GenProfile.cs 1.9 KB

12345678910111213141516171819202122232425262728293031323334353637383940414243444546474849505152535455565758596061626364656667
  1. using System;
  2. using System.IO;
  3. using System.IO.Compression;
  4. using System.Xml;
  5. using NLangDetect.Core.Utils;
  6. namespace NLangDetect.Core
  7. {
  8. // TODO IMM HI: xml reader not tested
  9. public static class GenProfile
  10. {
  11. #region Public methods
  12. public static LangProfile load(string lang, string file)
  13. {
  14. var profile = new LangProfile(lang);
  15. var tagextractor = new TagExtractor("abstract", 100);
  16. Stream inputStream = null;
  17. try
  18. {
  19. inputStream = File.OpenRead(file);
  20. string extension = Path.GetExtension(file) ?? "";
  21. if (extension.ToUpper() == ".GZ")
  22. {
  23. inputStream = new GZipStream(inputStream, CompressionMode.Decompress);
  24. }
  25. using (var xmlReader = XmlReader.Create(inputStream))
  26. {
  27. while (xmlReader.Read())
  28. {
  29. switch (xmlReader.NodeType)
  30. {
  31. case XmlNodeType.Element:
  32. tagextractor.SetTag(xmlReader.Name);
  33. break;
  34. case XmlNodeType.Text:
  35. tagextractor.Add(xmlReader.Value);
  36. break;
  37. case XmlNodeType.EndElement:
  38. tagextractor.CloseTag(profile);
  39. break;
  40. }
  41. }
  42. }
  43. }
  44. finally
  45. {
  46. if (inputStream != null)
  47. {
  48. inputStream.Close();
  49. }
  50. }
  51. Console.WriteLine(lang + ": " + tagextractor.Count);
  52. return profile;
  53. }
  54. #endregion
  55. }
  56. }