DetectorFactory.cs 3.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127
  1. using System;
  2. using System.Collections.Generic;
  3. using System.IO;
  4. using System.IO.Compression;
  5. using NLangDetect.Core.Utils;
  6. using MediaBrowser.Model.Serialization;
  7. using System.Linq;
  8. namespace NLangDetect.Core
  9. {
  10. public class DetectorFactory
  11. {
  12. public Dictionary<string, ProbVector> WordLangProbMap;
  13. public List<string> Langlist;
  14. private static readonly DetectorFactory _instance = new DetectorFactory();
  15. #region Constructor(s)
  16. private DetectorFactory()
  17. {
  18. WordLangProbMap = new Dictionary<string, ProbVector>();
  19. Langlist = new List<string>();
  20. }
  21. #endregion
  22. #region Public methods
  23. public static void LoadProfiles(IJsonSerializer json)
  24. {
  25. var assembly = typeof(DetectorFactory).Assembly;
  26. var names = assembly.GetManifestResourceNames()
  27. .Where(i => i.IndexOf("NLangDetect.Profiles", StringComparison.Ordinal) != -1)
  28. .ToList();
  29. var index = 0;
  30. foreach (var name in names)
  31. {
  32. using (var stream = assembly.GetManifestResourceStream(name))
  33. {
  34. var langProfile = (LangProfile)json.DeserializeFromStream(stream, typeof(LangProfile));
  35. AddProfile(langProfile, index);
  36. }
  37. index++;
  38. }
  39. }
  40. public static Detector Create()
  41. {
  42. return CreateDetector();
  43. }
  44. public static Detector Create(double alpha)
  45. {
  46. Detector detector = CreateDetector();
  47. detector.SetAlpha(alpha);
  48. return detector;
  49. }
  50. public static void SetSeed(int? seed)
  51. {
  52. _instance.Seed = seed;
  53. }
  54. #endregion
  55. #region Internal methods
  56. internal static void AddProfile(LangProfile profile, int index)
  57. {
  58. var lang = profile.name;
  59. if (_instance.Langlist.Contains(lang))
  60. {
  61. throw new NLangDetectException("duplicate the same language profile", ErrorCode.DuplicateLangError);
  62. }
  63. _instance.Langlist.Add(lang);
  64. foreach (string word in profile.freq.Keys)
  65. {
  66. if (!_instance.WordLangProbMap.ContainsKey(word))
  67. {
  68. _instance.WordLangProbMap.Add(word, new ProbVector());
  69. }
  70. double prob = (double)profile.freq[word] / profile.n_words[word.Length - 1];
  71. _instance.WordLangProbMap[word][index] = prob;
  72. }
  73. }
  74. internal static void Clear()
  75. {
  76. _instance.Langlist.Clear();
  77. _instance.WordLangProbMap.Clear();
  78. }
  79. #endregion
  80. #region Private helper methods
  81. private static Detector CreateDetector()
  82. {
  83. if (_instance.Langlist.Count == 0)
  84. {
  85. throw new NLangDetectException("need to load profiles", ErrorCode.NeedLoadProfileError);
  86. }
  87. return new Detector(_instance);
  88. }
  89. #endregion
  90. #region Properties
  91. public int? Seed { get; private set; }
  92. #endregion
  93. }
  94. }