2
0

LuceneSearchEngine.cs 11 KB


  1. using System;
  2. using System.Collections.Generic;
  3. using System.IO;
  4. using System.Linq;
  5. using System.Threading.Tasks;
  6. using Lucene.Net.Analysis.Standard;
  7. using Lucene.Net.Documents;
  8. using Lucene.Net.Index;
  9. using Lucene.Net.QueryParsers;
  10. using Lucene.Net.Search;
  11. using Lucene.Net.Store;
  12. using MediaBrowser.Controller;
  13. using MediaBrowser.Controller.Entities;
  14. using MediaBrowser.Controller.Library;
  15. using MediaBrowser.Model.Logging;
  16. namespace MediaBrowser.Server.Implementations.Library
  17. {
  18. /// <summary>
  19. /// Class LuceneSearchEngine
  20. /// http://www.codeproject.com/Articles/320219/Lucene-Net-ultra-fast-search-for-MVC-or-WebForms
  21. /// </summary>
  22. public class LuceneSearchEngine : ILibrarySearchEngine, IDisposable
  23. {
  24. public LuceneSearchEngine(IServerApplicationPaths serverPaths, ILogManager logManager)
  25. {
  26. string luceneDbPath = serverPaths.DataPath + "\\SearchIndexDB";
  27. if (!System.IO.Directory.Exists(luceneDbPath))
  28. System.IO.Directory.CreateDirectory(luceneDbPath);
  29. else if(File.Exists(luceneDbPath + "\\write.lock"))
  30. File.Delete(luceneDbPath + "\\write.lock");
  31. LuceneSearch.Init(luceneDbPath, logManager.GetLogger("Lucene"));
  32. BaseItem.LibraryManager.LibraryChanged += LibraryChanged;
  33. }
  34. public void LibraryChanged(object source, ChildrenChangedEventArgs changeInformation)
  35. {
  36. Task.Run(() =>
  37. {
  38. if (changeInformation.ItemsAdded.Count + changeInformation.ItemsUpdated.Count > 0)
  39. {
  40. LuceneSearch.AddUpdateLuceneIndex(changeInformation.ItemsAdded.Concat(changeInformation.ItemsUpdated));
  41. }
  42. if (changeInformation.ItemsRemoved.Count > 0)
  43. {
  44. LuceneSearch.RemoveFromLuceneIndex(changeInformation.ItemsRemoved);
  45. }
  46. });
  47. }
  48. public void AddItemsToIndex(IEnumerable<BaseItem> items)
  49. {
  50. LuceneSearch.AddUpdateLuceneIndex(items);
  51. }
  52. /// <summary>
  53. /// Searches items and returns them in order of relevance.
  54. /// </summary>
  55. /// <param name="items">The items.</param>
  56. /// <param name="searchTerm">The search term.</param>
  57. /// <returns>IEnumerable{BaseItem}.</returns>
  58. /// <exception cref="System.ArgumentNullException">searchTerm</exception>
  59. public IEnumerable<BaseItem> Search(IEnumerable<BaseItem> items, string searchTerm)
  60. {
  61. if (string.IsNullOrEmpty(searchTerm))
  62. {
  63. throw new ArgumentNullException("searchTerm");
  64. }
  65. var hits = LuceneSearch.Search(searchTerm, items.Count());
  66. //return hits;
  67. return hits.Where(searchHit => items.Any(p => p.Id == searchHit.Id));
  68. }
  69. public void Dispose()
  70. {
  71. BaseItem.LibraryManager.LibraryChanged -= LibraryChanged;
  72. LuceneSearch.CloseAll();
  73. }
  74. }
  75. public static class LuceneSearch
  76. {
  77. private static ILogger logger;
  78. private static string path;
  79. private static object lockOb = new object();
  80. private static FSDirectory _directory;
  81. private static FSDirectory directory
  82. {
  83. get
  84. {
  85. if (_directory == null)
  86. {
  87. logger.Info("Opening new Directory: " + path);
  88. _directory = FSDirectory.Open(path);
  89. }
  90. return _directory;
  91. }
  92. set
  93. {
  94. _directory = value;
  95. }
  96. }
  97. private static IndexWriter _writer;
  98. private static IndexWriter writer
  99. {
  100. get
  101. {
  102. if (_writer == null)
  103. {
  104. logger.Info("Opening new IndexWriter");
  105. _writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);
  106. }
  107. return _writer;
  108. }
  109. set
  110. {
  111. _writer = value;
  112. }
  113. }
  114. private static Dictionary<string, float> bonusTerms;
  115. public static void Init(string path, ILogger logger)
  116. {
  117. logger.Info("Lucene: Init");
  118. bonusTerms = new Dictionary<string, float>();
  119. bonusTerms.Add("Name", 2);
  120. bonusTerms.Add("Overview", 1);
  121. // Optimize the DB on initialization
  122. // TODO: Test whether this has..
  123. // Any effect what-so-ever (apart from initializing the indexwriter on the mainthread context, which makes things a whole lot easier)
  124. // Costs too much time
  125. // Is heavy on the CPU / Memory
  126. LuceneSearch.logger = logger;
  127. LuceneSearch.path = path;
  128. writer.Optimize();
  129. }
  130. private static StandardAnalyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
  131. private static Searcher searcher = null;
  132. private static Document createDocument(BaseItem data)
  133. {
  134. Document doc = new Document();
  135. doc.Add(new Field("Id", data.Id.ToString(), Field.Store.YES, Field.Index.NO));
  136. doc.Add(new Field("Name", data.Name, Field.Store.YES, Field.Index.ANALYZED) { Boost = 2 });
  137. doc.Add(new Field("Overview", data.Overview != null ? data.Overview : "", Field.Store.YES, Field.Index.ANALYZED));
  138. return doc;
  139. }
  140. private static void Create(BaseItem item)
  141. {
  142. lock (lockOb)
  143. {
  144. try
  145. {
  146. if (searcher != null)
  147. {
  148. try
  149. {
  150. searcher.Dispose();
  151. }
  152. catch (Exception e)
  153. {
  154. logger.ErrorException("Error in Lucene while creating index (disposing alive searcher)", e, item);
  155. }
  156. searcher = null;
  157. }
  158. _removeFromLuceneIndex(item);
  159. _addToLuceneIndex(item);
  160. }
  161. catch (Exception e)
  162. {
  163. logger.ErrorException("Error in Lucene while creating index", e, item);
  164. }
  165. }
  166. }
  167. private static void _addToLuceneIndex(BaseItem data)
  168. {
  169. // Prevent double entries
  170. var doc = createDocument(data);
  171. writer.AddDocument(doc);
  172. }
  173. private static void _removeFromLuceneIndex(BaseItem data)
  174. {
  175. var query = new TermQuery(new Term("Id", data.Id.ToString()));
  176. writer.DeleteDocuments(query);
  177. }
  178. public static void AddUpdateLuceneIndex(IEnumerable<BaseItem> items)
  179. {
  180. foreach (var item in items)
  181. {
  182. logger.Info("Adding/Updating BaseItem " + item.Name + "(" + item.Id.ToString() + ") to/on Lucene Index");
  183. Create(item);
  184. }
  185. writer.Commit();
  186. writer.Flush(true, true, true);
  187. }
  188. public static void RemoveFromLuceneIndex(IEnumerable<BaseItem> items)
  189. {
  190. foreach (var item in items)
  191. {
  192. logger.Info("Removing BaseItem " + item.Name + "(" + item.Id.ToString() + ") from Lucene Index");
  193. _removeFromLuceneIndex(item);
  194. }
  195. writer.Commit();
  196. writer.Flush(true, true, true);
  197. }
  198. public static IEnumerable<BaseItem> Search(string searchQuery, int maxHits)
  199. {
  200. var results = new List<BaseItem>();
  201. lock (lockOb)
  202. {
  203. try
  204. {
  205. if (searcher == null)
  206. {
  207. searcher = new IndexSearcher(directory, true);
  208. }
  209. BooleanQuery finalQuery = new BooleanQuery();
  210. MultiFieldQueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, new string[] { "Name", "Overview" }, analyzer, bonusTerms);
  211. string[] terms = searchQuery.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);
  212. foreach (string term in terms)
  213. finalQuery.Add(parser.Parse(term.Replace("~", "") + "~0.75"), Occur.SHOULD);
  214. foreach (string term in terms)
  215. finalQuery.Add(parser.Parse(term.Replace("*", "") + "*"), Occur.SHOULD);
  216. logger.Debug("Querying Lucene with query: " + finalQuery.ToString());
  217. long start = DateTime.Now.Ticks;
  218. var searchResult = searcher.Search(finalQuery, maxHits);
  219. foreach (var searchHit in searchResult.ScoreDocs)
  220. {
  221. Document hit = searcher.Doc(searchHit.Doc);
  222. results.Add(BaseItem.LibraryManager.GetItemById(Guid.Parse(hit.Get("Id"))));
  223. }
  224. long total = DateTime.Now.Ticks - start;
  225. float msTotal = (float)total / TimeSpan.TicksPerMillisecond;
  226. logger.Debug(searchResult.ScoreDocs.Length + " result" + (searchResult.ScoreDocs.Length == 1 ? "" : "s") + " in " + msTotal + " ms.");
  227. }
  228. catch (Exception e)
  229. {
  230. logger.ErrorException("Error while searching Lucene index", e);
  231. }
  232. }
  233. return results;
  234. }
  235. public static void CloseAll()
  236. {
  237. logger.Debug("Lucene: CloseAll");
  238. if (writer != null)
  239. {
  240. logger.Debug("Lucene: CloseAll - Writer is alive");
  241. writer.Flush(true, true, true);
  242. writer.Commit();
  243. writer.WaitForMerges();
  244. writer.Dispose();
  245. writer = null;
  246. }
  247. if (analyzer != null)
  248. {
  249. logger.Debug("Lucene: CloseAll - Analyzer is alive");
  250. analyzer.Close();
  251. analyzer.Dispose();
  252. analyzer = null;
  253. }
  254. if (searcher != null)
  255. {
  256. logger.Debug("Lucene: CloseAll - Searcher is alive");
  257. searcher.Dispose();
  258. searcher = null;
  259. }
  260. if (directory != null)
  261. {
  262. logger.Debug("Lucene: CloseAll - Directory is alive");
  263. directory.Dispose();
  264. directory = null;
  265. }
  266. }
  267. }
  268. }