LuceneSearchEngine.cs 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478
  1. using Lucene.Net.Analysis.Standard;
  2. using Lucene.Net.Documents;
  3. using Lucene.Net.Index;
  4. using Lucene.Net.QueryParsers;
  5. using Lucene.Net.Search;
  6. using Lucene.Net.Store;
  7. using MediaBrowser.Controller;
  8. using MediaBrowser.Controller.Entities;
  9. using MediaBrowser.Controller.Entities.Audio;
  10. using MediaBrowser.Controller.Library;
  11. using MediaBrowser.Model.Logging;
  12. using System;
  13. using System.Collections.Generic;
  14. using System.Linq;
  15. using System.Threading.Tasks;
  16. namespace MediaBrowser.Server.Implementations.Library
  17. {
  18. /// <summary>
  19. /// Class LuceneSearchEngine
  20. /// http://www.codeproject.com/Articles/320219/Lucene-Net-ultra-fast-search-for-MVC-or-WebForms
  21. /// </summary>
  22. public class LuceneSearchEngine : ILibrarySearchEngine, IDisposable
  23. {
  24. private readonly ILibraryManager _libraryManager;
  25. private readonly ILogger _logger;
  26. public LuceneSearchEngine(IServerApplicationPaths serverPaths, ILogManager logManager, ILibraryManager libraryManager)
  27. {
  28. _libraryManager = libraryManager;
  29. _logger = logManager.GetLogger("Lucene");
  30. //string luceneDbPath = serverPaths.DataPath + "\\SearchIndexDB";
  31. //if (!System.IO.Directory.Exists(luceneDbPath))
  32. // System.IO.Directory.CreateDirectory(luceneDbPath);
  33. //else if(File.Exists(luceneDbPath + "\\write.lock"))
  34. // File.Delete(luceneDbPath + "\\write.lock");
  35. //LuceneSearch.Init(luceneDbPath, _logger);
  36. //BaseItem.LibraryManager.LibraryChanged += LibraryChanged;
  37. }
  38. public void LibraryChanged(object source, ChildrenChangedEventArgs changeInformation)
  39. {
  40. Task.Run(() =>
  41. {
  42. if (changeInformation.ItemsAdded.Count + changeInformation.ItemsUpdated.Count > 0)
  43. {
  44. LuceneSearch.AddUpdateLuceneIndex(changeInformation.ItemsAdded.Concat(changeInformation.ItemsUpdated));
  45. }
  46. if (changeInformation.ItemsRemoved.Count > 0)
  47. {
  48. LuceneSearch.RemoveFromLuceneIndex(changeInformation.ItemsRemoved);
  49. }
  50. });
  51. }
  52. public void AddItemsToIndex(IEnumerable<BaseItem> items)
  53. {
  54. LuceneSearch.AddUpdateLuceneIndex(items);
  55. }
  56. /// <summary>
  57. /// Searches items and returns them in order of relevance.
  58. /// </summary>
  59. /// <param name="items">The items.</param>
  60. /// <param name="searchTerm">The search term.</param>
  61. /// <returns>IEnumerable{BaseItem}.</returns>
  62. /// <exception cref="System.ArgumentNullException">searchTerm</exception>
  63. public IEnumerable<BaseItem> Search(IEnumerable<BaseItem> items, string searchTerm)
  64. {
  65. if (string.IsNullOrEmpty(searchTerm))
  66. {
  67. throw new ArgumentNullException("searchTerm");
  68. }
  69. var hits = LuceneSearch.Search(searchTerm, items.Count());
  70. //return hits;
  71. return hits.Where(searchHit => items.Any(p => p.Id == searchHit.Id));
  72. }
  73. public void Dispose()
  74. {
  75. //BaseItem.LibraryManager.LibraryChanged -= LibraryChanged;
  76. //LuceneSearch.CloseAll();
  77. }
  78. /// <summary>
  79. /// Gets the search hints.
  80. /// </summary>
  81. /// <param name="inputItems">The input items.</param>
  82. /// <param name="searchTerm">The search term.</param>
  83. /// <returns>IEnumerable{SearchHintResult}.</returns>
  84. /// <exception cref="System.ArgumentNullException">searchTerm</exception>
  85. public async Task<IEnumerable<BaseItem>> GetSearchHints(IEnumerable<BaseItem> inputItems, string searchTerm)
  86. {
  87. if (string.IsNullOrEmpty(searchTerm))
  88. {
  89. throw new ArgumentNullException("searchTerm");
  90. }
  91. var hints = new List<Tuple<BaseItem, int>>();
  92. var items = inputItems.Where(i => !(i is MusicArtist)).ToList();
  93. foreach (var item in items)
  94. {
  95. var index = IndexOf(item.Name, searchTerm);
  96. if (index != -1)
  97. {
  98. hints.Add(new Tuple<BaseItem, int>(item, index));
  99. }
  100. }
  101. var artists = items.OfType<Audio>()
  102. .SelectMany(i => new[] { i.Artist, i.AlbumArtist })
  103. .Where(i => !string.IsNullOrEmpty(i))
  104. .ToList();
  105. foreach (var item in artists)
  106. {
  107. var index = IndexOf(item, searchTerm);
  108. if (index != -1)
  109. {
  110. var artist = await _libraryManager.GetArtist(item).ConfigureAwait(false);
  111. hints.Add(new Tuple<BaseItem, int>(artist, index));
  112. }
  113. }
  114. // Find genres
  115. var genres = items.SelectMany(i => i.Genres)
  116. .Where(i => !string.IsNullOrEmpty(i))
  117. .Distinct(StringComparer.OrdinalIgnoreCase)
  118. .ToList();
  119. foreach (var item in genres)
  120. {
  121. var index = IndexOf(item, searchTerm);
  122. if (index != -1)
  123. {
  124. var genre = await _libraryManager.GetGenre(item).ConfigureAwait(false);
  125. hints.Add(new Tuple<BaseItem, int>(genre, index));
  126. }
  127. }
  128. // Find studios
  129. var studios = items.SelectMany(i => i.Studios)
  130. .Where(i => !string.IsNullOrEmpty(i))
  131. .Distinct(StringComparer.OrdinalIgnoreCase)
  132. .ToList();
  133. foreach (var item in studios)
  134. {
  135. var index = IndexOf(item, searchTerm);
  136. if (index != -1)
  137. {
  138. var studio = await _libraryManager.GetStudio(item).ConfigureAwait(false);
  139. hints.Add(new Tuple<BaseItem, int>(studio, index));
  140. }
  141. }
  142. // Find persons
  143. var persons = items.SelectMany(i => i.People)
  144. .Select(i => i.Name)
  145. .Where(i => !string.IsNullOrEmpty(i))
  146. .Distinct(StringComparer.OrdinalIgnoreCase)
  147. .ToList();
  148. foreach (var item in persons)
  149. {
  150. var index = IndexOf(item, searchTerm);
  151. if (index != -1)
  152. {
  153. var person = await _libraryManager.GetPerson(item).ConfigureAwait(false);
  154. hints.Add(new Tuple<BaseItem, int>(person, index));
  155. }
  156. }
  157. return hints.OrderBy(i => i.Item2).Select(i => i.Item1);
  158. }
  159. /// <summary>
  160. /// Gets the hints.
  161. /// </summary>
  162. /// <param name="item">The item.</param>
  163. /// <param name="searchTerm">The search term.</param>
  164. /// <returns>IEnumerable{Tuple{SearchHintResultSystem.Int32}}.</returns>
  165. private async Task<IEnumerable<Tuple<BaseItem, int>>> GetHints(BaseItem item, string searchTerm)
  166. {
  167. var hints = new List<Tuple<BaseItem, int>>();
  168. var index = IndexOf(item.Name, searchTerm);
  169. if (index != -1)
  170. {
  171. hints.Add(new Tuple<BaseItem, int>(item, index));
  172. }
  173. return hints;
  174. }
  175. /// <summary>
  176. /// Gets the words.
  177. /// </summary>
  178. /// <param name="term">The term.</param>
  179. /// <returns>System.String[][].</returns>
  180. private string[] GetWords(string term)
  181. {
  182. // TODO: Improve this to be more accurate and respect culture
  183. var words = term.Split(' ');
  184. return words;
  185. }
  186. /// <summary>
  187. /// Indexes the of.
  188. /// </summary>
  189. /// <param name="input">The input.</param>
  190. /// <param name="term">The term.</param>
  191. /// <returns>System.Int32.</returns>
  192. private int IndexOf(string input, string term)
  193. {
  194. var index = 0;
  195. foreach (var word in GetWords(input))
  196. {
  197. if (word.IndexOf(term, StringComparison.OrdinalIgnoreCase) != -1)
  198. {
  199. return index;
  200. }
  201. index++;
  202. }
  203. return -1;
  204. }
  205. }
  206. public static class LuceneSearch
  207. {
  208. private static ILogger logger;
  209. private static string path;
  210. private static object lockOb = new object();
  211. private static FSDirectory _directory;
  212. private static FSDirectory directory
  213. {
  214. get
  215. {
  216. if (_directory == null)
  217. {
  218. logger.Info("Opening new Directory: " + path);
  219. _directory = FSDirectory.Open(path);
  220. }
  221. return _directory;
  222. }
  223. set
  224. {
  225. _directory = value;
  226. }
  227. }
  228. private static IndexWriter _writer;
  229. private static IndexWriter writer
  230. {
  231. get
  232. {
  233. if (_writer == null)
  234. {
  235. logger.Info("Opening new IndexWriter");
  236. _writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);
  237. }
  238. return _writer;
  239. }
  240. set
  241. {
  242. _writer = value;
  243. }
  244. }
  245. private static Dictionary<string, float> bonusTerms;
  246. public static void Init(string path, ILogger logger)
  247. {
  248. logger.Info("Lucene: Init");
  249. bonusTerms = new Dictionary<string, float>();
  250. bonusTerms.Add("Name", 2);
  251. bonusTerms.Add("Overview", 1);
  252. // Optimize the DB on initialization
  253. // TODO: Test whether this has..
  254. // Any effect what-so-ever (apart from initializing the indexwriter on the mainthread context, which makes things a whole lot easier)
  255. // Costs too much time
  256. // Is heavy on the CPU / Memory
  257. LuceneSearch.logger = logger;
  258. LuceneSearch.path = path;
  259. writer.Optimize();
  260. }
  261. private static StandardAnalyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
  262. private static Searcher searcher = null;
  263. private static Document createDocument(BaseItem data)
  264. {
  265. Document doc = new Document();
  266. doc.Add(new Field("Id", data.Id.ToString(), Field.Store.YES, Field.Index.NO));
  267. doc.Add(new Field("Name", data.Name, Field.Store.YES, Field.Index.ANALYZED) { Boost = 2 });
  268. doc.Add(new Field("Overview", data.Overview != null ? data.Overview : "", Field.Store.YES, Field.Index.ANALYZED));
  269. return doc;
  270. }
  271. private static void Create(BaseItem item)
  272. {
  273. lock (lockOb)
  274. {
  275. try
  276. {
  277. if (searcher != null)
  278. {
  279. try
  280. {
  281. searcher.Dispose();
  282. }
  283. catch (Exception e)
  284. {
  285. logger.ErrorException("Error in Lucene while creating index (disposing alive searcher)", e, item);
  286. }
  287. searcher = null;
  288. }
  289. _removeFromLuceneIndex(item);
  290. _addToLuceneIndex(item);
  291. }
  292. catch (Exception e)
  293. {
  294. logger.ErrorException("Error in Lucene while creating index", e, item);
  295. }
  296. }
  297. }
  298. private static void _addToLuceneIndex(BaseItem data)
  299. {
  300. // Prevent double entries
  301. var doc = createDocument(data);
  302. writer.AddDocument(doc);
  303. }
  304. private static void _removeFromLuceneIndex(BaseItem data)
  305. {
  306. var query = new TermQuery(new Term("Id", data.Id.ToString()));
  307. writer.DeleteDocuments(query);
  308. }
  309. public static void AddUpdateLuceneIndex(IEnumerable<BaseItem> items)
  310. {
  311. foreach (var item in items)
  312. {
  313. logger.Info("Adding/Updating BaseItem " + item.Name + "(" + item.Id.ToString() + ") to/on Lucene Index");
  314. Create(item);
  315. }
  316. writer.Commit();
  317. writer.Flush(true, true, true);
  318. }
  319. public static void RemoveFromLuceneIndex(IEnumerable<BaseItem> items)
  320. {
  321. foreach (var item in items)
  322. {
  323. logger.Info("Removing BaseItem " + item.Name + "(" + item.Id.ToString() + ") from Lucene Index");
  324. _removeFromLuceneIndex(item);
  325. }
  326. writer.Commit();
  327. writer.Flush(true, true, true);
  328. }
  329. public static IEnumerable<BaseItem> Search(string searchQuery, int maxHits)
  330. {
  331. var results = new List<BaseItem>();
  332. lock (lockOb)
  333. {
  334. try
  335. {
  336. if (searcher == null)
  337. {
  338. searcher = new IndexSearcher(directory, true);
  339. }
  340. BooleanQuery finalQuery = new BooleanQuery();
  341. MultiFieldQueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, new string[] { "Name", "Overview" }, analyzer, bonusTerms);
  342. string[] terms = searchQuery.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);
  343. foreach (string term in terms)
  344. finalQuery.Add(parser.Parse(term.Replace("~", "") + "~0.75"), Occur.SHOULD);
  345. foreach (string term in terms)
  346. finalQuery.Add(parser.Parse(term.Replace("*", "") + "*"), Occur.SHOULD);
  347. logger.Debug("Querying Lucene with query: " + finalQuery.ToString());
  348. long start = DateTime.Now.Ticks;
  349. var searchResult = searcher.Search(finalQuery, maxHits);
  350. foreach (var searchHit in searchResult.ScoreDocs)
  351. {
  352. Document hit = searcher.Doc(searchHit.Doc);
  353. results.Add(BaseItem.LibraryManager.GetItemById(Guid.Parse(hit.Get("Id"))));
  354. }
  355. long total = DateTime.Now.Ticks - start;
  356. float msTotal = (float)total / TimeSpan.TicksPerMillisecond;
  357. logger.Debug(searchResult.ScoreDocs.Length + " result" + (searchResult.ScoreDocs.Length == 1 ? "" : "s") + " in " + msTotal + " ms.");
  358. }
  359. catch (Exception e)
  360. {
  361. logger.ErrorException("Error while searching Lucene index", e);
  362. }
  363. }
  364. return results;
  365. }
  366. public static void CloseAll()
  367. {
  368. logger.Debug("Lucene: CloseAll");
  369. if (writer != null)
  370. {
  371. logger.Debug("Lucene: CloseAll - Writer is alive");
  372. writer.Flush(true, true, true);
  373. writer.Commit();
  374. writer.WaitForMerges();
  375. writer.Dispose();
  376. writer = null;
  377. }
  378. if (analyzer != null)
  379. {
  380. logger.Debug("Lucene: CloseAll - Analyzer is alive");
  381. analyzer.Close();
  382. analyzer.Dispose();
  383. analyzer = null;
  384. }
  385. if (searcher != null)
  386. {
  387. logger.Debug("Lucene: CloseAll - Searcher is alive");
  388. searcher.Dispose();
  389. searcher = null;
  390. }
  391. if (directory != null)
  392. {
  393. logger.Debug("Lucene: CloseAll - Directory is alive");
  394. directory.Dispose();
  395. directory = null;
  396. }
  397. }
  398. }
  399. }