LuceneSearchEngine.cs 20 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590
  1. using Lucene.Net.Analysis.Standard;
  2. using Lucene.Net.Documents;
  3. using Lucene.Net.Index;
  4. using Lucene.Net.QueryParsers;
  5. using Lucene.Net.Search;
  6. using Lucene.Net.Store;
  7. using MediaBrowser.Controller;
  8. using MediaBrowser.Controller.Entities;
  9. using MediaBrowser.Controller.Entities.Audio;
  10. using MediaBrowser.Controller.Library;
  11. using MediaBrowser.Model.Logging;
  12. using System;
  13. using System.Collections.Generic;
  14. using System.Linq;
  15. using System.Threading.Tasks;
  16. namespace MediaBrowser.Server.Implementations.Library
  17. {
  18. /// <summary>
  19. /// Class LuceneSearchEngine
  20. /// http://www.codeproject.com/Articles/320219/Lucene-Net-ultra-fast-search-for-MVC-or-WebForms
  21. /// </summary>
  22. public class LuceneSearchEngine : ILibrarySearchEngine, IDisposable
  23. {
  24. private readonly ILibraryManager _libraryManager;
  25. private readonly ILogger _logger;
  26. public LuceneSearchEngine(IServerApplicationPaths serverPaths, ILogManager logManager, ILibraryManager libraryManager)
  27. {
  28. _libraryManager = libraryManager;
  29. _logger = logManager.GetLogger("Lucene");
  30. //string luceneDbPath = serverPaths.DataPath + "\\SearchIndexDB";
  31. //if (!System.IO.Directory.Exists(luceneDbPath))
  32. // System.IO.Directory.CreateDirectory(luceneDbPath);
  33. //else if(File.Exists(luceneDbPath + "\\write.lock"))
  34. // File.Delete(luceneDbPath + "\\write.lock");
  35. //LuceneSearch.Init(luceneDbPath, _logger);
  36. //BaseItem.LibraryManager.LibraryChanged += LibraryChanged;
  37. }
  38. //public void LibraryChanged(object source, ChildrenChangedEventArgs changeInformation)
  39. //{
  40. // Task.Run(() =>
  41. // {
  42. // if (changeInformation.ItemsAdded.Count + changeInformation.ItemsUpdated.Count > 0)
  43. // {
  44. // LuceneSearch.AddUpdateLuceneIndex(changeInformation.ItemsAdded.Concat(changeInformation.ItemsUpdated));
  45. // }
  46. // if (changeInformation.ItemsRemoved.Count > 0)
  47. // {
  48. // LuceneSearch.RemoveFromLuceneIndex(changeInformation.ItemsRemoved);
  49. // }
  50. // });
  51. //}
  52. public void AddItemsToIndex(IEnumerable<BaseItem> items)
  53. {
  54. LuceneSearch.AddUpdateLuceneIndex(items);
  55. }
  56. /// <summary>
  57. /// Searches items and returns them in order of relevance.
  58. /// </summary>
  59. /// <param name="items">The items.</param>
  60. /// <param name="searchTerm">The search term.</param>
  61. /// <returns>IEnumerable{BaseItem}.</returns>
  62. /// <exception cref="System.ArgumentNullException">searchTerm</exception>
  63. public IEnumerable<BaseItem> Search(IEnumerable<BaseItem> items, string searchTerm)
  64. {
  65. if (string.IsNullOrEmpty(searchTerm))
  66. {
  67. throw new ArgumentNullException("searchTerm");
  68. }
  69. var hits = LuceneSearch.Search(searchTerm, items.Count());
  70. //return hits;
  71. return hits.Where(searchHit => items.Any(p => p.Id == searchHit.Id));
  72. }
  73. public void Dispose()
  74. {
  75. //BaseItem.LibraryManager.LibraryChanged -= LibraryChanged;
  76. //LuceneSearch.CloseAll();
  77. }
  78. /// <summary>
  79. /// Gets the search hints.
  80. /// </summary>
  81. /// <param name="inputItems">The input items.</param>
  82. /// <param name="searchTerm">The search term.</param>
  83. /// <returns>IEnumerable{SearchHintResult}.</returns>
  84. /// <exception cref="System.ArgumentNullException">searchTerm</exception>
  85. public async Task<IEnumerable<SearchHintInfo>> GetSearchHints(IEnumerable<BaseItem> inputItems, string searchTerm)
  86. {
  87. if (string.IsNullOrEmpty(searchTerm))
  88. {
  89. throw new ArgumentNullException("searchTerm");
  90. }
  91. var terms = GetWords(searchTerm);
  92. var hints = new List<Tuple<BaseItem, string, int>>();
  93. var items = inputItems.Where(i => !(i is MusicArtist)).ToList();
  94. // Add search hints based on item name
  95. hints.AddRange(items.Where(i => !string.IsNullOrEmpty(i.Name)).Select(item =>
  96. {
  97. var index = GetIndex(item.Name, searchTerm, terms);
  98. return new Tuple<BaseItem, string, int>(item, index.Item1, index.Item2);
  99. }));
  100. // Find artists
  101. var artists = items.OfType<Audio>()
  102. .SelectMany(i =>
  103. {
  104. var list = new List<string>();
  105. if (!string.IsNullOrEmpty(i.AlbumArtist))
  106. {
  107. list.Add(i.AlbumArtist);
  108. }
  109. list.AddRange(i.Artists);
  110. return list;
  111. })
  112. .Distinct(StringComparer.OrdinalIgnoreCase)
  113. .ToList();
  114. foreach (var item in artists)
  115. {
  116. var index = GetIndex(item, searchTerm, terms);
  117. if (index.Item2 != -1)
  118. {
  119. try
  120. {
  121. var artist = await _libraryManager.GetArtist(item).ConfigureAwait(false);
  122. hints.Add(new Tuple<BaseItem, string, int>(artist, index.Item1, index.Item2));
  123. }
  124. catch (Exception ex)
  125. {
  126. _logger.ErrorException("Error getting {0}", ex, item);
  127. }
  128. }
  129. }
  130. // Find genres, from non-audio items
  131. var genres = items.Where(i => !(i is Audio) && !(i is MusicAlbum) && !(i is MusicArtist) && !(i is MusicVideo) && !(i is Game))
  132. .SelectMany(i => i.Genres)
  133. .Where(i => !string.IsNullOrEmpty(i))
  134. .Distinct(StringComparer.OrdinalIgnoreCase)
  135. .ToList();
  136. foreach (var item in genres)
  137. {
  138. var index = GetIndex(item, searchTerm, terms);
  139. if (index.Item2 != -1)
  140. {
  141. try
  142. {
  143. var genre = await _libraryManager.GetGenre(item).ConfigureAwait(false);
  144. hints.Add(new Tuple<BaseItem, string, int>(genre, index.Item1, index.Item2));
  145. }
  146. catch (Exception ex)
  147. {
  148. _logger.ErrorException("Error getting {0}", ex, item);
  149. }
  150. }
  151. }
  152. // Find music genres
  153. var musicGenres = items.Where(i => (i is Audio) || (i is MusicAlbum) || (i is MusicArtist) || (i is MusicVideo))
  154. .SelectMany(i => i.Genres)
  155. .Where(i => !string.IsNullOrEmpty(i))
  156. .Distinct(StringComparer.OrdinalIgnoreCase)
  157. .ToList();
  158. foreach (var item in musicGenres)
  159. {
  160. var index = GetIndex(item, searchTerm, terms);
  161. if (index.Item2 != -1)
  162. {
  163. try
  164. {
  165. var genre = await _libraryManager.GetMusicGenre(item).ConfigureAwait(false);
  166. hints.Add(new Tuple<BaseItem, string, int>(genre, index.Item1, index.Item2));
  167. }
  168. catch (Exception ex)
  169. {
  170. _logger.ErrorException("Error getting {0}", ex, item);
  171. }
  172. }
  173. }
  174. // Find music genres
  175. var gameGenres = items.OfType<Game>()
  176. .SelectMany(i => i.Genres)
  177. .Where(i => !string.IsNullOrEmpty(i))
  178. .Distinct(StringComparer.OrdinalIgnoreCase)
  179. .ToList();
  180. foreach (var item in gameGenres)
  181. {
  182. var index = GetIndex(item, searchTerm, terms);
  183. if (index.Item2 != -1)
  184. {
  185. try
  186. {
  187. var genre = await _libraryManager.GetGameGenre(item).ConfigureAwait(false);
  188. hints.Add(new Tuple<BaseItem, string, int>(genre, index.Item1, index.Item2));
  189. }
  190. catch (Exception ex)
  191. {
  192. _logger.ErrorException("Error getting {0}", ex, item);
  193. }
  194. }
  195. }
  196. // Find studios
  197. var studios = items.SelectMany(i => i.Studios)
  198. .Where(i => !string.IsNullOrEmpty(i))
  199. .Distinct(StringComparer.OrdinalIgnoreCase)
  200. .ToList();
  201. foreach (var item in studios)
  202. {
  203. var index = GetIndex(item, searchTerm, terms);
  204. if (index.Item2 != -1)
  205. {
  206. try
  207. {
  208. var studio = await _libraryManager.GetStudio(item).ConfigureAwait(false);
  209. hints.Add(new Tuple<BaseItem, string, int>(studio, index.Item1, index.Item2));
  210. }
  211. catch (Exception ex)
  212. {
  213. _logger.ErrorException("Error getting {0}", ex, item);
  214. }
  215. }
  216. }
  217. // Find persons
  218. var persons = items.SelectMany(i => i.People)
  219. .Select(i => i.Name)
  220. .Where(i => !string.IsNullOrEmpty(i))
  221. .Distinct(StringComparer.OrdinalIgnoreCase)
  222. .ToList();
  223. foreach (var item in persons)
  224. {
  225. var index = GetIndex(item, searchTerm, terms);
  226. if (index.Item2 != -1)
  227. {
  228. try
  229. {
  230. var person = await _libraryManager.GetPerson(item).ConfigureAwait(false);
  231. hints.Add(new Tuple<BaseItem, string, int>(person, index.Item1, index.Item2));
  232. }
  233. catch (Exception ex)
  234. {
  235. _logger.ErrorException("Error getting {0}", ex, item);
  236. }
  237. }
  238. }
  239. return hints.Where(i => i.Item3 >= 0).OrderBy(i => i.Item3).Select(i => new SearchHintInfo
  240. {
  241. Item = i.Item1,
  242. MatchedTerm = i.Item2
  243. });
  244. }
  245. /// <summary>
  246. /// Gets the index.
  247. /// </summary>
  248. /// <param name="input">The input.</param>
  249. /// <param name="searchInput">The search input.</param>
  250. /// <param name="searchWords">The search input.</param>
  251. /// <returns>System.Int32.</returns>
  252. private Tuple<string, int> GetIndex(string input, string searchInput, string[] searchWords)
  253. {
  254. if (string.IsNullOrEmpty(input))
  255. {
  256. throw new ArgumentNullException("input");
  257. }
  258. if (string.Equals(input, searchInput, StringComparison.OrdinalIgnoreCase))
  259. {
  260. return new Tuple<string, int>(searchInput, 0);
  261. }
  262. var index = input.IndexOf(searchInput, StringComparison.OrdinalIgnoreCase);
  263. if (index == 0)
  264. {
  265. return new Tuple<string, int>(searchInput, 1);
  266. }
  267. if (index > 0)
  268. {
  269. return new Tuple<string, int>(searchInput, 2);
  270. }
  271. var items = GetWords(input);
  272. for (var i = 0; i < searchWords.Length; i++)
  273. {
  274. var searchTerm = searchWords[i];
  275. for (var j = 0; j < items.Length; j++)
  276. {
  277. var item = items[j];
  278. if (string.Equals(item, searchTerm, StringComparison.OrdinalIgnoreCase))
  279. {
  280. return new Tuple<string, int>(searchTerm, 3 + (i + 1) * (j + 1));
  281. }
  282. index = item.IndexOf(searchTerm, StringComparison.OrdinalIgnoreCase);
  283. if (index == 0)
  284. {
  285. return new Tuple<string, int>(searchTerm, 4 + (i + 1) * (j + 1));
  286. }
  287. if (index > 0)
  288. {
  289. return new Tuple<string, int>(searchTerm, 5 + (i + 1) * (j + 1));
  290. }
  291. }
  292. }
  293. return new Tuple<string, int>(null, -1);
  294. }
  295. /// <summary>
  296. /// Gets the words.
  297. /// </summary>
  298. /// <param name="term">The term.</param>
  299. /// <returns>System.String[][].</returns>
  300. private string[] GetWords(string term)
  301. {
  302. return term.Split().Where(i => !string.IsNullOrWhiteSpace(i)).ToArray();
  303. }
  304. }
  305. public static class LuceneSearch
  306. {
  307. private static ILogger logger;
  308. private static string path;
  309. private static object lockOb = new object();
  310. private static FSDirectory _directory;
  311. private static FSDirectory directory
  312. {
  313. get
  314. {
  315. if (_directory == null)
  316. {
  317. logger.Info("Opening new Directory: " + path);
  318. _directory = FSDirectory.Open(path);
  319. }
  320. return _directory;
  321. }
  322. set
  323. {
  324. _directory = value;
  325. }
  326. }
  327. private static IndexWriter _writer;
  328. private static IndexWriter writer
  329. {
  330. get
  331. {
  332. if (_writer == null)
  333. {
  334. logger.Info("Opening new IndexWriter");
  335. _writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);
  336. }
  337. return _writer;
  338. }
  339. set
  340. {
  341. _writer = value;
  342. }
  343. }
  344. private static Dictionary<string, float> bonusTerms;
  345. public static void Init(string path, ILogger logger)
  346. {
  347. logger.Info("Lucene: Init");
  348. bonusTerms = new Dictionary<string, float>();
  349. bonusTerms.Add("Name", 2);
  350. bonusTerms.Add("Overview", 1);
  351. // Optimize the DB on initialization
  352. // TODO: Test whether this has..
  353. // Any effect what-so-ever (apart from initializing the indexwriter on the mainthread context, which makes things a whole lot easier)
  354. // Costs too much time
  355. // Is heavy on the CPU / Memory
  356. LuceneSearch.logger = logger;
  357. LuceneSearch.path = path;
  358. writer.Optimize();
  359. }
  360. private static StandardAnalyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
  361. private static Searcher searcher = null;
  362. private static Document createDocument(BaseItem data)
  363. {
  364. Document doc = new Document();
  365. doc.Add(new Field("Id", data.Id.ToString(), Field.Store.YES, Field.Index.NO));
  366. doc.Add(new Field("Name", data.Name, Field.Store.YES, Field.Index.ANALYZED) { Boost = 2 });
  367. doc.Add(new Field("Overview", data.Overview != null ? data.Overview : "", Field.Store.YES, Field.Index.ANALYZED));
  368. return doc;
  369. }
  370. private static void Create(BaseItem item)
  371. {
  372. lock (lockOb)
  373. {
  374. try
  375. {
  376. if (searcher != null)
  377. {
  378. try
  379. {
  380. searcher.Dispose();
  381. }
  382. catch (Exception e)
  383. {
  384. logger.ErrorException("Error in Lucene while creating index (disposing alive searcher)", e, item);
  385. }
  386. searcher = null;
  387. }
  388. _removeFromLuceneIndex(item);
  389. _addToLuceneIndex(item);
  390. }
  391. catch (Exception e)
  392. {
  393. logger.ErrorException("Error in Lucene while creating index", e, item);
  394. }
  395. }
  396. }
  397. private static void _addToLuceneIndex(BaseItem data)
  398. {
  399. // Prevent double entries
  400. var doc = createDocument(data);
  401. writer.AddDocument(doc);
  402. }
  403. private static void _removeFromLuceneIndex(BaseItem data)
  404. {
  405. var query = new TermQuery(new Term("Id", data.Id.ToString()));
  406. writer.DeleteDocuments(query);
  407. }
  408. public static void AddUpdateLuceneIndex(IEnumerable<BaseItem> items)
  409. {
  410. foreach (var item in items)
  411. {
  412. logger.Info("Adding/Updating BaseItem " + item.Name + "(" + item.Id.ToString() + ") to/on Lucene Index");
  413. Create(item);
  414. }
  415. writer.Commit();
  416. writer.Flush(true, true, true);
  417. }
  418. public static void RemoveFromLuceneIndex(IEnumerable<BaseItem> items)
  419. {
  420. foreach (var item in items)
  421. {
  422. logger.Info("Removing BaseItem " + item.Name + "(" + item.Id.ToString() + ") from Lucene Index");
  423. _removeFromLuceneIndex(item);
  424. }
  425. writer.Commit();
  426. writer.Flush(true, true, true);
  427. }
  428. public static IEnumerable<BaseItem> Search(string searchQuery, int maxHits)
  429. {
  430. var results = new List<BaseItem>();
  431. lock (lockOb)
  432. {
  433. try
  434. {
  435. if (searcher == null)
  436. {
  437. searcher = new IndexSearcher(directory, true);
  438. }
  439. BooleanQuery finalQuery = new BooleanQuery();
  440. MultiFieldQueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, new string[] { "Name", "Overview" }, analyzer, bonusTerms);
  441. string[] terms = searchQuery.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);
  442. foreach (string term in terms)
  443. finalQuery.Add(parser.Parse(term.Replace("~", "") + "~0.75"), Occur.SHOULD);
  444. foreach (string term in terms)
  445. finalQuery.Add(parser.Parse(term.Replace("*", "") + "*"), Occur.SHOULD);
  446. logger.Debug("Querying Lucene with query: " + finalQuery.ToString());
  447. long start = DateTime.Now.Ticks;
  448. var searchResult = searcher.Search(finalQuery, maxHits);
  449. foreach (var searchHit in searchResult.ScoreDocs)
  450. {
  451. Document hit = searcher.Doc(searchHit.Doc);
  452. results.Add(BaseItem.LibraryManager.GetItemById(Guid.Parse(hit.Get("Id"))));
  453. }
  454. long total = DateTime.Now.Ticks - start;
  455. float msTotal = (float)total / TimeSpan.TicksPerMillisecond;
  456. logger.Debug(searchResult.ScoreDocs.Length + " result" + (searchResult.ScoreDocs.Length == 1 ? "" : "s") + " in " + msTotal + " ms.");
  457. }
  458. catch (Exception e)
  459. {
  460. logger.ErrorException("Error while searching Lucene index", e);
  461. }
  462. }
  463. return results;
  464. }
  465. public static void CloseAll()
  466. {
  467. logger.Debug("Lucene: CloseAll");
  468. if (writer != null)
  469. {
  470. logger.Debug("Lucene: CloseAll - Writer is alive");
  471. writer.Flush(true, true, true);
  472. writer.Commit();
  473. writer.WaitForMerges();
  474. writer.Dispose();
  475. writer = null;
  476. }
  477. if (analyzer != null)
  478. {
  479. logger.Debug("Lucene: CloseAll - Analyzer is alive");
  480. analyzer.Close();
  481. analyzer.Dispose();
  482. analyzer = null;
  483. }
  484. if (searcher != null)
  485. {
  486. logger.Debug("Lucene: CloseAll - Searcher is alive");
  487. searcher.Dispose();
  488. searcher = null;
  489. }
  490. if (directory != null)
  491. {
  492. logger.Debug("Lucene: CloseAll - Directory is alive");
  493. directory.Dispose();
  494. directory = null;
  495. }
  496. }
  497. }
  498. }