2
0

LuceneSearchEngine.cs 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550
  1. using System.Text.RegularExpressions;
  2. using Lucene.Net.Analysis.Standard;
  3. using Lucene.Net.Documents;
  4. using Lucene.Net.Index;
  5. using Lucene.Net.QueryParsers;
  6. using Lucene.Net.Search;
  7. using Lucene.Net.Store;
  8. using MediaBrowser.Controller;
  9. using MediaBrowser.Controller.Entities;
  10. using MediaBrowser.Controller.Entities.Audio;
  11. using MediaBrowser.Controller.Library;
  12. using MediaBrowser.Model.Logging;
  13. using System;
  14. using System.Collections.Generic;
  15. using System.Linq;
  16. using System.Threading.Tasks;
  17. namespace MediaBrowser.Server.Implementations.Library
  18. {
  19. /// <summary>
  20. /// Class LuceneSearchEngine
  21. /// http://www.codeproject.com/Articles/320219/Lucene-Net-ultra-fast-search-for-MVC-or-WebForms
  22. /// </summary>
  23. public class LuceneSearchEngine : ILibrarySearchEngine, IDisposable
  24. {
  25. private readonly ILibraryManager _libraryManager;
  26. private readonly ILogger _logger;
  27. public LuceneSearchEngine(IServerApplicationPaths serverPaths, ILogManager logManager, ILibraryManager libraryManager)
  28. {
  29. _libraryManager = libraryManager;
  30. _logger = logManager.GetLogger("Lucene");
  31. //string luceneDbPath = serverPaths.DataPath + "\\SearchIndexDB";
  32. //if (!System.IO.Directory.Exists(luceneDbPath))
  33. // System.IO.Directory.CreateDirectory(luceneDbPath);
  34. //else if(File.Exists(luceneDbPath + "\\write.lock"))
  35. // File.Delete(luceneDbPath + "\\write.lock");
  36. //LuceneSearch.Init(luceneDbPath, _logger);
  37. //BaseItem.LibraryManager.LibraryChanged += LibraryChanged;
  38. }
  39. //public void LibraryChanged(object source, ChildrenChangedEventArgs changeInformation)
  40. //{
  41. // Task.Run(() =>
  42. // {
  43. // if (changeInformation.ItemsAdded.Count + changeInformation.ItemsUpdated.Count > 0)
  44. // {
  45. // LuceneSearch.AddUpdateLuceneIndex(changeInformation.ItemsAdded.Concat(changeInformation.ItemsUpdated));
  46. // }
  47. // if (changeInformation.ItemsRemoved.Count > 0)
  48. // {
  49. // LuceneSearch.RemoveFromLuceneIndex(changeInformation.ItemsRemoved);
  50. // }
  51. // });
  52. //}
  53. public void AddItemsToIndex(IEnumerable<BaseItem> items)
  54. {
  55. LuceneSearch.AddUpdateLuceneIndex(items);
  56. }
  57. /// <summary>
  58. /// Searches items and returns them in order of relevance.
  59. /// </summary>
  60. /// <param name="items">The items.</param>
  61. /// <param name="searchTerm">The search term.</param>
  62. /// <returns>IEnumerable{BaseItem}.</returns>
  63. /// <exception cref="System.ArgumentNullException">searchTerm</exception>
  64. public IEnumerable<BaseItem> Search(IEnumerable<BaseItem> items, string searchTerm)
  65. {
  66. if (string.IsNullOrEmpty(searchTerm))
  67. {
  68. throw new ArgumentNullException("searchTerm");
  69. }
  70. var hits = LuceneSearch.Search(searchTerm, items.Count());
  71. //return hits;
  72. return hits.Where(searchHit => items.Any(p => p.Id == searchHit.Id));
  73. }
  74. public void Dispose()
  75. {
  76. //BaseItem.LibraryManager.LibraryChanged -= LibraryChanged;
  77. //LuceneSearch.CloseAll();
  78. }
  79. /// <summary>
  80. /// Gets the search hints.
  81. /// </summary>
  82. /// <param name="inputItems">The input items.</param>
  83. /// <param name="searchTerm">The search term.</param>
  84. /// <returns>IEnumerable{SearchHintResult}.</returns>
  85. /// <exception cref="System.ArgumentNullException">searchTerm</exception>
  86. public async Task<IEnumerable<SearchHintInfo>> GetSearchHints(IEnumerable<BaseItem> inputItems, string searchTerm)
  87. {
  88. if (string.IsNullOrEmpty(searchTerm))
  89. {
  90. throw new ArgumentNullException("searchTerm");
  91. }
  92. var terms = GetWords(searchTerm);
  93. var hints = new List<Tuple<BaseItem, string, int>>();
  94. var items = inputItems.Where(i => !(i is MusicArtist)).ToList();
  95. // Add search hints based on item name
  96. hints.AddRange(items.Select(item =>
  97. {
  98. var index = GetIndex(item.Name, searchTerm, terms);
  99. return new Tuple<BaseItem, string, int>(item, index.Item1, index.Item2);
  100. }));
  101. // Find artists
  102. var artists = items.OfType<Audio>()
  103. .SelectMany(i => new[] { i.Artist, i.AlbumArtist })
  104. .Where(i => !string.IsNullOrEmpty(i))
  105. .Distinct(StringComparer.OrdinalIgnoreCase)
  106. .ToList();
  107. foreach (var item in artists)
  108. {
  109. var index = GetIndex(item, searchTerm, terms);
  110. if (index.Item2 != -1)
  111. {
  112. try
  113. {
  114. var artist = await _libraryManager.GetArtist(item).ConfigureAwait(false);
  115. hints.Add(new Tuple<BaseItem, string, int>(artist, index.Item1, index.Item2));
  116. }
  117. catch (Exception ex)
  118. {
  119. _logger.ErrorException("Error getting {0}", ex, item);
  120. }
  121. }
  122. }
  123. // Find genres, from non-audio items
  124. var genres = items.Where(i => !(i is Audio) && !(i is MusicAlbum) && !(i is MusicAlbumDisc) && !(i is MusicArtist) && !(i is MusicVideo))
  125. .SelectMany(i => i.Genres)
  126. .Where(i => !string.IsNullOrEmpty(i))
  127. .Distinct(StringComparer.OrdinalIgnoreCase)
  128. .ToList();
  129. foreach (var item in genres)
  130. {
  131. var index = GetIndex(item, searchTerm, terms);
  132. if (index.Item2 != -1)
  133. {
  134. try
  135. {
  136. var genre = await _libraryManager.GetGenre(item).ConfigureAwait(false);
  137. hints.Add(new Tuple<BaseItem, string, int>(genre, index.Item1, index.Item2));
  138. }
  139. catch (Exception ex)
  140. {
  141. _logger.ErrorException("Error getting {0}", ex, item);
  142. }
  143. }
  144. }
  145. // Find music genres
  146. var musicGenres = items.Where(i => (i is Audio) || (i is MusicAlbum) || (i is MusicAlbumDisc) || (i is MusicArtist) || (i is MusicVideo))
  147. .SelectMany(i => i.Genres)
  148. .Where(i => !string.IsNullOrEmpty(i))
  149. .Distinct(StringComparer.OrdinalIgnoreCase)
  150. .ToList();
  151. foreach (var item in musicGenres)
  152. {
  153. var index = GetIndex(item, searchTerm, terms);
  154. if (index.Item2 != -1)
  155. {
  156. try
  157. {
  158. var genre = await _libraryManager.GetMusicGenre(item).ConfigureAwait(false);
  159. hints.Add(new Tuple<BaseItem, string, int>(genre, index.Item1, index.Item2));
  160. }
  161. catch (Exception ex)
  162. {
  163. _logger.ErrorException("Error getting {0}", ex, item);
  164. }
  165. }
  166. }
  167. // Find studios
  168. var studios = items.SelectMany(i => i.Studios)
  169. .Where(i => !string.IsNullOrEmpty(i))
  170. .Distinct(StringComparer.OrdinalIgnoreCase)
  171. .ToList();
  172. foreach (var item in studios)
  173. {
  174. var index = GetIndex(item, searchTerm, terms);
  175. if (index.Item2 != -1)
  176. {
  177. try
  178. {
  179. var studio = await _libraryManager.GetStudio(item).ConfigureAwait(false);
  180. hints.Add(new Tuple<BaseItem, string, int>(studio, index.Item1, index.Item2));
  181. }
  182. catch (Exception ex)
  183. {
  184. _logger.ErrorException("Error getting {0}", ex, item);
  185. }
  186. }
  187. }
  188. // Find persons
  189. var persons = items.SelectMany(i => i.People)
  190. .Select(i => i.Name)
  191. .Where(i => !string.IsNullOrEmpty(i))
  192. .Distinct(StringComparer.OrdinalIgnoreCase)
  193. .ToList();
  194. foreach (var item in persons)
  195. {
  196. var index = GetIndex(item, searchTerm, terms);
  197. if (index.Item2 != -1)
  198. {
  199. try
  200. {
  201. var person = await _libraryManager.GetPerson(item).ConfigureAwait(false);
  202. hints.Add(new Tuple<BaseItem, string, int>(person, index.Item1, index.Item2));
  203. }
  204. catch (Exception ex)
  205. {
  206. _logger.ErrorException("Error getting {0}", ex, item);
  207. }
  208. }
  209. }
  210. return hints.Where(i => i.Item3 >= 0).OrderBy(i => i.Item3).Select(i => new SearchHintInfo
  211. {
  212. Item = i.Item1,
  213. MatchedTerm = i.Item2
  214. });
  215. }
  216. /// <summary>
  217. /// Gets the index.
  218. /// </summary>
  219. /// <param name="input">The input.</param>
  220. /// <param name="searchInput">The search input.</param>
  221. /// <param name="searchWords">The search input.</param>
  222. /// <returns>System.Int32.</returns>
  223. private Tuple<string, int> GetIndex(string input, string searchInput, string[] searchWords)
  224. {
  225. if (string.Equals(input, searchInput, StringComparison.OrdinalIgnoreCase))
  226. {
  227. return new Tuple<string, int>(searchInput, 0);
  228. }
  229. var index = input.IndexOf(searchInput, StringComparison.OrdinalIgnoreCase);
  230. if (index == 0)
  231. {
  232. return new Tuple<string, int>(searchInput, 1);
  233. }
  234. if (index > 0)
  235. {
  236. return new Tuple<string, int>(searchInput, 2);
  237. }
  238. var items = GetWords(input);
  239. for (var i = 0; i < searchWords.Length; i++)
  240. {
  241. var searchTerm = searchWords[i];
  242. for (var j = 0; j < items.Length; j++)
  243. {
  244. var item = items[j];
  245. if (string.Equals(item, searchTerm, StringComparison.OrdinalIgnoreCase))
  246. {
  247. return new Tuple<string, int>(searchTerm, 3 + (i + 1) * (j + 1));
  248. }
  249. index = item.IndexOf(searchTerm, StringComparison.OrdinalIgnoreCase);
  250. if (index == 0)
  251. {
  252. return new Tuple<string, int>(searchTerm, 4 + (i + 1) * (j + 1));
  253. }
  254. if (index > 0)
  255. {
  256. return new Tuple<string, int>(searchTerm, 5 + (i + 1) * (j + 1));
  257. }
  258. }
  259. }
  260. return new Tuple<string, int>(null, -1);
  261. }
  262. /// <summary>
  263. /// Gets the words.
  264. /// </summary>
  265. /// <param name="term">The term.</param>
  266. /// <returns>System.String[][].</returns>
  267. private string[] GetWords(string term)
  268. {
  269. return term.Split().Where(i => !string.IsNullOrWhiteSpace(i)).ToArray();
  270. }
  271. }
  272. public static class LuceneSearch
  273. {
  274. private static ILogger logger;
  275. private static string path;
  276. private static object lockOb = new object();
  277. private static FSDirectory _directory;
  278. private static FSDirectory directory
  279. {
  280. get
  281. {
  282. if (_directory == null)
  283. {
  284. logger.Info("Opening new Directory: " + path);
  285. _directory = FSDirectory.Open(path);
  286. }
  287. return _directory;
  288. }
  289. set
  290. {
  291. _directory = value;
  292. }
  293. }
  294. private static IndexWriter _writer;
  295. private static IndexWriter writer
  296. {
  297. get
  298. {
  299. if (_writer == null)
  300. {
  301. logger.Info("Opening new IndexWriter");
  302. _writer = new IndexWriter(directory, analyzer, IndexWriter.MaxFieldLength.UNLIMITED);
  303. }
  304. return _writer;
  305. }
  306. set
  307. {
  308. _writer = value;
  309. }
  310. }
  311. private static Dictionary<string, float> bonusTerms;
  312. public static void Init(string path, ILogger logger)
  313. {
  314. logger.Info("Lucene: Init");
  315. bonusTerms = new Dictionary<string, float>();
  316. bonusTerms.Add("Name", 2);
  317. bonusTerms.Add("Overview", 1);
  318. // Optimize the DB on initialization
  319. // TODO: Test whether this has..
  320. // Any effect what-so-ever (apart from initializing the indexwriter on the mainthread context, which makes things a whole lot easier)
  321. // Costs too much time
  322. // Is heavy on the CPU / Memory
  323. LuceneSearch.logger = logger;
  324. LuceneSearch.path = path;
  325. writer.Optimize();
  326. }
  327. private static StandardAnalyzer analyzer = new StandardAnalyzer(Lucene.Net.Util.Version.LUCENE_30);
  328. private static Searcher searcher = null;
  329. private static Document createDocument(BaseItem data)
  330. {
  331. Document doc = new Document();
  332. doc.Add(new Field("Id", data.Id.ToString(), Field.Store.YES, Field.Index.NO));
  333. doc.Add(new Field("Name", data.Name, Field.Store.YES, Field.Index.ANALYZED) { Boost = 2 });
  334. doc.Add(new Field("Overview", data.Overview != null ? data.Overview : "", Field.Store.YES, Field.Index.ANALYZED));
  335. return doc;
  336. }
  337. private static void Create(BaseItem item)
  338. {
  339. lock (lockOb)
  340. {
  341. try
  342. {
  343. if (searcher != null)
  344. {
  345. try
  346. {
  347. searcher.Dispose();
  348. }
  349. catch (Exception e)
  350. {
  351. logger.ErrorException("Error in Lucene while creating index (disposing alive searcher)", e, item);
  352. }
  353. searcher = null;
  354. }
  355. _removeFromLuceneIndex(item);
  356. _addToLuceneIndex(item);
  357. }
  358. catch (Exception e)
  359. {
  360. logger.ErrorException("Error in Lucene while creating index", e, item);
  361. }
  362. }
  363. }
  364. private static void _addToLuceneIndex(BaseItem data)
  365. {
  366. // Prevent double entries
  367. var doc = createDocument(data);
  368. writer.AddDocument(doc);
  369. }
  370. private static void _removeFromLuceneIndex(BaseItem data)
  371. {
  372. var query = new TermQuery(new Term("Id", data.Id.ToString()));
  373. writer.DeleteDocuments(query);
  374. }
  375. public static void AddUpdateLuceneIndex(IEnumerable<BaseItem> items)
  376. {
  377. foreach (var item in items)
  378. {
  379. logger.Info("Adding/Updating BaseItem " + item.Name + "(" + item.Id.ToString() + ") to/on Lucene Index");
  380. Create(item);
  381. }
  382. writer.Commit();
  383. writer.Flush(true, true, true);
  384. }
  385. public static void RemoveFromLuceneIndex(IEnumerable<BaseItem> items)
  386. {
  387. foreach (var item in items)
  388. {
  389. logger.Info("Removing BaseItem " + item.Name + "(" + item.Id.ToString() + ") from Lucene Index");
  390. _removeFromLuceneIndex(item);
  391. }
  392. writer.Commit();
  393. writer.Flush(true, true, true);
  394. }
  395. public static IEnumerable<BaseItem> Search(string searchQuery, int maxHits)
  396. {
  397. var results = new List<BaseItem>();
  398. lock (lockOb)
  399. {
  400. try
  401. {
  402. if (searcher == null)
  403. {
  404. searcher = new IndexSearcher(directory, true);
  405. }
  406. BooleanQuery finalQuery = new BooleanQuery();
  407. MultiFieldQueryParser parser = new MultiFieldQueryParser(Lucene.Net.Util.Version.LUCENE_30, new string[] { "Name", "Overview" }, analyzer, bonusTerms);
  408. string[] terms = searchQuery.Split(new[] { " " }, StringSplitOptions.RemoveEmptyEntries);
  409. foreach (string term in terms)
  410. finalQuery.Add(parser.Parse(term.Replace("~", "") + "~0.75"), Occur.SHOULD);
  411. foreach (string term in terms)
  412. finalQuery.Add(parser.Parse(term.Replace("*", "") + "*"), Occur.SHOULD);
  413. logger.Debug("Querying Lucene with query: " + finalQuery.ToString());
  414. long start = DateTime.Now.Ticks;
  415. var searchResult = searcher.Search(finalQuery, maxHits);
  416. foreach (var searchHit in searchResult.ScoreDocs)
  417. {
  418. Document hit = searcher.Doc(searchHit.Doc);
  419. results.Add(BaseItem.LibraryManager.GetItemById(Guid.Parse(hit.Get("Id"))));
  420. }
  421. long total = DateTime.Now.Ticks - start;
  422. float msTotal = (float)total / TimeSpan.TicksPerMillisecond;
  423. logger.Debug(searchResult.ScoreDocs.Length + " result" + (searchResult.ScoreDocs.Length == 1 ? "" : "s") + " in " + msTotal + " ms.");
  424. }
  425. catch (Exception e)
  426. {
  427. logger.ErrorException("Error while searching Lucene index", e);
  428. }
  429. }
  430. return results;
  431. }
  432. public static void CloseAll()
  433. {
  434. logger.Debug("Lucene: CloseAll");
  435. if (writer != null)
  436. {
  437. logger.Debug("Lucene: CloseAll - Writer is alive");
  438. writer.Flush(true, true, true);
  439. writer.Commit();
  440. writer.WaitForMerges();
  441. writer.Dispose();
  442. writer = null;
  443. }
  444. if (analyzer != null)
  445. {
  446. logger.Debug("Lucene: CloseAll - Analyzer is alive");
  447. analyzer.Close();
  448. analyzer.Dispose();
  449. analyzer = null;
  450. }
  451. if (searcher != null)
  452. {
  453. logger.Debug("Lucene: CloseAll - Searcher is alive");
  454. searcher.Dispose();
  455. searcher = null;
  456. }
  457. if (directory != null)
  458. {
  459. logger.Debug("Lucene: CloseAll - Directory is alive");
  460. directory.Dispose();
  461. directory = null;
  462. }
  463. }
  464. }
  465. }