2
0

TmdbSearch.cs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Globalization;
  4. using System.Linq;
  5. using System.Net;
  6. using System.Threading;
  7. using System.Threading.Tasks;
  8. using System.Text.RegularExpressions;
  9. using MediaBrowser.Common.Net;
  10. using MediaBrowser.Controller.Library;
  11. using MediaBrowser.Controller.Providers;
  12. using MediaBrowser.Model.Entities;
  13. using MediaBrowser.Model.Providers;
  14. using MediaBrowser.Model.Serialization;
  15. using MediaBrowser.Providers.Tmdb.Models.Search;
  16. using Microsoft.Extensions.Logging;
  17. namespace MediaBrowser.Providers.Tmdb.Movies
  18. {
  19. public class TmdbSearch
  20. {
  21. private static readonly CultureInfo _usCulture = new CultureInfo("en-US");
  22. private static readonly Regex _cleanEnclosed = new Regex(@"\p{Ps}.*\p{Pe}", RegexOptions.Compiled);
  23. private static readonly Regex _cleanNonWord = new Regex(@"[\W_]+", RegexOptions.Compiled);
  24. private static readonly Regex _cleanStopWords = new Regex(@"\b( # Start at word boundary
  25. 19[0-9]{2}|20[0-9]{2}| # 1900-2099
  26. S[0-9]{2}| # Season
  27. E[0-9]{2}| # Episode
  28. (2160|1080|720|576|480)[ip]?| # Resolution
  29. [xh]?264| # Encoding
  30. (web|dvd|bd|hdtv|hd)rip| # *Rip
  31. web|hdtv|mp4|bluray|ktr|dl|single|imageset|internal|doku|dubbed|retail|xxx|flac
  32. ).* # Match rest of string",
  33. RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase);
  34. private const string _searchURL = TmdbUtils.BaseTmdbApiUrl + @"3/search/{3}?api_key={1}&query={0}&language={2}";
  35. private readonly ILogger _logger;
  36. private readonly IJsonSerializer _json;
  37. private readonly ILibraryManager _libraryManager;
  38. public TmdbSearch(ILogger logger, IJsonSerializer json, ILibraryManager libraryManager)
  39. {
  40. _logger = logger;
  41. _json = json;
  42. _libraryManager = libraryManager;
  43. }
  44. public Task<IEnumerable<RemoteSearchResult>> GetSearchResults(SeriesInfo idInfo, CancellationToken cancellationToken)
  45. {
  46. return GetSearchResults(idInfo, "tv", cancellationToken);
  47. }
  48. public Task<IEnumerable<RemoteSearchResult>> GetMovieSearchResults(ItemLookupInfo idInfo, CancellationToken cancellationToken)
  49. {
  50. return GetSearchResults(idInfo, "movie", cancellationToken);
  51. }
  52. public Task<IEnumerable<RemoteSearchResult>> GetSearchResults(BoxSetInfo idInfo, CancellationToken cancellationToken)
  53. {
  54. return GetSearchResults(idInfo, "collection", cancellationToken);
  55. }
  56. private async Task<IEnumerable<RemoteSearchResult>> GetSearchResults(ItemLookupInfo idInfo, string searchType, CancellationToken cancellationToken)
  57. {
  58. var name = idInfo.Name;
  59. var year = idInfo.Year;
  60. if (string.IsNullOrWhiteSpace(name))
  61. {
  62. return new List<RemoteSearchResult>();
  63. }
  64. var tmdbSettings = await TmdbMovieProvider.Current.GetTmdbSettings(cancellationToken).ConfigureAwait(false);
  65. var tmdbImageUrl = tmdbSettings.images.GetImageUrl("original");
  66. // TODO: Investigate: Does this mean we are reparsing already parsed ItemLookupInfo?
  67. var parsedName = _libraryManager.ParseName(name);
  68. var yearInName = parsedName.Year;
  69. name = parsedName.Name;
  70. year ??= yearInName;
  71. _logger.LogInformation("TmdbSearch: Finding id for item: {0} ({1})", name, year);
  72. var language = idInfo.MetadataLanguage.ToLowerInvariant();
  73. // Replace sequences of non-word characters with space
  74. // TMDB expects a space separated list of words make sure that is the case
  75. name = _cleanNonWord.Replace(name, " ").Trim();
  76. var results = await GetSearchResults(name, searchType, year, language, tmdbImageUrl, cancellationToken).ConfigureAwait(false);
  77. if (results.Count == 0)
  78. {
  79. //try in english if wasn't before
  80. if (!string.Equals(language, "en", StringComparison.OrdinalIgnoreCase))
  81. {
  82. results = await GetSearchResults(name, searchType, year, "en", tmdbImageUrl, cancellationToken).ConfigureAwait(false);
  83. }
  84. }
  85. // TODO: retrying alternatives should be done outside the search
  86. // provider so that the retry logic can be common for all search
  87. // providers
  88. if (results.Count == 0)
  89. {
  90. var name2 = parsedName.Name;
  91. // Remove things enclosed in []{}() etc
  92. name2 = _cleanEnclosed.Replace(name2, string.Empty);
  93. // Replace sequences of non-word characters with space
  94. name2 = _cleanNonWord.Replace(name2, " ");
  95. // Clean based on common stop words / tokens
  96. name2 = _cleanStopWords.Replace(name2, string.Empty);
  97. // Trim whitespace
  98. name2 = name2.Trim();
  99. // Search again if the new name is different
  100. if (!string.Equals(name2, name) && !string.IsNullOrWhiteSpace(name2))
  101. {
  102. _logger.LogInformation("TmdbSearch: Finding id for item: {0} ({1})", name2, year);
  103. results = await GetSearchResults(name2, searchType, year, language, tmdbImageUrl, cancellationToken).ConfigureAwait(false);
  104. if (results.Count == 0 && !string.Equals(language, "en", StringComparison.OrdinalIgnoreCase))
  105. {
  106. //one more time, in english
  107. results = await GetSearchResults(name2, searchType, year, "en", tmdbImageUrl, cancellationToken).ConfigureAwait(false);
  108. }
  109. }
  110. }
  111. return results.Where(i =>
  112. {
  113. if (year.HasValue && i.ProductionYear.HasValue)
  114. {
  115. // Allow one year tolerance
  116. return Math.Abs(year.Value - i.ProductionYear.Value) <= 1;
  117. }
  118. return true;
  119. });
  120. }
  121. private Task<List<RemoteSearchResult>> GetSearchResults(string name, string type, int? year, string language, string baseImageUrl, CancellationToken cancellationToken)
  122. {
  123. switch (type)
  124. {
  125. case "tv":
  126. return GetSearchResultsTv(name, year, language, baseImageUrl, cancellationToken);
  127. default:
  128. return GetSearchResultsGeneric(name, type, year, language, baseImageUrl, cancellationToken);
  129. }
  130. }
  131. private async Task<List<RemoteSearchResult>> GetSearchResultsGeneric(string name, string type, int? year, string language, string baseImageUrl, CancellationToken cancellationToken)
  132. {
  133. if (string.IsNullOrWhiteSpace(name))
  134. {
  135. throw new ArgumentException("name");
  136. }
  137. var url3 = string.Format(_searchURL, WebUtility.UrlEncode(name), TmdbUtils.ApiKey, language, type);
  138. using (var response = await TmdbMovieProvider.Current.GetMovieDbResponse(new HttpRequestOptions
  139. {
  140. Url = url3,
  141. CancellationToken = cancellationToken,
  142. AcceptHeader = TmdbUtils.AcceptHeader
  143. }).ConfigureAwait(false))
  144. {
  145. using (var json = response.Content)
  146. {
  147. var searchResults = await _json.DeserializeFromStreamAsync<TmdbSearchResult<MovieResult>>(json).ConfigureAwait(false);
  148. var results = searchResults.Results ?? new List<MovieResult>();
  149. return results
  150. .Select(i =>
  151. {
  152. var remoteResult = new RemoteSearchResult
  153. {
  154. SearchProviderName = TmdbMovieProvider.Current.Name,
  155. Name = i.Title ?? i.Name ?? i.Original_Title,
  156. ImageUrl = string.IsNullOrWhiteSpace(i.Poster_Path) ? null : baseImageUrl + i.Poster_Path
  157. };
  158. if (!string.IsNullOrWhiteSpace(i.Release_Date))
  159. {
  160. // These dates are always in this exact format
  161. if (DateTime.TryParseExact(i.Release_Date, "yyyy-MM-dd", _usCulture, DateTimeStyles.None, out var r))
  162. {
  163. remoteResult.PremiereDate = r.ToUniversalTime();
  164. remoteResult.ProductionYear = remoteResult.PremiereDate.Value.Year;
  165. }
  166. }
  167. remoteResult.SetProviderId(MetadataProviders.Tmdb, i.Id.ToString(_usCulture));
  168. return remoteResult;
  169. })
  170. .ToList();
  171. }
  172. }
  173. }
  174. private async Task<List<RemoteSearchResult>> GetSearchResultsTv(string name, int? year, string language, string baseImageUrl, CancellationToken cancellationToken)
  175. {
  176. if (string.IsNullOrWhiteSpace(name))
  177. {
  178. throw new ArgumentException("name");
  179. }
  180. var url3 = string.Format(_searchURL, WebUtility.UrlEncode(name), TmdbUtils.ApiKey, language, "tv");
  181. using (var response = await TmdbMovieProvider.Current.GetMovieDbResponse(new HttpRequestOptions
  182. {
  183. Url = url3,
  184. CancellationToken = cancellationToken,
  185. AcceptHeader = TmdbUtils.AcceptHeader
  186. }).ConfigureAwait(false))
  187. {
  188. using (var json = response.Content)
  189. {
  190. var searchResults = await _json.DeserializeFromStreamAsync<TmdbSearchResult<TvResult>>(json).ConfigureAwait(false);
  191. var results = searchResults.Results ?? new List<TvResult>();
  192. return results
  193. .Select(i =>
  194. {
  195. var remoteResult = new RemoteSearchResult
  196. {
  197. SearchProviderName = TmdbMovieProvider.Current.Name,
  198. Name = i.Name ?? i.Original_Name,
  199. ImageUrl = string.IsNullOrWhiteSpace(i.Poster_Path) ? null : baseImageUrl + i.Poster_Path
  200. };
  201. if (!string.IsNullOrWhiteSpace(i.First_Air_Date))
  202. {
  203. // These dates are always in this exact format
  204. if (DateTime.TryParseExact(i.First_Air_Date, "yyyy-MM-dd", _usCulture, DateTimeStyles.None, out var r))
  205. {
  206. remoteResult.PremiereDate = r.ToUniversalTime();
  207. remoteResult.ProductionYear = remoteResult.PremiereDate.Value.Year;
  208. }
  209. }
  210. remoteResult.SetProviderId(MetadataProviders.Tmdb, i.Id.ToString(_usCulture));
  211. return remoteResult;
  212. })
  213. .ToList();
  214. }
  215. }
  216. }
  217. }
  218. }