TmdbSearch.cs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266
  1. using System;
  2. using System.Collections.Generic;
  3. using System.Globalization;
  4. using System.Linq;
  5. using System.Net;
  6. using System.Text.RegularExpressions;
  7. using System.Threading;
  8. using System.Threading.Tasks;
  9. using MediaBrowser.Common.Net;
  10. using MediaBrowser.Controller.Library;
  11. using MediaBrowser.Controller.Providers;
  12. using MediaBrowser.Model.Entities;
  13. using MediaBrowser.Model.Providers;
  14. using MediaBrowser.Model.Serialization;
  15. using MediaBrowser.Providers.Plugins.Tmdb.Models.Search;
  16. using Microsoft.Extensions.Logging;
  17. namespace MediaBrowser.Providers.Plugins.Tmdb.Movies
  18. {
  19. public class TmdbSearch
  20. {
  21. private static readonly CultureInfo _usCulture = new CultureInfo("en-US");
  22. private static readonly Regex _cleanEnclosed = new Regex(@"\p{Ps}.*\p{Pe}", RegexOptions.Compiled);
  23. private static readonly Regex _cleanNonWord = new Regex(@"[\W_]+", RegexOptions.Compiled);
  24. private static readonly Regex _cleanStopWords = new Regex(@"\b( # Start at word boundary
  25. 19[0-9]{2}|20[0-9]{2}| # 1900-2099
  26. S[0-9]{2}| # Season
  27. E[0-9]{2}| # Episode
  28. (2160|1080|720|576|480)[ip]?| # Resolution
  29. [xh]?264| # Encoding
  30. (web|dvd|bd|hdtv|hd)rip| # *Rip
  31. web|hdtv|mp4|bluray|ktr|dl|single|imageset|internal|doku|dubbed|retail|xxx|flac
  32. ).* # Match rest of string",
  33. RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase);
  34. private const string _searchURL = TmdbUtils.BaseTmdbApiUrl + @"3/search/{3}?api_key={1}&query={0}&language={2}";
  35. private readonly ILogger _logger;
  36. private readonly IJsonSerializer _json;
  37. private readonly ILibraryManager _libraryManager;
  38. public TmdbSearch(ILogger logger, IJsonSerializer json, ILibraryManager libraryManager)
  39. {
  40. _logger = logger;
  41. _json = json;
  42. _libraryManager = libraryManager;
  43. }
  44. public Task<IEnumerable<RemoteSearchResult>> GetSearchResults(SeriesInfo idInfo, CancellationToken cancellationToken)
  45. {
  46. return GetSearchResults(idInfo, "tv", cancellationToken);
  47. }
  48. public Task<IEnumerable<RemoteSearchResult>> GetMovieSearchResults(ItemLookupInfo idInfo, CancellationToken cancellationToken)
  49. {
  50. return GetSearchResults(idInfo, "movie", cancellationToken);
  51. }
  52. public Task<IEnumerable<RemoteSearchResult>> GetSearchResults(BoxSetInfo idInfo, CancellationToken cancellationToken)
  53. {
  54. return GetSearchResults(idInfo, "collection", cancellationToken);
  55. }
  56. private async Task<IEnumerable<RemoteSearchResult>> GetSearchResults(ItemLookupInfo idInfo, string searchType, CancellationToken cancellationToken)
  57. {
  58. var name = idInfo.Name;
  59. var year = idInfo.Year;
  60. if (string.IsNullOrWhiteSpace(name))
  61. {
  62. return new List<RemoteSearchResult>();
  63. }
  64. var tmdbSettings = await TmdbMovieProvider.Current.GetTmdbSettings(cancellationToken).ConfigureAwait(false);
  65. var tmdbImageUrl = tmdbSettings.images.GetImageUrl("original");
  66. // ParseName is required here.
  67. // Caller provides the filename with extension stripped and NOT the parsed filename
  68. var parsedName = _libraryManager.ParseName(name);
  69. var yearInName = parsedName.Year;
  70. name = parsedName.Name;
  71. year ??= yearInName;
  72. var language = idInfo.MetadataLanguage.ToLowerInvariant();
  73. // Replace sequences of non-word characters with space
  74. // TMDB expects a space separated list of words make sure that is the case
  75. name = _cleanNonWord.Replace(name, " ").Trim();
  76. _logger.LogInformation("TmdbSearch: Finding id for item: {0} ({1})", name, year);
  77. var results = await GetSearchResults(name, searchType, year, language, tmdbImageUrl, cancellationToken).ConfigureAwait(false);
  78. if (results.Count == 0)
  79. {
  80. //try in english if wasn't before
  81. if (!string.Equals(language, "en", StringComparison.OrdinalIgnoreCase))
  82. {
  83. results = await GetSearchResults(name, searchType, year, "en", tmdbImageUrl, cancellationToken).ConfigureAwait(false);
  84. }
  85. }
  86. // TODO: retrying alternatives should be done outside the search
  87. // provider so that the retry logic can be common for all search
  88. // providers
  89. if (results.Count == 0)
  90. {
  91. var name2 = parsedName.Name;
  92. // Remove things enclosed in []{}() etc
  93. name2 = _cleanEnclosed.Replace(name2, string.Empty);
  94. // Replace sequences of non-word characters with space
  95. name2 = _cleanNonWord.Replace(name2, " ");
  96. // Clean based on common stop words / tokens
  97. name2 = _cleanStopWords.Replace(name2, string.Empty);
  98. // Trim whitespace
  99. name2 = name2.Trim();
  100. // Search again if the new name is different
  101. if (!string.Equals(name2, name) && !string.IsNullOrWhiteSpace(name2))
  102. {
  103. _logger.LogInformation("TmdbSearch: Finding id for item: {0} ({1})", name2, year);
  104. results = await GetSearchResults(name2, searchType, year, language, tmdbImageUrl, cancellationToken).ConfigureAwait(false);
  105. if (results.Count == 0 && !string.Equals(language, "en", StringComparison.OrdinalIgnoreCase))
  106. {
  107. //one more time, in english
  108. results = await GetSearchResults(name2, searchType, year, "en", tmdbImageUrl, cancellationToken).ConfigureAwait(false);
  109. }
  110. }
  111. }
  112. return results.Where(i =>
  113. {
  114. if (year.HasValue && i.ProductionYear.HasValue)
  115. {
  116. // Allow one year tolerance
  117. return Math.Abs(year.Value - i.ProductionYear.Value) <= 1;
  118. }
  119. return true;
  120. });
  121. }
  122. private Task<List<RemoteSearchResult>> GetSearchResults(string name, string type, int? year, string language, string baseImageUrl, CancellationToken cancellationToken)
  123. {
  124. switch (type)
  125. {
  126. case "tv":
  127. return GetSearchResultsTv(name, year, language, baseImageUrl, cancellationToken);
  128. default:
  129. return GetSearchResultsGeneric(name, type, year, language, baseImageUrl, cancellationToken);
  130. }
  131. }
  132. private async Task<List<RemoteSearchResult>> GetSearchResultsGeneric(string name, string type, int? year, string language, string baseImageUrl, CancellationToken cancellationToken)
  133. {
  134. if (string.IsNullOrWhiteSpace(name))
  135. {
  136. throw new ArgumentException("name");
  137. }
  138. var url3 = string.Format(_searchURL, WebUtility.UrlEncode(name), TmdbUtils.ApiKey, language, type);
  139. using (var response = await TmdbMovieProvider.Current.GetMovieDbResponse(new HttpRequestOptions
  140. {
  141. Url = url3,
  142. CancellationToken = cancellationToken,
  143. AcceptHeader = TmdbUtils.AcceptHeader
  144. }).ConfigureAwait(false))
  145. {
  146. using (var json = response.Content)
  147. {
  148. var searchResults = await _json.DeserializeFromStreamAsync<TmdbSearchResult<MovieResult>>(json).ConfigureAwait(false);
  149. var results = searchResults.Results ?? new List<MovieResult>();
  150. return results
  151. .Select(i =>
  152. {
  153. var remoteResult = new RemoteSearchResult
  154. {
  155. SearchProviderName = TmdbMovieProvider.Current.Name,
  156. Name = i.Title ?? i.Name ?? i.Original_Title,
  157. ImageUrl = string.IsNullOrWhiteSpace(i.Poster_Path) ? null : baseImageUrl + i.Poster_Path
  158. };
  159. if (!string.IsNullOrWhiteSpace(i.Release_Date))
  160. {
  161. // These dates are always in this exact format
  162. if (DateTime.TryParseExact(i.Release_Date, "yyyy-MM-dd", _usCulture, DateTimeStyles.None, out var r))
  163. {
  164. remoteResult.PremiereDate = r.ToUniversalTime();
  165. remoteResult.ProductionYear = remoteResult.PremiereDate.Value.Year;
  166. }
  167. }
  168. remoteResult.SetProviderId(MetadataProvider.Tmdb, i.Id.ToString(_usCulture));
  169. return remoteResult;
  170. })
  171. .ToList();
  172. }
  173. }
  174. }
  175. private async Task<List<RemoteSearchResult>> GetSearchResultsTv(string name, int? year, string language, string baseImageUrl, CancellationToken cancellationToken)
  176. {
  177. if (string.IsNullOrWhiteSpace(name))
  178. {
  179. throw new ArgumentException("name");
  180. }
  181. var url3 = string.Format(_searchURL, WebUtility.UrlEncode(name), TmdbUtils.ApiKey, language, "tv");
  182. using (var response = await TmdbMovieProvider.Current.GetMovieDbResponse(new HttpRequestOptions
  183. {
  184. Url = url3,
  185. CancellationToken = cancellationToken,
  186. AcceptHeader = TmdbUtils.AcceptHeader
  187. }).ConfigureAwait(false))
  188. {
  189. using (var json = response.Content)
  190. {
  191. var searchResults = await _json.DeserializeFromStreamAsync<TmdbSearchResult<TvResult>>(json).ConfigureAwait(false);
  192. var results = searchResults.Results ?? new List<TvResult>();
  193. return results
  194. .Select(i =>
  195. {
  196. var remoteResult = new RemoteSearchResult
  197. {
  198. SearchProviderName = TmdbMovieProvider.Current.Name,
  199. Name = i.Name ?? i.Original_Name,
  200. ImageUrl = string.IsNullOrWhiteSpace(i.Poster_Path) ? null : baseImageUrl + i.Poster_Path
  201. };
  202. if (!string.IsNullOrWhiteSpace(i.First_Air_Date))
  203. {
  204. // These dates are always in this exact format
  205. if (DateTime.TryParseExact(i.First_Air_Date, "yyyy-MM-dd", _usCulture, DateTimeStyles.None, out var r))
  206. {
  207. remoteResult.PremiereDate = r.ToUniversalTime();
  208. remoteResult.ProductionYear = remoteResult.PremiereDate.Value.Year;
  209. }
  210. }
  211. remoteResult.SetProviderId(MetadataProvider.Tmdb, i.Id.ToString(_usCulture));
  212. return remoteResult;
  213. })
  214. .ToList();
  215. }
  216. }
  217. }
  218. }
  219. }