TmdbSearch.cs 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265
  1. #pragma warning disable CS1591
  2. using System;
  3. using System.Collections.Generic;
  4. using System.Globalization;
  5. using System.Linq;
  6. using System.Net;
  7. using System.Text.RegularExpressions;
  8. using System.Threading;
  9. using System.Threading.Tasks;
  10. using MediaBrowser.Common.Net;
  11. using MediaBrowser.Controller.Library;
  12. using MediaBrowser.Controller.Providers;
  13. using MediaBrowser.Model.Entities;
  14. using MediaBrowser.Model.Providers;
  15. using MediaBrowser.Model.Serialization;
  16. using MediaBrowser.Providers.Plugins.Tmdb.Models.Search;
  17. using Microsoft.Extensions.Logging;
  18. namespace MediaBrowser.Providers.Plugins.Tmdb.Movies
  19. {
  20. public class TmdbSearch
  21. {
  22. private static readonly CultureInfo _usCulture = new CultureInfo("en-US");
  23. private static readonly Regex _cleanEnclosed = new Regex(@"\p{Ps}.*\p{Pe}", RegexOptions.Compiled);
  24. private static readonly Regex _cleanNonWord = new Regex(@"[\W_]+", RegexOptions.Compiled);
  25. private static readonly Regex _cleanStopWords = new Regex(@"\b( # Start at word boundary
  26. 19[0-9]{2}|20[0-9]{2}| # 1900-2099
  27. S[0-9]{2}| # Season
  28. E[0-9]{2}| # Episode
  29. (2160|1080|720|576|480)[ip]?| # Resolution
  30. [xh]?264| # Encoding
  31. (web|dvd|bd|hdtv|hd)rip| # *Rip
  32. web|hdtv|mp4|bluray|ktr|dl|single|imageset|internal|doku|dubbed|retail|xxx|flac
  33. ).* # Match rest of string",
  34. RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase);
  35. private const string _searchURL = TmdbUtils.BaseTmdbApiUrl + @"3/search/{3}?api_key={1}&query={0}&language={2}";
  36. private readonly ILogger _logger;
  37. private readonly IJsonSerializer _json;
  38. private readonly ILibraryManager _libraryManager;
  39. public TmdbSearch(ILogger logger, IJsonSerializer json, ILibraryManager libraryManager)
  40. {
  41. _logger = logger;
  42. _json = json;
  43. _libraryManager = libraryManager;
  44. }
  45. public Task<IEnumerable<RemoteSearchResult>> GetSearchResults(SeriesInfo idInfo, CancellationToken cancellationToken)
  46. {
  47. return GetSearchResults(idInfo, "tv", cancellationToken);
  48. }
  49. public Task<IEnumerable<RemoteSearchResult>> GetMovieSearchResults(ItemLookupInfo idInfo, CancellationToken cancellationToken)
  50. {
  51. return GetSearchResults(idInfo, "movie", cancellationToken);
  52. }
  53. public Task<IEnumerable<RemoteSearchResult>> GetSearchResults(BoxSetInfo idInfo, CancellationToken cancellationToken)
  54. {
  55. return GetSearchResults(idInfo, "collection", cancellationToken);
  56. }
  57. private async Task<IEnumerable<RemoteSearchResult>> GetSearchResults(ItemLookupInfo idInfo, string searchType, CancellationToken cancellationToken)
  58. {
  59. var name = idInfo.Name;
  60. var year = idInfo.Year;
  61. if (string.IsNullOrWhiteSpace(name))
  62. {
  63. return new List<RemoteSearchResult>();
  64. }
  65. var tmdbSettings = await TmdbMovieProvider.Current.GetTmdbSettings(cancellationToken).ConfigureAwait(false);
  66. var tmdbImageUrl = tmdbSettings.images.GetImageUrl("original");
  67. // ParseName is required here.
  68. // Caller provides the filename with extension stripped and NOT the parsed filename
  69. var parsedName = _libraryManager.ParseName(name);
  70. var yearInName = parsedName.Year;
  71. name = parsedName.Name;
  72. year ??= yearInName;
  73. var language = idInfo.MetadataLanguage.ToLowerInvariant();
  74. // Replace sequences of non-word characters with space
  75. // TMDB expects a space separated list of words make sure that is the case
  76. name = _cleanNonWord.Replace(name, " ").Trim();
  77. _logger.LogInformation("TmdbSearch: Finding id for item: {0} ({1})", name, year);
  78. var results = await GetSearchResults(name, searchType, year, language, tmdbImageUrl, cancellationToken).ConfigureAwait(false);
  79. if (results.Count == 0)
  80. {
  81. // try in english if wasn't before
  82. if (!string.Equals(language, "en", StringComparison.OrdinalIgnoreCase))
  83. {
  84. results = await GetSearchResults(name, searchType, year, "en", tmdbImageUrl, cancellationToken).ConfigureAwait(false);
  85. }
  86. }
  87. // TODO: retrying alternatives should be done outside the search
  88. // provider so that the retry logic can be common for all search
  89. // providers
  90. if (results.Count == 0)
  91. {
  92. var name2 = parsedName.Name;
  93. // Remove things enclosed in []{}() etc
  94. name2 = _cleanEnclosed.Replace(name2, string.Empty);
  95. // Replace sequences of non-word characters with space
  96. name2 = _cleanNonWord.Replace(name2, " ");
  97. // Clean based on common stop words / tokens
  98. name2 = _cleanStopWords.Replace(name2, string.Empty);
  99. // Trim whitespace
  100. name2 = name2.Trim();
  101. // Search again if the new name is different
  102. if (!string.Equals(name2, name) && !string.IsNullOrWhiteSpace(name2))
  103. {
  104. _logger.LogInformation("TmdbSearch: Finding id for item: {0} ({1})", name2, year);
  105. results = await GetSearchResults(name2, searchType, year, language, tmdbImageUrl, cancellationToken).ConfigureAwait(false);
  106. if (results.Count == 0 && !string.Equals(language, "en", StringComparison.OrdinalIgnoreCase))
  107. {
  108. // one more time, in english
  109. results = await GetSearchResults(name2, searchType, year, "en", tmdbImageUrl, cancellationToken).ConfigureAwait(false);
  110. }
  111. }
  112. }
  113. return results.Where(i =>
  114. {
  115. if (year.HasValue && i.ProductionYear.HasValue)
  116. {
  117. // Allow one year tolerance
  118. return Math.Abs(year.Value - i.ProductionYear.Value) <= 1;
  119. }
  120. return true;
  121. });
  122. }
  123. private Task<List<RemoteSearchResult>> GetSearchResults(string name, string type, int? year, string language, string baseImageUrl, CancellationToken cancellationToken)
  124. {
  125. switch (type)
  126. {
  127. case "tv":
  128. return GetSearchResultsTv(name, year, language, baseImageUrl, cancellationToken);
  129. default:
  130. return GetSearchResultsGeneric(name, type, year, language, baseImageUrl, cancellationToken);
  131. }
  132. }
  133. private async Task<List<RemoteSearchResult>> GetSearchResultsGeneric(string name, string type, int? year, string language, string baseImageUrl, CancellationToken cancellationToken)
  134. {
  135. if (string.IsNullOrWhiteSpace(name))
  136. {
  137. throw new ArgumentException("name");
  138. }
  139. var url3 = string.Format(_searchURL, WebUtility.UrlEncode(name), TmdbUtils.ApiKey, language, type);
  140. using (var response = await TmdbMovieProvider.Current.GetMovieDbResponse(new HttpRequestOptions
  141. {
  142. Url = url3,
  143. CancellationToken = cancellationToken,
  144. AcceptHeader = TmdbUtils.AcceptHeader
  145. }).ConfigureAwait(false))
  146. {
  147. using (var json = response.Content)
  148. {
  149. var searchResults = await _json.DeserializeFromStreamAsync<TmdbSearchResult<MovieResult>>(json).ConfigureAwait(false);
  150. var results = searchResults.Results ?? new List<MovieResult>();
  151. return results
  152. .Select(i =>
  153. {
  154. var remoteResult = new RemoteSearchResult
  155. {
  156. SearchProviderName = TmdbMovieProvider.Current.Name,
  157. Name = i.Title ?? i.Name ?? i.Original_Title,
  158. ImageUrl = string.IsNullOrWhiteSpace(i.Poster_Path) ? null : baseImageUrl + i.Poster_Path
  159. };
  160. if (!string.IsNullOrWhiteSpace(i.Release_Date))
  161. {
  162. // These dates are always in this exact format
  163. if (DateTime.TryParseExact(i.Release_Date, "yyyy-MM-dd", _usCulture, DateTimeStyles.None, out var r))
  164. {
  165. remoteResult.PremiereDate = r.ToUniversalTime();
  166. remoteResult.ProductionYear = remoteResult.PremiereDate.Value.Year;
  167. }
  168. }
  169. remoteResult.SetProviderId(MetadataProvider.Tmdb, i.Id.ToString(_usCulture));
  170. return remoteResult;
  171. })
  172. .ToList();
  173. }
  174. }
  175. }
  176. private async Task<List<RemoteSearchResult>> GetSearchResultsTv(string name, int? year, string language, string baseImageUrl, CancellationToken cancellationToken)
  177. {
  178. if (string.IsNullOrWhiteSpace(name))
  179. {
  180. throw new ArgumentException("name");
  181. }
  182. var url3 = string.Format(_searchURL, WebUtility.UrlEncode(name), TmdbUtils.ApiKey, language, "tv");
  183. using (var response = await TmdbMovieProvider.Current.GetMovieDbResponse(new HttpRequestOptions
  184. {
  185. Url = url3,
  186. CancellationToken = cancellationToken,
  187. AcceptHeader = TmdbUtils.AcceptHeader
  188. }).ConfigureAwait(false))
  189. {
  190. using (var json = response.Content)
  191. {
  192. var searchResults = await _json.DeserializeFromStreamAsync<TmdbSearchResult<TvResult>>(json).ConfigureAwait(false);
  193. var results = searchResults.Results ?? new List<TvResult>();
  194. return results
  195. .Select(i =>
  196. {
  197. var remoteResult = new RemoteSearchResult
  198. {
  199. SearchProviderName = TmdbMovieProvider.Current.Name,
  200. Name = i.Name ?? i.Original_Name,
  201. ImageUrl = string.IsNullOrWhiteSpace(i.Poster_Path) ? null : baseImageUrl + i.Poster_Path
  202. };
  203. if (!string.IsNullOrWhiteSpace(i.First_Air_Date))
  204. {
  205. // These dates are always in this exact format
  206. if (DateTime.TryParseExact(i.First_Air_Date, "yyyy-MM-dd", _usCulture, DateTimeStyles.None, out var r))
  207. {
  208. remoteResult.PremiereDate = r.ToUniversalTime();
  209. remoteResult.ProductionYear = remoteResult.PremiereDate.Value.Year;
  210. }
  211. }
  212. remoteResult.SetProviderId(MetadataProvider.Tmdb, i.Id.ToString(_usCulture));
  213. return remoteResult;
  214. })
  215. .ToList();
  216. }
  217. }
  218. }
  219. }
  220. }