TmdbSearch.cs 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290
  1. #pragma warning disable CS1591
  2. using System;
  3. using System.Collections.Generic;
  4. using System.Globalization;
  5. using System.Linq;
  6. using System.Net;
  7. using System.Net.Http;
  8. using System.Net.Http.Headers;
  9. using System.Text.RegularExpressions;
  10. using System.Threading;
  11. using System.Threading.Tasks;
  12. using MediaBrowser.Controller.Library;
  13. using MediaBrowser.Controller.Providers;
  14. using MediaBrowser.Model.Entities;
  15. using MediaBrowser.Model.Providers;
  16. using MediaBrowser.Model.Serialization;
  17. using MediaBrowser.Providers.Plugins.Tmdb.Models.Search;
  18. using Microsoft.Extensions.Logging;
  19. namespace MediaBrowser.Providers.Plugins.Tmdb.Movies
  20. {
  21. public class TmdbSearch
  22. {
  23. private static readonly CultureInfo _usCulture = new CultureInfo("en-US");
  24. private static readonly Regex _cleanEnclosed = new Regex(@"\p{Ps}.*\p{Pe}", RegexOptions.Compiled);
  25. private static readonly Regex _cleanNonWord = new Regex(@"[\W_]+", RegexOptions.Compiled);
  26. private static readonly Regex _cleanStopWords = new Regex(@"\b( # Start at word boundary
  27. 19[0-9]{2}|20[0-9]{2}| # 1900-2099
  28. S[0-9]{2}| # Season
  29. E[0-9]{2}| # Episode
  30. (2160|1080|720|576|480)[ip]?| # Resolution
  31. [xh]?264| # Encoding
  32. (web|dvd|bd|hdtv|hd)rip| # *Rip
  33. web|hdtv|mp4|bluray|ktr|dl|single|imageset|internal|doku|dubbed|retail|xxx|flac
  34. ).* # Match rest of string",
  35. RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase);
  36. private const string SearchUrl = TmdbUtils.BaseTmdbApiUrl + @"3/search/{3}?api_key={1}&query={0}&language={2}";
  37. private const string SearchUrlTvWithYear = TmdbUtils.BaseTmdbApiUrl + @"3/search/tv?api_key={1}&query={0}&language={2}&first_air_date_year={3}";
  38. private const string SearchUrlMovieWithYear = TmdbUtils.BaseTmdbApiUrl + @"3/search/movie?api_key={1}&query={0}&language={2}&primary_release_year={3}";
  39. private readonly ILogger _logger;
  40. private readonly IJsonSerializer _json;
  41. private readonly ILibraryManager _libraryManager;
  42. public TmdbSearch(ILogger logger, IJsonSerializer json, ILibraryManager libraryManager)
  43. {
  44. _logger = logger;
  45. _json = json;
  46. _libraryManager = libraryManager;
  47. }
  48. public Task<IEnumerable<RemoteSearchResult>> GetSearchResults(SeriesInfo idInfo, CancellationToken cancellationToken)
  49. {
  50. return GetSearchResults(idInfo, "tv", cancellationToken);
  51. }
  52. public Task<IEnumerable<RemoteSearchResult>> GetMovieSearchResults(ItemLookupInfo idInfo, CancellationToken cancellationToken)
  53. {
  54. return GetSearchResults(idInfo, "movie", cancellationToken);
  55. }
  56. public Task<IEnumerable<RemoteSearchResult>> GetSearchResults(BoxSetInfo idInfo, CancellationToken cancellationToken)
  57. {
  58. return GetSearchResults(idInfo, "collection", cancellationToken);
  59. }
  60. private async Task<IEnumerable<RemoteSearchResult>> GetSearchResults(ItemLookupInfo idInfo, string searchType, CancellationToken cancellationToken)
  61. {
  62. var name = idInfo.Name;
  63. var year = idInfo.Year;
  64. if (string.IsNullOrWhiteSpace(name))
  65. {
  66. return new List<RemoteSearchResult>();
  67. }
  68. var tmdbSettings = await TmdbMovieProvider.Current.GetTmdbSettings(cancellationToken).ConfigureAwait(false);
  69. var tmdbImageUrl = tmdbSettings.images.GetImageUrl("original");
  70. // ParseName is required here.
  71. // Caller provides the filename with extension stripped and NOT the parsed filename
  72. var parsedName = _libraryManager.ParseName(name);
  73. var yearInName = parsedName.Year;
  74. name = parsedName.Name;
  75. year ??= yearInName;
  76. var language = idInfo.MetadataLanguage.ToLowerInvariant();
  77. // Replace sequences of non-word characters with space
  78. // TMDB expects a space separated list of words make sure that is the case
  79. name = _cleanNonWord.Replace(name, " ").Trim();
  80. _logger.LogInformation("TmdbSearch: Finding id for item: {0} ({1})", name, year);
  81. var results = await GetSearchResults(name, searchType, year, language, tmdbImageUrl, cancellationToken).ConfigureAwait(false);
  82. if (results.Count == 0)
  83. {
  84. // try in english if wasn't before
  85. if (!string.Equals(language, "en", StringComparison.OrdinalIgnoreCase))
  86. {
  87. results = await GetSearchResults(name, searchType, year, "en", tmdbImageUrl, cancellationToken).ConfigureAwait(false);
  88. }
  89. }
  90. // TODO: retrying alternatives should be done outside the search
  91. // provider so that the retry logic can be common for all search
  92. // providers
  93. if (results.Count == 0)
  94. {
  95. var name2 = parsedName.Name;
  96. // Remove things enclosed in []{}() etc
  97. name2 = _cleanEnclosed.Replace(name2, string.Empty);
  98. // Replace sequences of non-word characters with space
  99. name2 = _cleanNonWord.Replace(name2, " ");
  100. // Clean based on common stop words / tokens
  101. name2 = _cleanStopWords.Replace(name2, string.Empty);
  102. // Trim whitespace
  103. name2 = name2.Trim();
  104. // Search again if the new name is different
  105. if (!string.Equals(name2, name, StringComparison.Ordinal) && !string.IsNullOrWhiteSpace(name2))
  106. {
  107. _logger.LogInformation("TmdbSearch: Finding id for item: {0} ({1})", name2, year);
  108. results = await GetSearchResults(name2, searchType, year, language, tmdbImageUrl, cancellationToken).ConfigureAwait(false);
  109. if (results.Count == 0 && !string.Equals(language, "en", StringComparison.OrdinalIgnoreCase))
  110. {
  111. // one more time, in english
  112. results = await GetSearchResults(name2, searchType, year, "en", tmdbImageUrl, cancellationToken).ConfigureAwait(false);
  113. }
  114. }
  115. }
  116. return results.Where(i =>
  117. {
  118. if (year.HasValue && i.ProductionYear.HasValue)
  119. {
  120. // Allow one year tolerance
  121. return Math.Abs(year.Value - i.ProductionYear.Value) <= 1;
  122. }
  123. return true;
  124. });
  125. }
  126. private Task<List<RemoteSearchResult>> GetSearchResults(string name, string type, int? year, string language, string baseImageUrl, CancellationToken cancellationToken)
  127. {
  128. switch (type)
  129. {
  130. case "tv":
  131. return GetSearchResultsTv(name, year, language, baseImageUrl, cancellationToken);
  132. default:
  133. return GetSearchResultsGeneric(name, type, year, language, baseImageUrl, cancellationToken);
  134. }
  135. }
  136. private async Task<List<RemoteSearchResult>> GetSearchResultsGeneric(string name, string type, int? year, string language, string baseImageUrl, CancellationToken cancellationToken)
  137. {
  138. if (string.IsNullOrWhiteSpace(name))
  139. {
  140. throw new ArgumentException("String can't be null or empty.", nameof(name));
  141. }
  142. string url3;
  143. if (year != null && string.Equals(type, "movie", StringComparison.OrdinalIgnoreCase))
  144. {
  145. url3 = string.Format(
  146. CultureInfo.InvariantCulture,
  147. SearchUrlMovieWithYear,
  148. WebUtility.UrlEncode(name),
  149. TmdbUtils.ApiKey,
  150. language,
  151. year);
  152. }
  153. else
  154. {
  155. url3 = string.Format(
  156. CultureInfo.InvariantCulture,
  157. SearchUrl,
  158. WebUtility.UrlEncode(name),
  159. TmdbUtils.ApiKey,
  160. language,
  161. type);
  162. }
  163. var requestMessage = new HttpRequestMessage(HttpMethod.Get, url3);
  164. foreach (var header in TmdbUtils.AcceptHeaders)
  165. {
  166. requestMessage.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue(header));
  167. }
  168. using var response = await TmdbMovieProvider.Current.GetMovieDbResponse(requestMessage).ConfigureAwait(false);
  169. await using var stream = await response.Content.ReadAsStreamAsync().ConfigureAwait(false);
  170. var searchResults = await _json.DeserializeFromStreamAsync<TmdbSearchResult<MovieResult>>(stream).ConfigureAwait(false);
  171. var results = searchResults.Results ?? new List<MovieResult>();
  172. return results
  173. .Select(i =>
  174. {
  175. var remoteResult = new RemoteSearchResult {SearchProviderName = TmdbMovieProvider.Current.Name, Name = i.Title ?? i.Name ?? i.Original_Title, ImageUrl = string.IsNullOrWhiteSpace(i.Poster_Path) ? null : baseImageUrl + i.Poster_Path};
  176. if (!string.IsNullOrWhiteSpace(i.Release_Date))
  177. {
  178. // These dates are always in this exact format
  179. if (DateTime.TryParseExact(i.Release_Date, "yyyy-MM-dd", _usCulture, DateTimeStyles.None, out var r))
  180. {
  181. remoteResult.PremiereDate = r.ToUniversalTime();
  182. remoteResult.ProductionYear = remoteResult.PremiereDate.Value.Year;
  183. }
  184. }
  185. remoteResult.SetProviderId(MetadataProvider.Tmdb, i.Id.ToString(_usCulture));
  186. return remoteResult;
  187. })
  188. .ToList();
  189. }
  190. private async Task<List<RemoteSearchResult>> GetSearchResultsTv(string name, int? year, string language, string baseImageUrl, CancellationToken cancellationToken)
  191. {
  192. if (string.IsNullOrWhiteSpace(name))
  193. {
  194. throw new ArgumentException("String can't be null or empty.", nameof(name));
  195. }
  196. string url3;
  197. if (year == null)
  198. {
  199. url3 = string.Format(
  200. CultureInfo.InvariantCulture,
  201. SearchUrl,
  202. WebUtility.UrlEncode(name),
  203. TmdbUtils.ApiKey,
  204. language,
  205. "tv");
  206. }
  207. else
  208. {
  209. url3 = string.Format(
  210. CultureInfo.InvariantCulture,
  211. SearchUrlTvWithYear,
  212. WebUtility.UrlEncode(name),
  213. TmdbUtils.ApiKey,
  214. language,
  215. year);
  216. }
  217. using var requestMessage = new HttpRequestMessage(HttpMethod.Get, url3);
  218. foreach (var header in TmdbUtils.AcceptHeaders)
  219. {
  220. requestMessage.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue(header));
  221. }
  222. using var response = await TmdbMovieProvider.Current.GetMovieDbResponse(requestMessage).ConfigureAwait(false);
  223. await using var stream = await response.Content.ReadAsStreamAsync().ConfigureAwait(false);
  224. var searchResults = await _json.DeserializeFromStreamAsync<TmdbSearchResult<TvResult>>(stream).ConfigureAwait(false);
  225. var results = searchResults.Results ?? new List<TvResult>();
  226. return results
  227. .Select(i =>
  228. {
  229. var remoteResult = new RemoteSearchResult {SearchProviderName = TmdbMovieProvider.Current.Name, Name = i.Name ?? i.Original_Name, ImageUrl = string.IsNullOrWhiteSpace(i.Poster_Path) ? null : baseImageUrl + i.Poster_Path};
  230. if (!string.IsNullOrWhiteSpace(i.First_Air_Date))
  231. {
  232. // These dates are always in this exact format
  233. if (DateTime.TryParseExact(i.First_Air_Date, "yyyy-MM-dd", _usCulture, DateTimeStyles.None, out var r))
  234. {
  235. remoteResult.PremiereDate = r.ToUniversalTime();
  236. remoteResult.ProductionYear = remoteResult.PremiereDate.Value.Year;
  237. }
  238. }
  239. remoteResult.SetProviderId(MetadataProvider.Tmdb, i.Id.ToString(_usCulture));
  240. return remoteResult;
  241. })
  242. .ToList();
  243. }
  244. }
  245. }