TmdbSearch.cs 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302
  1. #pragma warning disable CS1591
  2. using System;
  3. using System.Collections.Generic;
  4. using System.Globalization;
  5. using System.Linq;
  6. using System.Net;
  7. using System.Net.Http;
  8. using System.Net.Http.Headers;
  9. using System.Text.RegularExpressions;
  10. using System.Threading;
  11. using System.Threading.Tasks;
  12. using MediaBrowser.Controller.Library;
  13. using MediaBrowser.Controller.Providers;
  14. using MediaBrowser.Model.Entities;
  15. using MediaBrowser.Model.Providers;
  16. using MediaBrowser.Model.Serialization;
  17. using MediaBrowser.Providers.Plugins.Tmdb.Models.Search;
  18. using Microsoft.Extensions.Logging;
  19. namespace MediaBrowser.Providers.Plugins.Tmdb.Movies
  20. {
  21. public class TmdbSearch
  22. {
  23. private const string SearchUrl = TmdbUtils.BaseTmdbApiUrl + @"3/search/{3}?api_key={1}&query={0}&language={2}";
  24. private const string SearchUrlTvWithYear = TmdbUtils.BaseTmdbApiUrl + @"3/search/tv?api_key={1}&query={0}&language={2}&first_air_date_year={3}";
  25. private const string SearchUrlMovieWithYear = TmdbUtils.BaseTmdbApiUrl + @"3/search/movie?api_key={1}&query={0}&language={2}&primary_release_year={3}";
  26. private static readonly CultureInfo _usCulture = new CultureInfo("en-US");
  27. private static readonly Regex _cleanEnclosed = new Regex(@"\p{Ps}.*\p{Pe}", RegexOptions.Compiled);
  28. private static readonly Regex _cleanNonWord = new Regex(@"[\W_]+", RegexOptions.Compiled);
  29. private static readonly Regex _cleanStopWords = new Regex(
  30. @"\b( # Start at word boundary
  31. 19[0-9]{2}|20[0-9]{2}| # 1900-2099
  32. S[0-9]{2}| # Season
  33. E[0-9]{2}| # Episode
  34. (2160|1080|720|576|480)[ip]?| # Resolution
  35. [xh]?264| # Encoding
  36. (web|dvd|bd|hdtv|hd)rip| # *Rip
  37. web|hdtv|mp4|bluray|ktr|dl|single|imageset|internal|doku|dubbed|retail|xxx|flac
  38. ).* # Match rest of string",
  39. RegexOptions.Compiled | RegexOptions.IgnorePatternWhitespace | RegexOptions.IgnoreCase);
  40. private readonly ILogger _logger;
  41. private readonly IJsonSerializer _json;
  42. private readonly ILibraryManager _libraryManager;
  43. public TmdbSearch(ILogger logger, IJsonSerializer json, ILibraryManager libraryManager)
  44. {
  45. _logger = logger;
  46. _json = json;
  47. _libraryManager = libraryManager;
  48. }
  49. public Task<IEnumerable<RemoteSearchResult>> GetSearchResults(SeriesInfo idInfo, CancellationToken cancellationToken)
  50. {
  51. return GetSearchResults(idInfo, "tv", cancellationToken);
  52. }
  53. public Task<IEnumerable<RemoteSearchResult>> GetMovieSearchResults(ItemLookupInfo idInfo, CancellationToken cancellationToken)
  54. {
  55. return GetSearchResults(idInfo, "movie", cancellationToken);
  56. }
  57. public Task<IEnumerable<RemoteSearchResult>> GetSearchResults(BoxSetInfo idInfo, CancellationToken cancellationToken)
  58. {
  59. return GetSearchResults(idInfo, "collection", cancellationToken);
  60. }
  61. private async Task<IEnumerable<RemoteSearchResult>> GetSearchResults(ItemLookupInfo idInfo, string searchType, CancellationToken cancellationToken)
  62. {
  63. var name = idInfo.Name;
  64. var year = idInfo.Year;
  65. if (string.IsNullOrWhiteSpace(name))
  66. {
  67. return new List<RemoteSearchResult>();
  68. }
  69. var tmdbSettings = await TmdbMovieProvider.Current.GetTmdbSettings(cancellationToken).ConfigureAwait(false);
  70. var tmdbImageUrl = tmdbSettings.images.GetImageUrl("original");
  71. // ParseName is required here.
  72. // Caller provides the filename with extension stripped and NOT the parsed filename
  73. var parsedName = _libraryManager.ParseName(name);
  74. var yearInName = parsedName.Year;
  75. name = parsedName.Name;
  76. year ??= yearInName;
  77. var language = idInfo.MetadataLanguage.ToLowerInvariant();
  78. // Replace sequences of non-word characters with space
  79. // TMDB expects a space separated list of words make sure that is the case
  80. name = _cleanNonWord.Replace(name, " ").Trim();
  81. _logger.LogInformation("TmdbSearch: Finding id for item: {0} ({1})", name, year);
  82. var results = await GetSearchResults(name, searchType, year, language, tmdbImageUrl, cancellationToken).ConfigureAwait(false);
  83. if (results.Count == 0)
  84. {
  85. // try in english if wasn't before
  86. if (!string.Equals(language, "en", StringComparison.OrdinalIgnoreCase))
  87. {
  88. results = await GetSearchResults(name, searchType, year, "en", tmdbImageUrl, cancellationToken).ConfigureAwait(false);
  89. }
  90. }
  91. // TODO: retrying alternatives should be done outside the search
  92. // provider so that the retry logic can be common for all search
  93. // providers
  94. if (results.Count == 0)
  95. {
  96. var name2 = parsedName.Name;
  97. // Remove things enclosed in []{}() etc
  98. name2 = _cleanEnclosed.Replace(name2, string.Empty);
  99. // Replace sequences of non-word characters with space
  100. name2 = _cleanNonWord.Replace(name2, " ");
  101. // Clean based on common stop words / tokens
  102. name2 = _cleanStopWords.Replace(name2, string.Empty);
  103. // Trim whitespace
  104. name2 = name2.Trim();
  105. // Search again if the new name is different
  106. if (!string.Equals(name2, name, StringComparison.Ordinal) && !string.IsNullOrWhiteSpace(name2))
  107. {
  108. _logger.LogInformation("TmdbSearch: Finding id for item: {0} ({1})", name2, year);
  109. results = await GetSearchResults(name2, searchType, year, language, tmdbImageUrl, cancellationToken).ConfigureAwait(false);
  110. if (results.Count == 0 && !string.Equals(language, "en", StringComparison.OrdinalIgnoreCase))
  111. {
  112. // one more time, in english
  113. results = await GetSearchResults(name2, searchType, year, "en", tmdbImageUrl, cancellationToken).ConfigureAwait(false);
  114. }
  115. }
  116. }
  117. return results.Where(i =>
  118. {
  119. if (year.HasValue && i.ProductionYear.HasValue)
  120. {
  121. // Allow one year tolerance
  122. return Math.Abs(year.Value - i.ProductionYear.Value) <= 1;
  123. }
  124. return true;
  125. });
  126. }
  127. private Task<List<RemoteSearchResult>> GetSearchResults(string name, string type, int? year, string language, string baseImageUrl, CancellationToken cancellationToken)
  128. {
  129. switch (type)
  130. {
  131. case "tv":
  132. return GetSearchResultsTv(name, year, language, baseImageUrl, cancellationToken);
  133. default:
  134. return GetSearchResultsGeneric(name, type, year, language, baseImageUrl, cancellationToken);
  135. }
  136. }
  137. private async Task<List<RemoteSearchResult>> GetSearchResultsGeneric(string name, string type, int? year, string language, string baseImageUrl, CancellationToken cancellationToken)
  138. {
  139. if (string.IsNullOrWhiteSpace(name))
  140. {
  141. throw new ArgumentException("String can't be null or empty.", nameof(name));
  142. }
  143. string url3;
  144. if (year != null && string.Equals(type, "movie", StringComparison.OrdinalIgnoreCase))
  145. {
  146. url3 = string.Format(
  147. CultureInfo.InvariantCulture,
  148. SearchUrlMovieWithYear,
  149. WebUtility.UrlEncode(name),
  150. TmdbUtils.ApiKey,
  151. language,
  152. year);
  153. }
  154. else
  155. {
  156. url3 = string.Format(
  157. CultureInfo.InvariantCulture,
  158. SearchUrl,
  159. WebUtility.UrlEncode(name),
  160. TmdbUtils.ApiKey,
  161. language,
  162. type);
  163. }
  164. using var requestMessage = new HttpRequestMessage(HttpMethod.Get, url3);
  165. foreach (var header in TmdbUtils.AcceptHeaders)
  166. {
  167. requestMessage.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue(header));
  168. }
  169. using var response = await TmdbMovieProvider.Current.GetMovieDbResponse(requestMessage, cancellationToken).ConfigureAwait(false);
  170. await using var stream = await response.Content.ReadAsStreamAsync().ConfigureAwait(false);
  171. var searchResults = await _json.DeserializeFromStreamAsync<TmdbSearchResult<MovieResult>>(stream).ConfigureAwait(false);
  172. var results = searchResults.Results ?? new List<MovieResult>();
  173. return results
  174. .Select(i =>
  175. {
  176. var remoteResult = new RemoteSearchResult
  177. {
  178. SearchProviderName = TmdbMovieProvider.Current.Name,
  179. Name = i.Title ?? i.Name ?? i.Original_Title,
  180. ImageUrl = string.IsNullOrWhiteSpace(i.Poster_Path) ? null : baseImageUrl + i.Poster_Path
  181. };
  182. if (!string.IsNullOrWhiteSpace(i.Release_Date))
  183. {
  184. // These dates are always in this exact format
  185. if (DateTime.TryParseExact(i.Release_Date, "yyyy-MM-dd", _usCulture, DateTimeStyles.None, out var r))
  186. {
  187. remoteResult.PremiereDate = r.ToUniversalTime();
  188. remoteResult.ProductionYear = remoteResult.PremiereDate.Value.Year;
  189. }
  190. }
  191. remoteResult.SetProviderId(MetadataProvider.Tmdb, i.Id.ToString(_usCulture));
  192. return remoteResult;
  193. })
  194. .ToList();
  195. }
  196. private async Task<List<RemoteSearchResult>> GetSearchResultsTv(string name, int? year, string language, string baseImageUrl, CancellationToken cancellationToken)
  197. {
  198. if (string.IsNullOrWhiteSpace(name))
  199. {
  200. throw new ArgumentException("String can't be null or empty.", nameof(name));
  201. }
  202. string url3;
  203. if (year == null)
  204. {
  205. url3 = string.Format(
  206. CultureInfo.InvariantCulture,
  207. SearchUrl,
  208. WebUtility.UrlEncode(name),
  209. TmdbUtils.ApiKey,
  210. language,
  211. "tv");
  212. }
  213. else
  214. {
  215. url3 = string.Format(
  216. CultureInfo.InvariantCulture,
  217. SearchUrlTvWithYear,
  218. WebUtility.UrlEncode(name),
  219. TmdbUtils.ApiKey,
  220. language,
  221. year);
  222. }
  223. using var requestMessage = new HttpRequestMessage(HttpMethod.Get, url3);
  224. foreach (var header in TmdbUtils.AcceptHeaders)
  225. {
  226. requestMessage.Headers.Accept.Add(new MediaTypeWithQualityHeaderValue(header));
  227. }
  228. using var response = await TmdbMovieProvider.Current.GetMovieDbResponse(requestMessage, cancellationToken).ConfigureAwait(false);
  229. await using var stream = await response.Content.ReadAsStreamAsync().ConfigureAwait(false);
  230. var searchResults = await _json.DeserializeFromStreamAsync<TmdbSearchResult<TvResult>>(stream).ConfigureAwait(false);
  231. var results = searchResults.Results ?? new List<TvResult>();
  232. return results
  233. .Select(i =>
  234. {
  235. var remoteResult = new RemoteSearchResult
  236. {
  237. SearchProviderName = TmdbMovieProvider.Current.Name,
  238. Name = i.Name ?? i.Original_Name,
  239. ImageUrl = string.IsNullOrWhiteSpace(i.Poster_Path) ? null : baseImageUrl + i.Poster_Path
  240. };
  241. if (!string.IsNullOrWhiteSpace(i.First_Air_Date))
  242. {
  243. // These dates are always in this exact format
  244. if (DateTime.TryParseExact(i.First_Air_Date, "yyyy-MM-dd", _usCulture, DateTimeStyles.None, out var r))
  245. {
  246. remoteResult.PremiereDate = r.ToUniversalTime();
  247. remoteResult.ProductionYear = remoteResult.PremiereDate.Value.Year;
  248. }
  249. }
  250. remoteResult.SetProviderId(MetadataProvider.Tmdb, i.Id.ToString(_usCulture));
  251. return remoteResult;
  252. })
  253. .ToList();
  254. }
  255. }
  256. }