BaseItemXmlParser.cs 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721
  1. using MediaBrowser.Controller.Entities;
  2. using MediaBrowser.Controller.Entities.Audio;
  3. using MediaBrowser.Model.Entities;
  4. using MediaBrowser.Model.Logging;
  5. using System;
  6. using System.Collections.Generic;
  7. using System.Globalization;
  8. using System.IO;
  9. using System.Linq;
  10. using System.Text;
  11. using System.Threading;
  12. using System.Xml;
  13. namespace MediaBrowser.Controller.Providers
  14. {
  15. /// <summary>
  16. /// Provides a base class for parsing metadata xml
  17. /// </summary>
  18. /// <typeparam name="T"></typeparam>
  19. public class BaseItemXmlParser<T>
  20. where T : BaseItem, new()
  21. {
  22. /// <summary>
  23. /// The logger
  24. /// </summary>
  25. protected ILogger Logger { get; private set; }
  26. /// <summary>
  27. /// Initializes a new instance of the <see cref="BaseItemXmlParser{T}" /> class.
  28. /// </summary>
  29. /// <param name="logger">The logger.</param>
  30. public BaseItemXmlParser(ILogger logger)
  31. {
  32. Logger = logger;
  33. }
  34. /// <summary>
  35. /// Fetches metadata for an item from one xml file
  36. /// </summary>
  37. /// <param name="item">The item.</param>
  38. /// <param name="metadataFile">The metadata file.</param>
  39. /// <param name="cancellationToken">The cancellation token.</param>
  40. /// <exception cref="System.ArgumentNullException"></exception>
  41. public void Fetch(T item, string metadataFile, CancellationToken cancellationToken)
  42. {
  43. if (item == null)
  44. {
  45. throw new ArgumentNullException();
  46. }
  47. if (string.IsNullOrEmpty(metadataFile))
  48. {
  49. throw new ArgumentNullException();
  50. }
  51. var settings = new XmlReaderSettings
  52. {
  53. CheckCharacters = false,
  54. IgnoreProcessingInstructions = true,
  55. IgnoreComments = true,
  56. ValidationType = ValidationType.None
  57. };
  58. item.Taglines.Clear();
  59. item.Studios.Clear();
  60. item.Genres.Clear();
  61. item.People.Clear();
  62. // Use european encoding as it will accept more characters
  63. using (var streamReader = new StreamReader(metadataFile, Encoding.GetEncoding("ISO-8859-1")))
  64. {
  65. // Use XmlReader for best performance
  66. using (var reader = XmlReader.Create(streamReader, settings))
  67. {
  68. reader.MoveToContent();
  69. // Loop through each element
  70. while (reader.Read())
  71. {
  72. cancellationToken.ThrowIfCancellationRequested();
  73. if (reader.NodeType == XmlNodeType.Element)
  74. {
  75. FetchDataFromXmlNode(reader, item);
  76. }
  77. }
  78. }
  79. }
  80. }
  81. private readonly CultureInfo _usCulture = new CultureInfo("en-US");
  82. /// <summary>
  83. /// Fetches metadata from one Xml Element
  84. /// </summary>
  85. /// <param name="reader">The reader.</param>
  86. /// <param name="item">The item.</param>
  87. protected virtual void FetchDataFromXmlNode(XmlReader reader, T item)
  88. {
  89. switch (reader.Name)
  90. {
  91. // DateCreated
  92. case "Added":
  93. DateTime added;
  94. if (DateTime.TryParse(reader.ReadElementContentAsString() ?? string.Empty, out added))
  95. {
  96. item.DateCreated = added.ToUniversalTime();
  97. }
  98. break;
  99. case "LocalTitle":
  100. item.Name = reader.ReadElementContentAsString();
  101. break;
  102. case "Type":
  103. {
  104. var type = reader.ReadElementContentAsString();
  105. if (!string.IsNullOrWhiteSpace(type) && !type.Equals("none", StringComparison.OrdinalIgnoreCase))
  106. {
  107. item.DisplayMediaType = type;
  108. }
  109. break;
  110. }
  111. case "Budget":
  112. {
  113. var text = reader.ReadElementContentAsString();
  114. double value;
  115. if (double.TryParse(text, NumberStyles.Any, _usCulture, out value))
  116. {
  117. item.Budget = value;
  118. }
  119. break;
  120. }
  121. case "Revenue":
  122. {
  123. var text = reader.ReadElementContentAsString();
  124. double value;
  125. if (double.TryParse(text, NumberStyles.Any, _usCulture, out value))
  126. {
  127. item.Revenue = value;
  128. }
  129. break;
  130. }
  131. case "SortTitle":
  132. item.ForcedSortName = reader.ReadElementContentAsString();
  133. break;
  134. case "Overview":
  135. case "Description":
  136. {
  137. var val = reader.ReadElementContentAsString();
  138. if (!string.IsNullOrWhiteSpace(val))
  139. {
  140. item.Overview = val;
  141. }
  142. break;
  143. }
  144. case "TagLine":
  145. {
  146. var tagline = reader.ReadElementContentAsString();
  147. if (!string.IsNullOrWhiteSpace(tagline))
  148. {
  149. item.AddTagline(tagline);
  150. }
  151. break;
  152. }
  153. case "Website":
  154. {
  155. var val = reader.ReadElementContentAsString();
  156. if (!string.IsNullOrWhiteSpace(val))
  157. {
  158. item.HomePageUrl = val;
  159. }
  160. break;
  161. }
  162. case "TagLines":
  163. {
  164. FetchFromTaglinesNode(reader.ReadSubtree(), item);
  165. break;
  166. }
  167. case "ContentRating":
  168. case "certification":
  169. case "MPAARating":
  170. {
  171. var rating = reader.ReadElementContentAsString();
  172. if (!string.IsNullOrWhiteSpace(rating))
  173. {
  174. item.OfficialRating = rating;
  175. }
  176. break;
  177. }
  178. case "MPAADescription":
  179. {
  180. var rating = reader.ReadElementContentAsString();
  181. if (!string.IsNullOrWhiteSpace(rating))
  182. {
  183. item.OfficialRatingDescription = rating;
  184. }
  185. break;
  186. }
  187. case "CustomRating":
  188. {
  189. var val = reader.ReadElementContentAsString();
  190. if (!string.IsNullOrWhiteSpace(val))
  191. {
  192. item.CustomRating = val;
  193. }
  194. break;
  195. }
  196. case "Runtime":
  197. case "RunningTime":
  198. {
  199. var text = reader.ReadElementContentAsString();
  200. if (!string.IsNullOrWhiteSpace(text))
  201. {
  202. int runtime;
  203. if (int.TryParse(text.Split(' ')[0], NumberStyles.Integer, _usCulture, out runtime))
  204. {
  205. // For audio and video don't replace ffmpeg data
  206. if (item is Video || item is Audio)
  207. {
  208. item.OriginalRunTimeTicks = TimeSpan.FromMinutes(runtime).Ticks;
  209. }
  210. else
  211. {
  212. item.RunTimeTicks = TimeSpan.FromMinutes(runtime).Ticks;
  213. }
  214. }
  215. }
  216. break;
  217. }
  218. case "Genre":
  219. {
  220. foreach (var name in SplitNames(reader.ReadElementContentAsString()))
  221. {
  222. if (string.IsNullOrWhiteSpace(name))
  223. {
  224. continue;
  225. }
  226. item.AddGenre(name);
  227. }
  228. break;
  229. }
  230. case "AspectRatio":
  231. {
  232. var val = reader.ReadElementContentAsString();
  233. if (!string.IsNullOrWhiteSpace(val))
  234. {
  235. item.AspectRatio = val;
  236. }
  237. break;
  238. }
  239. case "Network":
  240. {
  241. foreach (var name in SplitNames(reader.ReadElementContentAsString()))
  242. {
  243. if (string.IsNullOrWhiteSpace(name))
  244. {
  245. continue;
  246. }
  247. item.AddStudio(name);
  248. }
  249. break;
  250. }
  251. case "Director":
  252. {
  253. foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v, Type = PersonType.Director }))
  254. {
  255. if (string.IsNullOrWhiteSpace(p.Name))
  256. {
  257. continue;
  258. }
  259. item.AddPerson(p);
  260. }
  261. break;
  262. }
  263. case "Writer":
  264. {
  265. foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v, Type = PersonType.Writer }))
  266. {
  267. if (string.IsNullOrWhiteSpace(p.Name))
  268. {
  269. continue;
  270. }
  271. item.AddPerson(p);
  272. }
  273. break;
  274. }
  275. case "Actors":
  276. {
  277. var actors = reader.ReadInnerXml();
  278. if (actors.Contains("<"))
  279. {
  280. // This is one of the mis-named "Actors" full nodes created by MB2
  281. // Create a reader and pass it to the persons node processor
  282. FetchDataFromPersonsNode(new XmlTextReader(new StringReader("<Persons>" + actors + "</Persons>")), item);
  283. }
  284. else
  285. {
  286. // Old-style piped string
  287. foreach (var p in SplitNames(actors).Select(v => new PersonInfo { Name = v.Trim(), Type = PersonType.Actor }))
  288. {
  289. if (string.IsNullOrWhiteSpace(p.Name))
  290. {
  291. continue;
  292. }
  293. item.AddPerson(p);
  294. }
  295. }
  296. break;
  297. }
  298. case "GuestStars":
  299. {
  300. foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v.Trim(), Type = PersonType.GuestStar }))
  301. {
  302. if (string.IsNullOrWhiteSpace(p.Name))
  303. {
  304. continue;
  305. }
  306. item.AddPerson(p);
  307. }
  308. break;
  309. }
  310. case "Trailer":
  311. {
  312. var val = reader.ReadElementContentAsString();
  313. if (!string.IsNullOrWhiteSpace(val))
  314. {
  315. //item.AddTrailerUrl(val);
  316. }
  317. break;
  318. }
  319. case "ProductionYear":
  320. {
  321. var val = reader.ReadElementContentAsString();
  322. if (!string.IsNullOrWhiteSpace(val))
  323. {
  324. int productionYear;
  325. if (int.TryParse(val, out productionYear) && productionYear > 1850)
  326. {
  327. item.ProductionYear = productionYear;
  328. }
  329. }
  330. break;
  331. }
  332. case "Rating":
  333. case "IMDBrating":
  334. {
  335. var rating = reader.ReadElementContentAsString();
  336. if (!string.IsNullOrWhiteSpace(rating))
  337. {
  338. float val;
  339. // All external meta is saving this as '.' for decimal I believe...but just to be sure
  340. if (float.TryParse(rating.Replace(',', '.'), NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture, out val))
  341. {
  342. item.CommunityRating = val;
  343. }
  344. }
  345. break;
  346. }
  347. case "FirstAired":
  348. {
  349. var firstAired = reader.ReadElementContentAsString();
  350. if (!string.IsNullOrWhiteSpace(firstAired))
  351. {
  352. DateTime airDate;
  353. if (DateTime.TryParse(firstAired, out airDate) && airDate.Year > 1850)
  354. {
  355. item.PremiereDate = airDate.ToUniversalTime();
  356. item.ProductionYear = airDate.Year;
  357. }
  358. }
  359. break;
  360. }
  361. case "TMDbId":
  362. var tmdb = reader.ReadElementContentAsString();
  363. if (!string.IsNullOrWhiteSpace(tmdb))
  364. {
  365. item.SetProviderId(MetadataProviders.Tmdb, tmdb);
  366. }
  367. break;
  368. case "CollectionNumber":
  369. var tmdbCollection = reader.ReadElementContentAsString();
  370. if (!string.IsNullOrWhiteSpace(tmdbCollection))
  371. {
  372. item.SetProviderId(MetadataProviders.TmdbCollection, tmdbCollection);
  373. }
  374. break;
  375. case "TVcomId":
  376. var TVcomId = reader.ReadElementContentAsString();
  377. if (!string.IsNullOrWhiteSpace(TVcomId))
  378. {
  379. item.SetProviderId(MetadataProviders.Tvcom, TVcomId);
  380. }
  381. break;
  382. case "IMDB_ID":
  383. case "IMDB":
  384. case "IMDbId":
  385. var imDbId = reader.ReadElementContentAsString();
  386. if (!string.IsNullOrWhiteSpace(imDbId))
  387. {
  388. item.SetProviderId(MetadataProviders.Imdb, imDbId);
  389. }
  390. break;
  391. case "Genres":
  392. FetchFromGenresNode(reader.ReadSubtree(), item);
  393. break;
  394. case "Persons":
  395. FetchDataFromPersonsNode(reader.ReadSubtree(), item);
  396. break;
  397. case "ParentalRating":
  398. FetchFromParentalRatingNode(reader.ReadSubtree(), item);
  399. break;
  400. case "Studios":
  401. FetchFromStudiosNode(reader.ReadSubtree(), item);
  402. break;
  403. default:
  404. reader.Skip();
  405. break;
  406. }
  407. }
  408. /// <summary>
  409. /// Fetches from taglines node.
  410. /// </summary>
  411. /// <param name="reader">The reader.</param>
  412. /// <param name="item">The item.</param>
  413. private void FetchFromTaglinesNode(XmlReader reader, T item)
  414. {
  415. reader.MoveToContent();
  416. while (reader.Read())
  417. {
  418. if (reader.NodeType == XmlNodeType.Element)
  419. {
  420. switch (reader.Name)
  421. {
  422. case "Tagline":
  423. {
  424. var val = reader.ReadElementContentAsString();
  425. if (!string.IsNullOrWhiteSpace(val))
  426. {
  427. item.AddTagline(val);
  428. }
  429. break;
  430. }
  431. default:
  432. reader.Skip();
  433. break;
  434. }
  435. }
  436. }
  437. }
  438. /// <summary>
  439. /// Fetches from genres node.
  440. /// </summary>
  441. /// <param name="reader">The reader.</param>
  442. /// <param name="item">The item.</param>
  443. private void FetchFromGenresNode(XmlReader reader, T item)
  444. {
  445. reader.MoveToContent();
  446. while (reader.Read())
  447. {
  448. if (reader.NodeType == XmlNodeType.Element)
  449. {
  450. switch (reader.Name)
  451. {
  452. case "Genre":
  453. {
  454. var genre = reader.ReadElementContentAsString();
  455. if (!string.IsNullOrWhiteSpace(genre))
  456. {
  457. item.AddGenre(genre);
  458. }
  459. break;
  460. }
  461. default:
  462. reader.Skip();
  463. break;
  464. }
  465. }
  466. }
  467. }
  468. /// <summary>
  469. /// Fetches the data from persons node.
  470. /// </summary>
  471. /// <param name="reader">The reader.</param>
  472. /// <param name="item">The item.</param>
  473. private void FetchDataFromPersonsNode(XmlReader reader, T item)
  474. {
  475. reader.MoveToContent();
  476. while (reader.Read())
  477. {
  478. if (reader.NodeType == XmlNodeType.Element)
  479. {
  480. switch (reader.Name)
  481. {
  482. case "Person":
  483. case "Actor":
  484. {
  485. foreach (var person in GetPersonsFromXmlNode(reader.ReadSubtree()))
  486. {
  487. item.AddPerson(person);
  488. }
  489. break;
  490. }
  491. default:
  492. reader.Skip();
  493. break;
  494. }
  495. }
  496. }
  497. }
  498. /// <summary>
  499. /// Fetches from studios node.
  500. /// </summary>
  501. /// <param name="reader">The reader.</param>
  502. /// <param name="item">The item.</param>
  503. private void FetchFromStudiosNode(XmlReader reader, T item)
  504. {
  505. reader.MoveToContent();
  506. while (reader.Read())
  507. {
  508. if (reader.NodeType == XmlNodeType.Element)
  509. {
  510. switch (reader.Name)
  511. {
  512. case "Studio":
  513. {
  514. var studio = reader.ReadElementContentAsString();
  515. if (!string.IsNullOrWhiteSpace(studio))
  516. {
  517. item.AddStudio(studio);
  518. }
  519. break;
  520. }
  521. default:
  522. reader.Skip();
  523. break;
  524. }
  525. }
  526. }
  527. }
  528. /// <summary>
  529. /// Fetches from parental rating node.
  530. /// </summary>
  531. /// <param name="reader">The reader.</param>
  532. /// <param name="item">The item.</param>
  533. private void FetchFromParentalRatingNode(XmlReader reader, T item)
  534. {
  535. reader.MoveToContent();
  536. while (reader.Read())
  537. {
  538. if (reader.NodeType == XmlNodeType.Element)
  539. {
  540. switch (reader.Name)
  541. {
  542. // Removed support for "Value" tag as it conflicted with MPAA rating but leaving this function for possible
  543. // future support of "Description" -ebr
  544. default:
  545. reader.Skip();
  546. break;
  547. }
  548. }
  549. }
  550. }
  551. /// <summary>
  552. /// Gets the persons from XML node.
  553. /// </summary>
  554. /// <param name="reader">The reader.</param>
  555. /// <returns>IEnumerable{PersonInfo}.</returns>
  556. private IEnumerable<PersonInfo> GetPersonsFromXmlNode(XmlReader reader)
  557. {
  558. var names = new List<string>();
  559. var type = "Actor"; // If type is not specified assume actor
  560. var role = string.Empty;
  561. reader.MoveToContent();
  562. while (reader.Read())
  563. {
  564. if (reader.NodeType == XmlNodeType.Element)
  565. {
  566. switch (reader.Name)
  567. {
  568. case "Name":
  569. names.AddRange(SplitNames(reader.ReadElementContentAsString()));
  570. break;
  571. case "Type":
  572. {
  573. var val = reader.ReadElementContentAsString();
  574. if (!string.IsNullOrWhiteSpace(val))
  575. {
  576. type = val;
  577. }
  578. break;
  579. }
  580. case "Role":
  581. {
  582. var val = reader.ReadElementContentAsString();
  583. if (!string.IsNullOrWhiteSpace(val))
  584. {
  585. role = val;
  586. }
  587. break;
  588. }
  589. default:
  590. reader.Skip();
  591. break;
  592. }
  593. }
  594. }
  595. return names.Select(n => new PersonInfo { Name = n, Role = role, Type = type });
  596. }
  597. /// <summary>
  598. /// Used to split names of comma or pipe delimeted genres and people
  599. /// </summary>
  600. /// <param name="value">The value.</param>
  601. /// <returns>IEnumerable{System.String}.</returns>
  602. private IEnumerable<string> SplitNames(string value)
  603. {
  604. value = value ?? string.Empty;
  605. // Only split by comma if there is no pipe in the string
  606. // We have to be careful to not split names like Matthew, Jr.
  607. var separator = value.IndexOf('|') == -1 && value.IndexOf(';') == -1 ? new[] { ',' } : new[] { '|', ';' };
  608. value = value.Trim().Trim(separator);
  609. return string.IsNullOrWhiteSpace(value) ? new string[] { } : Split(value, separator, StringSplitOptions.RemoveEmptyEntries);
  610. }
  611. /// <summary>
  612. /// Provides an additional overload for string.split
  613. /// </summary>
  614. /// <param name="val">The val.</param>
  615. /// <param name="separators">The separators.</param>
  616. /// <param name="options">The options.</param>
  617. /// <returns>System.String[][].</returns>
  618. private static string[] Split(string val, char[] separators, StringSplitOptions options)
  619. {
  620. return val.Split(separators, options);
  621. }
  622. }
  623. }