BaseItemXmlParser.cs 27 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795
  1. using MediaBrowser.Controller.Entities;
  2. using MediaBrowser.Controller.Entities.Audio;
  3. using MediaBrowser.Model.Entities;
  4. using MediaBrowser.Model.Logging;
  5. using System;
  6. using System.Collections.Generic;
  7. using System.Globalization;
  8. using System.IO;
  9. using System.Linq;
  10. using System.Text;
  11. using System.Threading;
  12. using System.Xml;
  13. namespace MediaBrowser.Controller.Providers
  14. {
  15. /// <summary>
  16. /// Provides a base class for parsing metadata xml
  17. /// </summary>
  18. /// <typeparam name="T"></typeparam>
  19. public class BaseItemXmlParser<T>
  20. where T : BaseItem, new()
  21. {
  22. /// <summary>
  23. /// The logger
  24. /// </summary>
  25. protected ILogger Logger { get; private set; }
  26. /// <summary>
  27. /// Initializes a new instance of the <see cref="BaseItemXmlParser{T}" /> class.
  28. /// </summary>
  29. /// <param name="logger">The logger.</param>
  30. public BaseItemXmlParser(ILogger logger)
  31. {
  32. Logger = logger;
  33. }
  34. /// <summary>
  35. /// Fetches metadata for an item from one xml file
  36. /// </summary>
  37. /// <param name="item">The item.</param>
  38. /// <param name="metadataFile">The metadata file.</param>
  39. /// <param name="cancellationToken">The cancellation token.</param>
  40. /// <exception cref="System.ArgumentNullException"></exception>
  41. public void Fetch(T item, string metadataFile, CancellationToken cancellationToken)
  42. {
  43. if (item == null)
  44. {
  45. throw new ArgumentNullException();
  46. }
  47. if (string.IsNullOrEmpty(metadataFile))
  48. {
  49. throw new ArgumentNullException();
  50. }
  51. var settings = new XmlReaderSettings
  52. {
  53. CheckCharacters = false,
  54. IgnoreProcessingInstructions = true,
  55. IgnoreComments = true,
  56. ValidationType = ValidationType.None
  57. };
  58. item.Taglines.Clear();
  59. item.Studios.Clear();
  60. item.Genres.Clear();
  61. item.People.Clear();
  62. item.Tags.Clear();
  63. // Use european encoding as it will accept more characters
  64. using (var streamReader = new StreamReader(metadataFile, Encoding.GetEncoding("ISO-8859-1")))
  65. {
  66. // Use XmlReader for best performance
  67. using (var reader = XmlReader.Create(streamReader, settings))
  68. {
  69. reader.MoveToContent();
  70. // Loop through each element
  71. while (reader.Read())
  72. {
  73. cancellationToken.ThrowIfCancellationRequested();
  74. if (reader.NodeType == XmlNodeType.Element)
  75. {
  76. FetchDataFromXmlNode(reader, item);
  77. }
  78. }
  79. }
  80. }
  81. }
  82. private readonly CultureInfo _usCulture = new CultureInfo("en-US");
  83. /// <summary>
  84. /// Fetches metadata from one Xml Element
  85. /// </summary>
  86. /// <param name="reader">The reader.</param>
  87. /// <param name="item">The item.</param>
  88. protected virtual void FetchDataFromXmlNode(XmlReader reader, T item)
  89. {
  90. switch (reader.Name)
  91. {
  92. // DateCreated
  93. case "Added":
  94. DateTime added;
  95. if (DateTime.TryParse(reader.ReadElementContentAsString() ?? string.Empty, out added))
  96. {
  97. item.DateCreated = added.ToUniversalTime();
  98. }
  99. break;
  100. case "LocalTitle":
  101. item.Name = reader.ReadElementContentAsString();
  102. break;
  103. case "Type":
  104. {
  105. var type = reader.ReadElementContentAsString();
  106. if (!string.IsNullOrWhiteSpace(type) && !type.Equals("none", StringComparison.OrdinalIgnoreCase))
  107. {
  108. item.DisplayMediaType = type;
  109. }
  110. break;
  111. }
  112. case "CriticRating":
  113. {
  114. var text = reader.ReadElementContentAsString();
  115. float value;
  116. if (float.TryParse(text, NumberStyles.Any, _usCulture, out value))
  117. {
  118. item.CriticRating = value;
  119. }
  120. break;
  121. }
  122. case "Budget":
  123. {
  124. var text = reader.ReadElementContentAsString();
  125. double value;
  126. if (double.TryParse(text, NumberStyles.Any, _usCulture, out value))
  127. {
  128. item.Budget = value;
  129. }
  130. break;
  131. }
  132. case "Revenue":
  133. {
  134. var text = reader.ReadElementContentAsString();
  135. double value;
  136. if (double.TryParse(text, NumberStyles.Any, _usCulture, out value))
  137. {
  138. item.Revenue = value;
  139. }
  140. break;
  141. }
  142. case "SortTitle":
  143. item.ForcedSortName = reader.ReadElementContentAsString();
  144. break;
  145. case "Overview":
  146. case "Description":
  147. {
  148. var val = reader.ReadElementContentAsString();
  149. if (!string.IsNullOrWhiteSpace(val))
  150. {
  151. item.Overview = val;
  152. }
  153. break;
  154. }
  155. case "CriticRatingSummary":
  156. {
  157. var val = reader.ReadElementContentAsString();
  158. if (!string.IsNullOrWhiteSpace(val))
  159. {
  160. item.CriticRatingSummary = val;
  161. }
  162. break;
  163. }
  164. case "TagLine":
  165. {
  166. var tagline = reader.ReadElementContentAsString();
  167. if (!string.IsNullOrWhiteSpace(tagline))
  168. {
  169. item.AddTagline(tagline);
  170. }
  171. break;
  172. }
  173. case "Website":
  174. {
  175. var val = reader.ReadElementContentAsString();
  176. if (!string.IsNullOrWhiteSpace(val))
  177. {
  178. item.HomePageUrl = val;
  179. }
  180. break;
  181. }
  182. case "TagLines":
  183. {
  184. FetchFromTaglinesNode(reader.ReadSubtree(), item);
  185. break;
  186. }
  187. case "ContentRating":
  188. case "certification":
  189. case "MPAARating":
  190. {
  191. var rating = reader.ReadElementContentAsString();
  192. if (!string.IsNullOrWhiteSpace(rating))
  193. {
  194. item.OfficialRating = rating;
  195. }
  196. break;
  197. }
  198. case "MPAADescription":
  199. {
  200. var rating = reader.ReadElementContentAsString();
  201. if (!string.IsNullOrWhiteSpace(rating))
  202. {
  203. item.OfficialRatingDescription = rating;
  204. }
  205. break;
  206. }
  207. case "CustomRating":
  208. {
  209. var val = reader.ReadElementContentAsString();
  210. if (!string.IsNullOrWhiteSpace(val))
  211. {
  212. item.CustomRating = val;
  213. }
  214. break;
  215. }
  216. case "Runtime":
  217. case "RunningTime":
  218. {
  219. var text = reader.ReadElementContentAsString();
  220. if (!string.IsNullOrWhiteSpace(text))
  221. {
  222. int runtime;
  223. if (int.TryParse(text.Split(' ')[0], NumberStyles.Integer, _usCulture, out runtime))
  224. {
  225. // For audio and video don't replace ffmpeg data
  226. if (item is Video || item is Audio)
  227. {
  228. item.OriginalRunTimeTicks = TimeSpan.FromMinutes(runtime).Ticks;
  229. }
  230. else
  231. {
  232. item.RunTimeTicks = TimeSpan.FromMinutes(runtime).Ticks;
  233. }
  234. }
  235. }
  236. break;
  237. }
  238. case "Genre":
  239. {
  240. foreach (var name in SplitNames(reader.ReadElementContentAsString()))
  241. {
  242. if (string.IsNullOrWhiteSpace(name))
  243. {
  244. continue;
  245. }
  246. item.AddGenre(name);
  247. }
  248. break;
  249. }
  250. case "AspectRatio":
  251. {
  252. var val = reader.ReadElementContentAsString();
  253. if (!string.IsNullOrWhiteSpace(val))
  254. {
  255. item.AspectRatio = val;
  256. }
  257. break;
  258. }
  259. case "Network":
  260. {
  261. foreach (var name in SplitNames(reader.ReadElementContentAsString()))
  262. {
  263. if (string.IsNullOrWhiteSpace(name))
  264. {
  265. continue;
  266. }
  267. item.AddStudio(name);
  268. }
  269. break;
  270. }
  271. case "Director":
  272. {
  273. foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v, Type = PersonType.Director }))
  274. {
  275. if (string.IsNullOrWhiteSpace(p.Name))
  276. {
  277. continue;
  278. }
  279. item.AddPerson(p);
  280. }
  281. break;
  282. }
  283. case "Writer":
  284. {
  285. foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v, Type = PersonType.Writer }))
  286. {
  287. if (string.IsNullOrWhiteSpace(p.Name))
  288. {
  289. continue;
  290. }
  291. item.AddPerson(p);
  292. }
  293. break;
  294. }
  295. case "Actors":
  296. {
  297. var actors = reader.ReadInnerXml();
  298. if (actors.Contains("<"))
  299. {
  300. // This is one of the mis-named "Actors" full nodes created by MB2
  301. // Create a reader and pass it to the persons node processor
  302. FetchDataFromPersonsNode(new XmlTextReader(new StringReader("<Persons>" + actors + "</Persons>")), item);
  303. }
  304. else
  305. {
  306. // Old-style piped string
  307. foreach (var p in SplitNames(actors).Select(v => new PersonInfo { Name = v.Trim(), Type = PersonType.Actor }))
  308. {
  309. if (string.IsNullOrWhiteSpace(p.Name))
  310. {
  311. continue;
  312. }
  313. item.AddPerson(p);
  314. }
  315. }
  316. break;
  317. }
  318. case "GuestStars":
  319. {
  320. foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v.Trim(), Type = PersonType.GuestStar }))
  321. {
  322. if (string.IsNullOrWhiteSpace(p.Name))
  323. {
  324. continue;
  325. }
  326. item.AddPerson(p);
  327. }
  328. break;
  329. }
  330. case "Trailer":
  331. {
  332. var val = reader.ReadElementContentAsString();
  333. if (!string.IsNullOrWhiteSpace(val))
  334. {
  335. //item.AddTrailerUrl(val);
  336. }
  337. break;
  338. }
  339. case "ProductionYear":
  340. {
  341. var val = reader.ReadElementContentAsString();
  342. if (!string.IsNullOrWhiteSpace(val))
  343. {
  344. int productionYear;
  345. if (int.TryParse(val, out productionYear) && productionYear > 1850)
  346. {
  347. item.ProductionYear = productionYear;
  348. }
  349. }
  350. break;
  351. }
  352. case "Rating":
  353. case "IMDBrating":
  354. {
  355. var rating = reader.ReadElementContentAsString();
  356. if (!string.IsNullOrWhiteSpace(rating))
  357. {
  358. float val;
  359. // All external meta is saving this as '.' for decimal I believe...but just to be sure
  360. if (float.TryParse(rating.Replace(',', '.'), NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture, out val))
  361. {
  362. item.CommunityRating = val;
  363. }
  364. }
  365. break;
  366. }
  367. case "PremiereDate":
  368. case "FirstAired":
  369. {
  370. var firstAired = reader.ReadElementContentAsString();
  371. if (!string.IsNullOrWhiteSpace(firstAired))
  372. {
  373. DateTime airDate;
  374. if (DateTime.TryParse(firstAired, out airDate) && airDate.Year > 1850)
  375. {
  376. item.PremiereDate = airDate.ToUniversalTime();
  377. item.ProductionYear = airDate.Year;
  378. }
  379. }
  380. break;
  381. }
  382. case "MusicbrainzId":
  383. var mbz = reader.ReadElementContentAsString();
  384. if (!string.IsNullOrWhiteSpace(mbz))
  385. {
  386. item.SetProviderId(MetadataProviders.Musicbrainz, mbz);
  387. }
  388. break;
  389. case "RottenTomatoesId":
  390. var rtId = reader.ReadElementContentAsString();
  391. if (!string.IsNullOrWhiteSpace(rtId))
  392. {
  393. item.SetProviderId(MetadataProviders.RottenTomatoes, rtId);
  394. }
  395. break;
  396. case "TMDbId":
  397. var tmdb = reader.ReadElementContentAsString();
  398. if (!string.IsNullOrWhiteSpace(tmdb))
  399. {
  400. item.SetProviderId(MetadataProviders.Tmdb, tmdb);
  401. }
  402. break;
  403. case "CollectionNumber":
  404. var tmdbCollection = reader.ReadElementContentAsString();
  405. if (!string.IsNullOrWhiteSpace(tmdbCollection))
  406. {
  407. item.SetProviderId(MetadataProviders.TmdbCollection, tmdbCollection);
  408. }
  409. break;
  410. case "TVcomId":
  411. var TVcomId = reader.ReadElementContentAsString();
  412. if (!string.IsNullOrWhiteSpace(TVcomId))
  413. {
  414. item.SetProviderId(MetadataProviders.Tvcom, TVcomId);
  415. }
  416. break;
  417. case "IMDB_ID":
  418. case "IMDB":
  419. case "IMDbId":
  420. var imDbId = reader.ReadElementContentAsString();
  421. if (!string.IsNullOrWhiteSpace(imDbId))
  422. {
  423. item.SetProviderId(MetadataProviders.Imdb, imDbId);
  424. }
  425. break;
  426. case "Genres":
  427. FetchFromGenresNode(reader.ReadSubtree(), item);
  428. break;
  429. case "Tags":
  430. FetchFromTagsNode(reader.ReadSubtree(), item);
  431. break;
  432. case "Persons":
  433. FetchDataFromPersonsNode(reader.ReadSubtree(), item);
  434. break;
  435. case "ParentalRating":
  436. FetchFromParentalRatingNode(reader.ReadSubtree(), item);
  437. break;
  438. case "Studios":
  439. FetchFromStudiosNode(reader.ReadSubtree(), item);
  440. break;
  441. default:
  442. reader.Skip();
  443. break;
  444. }
  445. }
  446. /// <summary>
  447. /// Fetches from taglines node.
  448. /// </summary>
  449. /// <param name="reader">The reader.</param>
  450. /// <param name="item">The item.</param>
  451. private void FetchFromTaglinesNode(XmlReader reader, T item)
  452. {
  453. reader.MoveToContent();
  454. while (reader.Read())
  455. {
  456. if (reader.NodeType == XmlNodeType.Element)
  457. {
  458. switch (reader.Name)
  459. {
  460. case "Tagline":
  461. {
  462. var val = reader.ReadElementContentAsString();
  463. if (!string.IsNullOrWhiteSpace(val))
  464. {
  465. item.AddTagline(val);
  466. }
  467. break;
  468. }
  469. default:
  470. reader.Skip();
  471. break;
  472. }
  473. }
  474. }
  475. }
  476. /// <summary>
  477. /// Fetches from genres node.
  478. /// </summary>
  479. /// <param name="reader">The reader.</param>
  480. /// <param name="item">The item.</param>
  481. private void FetchFromGenresNode(XmlReader reader, T item)
  482. {
  483. reader.MoveToContent();
  484. while (reader.Read())
  485. {
  486. if (reader.NodeType == XmlNodeType.Element)
  487. {
  488. switch (reader.Name)
  489. {
  490. case "Genre":
  491. {
  492. var genre = reader.ReadElementContentAsString();
  493. if (!string.IsNullOrWhiteSpace(genre))
  494. {
  495. item.AddGenre(genre);
  496. }
  497. break;
  498. }
  499. default:
  500. reader.Skip();
  501. break;
  502. }
  503. }
  504. }
  505. }
  506. private void FetchFromTagsNode(XmlReader reader, T item)
  507. {
  508. reader.MoveToContent();
  509. while (reader.Read())
  510. {
  511. if (reader.NodeType == XmlNodeType.Element)
  512. {
  513. switch (reader.Name)
  514. {
  515. case "Tag":
  516. {
  517. var tag = reader.ReadElementContentAsString();
  518. if (!string.IsNullOrWhiteSpace(tag))
  519. {
  520. item.AddTagline(tag);
  521. }
  522. break;
  523. }
  524. default:
  525. reader.Skip();
  526. break;
  527. }
  528. }
  529. }
  530. }
  531. /// <summary>
  532. /// Fetches the data from persons node.
  533. /// </summary>
  534. /// <param name="reader">The reader.</param>
  535. /// <param name="item">The item.</param>
  536. private void FetchDataFromPersonsNode(XmlReader reader, T item)
  537. {
  538. reader.MoveToContent();
  539. while (reader.Read())
  540. {
  541. if (reader.NodeType == XmlNodeType.Element)
  542. {
  543. switch (reader.Name)
  544. {
  545. case "Person":
  546. case "Actor":
  547. {
  548. foreach (var person in GetPersonsFromXmlNode(reader.ReadSubtree()))
  549. {
  550. item.AddPerson(person);
  551. }
  552. break;
  553. }
  554. default:
  555. reader.Skip();
  556. break;
  557. }
  558. }
  559. }
  560. }
  561. /// <summary>
  562. /// Fetches from studios node.
  563. /// </summary>
  564. /// <param name="reader">The reader.</param>
  565. /// <param name="item">The item.</param>
  566. private void FetchFromStudiosNode(XmlReader reader, T item)
  567. {
  568. reader.MoveToContent();
  569. while (reader.Read())
  570. {
  571. if (reader.NodeType == XmlNodeType.Element)
  572. {
  573. switch (reader.Name)
  574. {
  575. case "Studio":
  576. {
  577. var studio = reader.ReadElementContentAsString();
  578. if (!string.IsNullOrWhiteSpace(studio))
  579. {
  580. item.AddStudio(studio);
  581. }
  582. break;
  583. }
  584. default:
  585. reader.Skip();
  586. break;
  587. }
  588. }
  589. }
  590. }
  591. /// <summary>
  592. /// Fetches from parental rating node.
  593. /// </summary>
  594. /// <param name="reader">The reader.</param>
  595. /// <param name="item">The item.</param>
  596. private void FetchFromParentalRatingNode(XmlReader reader, T item)
  597. {
  598. reader.MoveToContent();
  599. while (reader.Read())
  600. {
  601. if (reader.NodeType == XmlNodeType.Element)
  602. {
  603. switch (reader.Name)
  604. {
  605. // Removed support for "Value" tag as it conflicted with MPAA rating but leaving this function for possible
  606. // future support of "Description" -ebr
  607. default:
  608. reader.Skip();
  609. break;
  610. }
  611. }
  612. }
  613. }
  614. /// <summary>
  615. /// Gets the persons from XML node.
  616. /// </summary>
  617. /// <param name="reader">The reader.</param>
  618. /// <returns>IEnumerable{PersonInfo}.</returns>
  619. private IEnumerable<PersonInfo> GetPersonsFromXmlNode(XmlReader reader)
  620. {
  621. var names = new List<string>();
  622. var type = "Actor"; // If type is not specified assume actor
  623. var role = string.Empty;
  624. reader.MoveToContent();
  625. while (reader.Read())
  626. {
  627. if (reader.NodeType == XmlNodeType.Element)
  628. {
  629. switch (reader.Name)
  630. {
  631. case "Name":
  632. names.AddRange(SplitNames(reader.ReadElementContentAsString()));
  633. break;
  634. case "Type":
  635. {
  636. var val = reader.ReadElementContentAsString();
  637. if (!string.IsNullOrWhiteSpace(val))
  638. {
  639. type = val;
  640. }
  641. break;
  642. }
  643. case "Role":
  644. {
  645. var val = reader.ReadElementContentAsString();
  646. if (!string.IsNullOrWhiteSpace(val))
  647. {
  648. role = val;
  649. }
  650. break;
  651. }
  652. default:
  653. reader.Skip();
  654. break;
  655. }
  656. }
  657. }
  658. return names.Select(n => new PersonInfo { Name = n, Role = role, Type = type });
  659. }
  660. /// <summary>
  661. /// Used to split names of comma or pipe delimeted genres and people
  662. /// </summary>
  663. /// <param name="value">The value.</param>
  664. /// <returns>IEnumerable{System.String}.</returns>
  665. private IEnumerable<string> SplitNames(string value)
  666. {
  667. value = value ?? string.Empty;
  668. // Only split by comma if there is no pipe in the string
  669. // We have to be careful to not split names like Matthew, Jr.
  670. var separator = value.IndexOf('|') == -1 && value.IndexOf(';') == -1 ? new[] { ',' } : new[] { '|', ';' };
  671. value = value.Trim().Trim(separator);
  672. return string.IsNullOrWhiteSpace(value) ? new string[] { } : Split(value, separator, StringSplitOptions.RemoveEmptyEntries);
  673. }
  674. /// <summary>
  675. /// Provides an additional overload for string.split
  676. /// </summary>
  677. /// <param name="val">The val.</param>
  678. /// <param name="separators">The separators.</param>
  679. /// <param name="options">The options.</param>
  680. /// <returns>System.String[][].</returns>
  681. private static string[] Split(string val, char[] separators, StringSplitOptions options)
  682. {
  683. return val.Split(separators, options);
  684. }
  685. }
  686. }