BaseItemXmlParser.cs 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808
  1. using MediaBrowser.Controller.Entities;
  2. using MediaBrowser.Controller.Entities.Audio;
  3. using MediaBrowser.Model.Entities;
  4. using MediaBrowser.Model.Logging;
  5. using System;
  6. using System.Collections.Generic;
  7. using System.Globalization;
  8. using System.IO;
  9. using System.Linq;
  10. using System.Text;
  11. using System.Threading;
  12. using System.Xml;
  13. namespace MediaBrowser.Controller.Providers
  14. {
  15. /// <summary>
  16. /// Provides a base class for parsing metadata xml
  17. /// </summary>
  18. /// <typeparam name="T"></typeparam>
  19. public class BaseItemXmlParser<T>
  20. where T : BaseItem, new()
  21. {
  22. /// <summary>
  23. /// The logger
  24. /// </summary>
  25. protected ILogger Logger { get; private set; }
  26. /// <summary>
  27. /// Initializes a new instance of the <see cref="BaseItemXmlParser{T}" /> class.
  28. /// </summary>
  29. /// <param name="logger">The logger.</param>
  30. public BaseItemXmlParser(ILogger logger)
  31. {
  32. Logger = logger;
  33. }
  34. /// <summary>
  35. /// Fetches metadata for an item from one xml file
  36. /// </summary>
  37. /// <param name="item">The item.</param>
  38. /// <param name="metadataFile">The metadata file.</param>
  39. /// <param name="cancellationToken">The cancellation token.</param>
  40. /// <exception cref="System.ArgumentNullException"></exception>
  41. public void Fetch(T item, string metadataFile, CancellationToken cancellationToken)
  42. {
  43. if (item == null)
  44. {
  45. throw new ArgumentNullException();
  46. }
  47. if (string.IsNullOrEmpty(metadataFile))
  48. {
  49. throw new ArgumentNullException();
  50. }
  51. var settings = new XmlReaderSettings
  52. {
  53. CheckCharacters = false,
  54. IgnoreProcessingInstructions = true,
  55. IgnoreComments = true,
  56. ValidationType = ValidationType.None
  57. };
  58. item.Taglines.Clear();
  59. item.Studios.Clear();
  60. item.Genres.Clear();
  61. item.People.Clear();
  62. item.Tags.Clear();
  63. //Fetch(item, metadataFile, settings, Encoding.GetEncoding("ISO-8859-1"), cancellationToken);
  64. Fetch(item, metadataFile, settings, Encoding.UTF8, cancellationToken);
  65. }
  66. /// <summary>
  67. /// Fetches the specified item.
  68. /// </summary>
  69. /// <param name="item">The item.</param>
  70. /// <param name="metadataFile">The metadata file.</param>
  71. /// <param name="settings">The settings.</param>
  72. /// <param name="encoding">The encoding.</param>
  73. /// <param name="cancellationToken">The cancellation token.</param>
  74. private void Fetch(T item, string metadataFile, XmlReaderSettings settings, Encoding encoding, CancellationToken cancellationToken)
  75. {
  76. using (var streamReader = new StreamReader(metadataFile, encoding))
  77. {
  78. // Use XmlReader for best performance
  79. using (var reader = XmlReader.Create(streamReader, settings))
  80. {
  81. reader.MoveToContent();
  82. // Loop through each element
  83. while (reader.Read())
  84. {
  85. cancellationToken.ThrowIfCancellationRequested();
  86. if (reader.NodeType == XmlNodeType.Element)
  87. {
  88. FetchDataFromXmlNode(reader, item);
  89. }
  90. }
  91. }
  92. }
  93. }
  94. private readonly CultureInfo _usCulture = new CultureInfo("en-US");
  95. /// <summary>
  96. /// Fetches metadata from one Xml Element
  97. /// </summary>
  98. /// <param name="reader">The reader.</param>
  99. /// <param name="item">The item.</param>
  100. protected virtual void FetchDataFromXmlNode(XmlReader reader, T item)
  101. {
  102. switch (reader.Name)
  103. {
  104. // DateCreated
  105. case "Added":
  106. DateTime added;
  107. if (DateTime.TryParse(reader.ReadElementContentAsString() ?? string.Empty, out added))
  108. {
  109. item.DateCreated = added.ToUniversalTime();
  110. }
  111. break;
  112. case "LocalTitle":
  113. item.Name = reader.ReadElementContentAsString();
  114. break;
  115. case "Type":
  116. {
  117. var type = reader.ReadElementContentAsString();
  118. if (!string.IsNullOrWhiteSpace(type) && !type.Equals("none", StringComparison.OrdinalIgnoreCase))
  119. {
  120. item.DisplayMediaType = type;
  121. }
  122. break;
  123. }
  124. case "CriticRating":
  125. {
  126. var text = reader.ReadElementContentAsString();
  127. float value;
  128. if (float.TryParse(text, NumberStyles.Any, _usCulture, out value))
  129. {
  130. item.CriticRating = value;
  131. }
  132. break;
  133. }
  134. case "Budget":
  135. {
  136. var text = reader.ReadElementContentAsString();
  137. double value;
  138. if (double.TryParse(text, NumberStyles.Any, _usCulture, out value))
  139. {
  140. item.Budget = value;
  141. }
  142. break;
  143. }
  144. case "Revenue":
  145. {
  146. var text = reader.ReadElementContentAsString();
  147. double value;
  148. if (double.TryParse(text, NumberStyles.Any, _usCulture, out value))
  149. {
  150. item.Revenue = value;
  151. }
  152. break;
  153. }
  154. case "SortTitle":
  155. item.ForcedSortName = reader.ReadElementContentAsString();
  156. break;
  157. case "Overview":
  158. case "Description":
  159. {
  160. var val = reader.ReadElementContentAsString();
  161. if (!string.IsNullOrWhiteSpace(val))
  162. {
  163. item.Overview = val;
  164. }
  165. break;
  166. }
  167. case "CriticRatingSummary":
  168. {
  169. var val = reader.ReadElementContentAsString();
  170. if (!string.IsNullOrWhiteSpace(val))
  171. {
  172. item.CriticRatingSummary = val;
  173. }
  174. break;
  175. }
  176. case "TagLine":
  177. {
  178. var tagline = reader.ReadElementContentAsString();
  179. if (!string.IsNullOrWhiteSpace(tagline))
  180. {
  181. item.AddTagline(tagline);
  182. }
  183. break;
  184. }
  185. case "Website":
  186. {
  187. var val = reader.ReadElementContentAsString();
  188. if (!string.IsNullOrWhiteSpace(val))
  189. {
  190. item.HomePageUrl = val;
  191. }
  192. break;
  193. }
  194. case "TagLines":
  195. {
  196. FetchFromTaglinesNode(reader.ReadSubtree(), item);
  197. break;
  198. }
  199. case "ContentRating":
  200. case "certification":
  201. case "MPAARating":
  202. {
  203. var rating = reader.ReadElementContentAsString();
  204. if (!string.IsNullOrWhiteSpace(rating))
  205. {
  206. item.OfficialRating = rating;
  207. }
  208. break;
  209. }
  210. case "MPAADescription":
  211. {
  212. var rating = reader.ReadElementContentAsString();
  213. if (!string.IsNullOrWhiteSpace(rating))
  214. {
  215. item.OfficialRatingDescription = rating;
  216. }
  217. break;
  218. }
  219. case "CustomRating":
  220. {
  221. var val = reader.ReadElementContentAsString();
  222. if (!string.IsNullOrWhiteSpace(val))
  223. {
  224. item.CustomRating = val;
  225. }
  226. break;
  227. }
  228. case "Runtime":
  229. case "RunningTime":
  230. {
  231. var text = reader.ReadElementContentAsString();
  232. if (!string.IsNullOrWhiteSpace(text))
  233. {
  234. int runtime;
  235. if (int.TryParse(text.Split(' ')[0], NumberStyles.Integer, _usCulture, out runtime))
  236. {
  237. // For audio and video don't replace ffmpeg data
  238. if (item is Video || item is Audio)
  239. {
  240. item.OriginalRunTimeTicks = TimeSpan.FromMinutes(runtime).Ticks;
  241. }
  242. else
  243. {
  244. item.RunTimeTicks = TimeSpan.FromMinutes(runtime).Ticks;
  245. }
  246. }
  247. }
  248. break;
  249. }
  250. case "Genre":
  251. {
  252. foreach (var name in SplitNames(reader.ReadElementContentAsString()))
  253. {
  254. if (string.IsNullOrWhiteSpace(name))
  255. {
  256. continue;
  257. }
  258. item.AddGenre(name);
  259. }
  260. break;
  261. }
  262. case "AspectRatio":
  263. {
  264. var val = reader.ReadElementContentAsString();
  265. if (!string.IsNullOrWhiteSpace(val))
  266. {
  267. item.AspectRatio = val;
  268. }
  269. break;
  270. }
  271. case "Network":
  272. {
  273. foreach (var name in SplitNames(reader.ReadElementContentAsString()))
  274. {
  275. if (string.IsNullOrWhiteSpace(name))
  276. {
  277. continue;
  278. }
  279. item.AddStudio(name);
  280. }
  281. break;
  282. }
  283. case "Director":
  284. {
  285. foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v, Type = PersonType.Director }))
  286. {
  287. if (string.IsNullOrWhiteSpace(p.Name))
  288. {
  289. continue;
  290. }
  291. item.AddPerson(p);
  292. }
  293. break;
  294. }
  295. case "Writer":
  296. {
  297. foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v, Type = PersonType.Writer }))
  298. {
  299. if (string.IsNullOrWhiteSpace(p.Name))
  300. {
  301. continue;
  302. }
  303. item.AddPerson(p);
  304. }
  305. break;
  306. }
  307. case "Actors":
  308. {
  309. var actors = reader.ReadInnerXml();
  310. if (actors.Contains("<"))
  311. {
  312. // This is one of the mis-named "Actors" full nodes created by MB2
  313. // Create a reader and pass it to the persons node processor
  314. FetchDataFromPersonsNode(new XmlTextReader(new StringReader("<Persons>" + actors + "</Persons>")), item);
  315. }
  316. else
  317. {
  318. // Old-style piped string
  319. foreach (var p in SplitNames(actors).Select(v => new PersonInfo { Name = v.Trim(), Type = PersonType.Actor }))
  320. {
  321. if (string.IsNullOrWhiteSpace(p.Name))
  322. {
  323. continue;
  324. }
  325. item.AddPerson(p);
  326. }
  327. }
  328. break;
  329. }
  330. case "GuestStars":
  331. {
  332. foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v.Trim(), Type = PersonType.GuestStar }))
  333. {
  334. if (string.IsNullOrWhiteSpace(p.Name))
  335. {
  336. continue;
  337. }
  338. item.AddPerson(p);
  339. }
  340. break;
  341. }
  342. case "Trailer":
  343. {
  344. var val = reader.ReadElementContentAsString();
  345. if (!string.IsNullOrWhiteSpace(val))
  346. {
  347. //item.AddTrailerUrl(val);
  348. }
  349. break;
  350. }
  351. case "ProductionYear":
  352. {
  353. var val = reader.ReadElementContentAsString();
  354. if (!string.IsNullOrWhiteSpace(val))
  355. {
  356. int productionYear;
  357. if (int.TryParse(val, out productionYear) && productionYear > 1850)
  358. {
  359. item.ProductionYear = productionYear;
  360. }
  361. }
  362. break;
  363. }
  364. case "Rating":
  365. case "IMDBrating":
  366. {
  367. var rating = reader.ReadElementContentAsString();
  368. if (!string.IsNullOrWhiteSpace(rating))
  369. {
  370. float val;
  371. // All external meta is saving this as '.' for decimal I believe...but just to be sure
  372. if (float.TryParse(rating.Replace(',', '.'), NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture, out val))
  373. {
  374. item.CommunityRating = val;
  375. }
  376. }
  377. break;
  378. }
  379. case "PremiereDate":
  380. case "FirstAired":
  381. {
  382. var firstAired = reader.ReadElementContentAsString();
  383. if (!string.IsNullOrWhiteSpace(firstAired))
  384. {
  385. DateTime airDate;
  386. if (DateTime.TryParse(firstAired, out airDate) && airDate.Year > 1850)
  387. {
  388. item.PremiereDate = airDate.ToUniversalTime();
  389. item.ProductionYear = airDate.Year;
  390. }
  391. }
  392. break;
  393. }
  394. case "MusicbrainzId":
  395. var mbz = reader.ReadElementContentAsString();
  396. if (!string.IsNullOrWhiteSpace(mbz))
  397. {
  398. item.SetProviderId(MetadataProviders.Musicbrainz, mbz);
  399. }
  400. break;
  401. case "RottenTomatoesId":
  402. var rtId = reader.ReadElementContentAsString();
  403. if (!string.IsNullOrWhiteSpace(rtId))
  404. {
  405. item.SetProviderId(MetadataProviders.RottenTomatoes, rtId);
  406. }
  407. break;
  408. case "TMDbId":
  409. var tmdb = reader.ReadElementContentAsString();
  410. if (!string.IsNullOrWhiteSpace(tmdb))
  411. {
  412. item.SetProviderId(MetadataProviders.Tmdb, tmdb);
  413. }
  414. break;
  415. case "CollectionNumber":
  416. var tmdbCollection = reader.ReadElementContentAsString();
  417. if (!string.IsNullOrWhiteSpace(tmdbCollection))
  418. {
  419. item.SetProviderId(MetadataProviders.TmdbCollection, tmdbCollection);
  420. }
  421. break;
  422. case "TVcomId":
  423. var TVcomId = reader.ReadElementContentAsString();
  424. if (!string.IsNullOrWhiteSpace(TVcomId))
  425. {
  426. item.SetProviderId(MetadataProviders.Tvcom, TVcomId);
  427. }
  428. break;
  429. case "IMDB_ID":
  430. case "IMDB":
  431. case "IMDbId":
  432. var imDbId = reader.ReadElementContentAsString();
  433. if (!string.IsNullOrWhiteSpace(imDbId))
  434. {
  435. item.SetProviderId(MetadataProviders.Imdb, imDbId);
  436. }
  437. break;
  438. case "Genres":
  439. FetchFromGenresNode(reader.ReadSubtree(), item);
  440. break;
  441. case "Tags":
  442. FetchFromTagsNode(reader.ReadSubtree(), item);
  443. break;
  444. case "Persons":
  445. FetchDataFromPersonsNode(reader.ReadSubtree(), item);
  446. break;
  447. case "ParentalRating":
  448. FetchFromParentalRatingNode(reader.ReadSubtree(), item);
  449. break;
  450. case "Studios":
  451. FetchFromStudiosNode(reader.ReadSubtree(), item);
  452. break;
  453. default:
  454. reader.Skip();
  455. break;
  456. }
  457. }
  458. /// <summary>
  459. /// Fetches from taglines node.
  460. /// </summary>
  461. /// <param name="reader">The reader.</param>
  462. /// <param name="item">The item.</param>
  463. private void FetchFromTaglinesNode(XmlReader reader, T item)
  464. {
  465. reader.MoveToContent();
  466. while (reader.Read())
  467. {
  468. if (reader.NodeType == XmlNodeType.Element)
  469. {
  470. switch (reader.Name)
  471. {
  472. case "Tagline":
  473. {
  474. var val = reader.ReadElementContentAsString();
  475. if (!string.IsNullOrWhiteSpace(val))
  476. {
  477. item.AddTagline(val);
  478. }
  479. break;
  480. }
  481. default:
  482. reader.Skip();
  483. break;
  484. }
  485. }
  486. }
  487. }
  488. /// <summary>
  489. /// Fetches from genres node.
  490. /// </summary>
  491. /// <param name="reader">The reader.</param>
  492. /// <param name="item">The item.</param>
  493. private void FetchFromGenresNode(XmlReader reader, T item)
  494. {
  495. reader.MoveToContent();
  496. while (reader.Read())
  497. {
  498. if (reader.NodeType == XmlNodeType.Element)
  499. {
  500. switch (reader.Name)
  501. {
  502. case "Genre":
  503. {
  504. var genre = reader.ReadElementContentAsString();
  505. if (!string.IsNullOrWhiteSpace(genre))
  506. {
  507. item.AddGenre(genre);
  508. }
  509. break;
  510. }
  511. default:
  512. reader.Skip();
  513. break;
  514. }
  515. }
  516. }
  517. }
  518. private void FetchFromTagsNode(XmlReader reader, T item)
  519. {
  520. reader.MoveToContent();
  521. while (reader.Read())
  522. {
  523. if (reader.NodeType == XmlNodeType.Element)
  524. {
  525. switch (reader.Name)
  526. {
  527. case "Tag":
  528. {
  529. var tag = reader.ReadElementContentAsString();
  530. if (!string.IsNullOrWhiteSpace(tag))
  531. {
  532. item.AddTagline(tag);
  533. }
  534. break;
  535. }
  536. default:
  537. reader.Skip();
  538. break;
  539. }
  540. }
  541. }
  542. }
  543. /// <summary>
  544. /// Fetches the data from persons node.
  545. /// </summary>
  546. /// <param name="reader">The reader.</param>
  547. /// <param name="item">The item.</param>
  548. private void FetchDataFromPersonsNode(XmlReader reader, T item)
  549. {
  550. reader.MoveToContent();
  551. while (reader.Read())
  552. {
  553. if (reader.NodeType == XmlNodeType.Element)
  554. {
  555. switch (reader.Name)
  556. {
  557. case "Person":
  558. case "Actor":
  559. {
  560. foreach (var person in GetPersonsFromXmlNode(reader.ReadSubtree()))
  561. {
  562. item.AddPerson(person);
  563. }
  564. break;
  565. }
  566. default:
  567. reader.Skip();
  568. break;
  569. }
  570. }
  571. }
  572. }
  573. /// <summary>
  574. /// Fetches from studios node.
  575. /// </summary>
  576. /// <param name="reader">The reader.</param>
  577. /// <param name="item">The item.</param>
  578. private void FetchFromStudiosNode(XmlReader reader, T item)
  579. {
  580. reader.MoveToContent();
  581. while (reader.Read())
  582. {
  583. if (reader.NodeType == XmlNodeType.Element)
  584. {
  585. switch (reader.Name)
  586. {
  587. case "Studio":
  588. {
  589. var studio = reader.ReadElementContentAsString();
  590. if (!string.IsNullOrWhiteSpace(studio))
  591. {
  592. item.AddStudio(studio);
  593. }
  594. break;
  595. }
  596. default:
  597. reader.Skip();
  598. break;
  599. }
  600. }
  601. }
  602. }
  603. /// <summary>
  604. /// Fetches from parental rating node.
  605. /// </summary>
  606. /// <param name="reader">The reader.</param>
  607. /// <param name="item">The item.</param>
  608. private void FetchFromParentalRatingNode(XmlReader reader, T item)
  609. {
  610. reader.MoveToContent();
  611. while (reader.Read())
  612. {
  613. if (reader.NodeType == XmlNodeType.Element)
  614. {
  615. switch (reader.Name)
  616. {
  617. // Removed support for "Value" tag as it conflicted with MPAA rating but leaving this function for possible
  618. // future support of "Description" -ebr
  619. default:
  620. reader.Skip();
  621. break;
  622. }
  623. }
  624. }
  625. }
  626. /// <summary>
  627. /// Gets the persons from XML node.
  628. /// </summary>
  629. /// <param name="reader">The reader.</param>
  630. /// <returns>IEnumerable{PersonInfo}.</returns>
  631. private IEnumerable<PersonInfo> GetPersonsFromXmlNode(XmlReader reader)
  632. {
  633. var names = new List<string>();
  634. var type = "Actor"; // If type is not specified assume actor
  635. var role = string.Empty;
  636. reader.MoveToContent();
  637. while (reader.Read())
  638. {
  639. if (reader.NodeType == XmlNodeType.Element)
  640. {
  641. switch (reader.Name)
  642. {
  643. case "Name":
  644. names.AddRange(SplitNames(reader.ReadElementContentAsString()));
  645. break;
  646. case "Type":
  647. {
  648. var val = reader.ReadElementContentAsString();
  649. if (!string.IsNullOrWhiteSpace(val))
  650. {
  651. type = val;
  652. }
  653. break;
  654. }
  655. case "Role":
  656. {
  657. var val = reader.ReadElementContentAsString();
  658. if (!string.IsNullOrWhiteSpace(val))
  659. {
  660. role = val;
  661. }
  662. break;
  663. }
  664. default:
  665. reader.Skip();
  666. break;
  667. }
  668. }
  669. }
  670. return names.Select(n => new PersonInfo { Name = n, Role = role, Type = type });
  671. }
  672. /// <summary>
  673. /// Used to split names of comma or pipe delimeted genres and people
  674. /// </summary>
  675. /// <param name="value">The value.</param>
  676. /// <returns>IEnumerable{System.String}.</returns>
  677. private IEnumerable<string> SplitNames(string value)
  678. {
  679. value = value ?? string.Empty;
  680. // Only split by comma if there is no pipe in the string
  681. // We have to be careful to not split names like Matthew, Jr.
  682. var separator = value.IndexOf('|') == -1 && value.IndexOf(';') == -1 ? new[] { ',' } : new[] { '|', ';' };
  683. value = value.Trim().Trim(separator);
  684. return string.IsNullOrWhiteSpace(value) ? new string[] { } : Split(value, separator, StringSplitOptions.RemoveEmptyEntries);
  685. }
  686. /// <summary>
  687. /// Provides an additional overload for string.split
  688. /// </summary>
  689. /// <param name="val">The val.</param>
  690. /// <param name="separators">The separators.</param>
  691. /// <param name="options">The options.</param>
  692. /// <returns>System.String[][].</returns>
  693. private static string[] Split(string val, char[] separators, StringSplitOptions options)
  694. {
  695. return val.Split(separators, options);
  696. }
  697. }
  698. }