BaseItemXmlParser.cs 26 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756
  1. using MediaBrowser.Controller.Entities;
  2. using MediaBrowser.Controller.Entities.Audio;
  3. using MediaBrowser.Model.Entities;
  4. using MediaBrowser.Model.Logging;
  5. using System;
  6. using System.Collections.Generic;
  7. using System.Globalization;
  8. using System.IO;
  9. using System.Linq;
  10. using System.Text;
  11. using System.Threading;
  12. using System.Xml;
  13. namespace MediaBrowser.Controller.Providers
  14. {
  15. /// <summary>
  16. /// Provides a base class for parsing metadata xml
  17. /// </summary>
  18. /// <typeparam name="T"></typeparam>
  19. public class BaseItemXmlParser<T>
  20. where T : BaseItem, new()
  21. {
  22. /// <summary>
  23. /// The logger
  24. /// </summary>
  25. protected ILogger Logger { get; private set; }
  26. /// <summary>
  27. /// Initializes a new instance of the <see cref="BaseItemXmlParser{T}" /> class.
  28. /// </summary>
  29. /// <param name="logger">The logger.</param>
  30. public BaseItemXmlParser(ILogger logger)
  31. {
  32. Logger = logger;
  33. }
  34. /// <summary>
  35. /// Fetches metadata for an item from one xml file
  36. /// </summary>
  37. /// <param name="item">The item.</param>
  38. /// <param name="metadataFile">The metadata file.</param>
  39. /// <param name="cancellationToken">The cancellation token.</param>
  40. /// <exception cref="System.ArgumentNullException"></exception>
  41. public void Fetch(T item, string metadataFile, CancellationToken cancellationToken)
  42. {
  43. if (item == null)
  44. {
  45. throw new ArgumentNullException();
  46. }
  47. if (string.IsNullOrEmpty(metadataFile))
  48. {
  49. throw new ArgumentNullException();
  50. }
  51. var settings = new XmlReaderSettings
  52. {
  53. CheckCharacters = false,
  54. IgnoreProcessingInstructions = true,
  55. IgnoreComments = true,
  56. ValidationType = ValidationType.None
  57. };
  58. item.Taglines.Clear();
  59. item.Studios.Clear();
  60. item.Genres.Clear();
  61. item.People.Clear();
  62. item.Tags.Clear();
  63. // Use european encoding as it will accept more characters
  64. using (var streamReader = new StreamReader(metadataFile, Encoding.GetEncoding("ISO-8859-1")))
  65. {
  66. // Use XmlReader for best performance
  67. using (var reader = XmlReader.Create(streamReader, settings))
  68. {
  69. reader.MoveToContent();
  70. // Loop through each element
  71. while (reader.Read())
  72. {
  73. cancellationToken.ThrowIfCancellationRequested();
  74. if (reader.NodeType == XmlNodeType.Element)
  75. {
  76. FetchDataFromXmlNode(reader, item);
  77. }
  78. }
  79. }
  80. }
  81. }
  82. private readonly CultureInfo _usCulture = new CultureInfo("en-US");
  83. /// <summary>
  84. /// Fetches metadata from one Xml Element
  85. /// </summary>
  86. /// <param name="reader">The reader.</param>
  87. /// <param name="item">The item.</param>
  88. protected virtual void FetchDataFromXmlNode(XmlReader reader, T item)
  89. {
  90. switch (reader.Name)
  91. {
  92. // DateCreated
  93. case "Added":
  94. DateTime added;
  95. if (DateTime.TryParse(reader.ReadElementContentAsString() ?? string.Empty, out added))
  96. {
  97. item.DateCreated = added.ToUniversalTime();
  98. }
  99. break;
  100. case "LocalTitle":
  101. item.Name = reader.ReadElementContentAsString();
  102. break;
  103. case "Type":
  104. {
  105. var type = reader.ReadElementContentAsString();
  106. if (!string.IsNullOrWhiteSpace(type) && !type.Equals("none", StringComparison.OrdinalIgnoreCase))
  107. {
  108. item.DisplayMediaType = type;
  109. }
  110. break;
  111. }
  112. case "Budget":
  113. {
  114. var text = reader.ReadElementContentAsString();
  115. double value;
  116. if (double.TryParse(text, NumberStyles.Any, _usCulture, out value))
  117. {
  118. item.Budget = value;
  119. }
  120. break;
  121. }
  122. case "Revenue":
  123. {
  124. var text = reader.ReadElementContentAsString();
  125. double value;
  126. if (double.TryParse(text, NumberStyles.Any, _usCulture, out value))
  127. {
  128. item.Revenue = value;
  129. }
  130. break;
  131. }
  132. case "SortTitle":
  133. item.ForcedSortName = reader.ReadElementContentAsString();
  134. break;
  135. case "Overview":
  136. case "Description":
  137. {
  138. var val = reader.ReadElementContentAsString();
  139. if (!string.IsNullOrWhiteSpace(val))
  140. {
  141. item.Overview = val;
  142. }
  143. break;
  144. }
  145. case "TagLine":
  146. {
  147. var tagline = reader.ReadElementContentAsString();
  148. if (!string.IsNullOrWhiteSpace(tagline))
  149. {
  150. item.AddTagline(tagline);
  151. }
  152. break;
  153. }
  154. case "Website":
  155. {
  156. var val = reader.ReadElementContentAsString();
  157. if (!string.IsNullOrWhiteSpace(val))
  158. {
  159. item.HomePageUrl = val;
  160. }
  161. break;
  162. }
  163. case "TagLines":
  164. {
  165. FetchFromTaglinesNode(reader.ReadSubtree(), item);
  166. break;
  167. }
  168. case "ContentRating":
  169. case "certification":
  170. case "MPAARating":
  171. {
  172. var rating = reader.ReadElementContentAsString();
  173. if (!string.IsNullOrWhiteSpace(rating))
  174. {
  175. item.OfficialRating = rating;
  176. }
  177. break;
  178. }
  179. case "MPAADescription":
  180. {
  181. var rating = reader.ReadElementContentAsString();
  182. if (!string.IsNullOrWhiteSpace(rating))
  183. {
  184. item.OfficialRatingDescription = rating;
  185. }
  186. break;
  187. }
  188. case "CustomRating":
  189. {
  190. var val = reader.ReadElementContentAsString();
  191. if (!string.IsNullOrWhiteSpace(val))
  192. {
  193. item.CustomRating = val;
  194. }
  195. break;
  196. }
  197. case "Runtime":
  198. case "RunningTime":
  199. {
  200. var text = reader.ReadElementContentAsString();
  201. if (!string.IsNullOrWhiteSpace(text))
  202. {
  203. int runtime;
  204. if (int.TryParse(text.Split(' ')[0], NumberStyles.Integer, _usCulture, out runtime))
  205. {
  206. // For audio and video don't replace ffmpeg data
  207. if (item is Video || item is Audio)
  208. {
  209. item.OriginalRunTimeTicks = TimeSpan.FromMinutes(runtime).Ticks;
  210. }
  211. else
  212. {
  213. item.RunTimeTicks = TimeSpan.FromMinutes(runtime).Ticks;
  214. }
  215. }
  216. }
  217. break;
  218. }
  219. case "Genre":
  220. {
  221. foreach (var name in SplitNames(reader.ReadElementContentAsString()))
  222. {
  223. if (string.IsNullOrWhiteSpace(name))
  224. {
  225. continue;
  226. }
  227. item.AddGenre(name);
  228. }
  229. break;
  230. }
  231. case "AspectRatio":
  232. {
  233. var val = reader.ReadElementContentAsString();
  234. if (!string.IsNullOrWhiteSpace(val))
  235. {
  236. item.AspectRatio = val;
  237. }
  238. break;
  239. }
  240. case "Network":
  241. {
  242. foreach (var name in SplitNames(reader.ReadElementContentAsString()))
  243. {
  244. if (string.IsNullOrWhiteSpace(name))
  245. {
  246. continue;
  247. }
  248. item.AddStudio(name);
  249. }
  250. break;
  251. }
  252. case "Director":
  253. {
  254. foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v, Type = PersonType.Director }))
  255. {
  256. if (string.IsNullOrWhiteSpace(p.Name))
  257. {
  258. continue;
  259. }
  260. item.AddPerson(p);
  261. }
  262. break;
  263. }
  264. case "Writer":
  265. {
  266. foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v, Type = PersonType.Writer }))
  267. {
  268. if (string.IsNullOrWhiteSpace(p.Name))
  269. {
  270. continue;
  271. }
  272. item.AddPerson(p);
  273. }
  274. break;
  275. }
  276. case "Actors":
  277. {
  278. var actors = reader.ReadInnerXml();
  279. if (actors.Contains("<"))
  280. {
  281. // This is one of the mis-named "Actors" full nodes created by MB2
  282. // Create a reader and pass it to the persons node processor
  283. FetchDataFromPersonsNode(new XmlTextReader(new StringReader("<Persons>" + actors + "</Persons>")), item);
  284. }
  285. else
  286. {
  287. // Old-style piped string
  288. foreach (var p in SplitNames(actors).Select(v => new PersonInfo { Name = v.Trim(), Type = PersonType.Actor }))
  289. {
  290. if (string.IsNullOrWhiteSpace(p.Name))
  291. {
  292. continue;
  293. }
  294. item.AddPerson(p);
  295. }
  296. }
  297. break;
  298. }
  299. case "GuestStars":
  300. {
  301. foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v.Trim(), Type = PersonType.GuestStar }))
  302. {
  303. if (string.IsNullOrWhiteSpace(p.Name))
  304. {
  305. continue;
  306. }
  307. item.AddPerson(p);
  308. }
  309. break;
  310. }
  311. case "Trailer":
  312. {
  313. var val = reader.ReadElementContentAsString();
  314. if (!string.IsNullOrWhiteSpace(val))
  315. {
  316. //item.AddTrailerUrl(val);
  317. }
  318. break;
  319. }
  320. case "ProductionYear":
  321. {
  322. var val = reader.ReadElementContentAsString();
  323. if (!string.IsNullOrWhiteSpace(val))
  324. {
  325. int productionYear;
  326. if (int.TryParse(val, out productionYear) && productionYear > 1850)
  327. {
  328. item.ProductionYear = productionYear;
  329. }
  330. }
  331. break;
  332. }
  333. case "Rating":
  334. case "IMDBrating":
  335. {
  336. var rating = reader.ReadElementContentAsString();
  337. if (!string.IsNullOrWhiteSpace(rating))
  338. {
  339. float val;
  340. // All external meta is saving this as '.' for decimal I believe...but just to be sure
  341. if (float.TryParse(rating.Replace(',', '.'), NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture, out val))
  342. {
  343. item.CommunityRating = val;
  344. }
  345. }
  346. break;
  347. }
  348. case "PremiereDate":
  349. case "FirstAired":
  350. {
  351. var firstAired = reader.ReadElementContentAsString();
  352. if (!string.IsNullOrWhiteSpace(firstAired))
  353. {
  354. DateTime airDate;
  355. if (DateTime.TryParse(firstAired, out airDate) && airDate.Year > 1850)
  356. {
  357. item.PremiereDate = airDate.ToUniversalTime();
  358. item.ProductionYear = airDate.Year;
  359. }
  360. }
  361. break;
  362. }
  363. case "TMDbId":
  364. var tmdb = reader.ReadElementContentAsString();
  365. if (!string.IsNullOrWhiteSpace(tmdb))
  366. {
  367. item.SetProviderId(MetadataProviders.Tmdb, tmdb);
  368. }
  369. break;
  370. case "CollectionNumber":
  371. var tmdbCollection = reader.ReadElementContentAsString();
  372. if (!string.IsNullOrWhiteSpace(tmdbCollection))
  373. {
  374. item.SetProviderId(MetadataProviders.TmdbCollection, tmdbCollection);
  375. }
  376. break;
  377. case "TVcomId":
  378. var TVcomId = reader.ReadElementContentAsString();
  379. if (!string.IsNullOrWhiteSpace(TVcomId))
  380. {
  381. item.SetProviderId(MetadataProviders.Tvcom, TVcomId);
  382. }
  383. break;
  384. case "IMDB_ID":
  385. case "IMDB":
  386. case "IMDbId":
  387. var imDbId = reader.ReadElementContentAsString();
  388. if (!string.IsNullOrWhiteSpace(imDbId))
  389. {
  390. item.SetProviderId(MetadataProviders.Imdb, imDbId);
  391. }
  392. break;
  393. case "Genres":
  394. FetchFromGenresNode(reader.ReadSubtree(), item);
  395. break;
  396. case "Tags":
  397. FetchFromTagsNode(reader.ReadSubtree(), item);
  398. break;
  399. case "Persons":
  400. FetchDataFromPersonsNode(reader.ReadSubtree(), item);
  401. break;
  402. case "ParentalRating":
  403. FetchFromParentalRatingNode(reader.ReadSubtree(), item);
  404. break;
  405. case "Studios":
  406. FetchFromStudiosNode(reader.ReadSubtree(), item);
  407. break;
  408. default:
  409. reader.Skip();
  410. break;
  411. }
  412. }
  413. /// <summary>
  414. /// Fetches from taglines node.
  415. /// </summary>
  416. /// <param name="reader">The reader.</param>
  417. /// <param name="item">The item.</param>
  418. private void FetchFromTaglinesNode(XmlReader reader, T item)
  419. {
  420. reader.MoveToContent();
  421. while (reader.Read())
  422. {
  423. if (reader.NodeType == XmlNodeType.Element)
  424. {
  425. switch (reader.Name)
  426. {
  427. case "Tagline":
  428. {
  429. var val = reader.ReadElementContentAsString();
  430. if (!string.IsNullOrWhiteSpace(val))
  431. {
  432. item.AddTagline(val);
  433. }
  434. break;
  435. }
  436. default:
  437. reader.Skip();
  438. break;
  439. }
  440. }
  441. }
  442. }
  443. /// <summary>
  444. /// Fetches from genres node.
  445. /// </summary>
  446. /// <param name="reader">The reader.</param>
  447. /// <param name="item">The item.</param>
  448. private void FetchFromGenresNode(XmlReader reader, T item)
  449. {
  450. reader.MoveToContent();
  451. while (reader.Read())
  452. {
  453. if (reader.NodeType == XmlNodeType.Element)
  454. {
  455. switch (reader.Name)
  456. {
  457. case "Genre":
  458. {
  459. var genre = reader.ReadElementContentAsString();
  460. if (!string.IsNullOrWhiteSpace(genre))
  461. {
  462. item.AddGenre(genre);
  463. }
  464. break;
  465. }
  466. default:
  467. reader.Skip();
  468. break;
  469. }
  470. }
  471. }
  472. }
  473. private void FetchFromTagsNode(XmlReader reader, T item)
  474. {
  475. reader.MoveToContent();
  476. while (reader.Read())
  477. {
  478. if (reader.NodeType == XmlNodeType.Element)
  479. {
  480. switch (reader.Name)
  481. {
  482. case "Tag":
  483. {
  484. var tag = reader.ReadElementContentAsString();
  485. if (!string.IsNullOrWhiteSpace(tag))
  486. {
  487. item.AddTagline(tag);
  488. }
  489. break;
  490. }
  491. default:
  492. reader.Skip();
  493. break;
  494. }
  495. }
  496. }
  497. }
  498. /// <summary>
  499. /// Fetches the data from persons node.
  500. /// </summary>
  501. /// <param name="reader">The reader.</param>
  502. /// <param name="item">The item.</param>
  503. private void FetchDataFromPersonsNode(XmlReader reader, T item)
  504. {
  505. reader.MoveToContent();
  506. while (reader.Read())
  507. {
  508. if (reader.NodeType == XmlNodeType.Element)
  509. {
  510. switch (reader.Name)
  511. {
  512. case "Person":
  513. case "Actor":
  514. {
  515. foreach (var person in GetPersonsFromXmlNode(reader.ReadSubtree()))
  516. {
  517. item.AddPerson(person);
  518. }
  519. break;
  520. }
  521. default:
  522. reader.Skip();
  523. break;
  524. }
  525. }
  526. }
  527. }
  528. /// <summary>
  529. /// Fetches from studios node.
  530. /// </summary>
  531. /// <param name="reader">The reader.</param>
  532. /// <param name="item">The item.</param>
  533. private void FetchFromStudiosNode(XmlReader reader, T item)
  534. {
  535. reader.MoveToContent();
  536. while (reader.Read())
  537. {
  538. if (reader.NodeType == XmlNodeType.Element)
  539. {
  540. switch (reader.Name)
  541. {
  542. case "Studio":
  543. {
  544. var studio = reader.ReadElementContentAsString();
  545. if (!string.IsNullOrWhiteSpace(studio))
  546. {
  547. item.AddStudio(studio);
  548. }
  549. break;
  550. }
  551. default:
  552. reader.Skip();
  553. break;
  554. }
  555. }
  556. }
  557. }
  558. /// <summary>
  559. /// Fetches from parental rating node.
  560. /// </summary>
  561. /// <param name="reader">The reader.</param>
  562. /// <param name="item">The item.</param>
  563. private void FetchFromParentalRatingNode(XmlReader reader, T item)
  564. {
  565. reader.MoveToContent();
  566. while (reader.Read())
  567. {
  568. if (reader.NodeType == XmlNodeType.Element)
  569. {
  570. switch (reader.Name)
  571. {
  572. // Removed support for "Value" tag as it conflicted with MPAA rating but leaving this function for possible
  573. // future support of "Description" -ebr
  574. default:
  575. reader.Skip();
  576. break;
  577. }
  578. }
  579. }
  580. }
  581. /// <summary>
  582. /// Gets the persons from XML node.
  583. /// </summary>
  584. /// <param name="reader">The reader.</param>
  585. /// <returns>IEnumerable{PersonInfo}.</returns>
  586. private IEnumerable<PersonInfo> GetPersonsFromXmlNode(XmlReader reader)
  587. {
  588. var names = new List<string>();
  589. var type = "Actor"; // If type is not specified assume actor
  590. var role = string.Empty;
  591. reader.MoveToContent();
  592. while (reader.Read())
  593. {
  594. if (reader.NodeType == XmlNodeType.Element)
  595. {
  596. switch (reader.Name)
  597. {
  598. case "Name":
  599. names.AddRange(SplitNames(reader.ReadElementContentAsString()));
  600. break;
  601. case "Type":
  602. {
  603. var val = reader.ReadElementContentAsString();
  604. if (!string.IsNullOrWhiteSpace(val))
  605. {
  606. type = val;
  607. }
  608. break;
  609. }
  610. case "Role":
  611. {
  612. var val = reader.ReadElementContentAsString();
  613. if (!string.IsNullOrWhiteSpace(val))
  614. {
  615. role = val;
  616. }
  617. break;
  618. }
  619. default:
  620. reader.Skip();
  621. break;
  622. }
  623. }
  624. }
  625. return names.Select(n => new PersonInfo { Name = n, Role = role, Type = type });
  626. }
  627. /// <summary>
  628. /// Used to split names of comma or pipe delimeted genres and people
  629. /// </summary>
  630. /// <param name="value">The value.</param>
  631. /// <returns>IEnumerable{System.String}.</returns>
  632. private IEnumerable<string> SplitNames(string value)
  633. {
  634. value = value ?? string.Empty;
  635. // Only split by comma if there is no pipe in the string
  636. // We have to be careful to not split names like Matthew, Jr.
  637. var separator = value.IndexOf('|') == -1 && value.IndexOf(';') == -1 ? new[] { ',' } : new[] { '|', ';' };
  638. value = value.Trim().Trim(separator);
  639. return string.IsNullOrWhiteSpace(value) ? new string[] { } : Split(value, separator, StringSplitOptions.RemoveEmptyEntries);
  640. }
  641. /// <summary>
  642. /// Provides an additional overload for string.split
  643. /// </summary>
  644. /// <param name="val">The val.</param>
  645. /// <param name="separators">The separators.</param>
  646. /// <param name="options">The options.</param>
  647. /// <returns>System.String[][].</returns>
  648. private static string[] Split(string val, char[] separators, StringSplitOptions options)
  649. {
  650. return val.Split(separators, options);
  651. }
  652. }
  653. }