BaseItemXmlParser.cs 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723
  1. using System.Globalization;
  2. using System.IO;
  3. using System.Text.RegularExpressions;
  4. using MediaBrowser.Controller.Entities;
  5. using MediaBrowser.Model.Entities;
  6. using MediaBrowser.Model.Logging;
  7. using System;
  8. using System.Collections.Generic;
  9. using System.Linq;
  10. using System.Threading;
  11. using System.Xml;
  12. namespace MediaBrowser.Controller.Providers
  13. {
  14. /// <summary>
  15. /// Provides a base class for parsing metadata xml
  16. /// </summary>
  17. /// <typeparam name="T"></typeparam>
  18. public class BaseItemXmlParser<T>
  19. where T : BaseItem, new()
  20. {
  21. /// <summary>
  22. /// The logger
  23. /// </summary>
  24. protected ILogger Logger { get; private set; }
  25. /// <summary>
  26. /// Initializes a new instance of the <see cref="BaseItemXmlParser{T}" /> class.
  27. /// </summary>
  28. /// <param name="logger">The logger.</param>
  29. public BaseItemXmlParser(ILogger logger)
  30. {
  31. Logger = logger;
  32. }
  33. /// <summary>
  34. /// Fetches metadata for an item from one xml file
  35. /// </summary>
  36. /// <param name="item">The item.</param>
  37. /// <param name="metadataFile">The metadata file.</param>
  38. /// <param name="cancellationToken">The cancellation token.</param>
  39. /// <exception cref="System.ArgumentNullException"></exception>
  40. public void Fetch(T item, string metadataFile, CancellationToken cancellationToken)
  41. {
  42. if (item == null)
  43. {
  44. throw new ArgumentNullException();
  45. }
  46. if (string.IsNullOrEmpty(metadataFile))
  47. {
  48. throw new ArgumentNullException();
  49. }
  50. var settings = new XmlReaderSettings
  51. {
  52. CheckCharacters = false,
  53. IgnoreProcessingInstructions = true,
  54. IgnoreComments = true,
  55. ValidationType = ValidationType.None
  56. };
  57. item.Taglines.Clear();
  58. item.Studios.Clear();
  59. item.Genres.Clear();
  60. item.People.Clear();
  61. // Use XmlReader for best performance
  62. using (var reader = XmlReader.Create(metadataFile, settings))
  63. {
  64. reader.MoveToContent();
  65. // Loop through each element
  66. while (reader.Read())
  67. {
  68. cancellationToken.ThrowIfCancellationRequested();
  69. if (reader.NodeType == XmlNodeType.Element)
  70. {
  71. FetchDataFromXmlNode(reader, item);
  72. }
  73. }
  74. }
  75. }
  76. private readonly CultureInfo _usCulture = new CultureInfo("en-US");
  77. /// <summary>
  78. /// Fetches metadata from one Xml Element
  79. /// </summary>
  80. /// <param name="reader">The reader.</param>
  81. /// <param name="item">The item.</param>
  82. protected virtual void FetchDataFromXmlNode(XmlReader reader, T item)
  83. {
  84. switch (reader.Name)
  85. {
  86. // DateCreated
  87. case "Added":
  88. DateTime added;
  89. if (DateTime.TryParse(reader.ReadElementContentAsString() ?? string.Empty, out added))
  90. {
  91. item.DateCreated = added.ToUniversalTime();
  92. }
  93. break;
  94. case "LocalTitle":
  95. item.Name = reader.ReadElementContentAsString();
  96. break;
  97. case "Type":
  98. {
  99. var type = reader.ReadElementContentAsString();
  100. if (!string.IsNullOrWhiteSpace(type) && !type.Equals("none", StringComparison.OrdinalIgnoreCase))
  101. {
  102. item.DisplayMediaType = type;
  103. }
  104. break;
  105. }
  106. case "Budget":
  107. {
  108. var text = reader.ReadElementContentAsString();
  109. double value;
  110. if (double.TryParse(text, NumberStyles.Any, _usCulture, out value))
  111. {
  112. item.Budget = value;
  113. }
  114. break;
  115. }
  116. case "Revenue":
  117. {
  118. var text = reader.ReadElementContentAsString();
  119. double value;
  120. if (double.TryParse(text, NumberStyles.Any, _usCulture, out value))
  121. {
  122. item.Revenue = value;
  123. }
  124. break;
  125. }
  126. case "SortTitle":
  127. item.ForcedSortName = reader.ReadElementContentAsString();
  128. break;
  129. case "Overview":
  130. case "Description":
  131. var overview = reader.ReadInnerXml();
  132. const string cdataRegex = @"\<\!\[CDATA\[(?<text>.*)\]\]\>";
  133. if (Regex.IsMatch(overview, cdataRegex))
  134. {
  135. var match = Regex.Match(overview, cdataRegex);
  136. overview = match.Groups["text"].Value;
  137. }
  138. item.Overview = overview;
  139. break;
  140. case "TagLine":
  141. {
  142. var tagline = reader.ReadElementContentAsString();
  143. if (!string.IsNullOrWhiteSpace(tagline))
  144. {
  145. item.AddTagline(tagline);
  146. }
  147. break;
  148. }
  149. case "Website":
  150. {
  151. var val = reader.ReadElementContentAsString();
  152. if (!string.IsNullOrWhiteSpace(val))
  153. {
  154. item.HomePageUrl = val;
  155. }
  156. break;
  157. }
  158. case "TagLines":
  159. {
  160. FetchFromTaglinesNode(reader.ReadSubtree(), item);
  161. break;
  162. }
  163. case "ContentRating":
  164. case "certification":
  165. case "MPAARating":
  166. {
  167. var rating = reader.ReadElementContentAsString();
  168. if (!string.IsNullOrWhiteSpace(rating))
  169. {
  170. item.OfficialRating = rating;
  171. }
  172. break;
  173. }
  174. case "CustomRating":
  175. {
  176. var val = reader.ReadElementContentAsString();
  177. if (!string.IsNullOrWhiteSpace(val))
  178. {
  179. item.CustomRating = val;
  180. }
  181. break;
  182. }
  183. case "Runtime":
  184. case "RunningTime":
  185. {
  186. var text = reader.ReadElementContentAsString();
  187. if (!string.IsNullOrWhiteSpace(text))
  188. {
  189. int runtime;
  190. if (int.TryParse(text.Split(' ')[0], out runtime))
  191. {
  192. item.RunTimeTicks = TimeSpan.FromMinutes(runtime).Ticks;
  193. }
  194. }
  195. break;
  196. }
  197. case "Genre":
  198. {
  199. foreach (var name in SplitNames(reader.ReadElementContentAsString()))
  200. {
  201. if (string.IsNullOrWhiteSpace(name))
  202. {
  203. continue;
  204. }
  205. item.AddGenre(name);
  206. }
  207. break;
  208. }
  209. case "AspectRatio":
  210. {
  211. var val = reader.ReadElementContentAsString();
  212. if (!string.IsNullOrWhiteSpace(val))
  213. {
  214. item.AspectRatio = val;
  215. }
  216. break;
  217. }
  218. case "Network":
  219. {
  220. foreach (var name in SplitNames(reader.ReadElementContentAsString()))
  221. {
  222. if (string.IsNullOrWhiteSpace(name))
  223. {
  224. continue;
  225. }
  226. item.AddStudio(name);
  227. }
  228. break;
  229. }
  230. case "Director":
  231. {
  232. foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v, Type = PersonType.Director }))
  233. {
  234. if (string.IsNullOrWhiteSpace(p.Name))
  235. {
  236. continue;
  237. }
  238. item.AddPerson(p);
  239. }
  240. break;
  241. }
  242. case "Writer":
  243. {
  244. foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v, Type = PersonType.Writer }))
  245. {
  246. if (string.IsNullOrWhiteSpace(p.Name))
  247. {
  248. continue;
  249. }
  250. item.AddPerson(p);
  251. }
  252. break;
  253. }
  254. case "Actors":
  255. {
  256. var actors = reader.ReadInnerXml();
  257. if (actors.Contains("<"))
  258. {
  259. // This is one of the mis-named "Actors" full nodes created by MB2
  260. // Create a reader and pass it to the persons node processor
  261. FetchDataFromPersonsNode(new XmlTextReader(new StringReader("<Persons>" + actors + "</Persons>")), item);
  262. }
  263. else
  264. {
  265. // Old-style piped string
  266. foreach (var p in SplitNames(actors).Select(v => new PersonInfo { Name = v.Trim(), Type = PersonType.Actor }))
  267. {
  268. if (string.IsNullOrWhiteSpace(p.Name))
  269. {
  270. continue;
  271. }
  272. item.AddPerson(p);
  273. }
  274. }
  275. break;
  276. }
  277. case "GuestStars":
  278. {
  279. foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v.Trim(), Type = PersonType.GuestStar }))
  280. {
  281. if (string.IsNullOrWhiteSpace(p.Name))
  282. {
  283. continue;
  284. }
  285. item.AddPerson(p);
  286. }
  287. break;
  288. }
  289. case "Trailer":
  290. {
  291. var val = reader.ReadElementContentAsString();
  292. if (!string.IsNullOrWhiteSpace(val))
  293. {
  294. //item.AddTrailerUrl(val);
  295. }
  296. break;
  297. }
  298. case "ProductionYear":
  299. {
  300. var val = reader.ReadElementContentAsString();
  301. if (!string.IsNullOrWhiteSpace(val))
  302. {
  303. int productionYear;
  304. if (int.TryParse(val, out productionYear) && productionYear > 1850)
  305. {
  306. item.ProductionYear = productionYear;
  307. }
  308. }
  309. break;
  310. }
  311. case "Rating":
  312. case "IMDBrating":
  313. {
  314. var rating = reader.ReadElementContentAsString();
  315. if (!string.IsNullOrWhiteSpace(rating))
  316. {
  317. float val;
  318. if (float.TryParse(rating, out val))
  319. {
  320. item.CommunityRating = val;
  321. }
  322. }
  323. break;
  324. }
  325. case "FirstAired":
  326. {
  327. var firstAired = reader.ReadElementContentAsString();
  328. if (!string.IsNullOrWhiteSpace(firstAired))
  329. {
  330. DateTime airDate;
  331. if (DateTime.TryParse(firstAired, out airDate) && airDate.Year > 1850)
  332. {
  333. item.PremiereDate = airDate.ToUniversalTime();
  334. item.ProductionYear = airDate.Year;
  335. }
  336. }
  337. break;
  338. }
  339. case "TMDbId":
  340. var tmdb = reader.ReadElementContentAsString();
  341. if (!string.IsNullOrWhiteSpace(tmdb))
  342. {
  343. item.SetProviderId(MetadataProviders.Tmdb, tmdb);
  344. }
  345. break;
  346. case "TVcomId":
  347. var TVcomId = reader.ReadElementContentAsString();
  348. if (!string.IsNullOrWhiteSpace(TVcomId))
  349. {
  350. item.SetProviderId(MetadataProviders.Tvcom, TVcomId);
  351. }
  352. break;
  353. case "IMDB_ID":
  354. case "IMDB":
  355. case "IMDbId":
  356. var IMDbId = reader.ReadElementContentAsString();
  357. if (!string.IsNullOrWhiteSpace(IMDbId))
  358. {
  359. item.SetProviderId(MetadataProviders.Imdb, IMDbId);
  360. }
  361. break;
  362. case "Genres":
  363. FetchFromGenresNode(reader.ReadSubtree(), item);
  364. break;
  365. case "Persons":
  366. FetchDataFromPersonsNode(reader.ReadSubtree(), item);
  367. break;
  368. case "ParentalRating":
  369. FetchFromParentalRatingNode(reader.ReadSubtree(), item);
  370. break;
  371. case "Studios":
  372. FetchFromStudiosNode(reader.ReadSubtree(), item);
  373. break;
  374. default:
  375. reader.Skip();
  376. break;
  377. }
  378. }
  379. /// <summary>
  380. /// Fetches from taglines node.
  381. /// </summary>
  382. /// <param name="reader">The reader.</param>
  383. /// <param name="item">The item.</param>
  384. private void FetchFromTaglinesNode(XmlReader reader, T item)
  385. {
  386. reader.MoveToContent();
  387. while (reader.Read())
  388. {
  389. if (reader.NodeType == XmlNodeType.Element)
  390. {
  391. switch (reader.Name)
  392. {
  393. case "Tagline":
  394. {
  395. var val = reader.ReadElementContentAsString();
  396. if (!string.IsNullOrWhiteSpace(val))
  397. {
  398. item.AddTagline(val);
  399. }
  400. break;
  401. }
  402. default:
  403. reader.Skip();
  404. break;
  405. }
  406. }
  407. }
  408. }
  409. /// <summary>
  410. /// Fetches from genres node.
  411. /// </summary>
  412. /// <param name="reader">The reader.</param>
  413. /// <param name="item">The item.</param>
  414. private void FetchFromGenresNode(XmlReader reader, T item)
  415. {
  416. reader.MoveToContent();
  417. while (reader.Read())
  418. {
  419. if (reader.NodeType == XmlNodeType.Element)
  420. {
  421. switch (reader.Name)
  422. {
  423. case "Genre":
  424. {
  425. var genre = reader.ReadElementContentAsString();
  426. if (!string.IsNullOrWhiteSpace(genre))
  427. {
  428. item.AddGenre(genre);
  429. }
  430. break;
  431. }
  432. default:
  433. reader.Skip();
  434. break;
  435. }
  436. }
  437. }
  438. }
  439. /// <summary>
  440. /// Fetches the data from persons node.
  441. /// </summary>
  442. /// <param name="reader">The reader.</param>
  443. /// <param name="item">The item.</param>
  444. private void FetchDataFromPersonsNode(XmlReader reader, T item)
  445. {
  446. reader.MoveToContent();
  447. while (reader.Read())
  448. {
  449. if (reader.NodeType == XmlNodeType.Element)
  450. {
  451. switch (reader.Name)
  452. {
  453. case "Person":
  454. case "Actor":
  455. {
  456. foreach (var person in GetPersonsFromXmlNode(reader.ReadSubtree()))
  457. {
  458. item.AddPerson(person);
  459. }
  460. break;
  461. }
  462. default:
  463. reader.Skip();
  464. break;
  465. }
  466. }
  467. }
  468. }
  469. /// <summary>
  470. /// Fetches from studios node.
  471. /// </summary>
  472. /// <param name="reader">The reader.</param>
  473. /// <param name="item">The item.</param>
  474. private void FetchFromStudiosNode(XmlReader reader, T item)
  475. {
  476. reader.MoveToContent();
  477. while (reader.Read())
  478. {
  479. if (reader.NodeType == XmlNodeType.Element)
  480. {
  481. switch (reader.Name)
  482. {
  483. case "Studio":
  484. {
  485. var studio = reader.ReadElementContentAsString();
  486. if (!string.IsNullOrWhiteSpace(studio))
  487. {
  488. item.AddStudio(studio);
  489. }
  490. break;
  491. }
  492. default:
  493. reader.Skip();
  494. break;
  495. }
  496. }
  497. }
  498. }
  499. /// <summary>
  500. /// Fetches from parental rating node.
  501. /// </summary>
  502. /// <param name="reader">The reader.</param>
  503. /// <param name="item">The item.</param>
  504. private void FetchFromParentalRatingNode(XmlReader reader, T item)
  505. {
  506. reader.MoveToContent();
  507. while (reader.Read())
  508. {
  509. if (reader.NodeType == XmlNodeType.Element)
  510. {
  511. switch (reader.Name)
  512. {
  513. case "Value":
  514. {
  515. var ratingString = reader.ReadElementContentAsString();
  516. int rating = 7;
  517. if (!string.IsNullOrWhiteSpace(ratingString))
  518. {
  519. int.TryParse(ratingString, out rating);
  520. }
  521. switch (rating)
  522. {
  523. case -1:
  524. item.OfficialRating = "NR";
  525. break;
  526. case 0:
  527. item.OfficialRating = "UR";
  528. break;
  529. case 1:
  530. item.OfficialRating = "G";
  531. break;
  532. case 3:
  533. item.OfficialRating = "PG";
  534. break;
  535. case 4:
  536. item.OfficialRating = "PG-13";
  537. break;
  538. case 5:
  539. item.OfficialRating = "NC-17";
  540. break;
  541. case 6:
  542. item.OfficialRating = "R";
  543. break;
  544. }
  545. break;
  546. }
  547. default:
  548. reader.Skip();
  549. break;
  550. }
  551. }
  552. }
  553. }
  554. /// <summary>
  555. /// Gets the persons from XML node.
  556. /// </summary>
  557. /// <param name="reader">The reader.</param>
  558. /// <returns>IEnumerable{PersonInfo}.</returns>
  559. private IEnumerable<PersonInfo> GetPersonsFromXmlNode(XmlReader reader)
  560. {
  561. var names = new List<string>();
  562. var type = "Actor"; // If type is not specified assume actor
  563. var role = string.Empty;
  564. reader.MoveToContent();
  565. while (reader.Read())
  566. {
  567. if (reader.NodeType == XmlNodeType.Element)
  568. {
  569. switch (reader.Name)
  570. {
  571. case "Name":
  572. names.AddRange(SplitNames(reader.ReadElementContentAsString()));
  573. break;
  574. case "Type":
  575. {
  576. var val = reader.ReadElementContentAsString();
  577. if (!string.IsNullOrWhiteSpace(val))
  578. {
  579. type = val;
  580. }
  581. break;
  582. }
  583. case "Role":
  584. {
  585. var val = reader.ReadElementContentAsString();
  586. if (!string.IsNullOrWhiteSpace(val))
  587. {
  588. role = val;
  589. }
  590. break;
  591. }
  592. default:
  593. reader.Skip();
  594. break;
  595. }
  596. }
  597. }
  598. return names.Select(n => new PersonInfo { Name = n, Role = role, Type = type });
  599. }
  600. /// <summary>
  601. /// Used to split names of comma or pipe delimeted genres and people
  602. /// </summary>
  603. /// <param name="value">The value.</param>
  604. /// <returns>IEnumerable{System.String}.</returns>
  605. private IEnumerable<string> SplitNames(string value)
  606. {
  607. value = value ?? string.Empty;
  608. // Only split by comma if there is no pipe in the string
  609. // We have to be careful to not split names like Matthew, Jr.
  610. var separator = value.IndexOf('|') == -1 ? ',' : '|';
  611. value = value.Trim().Trim(separator);
  612. return string.IsNullOrWhiteSpace(value) ? new string[] { } : Split(value, separator, StringSplitOptions.RemoveEmptyEntries);
  613. }
  614. /// <summary>
  615. /// Provides an additional overload for string.split
  616. /// </summary>
  617. /// <param name="val">The val.</param>
  618. /// <param name="separator">The separator.</param>
  619. /// <param name="options">The options.</param>
  620. /// <returns>System.String[][].</returns>
  621. private static string[] Split(string val, char separator, StringSplitOptions options)
  622. {
  623. return val.Split(new[] { separator }, options);
  624. }
  625. }
  626. }