BaseItemXmlParser.cs 25 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729
  1. using System.Globalization;
  2. using System.IO;
  3. using System.Text;
  4. using System.Text.RegularExpressions;
  5. using MediaBrowser.Controller.Entities;
  6. using MediaBrowser.Model.Entities;
  7. using MediaBrowser.Model.Logging;
  8. using System;
  9. using System.Collections.Generic;
  10. using System.Linq;
  11. using System.Threading;
  12. using System.Xml;
  13. namespace MediaBrowser.Controller.Providers
  14. {
  15. /// <summary>
  16. /// Provides a base class for parsing metadata xml
  17. /// </summary>
  18. /// <typeparam name="T"></typeparam>
  19. public class BaseItemXmlParser<T>
  20. where T : BaseItem, new()
  21. {
  22. /// <summary>
  23. /// The logger
  24. /// </summary>
  25. protected ILogger Logger { get; private set; }
  26. /// <summary>
  27. /// Initializes a new instance of the <see cref="BaseItemXmlParser{T}" /> class.
  28. /// </summary>
  29. /// <param name="logger">The logger.</param>
  30. public BaseItemXmlParser(ILogger logger)
  31. {
  32. Logger = logger;
  33. }
  34. /// <summary>
  35. /// Fetches metadata for an item from one xml file
  36. /// </summary>
  37. /// <param name="item">The item.</param>
  38. /// <param name="metadataFile">The metadata file.</param>
  39. /// <param name="cancellationToken">The cancellation token.</param>
  40. /// <exception cref="System.ArgumentNullException"></exception>
  41. public void Fetch(T item, string metadataFile, CancellationToken cancellationToken)
  42. {
  43. if (item == null)
  44. {
  45. throw new ArgumentNullException();
  46. }
  47. if (string.IsNullOrEmpty(metadataFile))
  48. {
  49. throw new ArgumentNullException();
  50. }
  51. var settings = new XmlReaderSettings
  52. {
  53. CheckCharacters = false,
  54. IgnoreProcessingInstructions = true,
  55. IgnoreComments = true,
  56. ValidationType = ValidationType.None
  57. };
  58. item.Taglines.Clear();
  59. item.Studios.Clear();
  60. item.Genres.Clear();
  61. item.People.Clear();
  62. // Use european encoding as it will accept more characters
  63. using (var streamReader = new StreamReader(metadataFile, Encoding.GetEncoding("ISO-8859-1")))
  64. {
  65. // Use XmlReader for best performance
  66. using (var reader = XmlReader.Create(streamReader, settings))
  67. {
  68. reader.MoveToContent();
  69. // Loop through each element
  70. while (reader.Read())
  71. {
  72. cancellationToken.ThrowIfCancellationRequested();
  73. if (reader.NodeType == XmlNodeType.Element)
  74. {
  75. FetchDataFromXmlNode(reader, item);
  76. }
  77. }
  78. }
  79. }
  80. }
  81. private readonly CultureInfo _usCulture = new CultureInfo("en-US");
  82. /// <summary>
  83. /// Fetches metadata from one Xml Element
  84. /// </summary>
  85. /// <param name="reader">The reader.</param>
  86. /// <param name="item">The item.</param>
  87. protected virtual void FetchDataFromXmlNode(XmlReader reader, T item)
  88. {
  89. switch (reader.Name)
  90. {
  91. // DateCreated
  92. case "Added":
  93. DateTime added;
  94. if (DateTime.TryParse(reader.ReadElementContentAsString() ?? string.Empty, out added))
  95. {
  96. item.DateCreated = added.ToUniversalTime();
  97. }
  98. break;
  99. case "LocalTitle":
  100. item.Name = reader.ReadElementContentAsString();
  101. break;
  102. case "Type":
  103. {
  104. var type = reader.ReadElementContentAsString();
  105. if (!string.IsNullOrWhiteSpace(type) && !type.Equals("none", StringComparison.OrdinalIgnoreCase))
  106. {
  107. item.DisplayMediaType = type;
  108. }
  109. break;
  110. }
  111. case "Budget":
  112. {
  113. var text = reader.ReadElementContentAsString();
  114. double value;
  115. if (double.TryParse(text, NumberStyles.Any, _usCulture, out value))
  116. {
  117. item.Budget = value;
  118. }
  119. break;
  120. }
  121. case "Revenue":
  122. {
  123. var text = reader.ReadElementContentAsString();
  124. double value;
  125. if (double.TryParse(text, NumberStyles.Any, _usCulture, out value))
  126. {
  127. item.Revenue = value;
  128. }
  129. break;
  130. }
  131. case "SortTitle":
  132. item.ForcedSortName = reader.ReadElementContentAsString();
  133. break;
  134. case "Overview":
  135. case "Description":
  136. {
  137. var val = reader.ReadElementContentAsString();
  138. if (!string.IsNullOrWhiteSpace(val))
  139. {
  140. item.Overview = val;
  141. }
  142. break;
  143. }
  144. case "TagLine":
  145. {
  146. var tagline = reader.ReadElementContentAsString();
  147. if (!string.IsNullOrWhiteSpace(tagline))
  148. {
  149. item.AddTagline(tagline);
  150. }
  151. break;
  152. }
  153. case "Website":
  154. {
  155. var val = reader.ReadElementContentAsString();
  156. if (!string.IsNullOrWhiteSpace(val))
  157. {
  158. item.HomePageUrl = val;
  159. }
  160. break;
  161. }
  162. case "TagLines":
  163. {
  164. FetchFromTaglinesNode(reader.ReadSubtree(), item);
  165. break;
  166. }
  167. case "ContentRating":
  168. case "certification":
  169. case "MPAARating":
  170. {
  171. var rating = reader.ReadElementContentAsString();
  172. if (!string.IsNullOrWhiteSpace(rating))
  173. {
  174. item.OfficialRating = rating;
  175. }
  176. break;
  177. }
  178. case "CustomRating":
  179. {
  180. var val = reader.ReadElementContentAsString();
  181. if (!string.IsNullOrWhiteSpace(val))
  182. {
  183. item.CustomRating = val;
  184. }
  185. break;
  186. }
  187. case "Runtime":
  188. case "RunningTime":
  189. {
  190. var text = reader.ReadElementContentAsString();
  191. if (!string.IsNullOrWhiteSpace(text))
  192. {
  193. int runtime;
  194. if (int.TryParse(text.Split(' ')[0], out runtime))
  195. {
  196. item.RunTimeTicks = TimeSpan.FromMinutes(runtime).Ticks;
  197. }
  198. }
  199. break;
  200. }
  201. case "Genre":
  202. {
  203. foreach (var name in SplitNames(reader.ReadElementContentAsString()))
  204. {
  205. if (string.IsNullOrWhiteSpace(name))
  206. {
  207. continue;
  208. }
  209. item.AddGenre(name);
  210. }
  211. break;
  212. }
  213. case "AspectRatio":
  214. {
  215. var val = reader.ReadElementContentAsString();
  216. if (!string.IsNullOrWhiteSpace(val))
  217. {
  218. item.AspectRatio = val;
  219. }
  220. break;
  221. }
  222. case "Network":
  223. {
  224. foreach (var name in SplitNames(reader.ReadElementContentAsString()))
  225. {
  226. if (string.IsNullOrWhiteSpace(name))
  227. {
  228. continue;
  229. }
  230. item.AddStudio(name);
  231. }
  232. break;
  233. }
  234. case "Director":
  235. {
  236. foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v, Type = PersonType.Director }))
  237. {
  238. if (string.IsNullOrWhiteSpace(p.Name))
  239. {
  240. continue;
  241. }
  242. item.AddPerson(p);
  243. }
  244. break;
  245. }
  246. case "Writer":
  247. {
  248. foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v, Type = PersonType.Writer }))
  249. {
  250. if (string.IsNullOrWhiteSpace(p.Name))
  251. {
  252. continue;
  253. }
  254. item.AddPerson(p);
  255. }
  256. break;
  257. }
  258. case "Actors":
  259. {
  260. var actors = reader.ReadInnerXml();
  261. if (actors.Contains("<"))
  262. {
  263. // This is one of the mis-named "Actors" full nodes created by MB2
  264. // Create a reader and pass it to the persons node processor
  265. FetchDataFromPersonsNode(new XmlTextReader(new StringReader("<Persons>" + actors + "</Persons>")), item);
  266. }
  267. else
  268. {
  269. // Old-style piped string
  270. foreach (var p in SplitNames(actors).Select(v => new PersonInfo { Name = v.Trim(), Type = PersonType.Actor }))
  271. {
  272. if (string.IsNullOrWhiteSpace(p.Name))
  273. {
  274. continue;
  275. }
  276. item.AddPerson(p);
  277. }
  278. }
  279. break;
  280. }
  281. case "GuestStars":
  282. {
  283. foreach (var p in SplitNames(reader.ReadElementContentAsString()).Select(v => new PersonInfo { Name = v.Trim(), Type = PersonType.GuestStar }))
  284. {
  285. if (string.IsNullOrWhiteSpace(p.Name))
  286. {
  287. continue;
  288. }
  289. item.AddPerson(p);
  290. }
  291. break;
  292. }
  293. case "Trailer":
  294. {
  295. var val = reader.ReadElementContentAsString();
  296. if (!string.IsNullOrWhiteSpace(val))
  297. {
  298. //item.AddTrailerUrl(val);
  299. }
  300. break;
  301. }
  302. case "ProductionYear":
  303. {
  304. var val = reader.ReadElementContentAsString();
  305. if (!string.IsNullOrWhiteSpace(val))
  306. {
  307. int productionYear;
  308. if (int.TryParse(val, out productionYear) && productionYear > 1850)
  309. {
  310. item.ProductionYear = productionYear;
  311. }
  312. }
  313. break;
  314. }
  315. case "Rating":
  316. case "IMDBrating":
  317. {
  318. var rating = reader.ReadElementContentAsString();
  319. if (!string.IsNullOrWhiteSpace(rating))
  320. {
  321. float val;
  322. // All external meta is saving this as '.' for decimal I believe...but just to be sure
  323. if (float.TryParse(rating.Replace(',','.'), NumberStyles.AllowDecimalPoint, CultureInfo.InvariantCulture, out val))
  324. {
  325. item.CommunityRating = val;
  326. }
  327. }
  328. break;
  329. }
  330. case "FirstAired":
  331. {
  332. var firstAired = reader.ReadElementContentAsString();
  333. if (!string.IsNullOrWhiteSpace(firstAired))
  334. {
  335. DateTime airDate;
  336. if (DateTime.TryParse(firstAired, out airDate) && airDate.Year > 1850)
  337. {
  338. item.PremiereDate = airDate.ToUniversalTime();
  339. item.ProductionYear = airDate.Year;
  340. }
  341. }
  342. break;
  343. }
  344. case "TMDbId":
  345. var tmdb = reader.ReadElementContentAsString();
  346. if (!string.IsNullOrWhiteSpace(tmdb))
  347. {
  348. item.SetProviderId(MetadataProviders.Tmdb, tmdb);
  349. }
  350. break;
  351. case "TVcomId":
  352. var TVcomId = reader.ReadElementContentAsString();
  353. if (!string.IsNullOrWhiteSpace(TVcomId))
  354. {
  355. item.SetProviderId(MetadataProviders.Tvcom, TVcomId);
  356. }
  357. break;
  358. case "IMDB_ID":
  359. case "IMDB":
  360. case "IMDbId":
  361. var IMDbId = reader.ReadElementContentAsString();
  362. if (!string.IsNullOrWhiteSpace(IMDbId))
  363. {
  364. item.SetProviderId(MetadataProviders.Imdb, IMDbId);
  365. }
  366. break;
  367. case "Genres":
  368. FetchFromGenresNode(reader.ReadSubtree(), item);
  369. break;
  370. case "Persons":
  371. FetchDataFromPersonsNode(reader.ReadSubtree(), item);
  372. break;
  373. case "ParentalRating":
  374. FetchFromParentalRatingNode(reader.ReadSubtree(), item);
  375. break;
  376. case "Studios":
  377. FetchFromStudiosNode(reader.ReadSubtree(), item);
  378. break;
  379. default:
  380. reader.Skip();
  381. break;
  382. }
  383. }
  384. /// <summary>
  385. /// Fetches from taglines node.
  386. /// </summary>
  387. /// <param name="reader">The reader.</param>
  388. /// <param name="item">The item.</param>
  389. private void FetchFromTaglinesNode(XmlReader reader, T item)
  390. {
  391. reader.MoveToContent();
  392. while (reader.Read())
  393. {
  394. if (reader.NodeType == XmlNodeType.Element)
  395. {
  396. switch (reader.Name)
  397. {
  398. case "Tagline":
  399. {
  400. var val = reader.ReadElementContentAsString();
  401. if (!string.IsNullOrWhiteSpace(val))
  402. {
  403. item.AddTagline(val);
  404. }
  405. break;
  406. }
  407. default:
  408. reader.Skip();
  409. break;
  410. }
  411. }
  412. }
  413. }
  414. /// <summary>
  415. /// Fetches from genres node.
  416. /// </summary>
  417. /// <param name="reader">The reader.</param>
  418. /// <param name="item">The item.</param>
  419. private void FetchFromGenresNode(XmlReader reader, T item)
  420. {
  421. reader.MoveToContent();
  422. while (reader.Read())
  423. {
  424. if (reader.NodeType == XmlNodeType.Element)
  425. {
  426. switch (reader.Name)
  427. {
  428. case "Genre":
  429. {
  430. var genre = reader.ReadElementContentAsString();
  431. if (!string.IsNullOrWhiteSpace(genre))
  432. {
  433. item.AddGenre(genre);
  434. }
  435. break;
  436. }
  437. default:
  438. reader.Skip();
  439. break;
  440. }
  441. }
  442. }
  443. }
  444. /// <summary>
  445. /// Fetches the data from persons node.
  446. /// </summary>
  447. /// <param name="reader">The reader.</param>
  448. /// <param name="item">The item.</param>
  449. private void FetchDataFromPersonsNode(XmlReader reader, T item)
  450. {
  451. reader.MoveToContent();
  452. while (reader.Read())
  453. {
  454. if (reader.NodeType == XmlNodeType.Element)
  455. {
  456. switch (reader.Name)
  457. {
  458. case "Person":
  459. case "Actor":
  460. {
  461. foreach (var person in GetPersonsFromXmlNode(reader.ReadSubtree()))
  462. {
  463. item.AddPerson(person);
  464. }
  465. break;
  466. }
  467. default:
  468. reader.Skip();
  469. break;
  470. }
  471. }
  472. }
  473. }
  474. /// <summary>
  475. /// Fetches from studios node.
  476. /// </summary>
  477. /// <param name="reader">The reader.</param>
  478. /// <param name="item">The item.</param>
  479. private void FetchFromStudiosNode(XmlReader reader, T item)
  480. {
  481. reader.MoveToContent();
  482. while (reader.Read())
  483. {
  484. if (reader.NodeType == XmlNodeType.Element)
  485. {
  486. switch (reader.Name)
  487. {
  488. case "Studio":
  489. {
  490. var studio = reader.ReadElementContentAsString();
  491. if (!string.IsNullOrWhiteSpace(studio))
  492. {
  493. item.AddStudio(studio);
  494. }
  495. break;
  496. }
  497. default:
  498. reader.Skip();
  499. break;
  500. }
  501. }
  502. }
  503. }
  504. /// <summary>
  505. /// Fetches from parental rating node.
  506. /// </summary>
  507. /// <param name="reader">The reader.</param>
  508. /// <param name="item">The item.</param>
  509. private void FetchFromParentalRatingNode(XmlReader reader, T item)
  510. {
  511. reader.MoveToContent();
  512. while (reader.Read())
  513. {
  514. if (reader.NodeType == XmlNodeType.Element)
  515. {
  516. switch (reader.Name)
  517. {
  518. case "Value":
  519. {
  520. var ratingString = reader.ReadElementContentAsString();
  521. int rating = 7;
  522. if (!string.IsNullOrWhiteSpace(ratingString))
  523. {
  524. int.TryParse(ratingString, out rating);
  525. }
  526. switch (rating)
  527. {
  528. case -1:
  529. item.OfficialRating = "NR";
  530. break;
  531. case 0:
  532. item.OfficialRating = "UR";
  533. break;
  534. case 1:
  535. item.OfficialRating = "G";
  536. break;
  537. case 3:
  538. item.OfficialRating = "PG";
  539. break;
  540. case 4:
  541. item.OfficialRating = "PG-13";
  542. break;
  543. case 5:
  544. item.OfficialRating = "NC-17";
  545. break;
  546. case 6:
  547. item.OfficialRating = "R";
  548. break;
  549. }
  550. break;
  551. }
  552. default:
  553. reader.Skip();
  554. break;
  555. }
  556. }
  557. }
  558. }
  559. /// <summary>
  560. /// Gets the persons from XML node.
  561. /// </summary>
  562. /// <param name="reader">The reader.</param>
  563. /// <returns>IEnumerable{PersonInfo}.</returns>
  564. private IEnumerable<PersonInfo> GetPersonsFromXmlNode(XmlReader reader)
  565. {
  566. var names = new List<string>();
  567. var type = "Actor"; // If type is not specified assume actor
  568. var role = string.Empty;
  569. reader.MoveToContent();
  570. while (reader.Read())
  571. {
  572. if (reader.NodeType == XmlNodeType.Element)
  573. {
  574. switch (reader.Name)
  575. {
  576. case "Name":
  577. names.AddRange(SplitNames(reader.ReadElementContentAsString()));
  578. break;
  579. case "Type":
  580. {
  581. var val = reader.ReadElementContentAsString();
  582. if (!string.IsNullOrWhiteSpace(val))
  583. {
  584. type = val;
  585. }
  586. break;
  587. }
  588. case "Role":
  589. {
  590. var val = reader.ReadElementContentAsString();
  591. if (!string.IsNullOrWhiteSpace(val))
  592. {
  593. role = val;
  594. }
  595. break;
  596. }
  597. default:
  598. reader.Skip();
  599. break;
  600. }
  601. }
  602. }
  603. return names.Select(n => new PersonInfo { Name = n, Role = role, Type = type });
  604. }
  605. /// <summary>
  606. /// Used to split names of comma or pipe delimeted genres and people
  607. /// </summary>
  608. /// <param name="value">The value.</param>
  609. /// <returns>IEnumerable{System.String}.</returns>
  610. private IEnumerable<string> SplitNames(string value)
  611. {
  612. value = value ?? string.Empty;
  613. // Only split by comma if there is no pipe in the string
  614. // We have to be careful to not split names like Matthew, Jr.
  615. var separator = value.IndexOf('|') == -1 ? ',' : '|';
  616. value = value.Trim().Trim(separator);
  617. return string.IsNullOrWhiteSpace(value) ? new string[] { } : Split(value, separator, StringSplitOptions.RemoveEmptyEntries);
  618. }
  619. /// <summary>
  620. /// Provides an additional overload for string.split
  621. /// </summary>
  622. /// <param name="val">The val.</param>
  623. /// <param name="separator">The separator.</param>
  624. /// <param name="options">The options.</param>
  625. /// <returns>System.String[][].</returns>
  626. private static string[] Split(string val, char separator, StringSplitOptions options)
  627. {
  628. return val.Split(new[] { separator }, options);
  629. }
  630. }
  631. }