kika.py 3.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. from .common import InfoExtractor
  4. from ..utils import ExtractorError
  5. class KikaIE(InfoExtractor):
  6. _VALID_URL = r'https?://(?:www\.)?kika\.de/(?:[a-z-]+/)*(?:video|(?:einzel)?sendung)(?P<id>\d+).*'
  7. _TESTS = [
  8. {
  9. 'url': 'http://www.kika.de/baumhaus/videos/video19636.html',
  10. 'md5': '4930515e36b06c111213e80d1e4aad0e',
  11. 'info_dict': {
  12. 'id': '19636',
  13. 'ext': 'mp4',
  14. 'title': 'Baumhaus vom 30. Oktober 2015',
  15. 'description': None,
  16. },
  17. },
  18. {
  19. 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/videos/video8182.html',
  20. 'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
  21. 'info_dict': {
  22. 'id': '8182',
  23. 'ext': 'mp4',
  24. 'title': 'Beutolomäus und der geheime Weihnachtswunsch',
  25. 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
  26. },
  27. },
  28. {
  29. 'url': 'http://www.kika.de/baumhaus/sendungen/video19636_zc-fea7f8a0_zs-4bf89c60.html',
  30. 'md5': '4930515e36b06c111213e80d1e4aad0e',
  31. 'info_dict': {
  32. 'id': '19636',
  33. 'ext': 'mp4',
  34. 'title': 'Baumhaus vom 30. Oktober 2015',
  35. 'description': None,
  36. },
  37. },
  38. {
  39. 'url': 'http://www.kika.de/sendungen/einzelsendungen/weihnachtsprogramm/einzelsendung2534.html',
  40. 'md5': '5fe9c4dd7d71e3b238f04b8fdd588357',
  41. 'info_dict': {
  42. 'id': '8182',
  43. 'ext': 'mp4',
  44. 'title': 'Beutolomäus und der geheime Weihnachtswunsch',
  45. 'description': 'md5:b69d32d7b2c55cbe86945ab309d39bbd',
  46. },
  47. },
  48. ]
  49. def _real_extract(self, url):
  50. # broadcast_id may be the same as the video_id
  51. broadcast_id = self._match_id(url)
  52. webpage = self._download_webpage(url, broadcast_id)
  53. xml_re = r'sectionArticle[ "](?:(?!sectionA[ "])(?:.|\n))*?dataURL:\'(?:/[a-z-]+?)*?/video(\d+)-avCustom\.xml'
  54. video_id = self._search_regex(xml_re, webpage, "xml_url", default=None)
  55. if not video_id:
  56. err_msg = 'Video %s is not available online' % broadcast_id
  57. raise ExtractorError(err_msg, expected=True)
  58. xml_url = 'http://www.kika.de/video%s-avCustom.xml' % (video_id)
  59. xml_tree = self._download_xml(xml_url, video_id)
  60. title = xml_tree.find('title').text
  61. webpage_url = xml_tree.find('htmlUrl').text
  62. # Try to get the description, not available for all videos
  63. try:
  64. broadcast_elem = xml_tree.find('broadcast')
  65. description = broadcast_elem.find('broadcastDescription').text
  66. except AttributeError:
  67. description = None
  68. # duration string format is mm:ss (even if it is >= 1 hour, e.g. 78:42)
  69. tmp = xml_tree.find('duration').text.split(':')
  70. duration = int(tmp[0]) * 60 + int(tmp[1])
  71. formats = [{
  72. 'url': elem.find('progressiveDownloadUrl').text,
  73. 'ext': elem.find('mediaType').text.lower(),
  74. 'format': elem.find('profileName').text,
  75. 'width': int(elem.find('frameWidth').text),
  76. 'height': int(elem.find('frameHeight').text),
  77. 'abr': int(elem.find('bitrateAudio').text),
  78. 'vbr': int(elem.find('bitrateVideo').text),
  79. 'filesize': int(elem.find('fileSize').text),
  80. } for elem in xml_tree.find('assets')]
  81. self._sort_formats(formats)
  82. return {
  83. 'id': video_id,
  84. 'title': title,
  85. 'description': description,
  86. 'formats': formats,
  87. 'duration': duration,
  88. 'webpage_url': webpage_url,
  89. }