rtlnow.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165
  1. # encoding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. from .common import InfoExtractor
  5. from ..utils import (
  6. ExtractorError,
  7. clean_html,
  8. unified_strdate,
  9. parse_duration,
  10. int_or_none,
  11. )
  12. class RTLnowIE(InfoExtractor):
  13. """Information Extractor for RTL NOW, RTL2 NOW, RTL NITRO, SUPER RTL NOW, VOX NOW and n-tv NOW"""
  14. _VALID_URL = r'''(?x)
  15. (?:https?://)?
  16. (?P<url>
  17. (?P<domain>
  18. rtl-now\.rtl\.de|
  19. rtl2now\.rtl2\.de|
  20. (?:www\.)?voxnow\.de|
  21. (?:www\.)?rtlnitronow\.de|
  22. (?:www\.)?superrtlnow\.de|
  23. (?:www\.)?n-tvnow\.de)
  24. /+[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?
  25. (?:container_id|film_id)=(?P<video_id>[0-9]+)&
  26. player=1(?:&season=[0-9]+)?(?:&.*)?
  27. )'''
  28. _TESTS = [
  29. {
  30. 'url': 'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
  31. 'info_dict': {
  32. 'id': '90419',
  33. 'ext': 'flv',
  34. 'title': 'Ahornallee - Folge 1 - Der Einzug',
  35. 'description': 'md5:ce843b6b5901d9a7f7d04d1bbcdb12de',
  36. 'upload_date': '20070416',
  37. 'duration': 1685,
  38. },
  39. 'params': {
  40. 'skip_download': True,
  41. },
  42. 'skip': 'Only works from Germany',
  43. },
  44. {
  45. 'url': 'http://rtl2now.rtl2.de/aerger-im-revier/episode-15-teil-1.php?film_id=69756&player=1&season=2&index=5',
  46. 'info_dict': {
  47. 'id': '69756',
  48. 'ext': 'flv',
  49. 'title': 'Ärger im Revier - Ein junger Ladendieb, ein handfester Streit u.a.',
  50. 'description': 'md5:3fb247005ed21a935ffc82b7dfa70cf0',
  51. 'thumbnail': 'http://autoimg.static-fra.de/rtl2now/219850/1500x1500/image2.jpg',
  52. 'upload_date': '20120519',
  53. 'duration': 1245,
  54. },
  55. 'params': {
  56. 'skip_download': True,
  57. },
  58. 'skip': 'Only works from Germany',
  59. },
  60. {
  61. 'url': 'http://www.voxnow.de/voxtours/suedafrika-reporter-ii.php?film_id=13883&player=1&season=17',
  62. 'info_dict': {
  63. 'id': '13883',
  64. 'ext': 'flv',
  65. 'title': 'Voxtours - Südafrika-Reporter II',
  66. 'description': 'md5:de7f8d56be6fd4fed10f10f57786db00',
  67. 'upload_date': '20090627',
  68. 'duration': 1800,
  69. },
  70. 'params': {
  71. 'skip_download': True,
  72. },
  73. },
  74. {
  75. 'url': 'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
  76. 'info_dict': {
  77. 'id': '99205',
  78. 'ext': 'flv',
  79. 'title': 'Medicopter 117 - Angst!',
  80. 'description': 'md5:895b1df01639b5f61a04fc305a5cb94d',
  81. 'thumbnail': 'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg',
  82. 'upload_date': '20080928',
  83. 'duration': 2691,
  84. },
  85. 'params': {
  86. 'skip_download': True,
  87. },
  88. },
  89. {
  90. 'url': 'http://www.n-tvnow.de/top-gear/episode-1-2013-01-01-00-00-00.php?film_id=124903&player=1&season=10',
  91. 'info_dict': {
  92. 'id': '124903',
  93. 'ext': 'flv',
  94. 'title': 'Top Gear vom 01.01.2013',
  95. 'description': 'Episode 1',
  96. 'upload_date': '20130101',
  97. },
  98. 'params': {
  99. 'skip_download': True,
  100. },
  101. 'skip': 'Only works from Germany',
  102. },
  103. ]
  104. def _real_extract(self, url):
  105. mobj = re.match(self._VALID_URL, url)
  106. video_page_url = 'http://%s/' % mobj.group('domain')
  107. video_id = mobj.group('video_id')
  108. webpage = self._download_webpage('http://' + mobj.group('url'), video_id)
  109. mobj = re.search(r'(?s)<div style="margin-left: 20px; font-size: 13px;">(.*?)<div id="playerteaser">', webpage)
  110. if mobj:
  111. raise ExtractorError(clean_html(mobj.group(1)), expected=True)
  112. title = self._og_search_title(webpage)
  113. description = self._og_search_description(webpage)
  114. thumbnail = self._og_search_thumbnail(webpage, default=None)
  115. upload_date = unified_strdate(self._html_search_meta('uploadDate', webpage, 'upload date'))
  116. playerdata_url = self._html_search_regex(
  117. r"'playerdata': '(?P<playerdata_url>[^']+)'", webpage, 'playerdata_url')
  118. playerdata = self._download_xml(playerdata_url, video_id, 'Downloading player data XML')
  119. videoinfo = playerdata.find('./playlist/videoinfo')
  120. duration = parse_duration(videoinfo.find('duration').text)
  121. formats = []
  122. for filename in videoinfo.findall('filename'):
  123. mobj = re.search(r'(?P<url>rtmpe://(?:[^/]+/){2})(?P<play_path>.+)', filename.text)
  124. if mobj:
  125. fmt = {
  126. 'url': mobj.group('url'),
  127. 'play_path': 'mp4:' + mobj.group('play_path'),
  128. 'page_url': video_page_url,
  129. 'player_url': video_page_url + 'includes/vodplayer.swf',
  130. }
  131. else:
  132. fmt = {
  133. 'url': filename.text,
  134. }
  135. fmt.update({
  136. 'width': int_or_none(filename.get('width')),
  137. 'height': int_or_none(filename.get('height')),
  138. 'vbr': int_or_none(filename.get('bitrate')),
  139. 'ext': 'flv',
  140. })
  141. formats.append(fmt)
  142. return {
  143. 'id': video_id,
  144. 'title': title,
  145. 'description': description,
  146. 'thumbnail': thumbnail,
  147. 'upload_date': upload_date,
  148. 'duration': duration,
  149. 'formats': formats,
  150. }