myvideo.py 5.6 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164
  1. import binascii
  2. import base64
  3. import hashlib
  4. import re
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. compat_ord,
  8. compat_urllib_parse,
  9. ExtractorError,
  10. )
  11. class MyVideoIE(InfoExtractor):
  12. """Information Extractor for myvideo.de."""
  13. _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
  14. IE_NAME = u'myvideo'
  15. # Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
  16. # Released into the Public Domain by Tristan Fischer on 2013-05-19
  17. # https://github.com/rg3/youtube-dl/pull/842
  18. def __rc4crypt(self,data, key):
  19. x = 0
  20. box = list(range(256))
  21. for i in list(range(256)):
  22. x = (x + box[i] + compat_ord(key[i % len(key)])) % 256
  23. box[i], box[x] = box[x], box[i]
  24. x = 0
  25. y = 0
  26. out = ''
  27. for char in data:
  28. x = (x + 1) % 256
  29. y = (y + box[x]) % 256
  30. box[x], box[y] = box[y], box[x]
  31. out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256])
  32. return out
  33. def __md5(self,s):
  34. return hashlib.md5(s).hexdigest().encode()
  35. def _real_extract(self,url):
  36. mobj = re.match(self._VALID_URL, url)
  37. if mobj is None:
  38. raise ExtractorError(u'invalid URL: %s' % url)
  39. video_id = mobj.group(1)
  40. GK = (
  41. b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
  42. b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
  43. b'TnpsbA0KTVRkbU1tSTRNdz09'
  44. )
  45. # Get video webpage
  46. webpage_url = 'http://www.myvideo.de/watch/%s' % video_id
  47. webpage = self._download_webpage(webpage_url, video_id)
  48. mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage)
  49. if mobj is not None:
  50. self.report_extraction(video_id)
  51. video_url = mobj.group(1) + '.flv'
  52. video_title = self._html_search_regex('<title>([^<]+)</title>',
  53. webpage, u'title')
  54. video_ext = self._search_regex('[.](.+?)$', video_url, u'extension')
  55. return [{
  56. 'id': video_id,
  57. 'url': video_url,
  58. 'uploader': None,
  59. 'upload_date': None,
  60. 'title': video_title,
  61. 'ext': video_ext,
  62. }]
  63. # try encxml
  64. mobj = re.search('var flashvars={(.+?)}', webpage)
  65. if mobj is None:
  66. raise ExtractorError(u'Unable to extract video')
  67. params = {}
  68. encxml = ''
  69. sec = mobj.group(1)
  70. for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec):
  71. if not a == '_encxml':
  72. params[a] = b
  73. else:
  74. encxml = compat_urllib_parse.unquote(b)
  75. if not params.get('domain'):
  76. params['domain'] = 'www.myvideo.de'
  77. xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
  78. if 'flash_playertype=MTV' in xmldata_url:
  79. self._downloader.report_warning(u'avoiding MTV player')
  80. xmldata_url = (
  81. 'http://www.myvideo.de/dynamic/get_player_video_xml.php'
  82. '?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
  83. ) % video_id
  84. # get enc data
  85. enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1]
  86. enc_data_b = binascii.unhexlify(enc_data)
  87. sk = self.__md5(
  88. base64.b64decode(base64.b64decode(GK)) +
  89. self.__md5(
  90. str(video_id).encode('utf-8')
  91. )
  92. )
  93. dec_data = self.__rc4crypt(enc_data_b, sk)
  94. # extracting infos
  95. self.report_extraction(video_id)
  96. video_url = None
  97. mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
  98. if mobj:
  99. video_url = compat_urllib_parse.unquote(mobj.group(1))
  100. if 'myvideo2flash' in video_url:
  101. self._downloader.report_warning(u'forcing RTMPT ...')
  102. video_url = video_url.replace('rtmpe://', 'rtmpt://')
  103. if not video_url:
  104. # extract non rtmp videos
  105. mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
  106. if mobj is None:
  107. raise ExtractorError(u'unable to extract url')
  108. video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
  109. video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file')
  110. video_file = compat_urllib_parse.unquote(video_file)
  111. if not video_file.endswith('f4m'):
  112. ppath, prefix = video_file.split('.')
  113. video_playpath = '%s:%s' % (prefix, ppath)
  114. video_hls_playlist = ''
  115. else:
  116. video_playpath = ''
  117. video_hls_playlist = (
  118. video_file
  119. ).replace('.f4m', '.m3u8')
  120. video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj')
  121. video_swfobj = compat_urllib_parse.unquote(video_swfobj)
  122. video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
  123. webpage, u'title')
  124. return [{
  125. 'id': video_id,
  126. 'url': video_url,
  127. 'tc_url': video_url,
  128. 'uploader': None,
  129. 'upload_date': None,
  130. 'title': video_title,
  131. 'ext': u'flv',
  132. 'play_path': video_playpath,
  133. 'video_file': video_file,
  134. 'video_hls_playlist': video_hls_playlist,
  135. 'player_url': video_swfobj,
  136. }]