myvideo.py 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172
  1. import binascii
  2. import base64
  3. import hashlib
  4. import re
  5. from .common import InfoExtractor
  6. from ..utils import (
  7. compat_ord,
  8. compat_urllib_parse,
  9. ExtractorError,
  10. )
  11. class MyVideoIE(InfoExtractor):
  12. """Information Extractor for myvideo.de."""
  13. _VALID_URL = r'(?:http://)?(?:www\.)?myvideo\.de/watch/([0-9]+)/([^?/]+).*'
  14. IE_NAME = u'myvideo'
  15. _TEST = {
  16. u'url': u'http://www.myvideo.de/watch/8229274/bowling_fail_or_win',
  17. u'file': u'8229274.flv',
  18. u'md5': u'2d2753e8130479ba2cb7e0a37002053e',
  19. u'info_dict': {
  20. u"title": u"bowling-fail-or-win"
  21. }
  22. }
  23. # Original Code from: https://github.com/dersphere/plugin.video.myvideo_de.git
  24. # Released into the Public Domain by Tristan Fischer on 2013-05-19
  25. # https://github.com/rg3/youtube-dl/pull/842
  26. def __rc4crypt(self,data, key):
  27. x = 0
  28. box = list(range(256))
  29. for i in list(range(256)):
  30. x = (x + box[i] + compat_ord(key[i % len(key)])) % 256
  31. box[i], box[x] = box[x], box[i]
  32. x = 0
  33. y = 0
  34. out = ''
  35. for char in data:
  36. x = (x + 1) % 256
  37. y = (y + box[x]) % 256
  38. box[x], box[y] = box[y], box[x]
  39. out += chr(compat_ord(char) ^ box[(box[x] + box[y]) % 256])
  40. return out
  41. def __md5(self,s):
  42. return hashlib.md5(s).hexdigest().encode()
  43. def _real_extract(self,url):
  44. mobj = re.match(self._VALID_URL, url)
  45. if mobj is None:
  46. raise ExtractorError(u'invalid URL: %s' % url)
  47. video_id = mobj.group(1)
  48. GK = (
  49. b'WXpnME1EZGhNRGhpTTJNM01XVmhOREU0WldNNVpHTTJOakpt'
  50. b'TW1FMU5tVTBNR05pWkRaa05XRXhNVFJoWVRVd1ptSXhaVEV3'
  51. b'TnpsbA0KTVRkbU1tSTRNdz09'
  52. )
  53. # Get video webpage
  54. webpage_url = 'http://www.myvideo.de/watch/%s' % video_id
  55. webpage = self._download_webpage(webpage_url, video_id)
  56. mobj = re.search('source src=\'(.+?)[.]([^.]+)\'', webpage)
  57. if mobj is not None:
  58. self.report_extraction(video_id)
  59. video_url = mobj.group(1) + '.flv'
  60. video_title = self._html_search_regex('<title>([^<]+)</title>',
  61. webpage, u'title')
  62. video_ext = self._search_regex('[.](.+?)$', video_url, u'extension')
  63. return [{
  64. 'id': video_id,
  65. 'url': video_url,
  66. 'uploader': None,
  67. 'upload_date': None,
  68. 'title': video_title,
  69. 'ext': video_ext,
  70. }]
  71. # try encxml
  72. mobj = re.search('var flashvars={(.+?)}', webpage)
  73. if mobj is None:
  74. raise ExtractorError(u'Unable to extract video')
  75. params = {}
  76. encxml = ''
  77. sec = mobj.group(1)
  78. for (a, b) in re.findall('(.+?):\'(.+?)\',?', sec):
  79. if not a == '_encxml':
  80. params[a] = b
  81. else:
  82. encxml = compat_urllib_parse.unquote(b)
  83. if not params.get('domain'):
  84. params['domain'] = 'www.myvideo.de'
  85. xmldata_url = '%s?%s' % (encxml, compat_urllib_parse.urlencode(params))
  86. if 'flash_playertype=MTV' in xmldata_url:
  87. self._downloader.report_warning(u'avoiding MTV player')
  88. xmldata_url = (
  89. 'http://www.myvideo.de/dynamic/get_player_video_xml.php'
  90. '?flash_playertype=D&ID=%s&_countlimit=4&autorun=yes'
  91. ) % video_id
  92. # get enc data
  93. enc_data = self._download_webpage(xmldata_url, video_id).split('=')[1]
  94. enc_data_b = binascii.unhexlify(enc_data)
  95. sk = self.__md5(
  96. base64.b64decode(base64.b64decode(GK)) +
  97. self.__md5(
  98. str(video_id).encode('utf-8')
  99. )
  100. )
  101. dec_data = self.__rc4crypt(enc_data_b, sk)
  102. # extracting infos
  103. self.report_extraction(video_id)
  104. video_url = None
  105. mobj = re.search('connectionurl=\'(.*?)\'', dec_data)
  106. if mobj:
  107. video_url = compat_urllib_parse.unquote(mobj.group(1))
  108. if 'myvideo2flash' in video_url:
  109. self._downloader.report_warning(u'forcing RTMPT ...')
  110. video_url = video_url.replace('rtmpe://', 'rtmpt://')
  111. if not video_url:
  112. # extract non rtmp videos
  113. mobj = re.search('path=\'(http.*?)\' source=\'(.*?)\'', dec_data)
  114. if mobj is None:
  115. raise ExtractorError(u'unable to extract url')
  116. video_url = compat_urllib_parse.unquote(mobj.group(1)) + compat_urllib_parse.unquote(mobj.group(2))
  117. video_file = self._search_regex('source=\'(.*?)\'', dec_data, u'video file')
  118. video_file = compat_urllib_parse.unquote(video_file)
  119. if not video_file.endswith('f4m'):
  120. ppath, prefix = video_file.split('.')
  121. video_playpath = '%s:%s' % (prefix, ppath)
  122. video_hls_playlist = ''
  123. else:
  124. video_playpath = ''
  125. video_hls_playlist = (
  126. video_file
  127. ).replace('.f4m', '.m3u8')
  128. video_swfobj = self._search_regex('swfobject.embedSWF\(\'(.+?)\'', webpage, u'swfobj')
  129. video_swfobj = compat_urllib_parse.unquote(video_swfobj)
  130. video_title = self._html_search_regex("<h1(?: class='globalHd')?>(.*?)</h1>",
  131. webpage, u'title')
  132. return [{
  133. 'id': video_id,
  134. 'url': video_url,
  135. 'tc_url': video_url,
  136. 'uploader': None,
  137. 'upload_date': None,
  138. 'title': video_title,
  139. 'ext': u'flv',
  140. 'play_path': video_playpath,
  141. 'video_file': video_file,
  142. 'video_hls_playlist': video_hls_playlist,
  143. 'player_url': video_swfobj,
  144. }]