vrv.py 6.1 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import base64
  4. import json
  5. import hashlib
  6. import hmac
  7. import random
  8. import string
  9. import time
  10. from .common import InfoExtractor
  11. from ..compat import (
  12. compat_urllib_parse_urlencode,
  13. compat_urlparse,
  14. )
  15. from ..utils import (
  16. float_or_none,
  17. int_or_none,
  18. )
  19. class VRVIE(InfoExtractor):
  20. _VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P<id>[A-Z0-9]+)'
  21. _TEST = {
  22. 'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT',
  23. 'info_dict': {
  24. 'id': 'GR9PNZ396',
  25. 'ext': 'mp4',
  26. 'title': 'BOSTON: WHERE THE PAST IS THE PRESENT',
  27. 'description': 'md5:4ec8844ac262ca2df9e67c0983c6b83f',
  28. 'uploader_id': 'seeso',
  29. },
  30. 'params': {
  31. # m3u8 download
  32. 'skip_download': True,
  33. },
  34. }
  35. _API_DOMAIN = None
  36. _API_PARAMS = {}
  37. _CMS_SIGNING = {}
  38. def _call_api(self, path, video_id, note, data=None):
  39. base_url = self._API_DOMAIN + '/core/' + path
  40. encoded_query = compat_urllib_parse_urlencode({
  41. 'oauth_consumer_key': self._API_PARAMS['oAuthKey'],
  42. 'oauth_nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
  43. 'oauth_signature_method': 'HMAC-SHA1',
  44. 'oauth_timestamp': int(time.time()),
  45. 'oauth_version': '1.0',
  46. })
  47. headers = self.geo_verification_headers()
  48. if data:
  49. data = json.dumps(data).encode()
  50. headers['Content-Type'] = 'application/json'
  51. method = 'POST' if data else 'GET'
  52. base_string = '&'.join([method, compat_urlparse.quote(base_url, ''), compat_urlparse.quote(encoded_query, '')])
  53. oauth_signature = base64.b64encode(hmac.new(
  54. (self._API_PARAMS['oAuthSecret'] + '&').encode('ascii'),
  55. base_string.encode(), hashlib.sha1).digest()).decode()
  56. encoded_query += '&oauth_signature=' + compat_urlparse.quote(oauth_signature, '')
  57. return self._download_json(
  58. '?'.join([base_url, encoded_query]), video_id,
  59. note='Downloading %s JSON metadata' % note, headers=headers, data=data)
  60. def _call_cms(self, path, video_id, note):
  61. return self._download_json(
  62. self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING,
  63. note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers())
  64. def _set_api_params(self, webpage, video_id):
  65. if not self._API_PARAMS:
  66. self._API_PARAMS = self._parse_json(self._search_regex(
  67. r'window\.__APP_CONFIG__\s*=\s*({.+?})</script>',
  68. webpage, 'api config'), video_id)['cxApiParams']
  69. self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co')
  70. def _set_cms_signing(self, video_id):
  71. if not self._CMS_SIGNING:
  72. self._CMS_SIGNING = self._call_api('index', video_id, 'CMS Signing')['cms_signing']
  73. def _real_extract(self, url):
  74. video_id = self._match_id(url)
  75. webpage = self._download_webpage(
  76. url, video_id,
  77. headers=self.geo_verification_headers())
  78. media_resource = self._parse_json(self._search_regex(
  79. r'window\.__INITIAL_STATE__\s*=\s*({.+?})</script>',
  80. webpage, 'inital state'), video_id).get('watch', {}).get('mediaResource') or {}
  81. video_data = media_resource.get('json')
  82. if not video_data:
  83. self._set_api_params(webpage, video_id)
  84. episode_path = self._call_api('cms_resource', video_id, 'episode resource path', data={
  85. 'resource_key': 'cms:/episodes/' + video_id,
  86. })['__links__']['cms_resource']['href']
  87. self._set_cms_signing(video_id)
  88. video_data = self._call_cms(episode_path, video_id, 'video')
  89. title = video_data['title']
  90. streams_json = media_resource.get('streams', {}).get('json', {})
  91. if not streams_json:
  92. self._set_api_params(webpage, video_id)
  93. streams_path = video_data['__links__']['streams']['href']
  94. self._set_cms_signing(video_id)
  95. streams_json = self._call_cms(streams_path, video_id, 'streams')
  96. audio_locale = streams_json.get('audio_locale')
  97. formats = []
  98. for stream_id, stream in streams_json.get('streams', {}).get('adaptive_hls', {}).items():
  99. stream_url = stream.get('url')
  100. if not stream_url:
  101. continue
  102. stream_id = stream_id or audio_locale
  103. m3u8_formats = self._extract_m3u8_formats(
  104. stream_url, video_id, 'mp4', m3u8_id=stream_id,
  105. note='Downloading %s m3u8 information' % stream_id,
  106. fatal=False)
  107. if audio_locale:
  108. for f in m3u8_formats:
  109. f['language'] = audio_locale
  110. formats.extend(m3u8_formats)
  111. self._sort_formats(formats)
  112. thumbnails = []
  113. for thumbnail in video_data.get('images', {}).get('thumbnails', []):
  114. thumbnail_url = thumbnail.get('source')
  115. if not thumbnail_url:
  116. continue
  117. thumbnails.append({
  118. 'url': thumbnail_url,
  119. 'width': int_or_none(thumbnail.get('width')),
  120. 'height': int_or_none(thumbnail.get('height')),
  121. })
  122. return {
  123. 'id': video_id,
  124. 'title': title,
  125. 'formats': formats,
  126. 'thumbnails': thumbnails,
  127. 'description': video_data.get('description'),
  128. 'duration': float_or_none(video_data.get('duration_ms'), 1000),
  129. 'uploader_id': video_data.get('channel_id'),
  130. 'series': video_data.get('series_title'),
  131. 'season': video_data.get('season_title'),
  132. 'season_number': int_or_none(video_data.get('season_number')),
  133. 'season_id': video_data.get('season_id'),
  134. 'episode': title,
  135. 'episode_number': int_or_none(video_data.get('episode_number')),
  136. 'episode_id': video_data.get('production_episode_id'),
  137. }