vrv.py 8.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import base64
  4. import json
  5. import hashlib
  6. import hmac
  7. import random
  8. import string
  9. import time
  10. from .common import InfoExtractor
  11. from ..compat import (
  12. compat_urllib_parse_urlencode,
  13. compat_urllib_parse,
  14. )
  15. from ..utils import (
  16. float_or_none,
  17. int_or_none,
  18. )
  19. class VRVBaseIE(InfoExtractor):
  20. _API_DOMAIN = None
  21. _API_PARAMS = {}
  22. _CMS_SIGNING = {}
  23. def _call_api(self, path, video_id, note, data=None):
  24. base_url = self._API_DOMAIN + '/core/' + path
  25. encoded_query = compat_urllib_parse_urlencode({
  26. 'oauth_consumer_key': self._API_PARAMS['oAuthKey'],
  27. 'oauth_nonce': ''.join([random.choice(string.ascii_letters) for _ in range(32)]),
  28. 'oauth_signature_method': 'HMAC-SHA1',
  29. 'oauth_timestamp': int(time.time()),
  30. 'oauth_version': '1.0',
  31. })
  32. headers = self.geo_verification_headers()
  33. if data:
  34. data = json.dumps(data).encode()
  35. headers['Content-Type'] = 'application/json'
  36. method = 'POST' if data else 'GET'
  37. base_string = '&'.join([method, compat_urllib_parse.quote(base_url, ''), compat_urllib_parse.quote(encoded_query, '')])
  38. oauth_signature = base64.b64encode(hmac.new(
  39. (self._API_PARAMS['oAuthSecret'] + '&').encode('ascii'),
  40. base_string.encode(), hashlib.sha1).digest()).decode()
  41. encoded_query += '&oauth_signature=' + compat_urllib_parse.quote(oauth_signature, '')
  42. return self._download_json(
  43. '?'.join([base_url, encoded_query]), video_id,
  44. note='Downloading %s JSON metadata' % note, headers=headers, data=data)
  45. def _call_cms(self, path, video_id, note):
  46. if not self._CMS_SIGNING:
  47. self._CMS_SIGNING = self._call_api('index', video_id, 'CMS Signing')['cms_signing']
  48. return self._download_json(
  49. self._API_DOMAIN + path, video_id, query=self._CMS_SIGNING,
  50. note='Downloading %s JSON metadata' % note, headers=self.geo_verification_headers())
  51. def _set_api_params(self, webpage, video_id):
  52. if not self._API_PARAMS:
  53. self._API_PARAMS = self._parse_json(self._search_regex(
  54. r'window\.__APP_CONFIG__\s*=\s*({.+?})</script>',
  55. webpage, 'api config'), video_id)['cxApiParams']
  56. self._API_DOMAIN = self._API_PARAMS.get('apiDomain', 'https://api.vrv.co')
  57. def _get_cms_resource(self, resource_key, video_id):
  58. return self._call_api(
  59. 'cms_resource', video_id, 'resource path', data={
  60. 'resource_key': resource_key,
  61. })['__links__']['cms_resource']['href']
  62. class VRVIE(VRVBaseIE):
  63. IE_NAME = 'vrv'
  64. _VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P<id>[A-Z0-9]+)'
  65. _TESTS = [{
  66. 'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT',
  67. 'info_dict': {
  68. 'id': 'GR9PNZ396',
  69. 'ext': 'mp4',
  70. 'title': 'BOSTON: WHERE THE PAST IS THE PRESENT',
  71. 'description': 'md5:4ec8844ac262ca2df9e67c0983c6b83f',
  72. 'uploader_id': 'seeso',
  73. },
  74. 'params': {
  75. # m3u8 download
  76. 'skip_download': True,
  77. },
  78. }]
  79. def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
  80. if not url or stream_format not in ('hls', 'dash'):
  81. return []
  82. assert audio_lang or hardsub_lang
  83. stream_id_list = []
  84. if audio_lang:
  85. stream_id_list.append('audio-%s' % audio_lang)
  86. if hardsub_lang:
  87. stream_id_list.append('hardsub-%s' % hardsub_lang)
  88. stream_id = '-'.join(stream_id_list)
  89. format_id = '%s-%s' % (stream_format, stream_id)
  90. if stream_format == 'hls':
  91. adaptive_formats = self._extract_m3u8_formats(
  92. url, video_id, 'mp4', m3u8_id=format_id,
  93. note='Downloading %s m3u8 information' % stream_id,
  94. fatal=False)
  95. elif stream_format == 'dash':
  96. adaptive_formats = self._extract_mpd_formats(
  97. url, video_id, mpd_id=format_id,
  98. note='Downloading %s MPD information' % stream_id,
  99. fatal=False)
  100. if audio_lang:
  101. for f in adaptive_formats:
  102. if f.get('acodec') != 'none':
  103. f['language'] = audio_lang
  104. return adaptive_formats
  105. def _real_extract(self, url):
  106. video_id = self._match_id(url)
  107. webpage = self._download_webpage(
  108. url, video_id,
  109. headers=self.geo_verification_headers())
  110. media_resource = self._parse_json(self._search_regex(
  111. r'window\.__INITIAL_STATE__\s*=\s*({.+?})</script>',
  112. webpage, 'inital state'), video_id).get('watch', {}).get('mediaResource') or {}
  113. video_data = media_resource.get('json')
  114. if not video_data:
  115. self._set_api_params(webpage, video_id)
  116. episode_path = self._get_cms_resource(
  117. 'cms:/episodes/' + video_id, video_id)
  118. video_data = self._call_cms(episode_path, video_id, 'video')
  119. title = video_data['title']
  120. streams_json = media_resource.get('streams', {}).get('json', {})
  121. if not streams_json:
  122. self._set_api_params(webpage, video_id)
  123. streams_path = video_data['__links__']['streams']['href']
  124. streams_json = self._call_cms(streams_path, video_id, 'streams')
  125. audio_locale = streams_json.get('audio_locale')
  126. formats = []
  127. for stream_type, streams in streams_json.get('streams', {}).items():
  128. if stream_type in ('adaptive_hls', 'adaptive_dash'):
  129. for stream in streams.values():
  130. formats.extend(self._extract_vrv_formats(
  131. stream.get('url'), video_id, stream_type.split('_')[1],
  132. audio_locale, stream.get('hardsub_locale')))
  133. self._sort_formats(formats)
  134. subtitles = {}
  135. for subtitle in streams_json.get('subtitles', {}).values():
  136. subtitle_url = subtitle.get('url')
  137. if not subtitle_url:
  138. continue
  139. subtitles.setdefault(subtitle.get('locale', 'en-US'), []).append({
  140. 'url': subtitle_url,
  141. 'ext': subtitle.get('format', 'ass'),
  142. })
  143. thumbnails = []
  144. for thumbnail in video_data.get('images', {}).get('thumbnails', []):
  145. thumbnail_url = thumbnail.get('source')
  146. if not thumbnail_url:
  147. continue
  148. thumbnails.append({
  149. 'url': thumbnail_url,
  150. 'width': int_or_none(thumbnail.get('width')),
  151. 'height': int_or_none(thumbnail.get('height')),
  152. })
  153. return {
  154. 'id': video_id,
  155. 'title': title,
  156. 'formats': formats,
  157. 'subtitles': subtitles,
  158. 'thumbnails': thumbnails,
  159. 'description': video_data.get('description'),
  160. 'duration': float_or_none(video_data.get('duration_ms'), 1000),
  161. 'uploader_id': video_data.get('channel_id'),
  162. 'series': video_data.get('series_title'),
  163. 'season': video_data.get('season_title'),
  164. 'season_number': int_or_none(video_data.get('season_number')),
  165. 'season_id': video_data.get('season_id'),
  166. 'episode': title,
  167. 'episode_number': int_or_none(video_data.get('episode_number')),
  168. 'episode_id': video_data.get('production_episode_id'),
  169. }
  170. class VRVSeriesIE(VRVBaseIE):
  171. IE_NAME = 'vrv:series'
  172. _VALID_URL = r'https?://(?:www\.)?vrv\.co/series/(?P<id>[A-Z0-9]+)'
  173. _TEST = {
  174. 'url': 'https://vrv.co/series/G68VXG3G6/The-Perfect-Insider',
  175. 'info_dict': {
  176. 'id': 'G68VXG3G6',
  177. },
  178. 'playlist_mincount': 11,
  179. }
  180. def _real_extract(self, url):
  181. series_id = self._match_id(url)
  182. webpage = self._download_webpage(
  183. url, series_id,
  184. headers=self.geo_verification_headers())
  185. self._set_api_params(webpage, series_id)
  186. seasons_path = self._get_cms_resource(
  187. 'cms:/seasons?series_id=' + series_id, series_id)
  188. seasons_data = self._call_cms(seasons_path, series_id, 'seasons')
  189. entries = []
  190. for season in seasons_data.get('items', []):
  191. episodes_path = season['__links__']['season/episodes']['href']
  192. episodes = self._call_cms(episodes_path, series_id, 'episodes')
  193. for episode in episodes.get('items', []):
  194. episode_id = episode['id']
  195. entries.append(self.url_result(
  196. 'https://vrv.co/watch/' + episode_id,
  197. 'VRV', episode_id, episode.get('title')))
  198. return self.playlist_result(entries, series_id)