viki.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import base64
  4. import hashlib
  5. import hmac
  6. import itertools
  7. import json
  8. import re
  9. import time
  10. from .common import InfoExtractor
  11. from ..compat import (
  12. compat_parse_qs,
  13. compat_urllib_parse_urlparse,
  14. )
  15. from ..utils import (
  16. ExtractorError,
  17. int_or_none,
  18. parse_age_limit,
  19. parse_iso8601,
  20. sanitized_Request,
  21. std_headers,
  22. try_get,
  23. )
  24. class VikiBaseIE(InfoExtractor):
  25. _VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
  26. _API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
  27. _API_URL_TEMPLATE = 'https://api.viki.io%s&sig=%s'
  28. _APP = '100005a'
  29. _APP_VERSION = '6.0.0'
  30. _APP_SECRET = 'MM_d*yP@`&1@]@!AVrXf_o-HVEnoTnm$O-ti4[G~$JDI/Dc-&piU&z&5.;:}95=Iad'
  31. _GEO_BYPASS = False
  32. _NETRC_MACHINE = 'viki'
  33. _token = None
  34. _ERRORS = {
  35. 'geo': 'Sorry, this content is not available in your region.',
  36. 'upcoming': 'Sorry, this content is not yet available.',
  37. 'paywall': 'Sorry, this content is only available to Viki Pass Plus subscribers',
  38. }
  39. def _prepare_call(self, path, timestamp=None, post_data=None):
  40. path += '?' if '?' not in path else '&'
  41. if not timestamp:
  42. timestamp = int(time.time())
  43. query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp)
  44. if self._token:
  45. query += '&token=%s' % self._token
  46. sig = hmac.new(
  47. self._APP_SECRET.encode('ascii'),
  48. query.encode('ascii'),
  49. hashlib.sha1
  50. ).hexdigest()
  51. url = self._API_URL_TEMPLATE % (query, sig)
  52. return sanitized_Request(
  53. url, json.dumps(post_data).encode('utf-8')) if post_data else url
  54. def _call_api(self, path, video_id, note, timestamp=None, post_data=None):
  55. resp = self._download_json(
  56. self._prepare_call(path, timestamp, post_data), video_id, note,
  57. headers={'x-viki-app-ver': self._APP_VERSION})
  58. error = resp.get('error')
  59. if error:
  60. if error == 'invalid timestamp':
  61. resp = self._download_json(
  62. self._prepare_call(path, int(resp['current_timestamp']), post_data),
  63. video_id, '%s (retry)' % note)
  64. error = resp.get('error')
  65. if error:
  66. self._raise_error(resp['error'])
  67. return resp
  68. def _raise_error(self, error):
  69. raise ExtractorError(
  70. '%s returned error: %s' % (self.IE_NAME, error),
  71. expected=True)
  72. def _check_errors(self, data):
  73. for reason, status in (data.get('blocking') or {}).items():
  74. if status and reason in self._ERRORS:
  75. message = self._ERRORS[reason]
  76. if reason == 'geo':
  77. self.raise_geo_restricted(msg=message)
  78. elif reason == 'paywall':
  79. self.raise_login_required(message)
  80. raise ExtractorError('%s said: %s' % (
  81. self.IE_NAME, message), expected=True)
  82. def _real_initialize(self):
  83. self._login()
  84. def _login(self):
  85. username, password = self._get_login_info()
  86. if username is None:
  87. return
  88. login_form = {
  89. 'login_id': username,
  90. 'password': password,
  91. }
  92. login = self._call_api(
  93. 'sessions.json', None,
  94. 'Logging in', post_data=login_form)
  95. self._token = login.get('token')
  96. if not self._token:
  97. self.report_warning('Unable to get session token, login has probably failed')
  98. @staticmethod
  99. def dict_selection(dict_obj, preferred_key, allow_fallback=True):
  100. if preferred_key in dict_obj:
  101. return dict_obj.get(preferred_key)
  102. if not allow_fallback:
  103. return
  104. filtered_dict = list(filter(None, [dict_obj.get(k) for k in dict_obj.keys()]))
  105. return filtered_dict[0] if filtered_dict else None
  106. class VikiIE(VikiBaseIE):
  107. IE_NAME = 'viki'
  108. _VALID_URL = r'%s(?:videos|player)/(?P<id>[0-9]+v)' % VikiBaseIE._VALID_URL_BASE
  109. _TESTS = [{
  110. 'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
  111. 'info_dict': {
  112. 'id': '1023585v',
  113. 'ext': 'mp4',
  114. 'title': 'Heirs - Episode 14',
  115. 'uploader': 'SBS Contents Hub',
  116. 'timestamp': 1385047627,
  117. 'upload_date': '20131121',
  118. 'age_limit': 13,
  119. 'duration': 3570,
  120. 'episode_number': 14,
  121. },
  122. 'params': {
  123. 'format': 'bestvideo',
  124. },
  125. 'skip': 'Blocked in the US',
  126. 'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
  127. }, {
  128. # clip
  129. 'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
  130. 'md5': '86c0b5dbd4d83a6611a79987cc7a1989',
  131. 'info_dict': {
  132. 'id': '1067139v',
  133. 'ext': 'mp4',
  134. 'title': "'The Avengers: Age of Ultron' Press Conference",
  135. 'description': 'md5:d70b2f9428f5488321bfe1db10d612ea',
  136. 'duration': 352,
  137. 'timestamp': 1430380829,
  138. 'upload_date': '20150430',
  139. 'uploader': 'Arirang TV',
  140. 'like_count': int,
  141. 'age_limit': 0,
  142. },
  143. 'skip': 'Sorry. There was an error loading this video',
  144. }, {
  145. 'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
  146. 'info_dict': {
  147. 'id': '1048879v',
  148. 'ext': 'mp4',
  149. 'title': 'Ankhon Dekhi',
  150. 'duration': 6512,
  151. 'timestamp': 1408532356,
  152. 'upload_date': '20140820',
  153. 'uploader': 'Spuul',
  154. 'like_count': int,
  155. 'age_limit': 13,
  156. },
  157. 'skip': 'Blocked in the US',
  158. }, {
  159. # episode
  160. 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
  161. 'md5': '0a53dc252e6e690feccd756861495a8c',
  162. 'info_dict': {
  163. 'id': '44699v',
  164. 'ext': 'mp4',
  165. 'title': 'Boys Over Flowers - Episode 1',
  166. 'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
  167. 'duration': 4172,
  168. 'timestamp': 1270496524,
  169. 'upload_date': '20100405',
  170. 'uploader': 'group8',
  171. 'like_count': int,
  172. 'age_limit': 13,
  173. 'episode_number': 1,
  174. },
  175. 'params': {
  176. 'format': 'bestvideo',
  177. },
  178. 'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
  179. }, {
  180. # youtube external
  181. 'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
  182. 'md5': '63f8600c1da6f01b7640eee7eca4f1da',
  183. 'info_dict': {
  184. 'id': '50562v',
  185. 'ext': 'webm',
  186. 'title': 'Poor Nastya [COMPLETE] - Episode 1',
  187. 'description': '',
  188. 'duration': 606,
  189. 'timestamp': 1274949505,
  190. 'upload_date': '20101213',
  191. 'uploader': 'ad14065n',
  192. 'uploader_id': 'ad14065n',
  193. 'like_count': int,
  194. 'age_limit': 13,
  195. },
  196. 'skip': 'Page not found!',
  197. }, {
  198. 'url': 'http://www.viki.com/player/44699v',
  199. 'only_matching': True,
  200. }, {
  201. # non-English description
  202. 'url': 'http://www.viki.com/videos/158036v-love-in-magic',
  203. 'md5': '41faaba0de90483fb4848952af7c7d0d',
  204. 'info_dict': {
  205. 'id': '158036v',
  206. 'ext': 'mp4',
  207. 'uploader': 'I Planet Entertainment',
  208. 'upload_date': '20111122',
  209. 'timestamp': 1321985454,
  210. 'description': 'md5:44b1e46619df3a072294645c770cef36',
  211. 'title': 'Love In Magic',
  212. 'age_limit': 13,
  213. },
  214. 'params': {
  215. 'format': 'bestvideo',
  216. },
  217. 'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
  218. }]
  219. def _real_extract(self, url):
  220. video_id = self._match_id(url)
  221. resp = self._download_json(
  222. 'https://www.viki.com/api/videos/' + video_id,
  223. video_id, 'Downloading video JSON', headers={
  224. 'x-client-user-agent': std_headers['User-Agent'],
  225. 'x-viki-app-ver': '3.0.0',
  226. })
  227. video = resp['video']
  228. self._check_errors(video)
  229. title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False)
  230. episode_number = int_or_none(video.get('number'))
  231. if not title:
  232. title = 'Episode %d' % episode_number if video.get('type') == 'episode' else video.get('id') or video_id
  233. container_titles = try_get(video, lambda x: x['container']['titles'], dict) or {}
  234. container_title = self.dict_selection(container_titles, 'en')
  235. title = '%s - %s' % (container_title, title)
  236. description = self.dict_selection(video.get('descriptions', {}), 'en')
  237. like_count = int_or_none(try_get(video, lambda x: x['likes']['count']))
  238. thumbnails = []
  239. for thumbnail_id, thumbnail in (video.get('images') or {}).items():
  240. thumbnails.append({
  241. 'id': thumbnail_id,
  242. 'url': thumbnail.get('url'),
  243. })
  244. subtitles = {}
  245. for subtitle_lang, _ in (video.get('subtitle_completions') or {}).items():
  246. subtitles[subtitle_lang] = [{
  247. 'ext': subtitles_format,
  248. 'url': self._prepare_call(
  249. 'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
  250. } for subtitles_format in ('srt', 'vtt')]
  251. result = {
  252. 'id': video_id,
  253. 'title': title,
  254. 'description': description,
  255. 'duration': int_or_none(video.get('duration')),
  256. 'timestamp': parse_iso8601(video.get('created_at')),
  257. 'uploader': video.get('author'),
  258. 'uploader_url': video.get('author_url'),
  259. 'like_count': like_count,
  260. 'age_limit': parse_age_limit(video.get('rating')),
  261. 'thumbnails': thumbnails,
  262. 'subtitles': subtitles,
  263. 'episode_number': episode_number,
  264. }
  265. formats = []
  266. def add_format(format_id, format_dict, protocol='http'):
  267. # rtmps URLs does not seem to work
  268. if protocol == 'rtmps':
  269. return
  270. format_url = format_dict.get('url')
  271. if not format_url:
  272. return
  273. qs = compat_parse_qs(compat_urllib_parse_urlparse(format_url).query)
  274. stream = qs.get('stream', [None])[0]
  275. if stream:
  276. format_url = base64.b64decode(stream).decode()
  277. if format_id in ('m3u8', 'hls'):
  278. m3u8_formats = self._extract_m3u8_formats(
  279. format_url, video_id, 'mp4',
  280. entry_protocol='m3u8_native',
  281. m3u8_id='m3u8-%s' % protocol, fatal=False)
  282. # Despite CODECS metadata in m3u8 all video-only formats
  283. # are actually video+audio
  284. for f in m3u8_formats:
  285. if '_drm/index_' in f['url']:
  286. continue
  287. if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
  288. f['acodec'] = None
  289. formats.append(f)
  290. elif format_id in ('mpd', 'dash'):
  291. formats.extend(self._extract_mpd_formats(
  292. format_url, video_id, 'mpd-%s' % protocol, fatal=False))
  293. elif format_url.startswith('rtmp'):
  294. mobj = re.search(
  295. r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
  296. format_url)
  297. if not mobj:
  298. return
  299. formats.append({
  300. 'format_id': 'rtmp-%s' % format_id,
  301. 'ext': 'flv',
  302. 'url': mobj.group('url'),
  303. 'play_path': mobj.group('playpath'),
  304. 'app': mobj.group('app'),
  305. 'page_url': url,
  306. })
  307. else:
  308. formats.append({
  309. 'url': format_url,
  310. 'format_id': '%s-%s' % (format_id, protocol),
  311. 'height': int_or_none(self._search_regex(
  312. r'^(\d+)[pP]$', format_id, 'height', default=None)),
  313. })
  314. for format_id, format_dict in (resp.get('streams') or {}).items():
  315. add_format(format_id, format_dict)
  316. if not formats:
  317. streams = self._call_api(
  318. 'videos/%s/streams.json' % video_id, video_id,
  319. 'Downloading video streams JSON')
  320. if 'external' in streams:
  321. result.update({
  322. '_type': 'url_transparent',
  323. 'url': streams['external']['url'],
  324. })
  325. return result
  326. for format_id, stream_dict in streams.items():
  327. for protocol, format_dict in stream_dict.items():
  328. add_format(format_id, format_dict, protocol)
  329. self._sort_formats(formats)
  330. result['formats'] = formats
  331. return result
  332. class VikiChannelIE(VikiBaseIE):
  333. IE_NAME = 'viki:channel'
  334. _VALID_URL = r'%s(?:tv|news|movies|artists)/(?P<id>[0-9]+c)' % VikiBaseIE._VALID_URL_BASE
  335. _TESTS = [{
  336. 'url': 'http://www.viki.com/tv/50c-boys-over-flowers',
  337. 'info_dict': {
  338. 'id': '50c',
  339. 'title': 'Boys Over Flowers',
  340. 'description': 'md5:804ce6e7837e1fd527ad2f25420f4d59',
  341. },
  342. 'playlist_mincount': 71,
  343. }, {
  344. 'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete',
  345. 'info_dict': {
  346. 'id': '1354c',
  347. 'title': 'Poor Nastya [COMPLETE]',
  348. 'description': 'md5:05bf5471385aa8b21c18ad450e350525',
  349. },
  350. 'playlist_count': 127,
  351. 'skip': 'Page not found',
  352. }, {
  353. 'url': 'http://www.viki.com/news/24569c-showbiz-korea',
  354. 'only_matching': True,
  355. }, {
  356. 'url': 'http://www.viki.com/movies/22047c-pride-and-prejudice-2005',
  357. 'only_matching': True,
  358. }, {
  359. 'url': 'http://www.viki.com/artists/2141c-shinee',
  360. 'only_matching': True,
  361. }]
  362. _PER_PAGE = 25
  363. def _real_extract(self, url):
  364. channel_id = self._match_id(url)
  365. channel = self._call_api(
  366. 'containers/%s.json' % channel_id, channel_id,
  367. 'Downloading channel JSON')
  368. self._check_errors(channel)
  369. title = self.dict_selection(channel['titles'], 'en')
  370. description = self.dict_selection(channel['descriptions'], 'en')
  371. entries = []
  372. for video_type in ('episodes', 'clips', 'movies'):
  373. for page_num in itertools.count(1):
  374. page = self._call_api(
  375. 'containers/%s/%s.json?per_page=%d&sort=number&direction=asc&with_paging=true&page=%d'
  376. % (channel_id, video_type, self._PER_PAGE, page_num), channel_id,
  377. 'Downloading %s JSON page #%d' % (video_type, page_num))
  378. for video in page['response']:
  379. video_id = video['id']
  380. entries.append(self.url_result(
  381. 'https://www.viki.com/videos/%s' % video_id, 'Viki'))
  382. if not page['pagination']['next']:
  383. break
  384. return self.playlist_result(entries, channel_id, title, description)