viki.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import base64
  4. import hashlib
  5. import hmac
  6. import itertools
  7. import json
  8. import re
  9. import time
  10. from .common import InfoExtractor
  11. from ..compat import (
  12. compat_parse_qs,
  13. compat_urllib_parse_urlparse,
  14. )
  15. from ..utils import (
  16. ExtractorError,
  17. int_or_none,
  18. parse_age_limit,
  19. parse_iso8601,
  20. sanitized_Request,
  21. std_headers,
  22. )
  23. class VikiBaseIE(InfoExtractor):
  24. _VALID_URL_BASE = r'https?://(?:www\.)?viki\.(?:com|net|mx|jp|fr)/'
  25. _API_QUERY_TEMPLATE = '/v4/%sapp=%s&t=%s&site=www.viki.com'
  26. _API_URL_TEMPLATE = 'https://api.viki.io%s&sig=%s'
  27. _APP = '100005a'
  28. _APP_VERSION = '2.2.5.1428709186'
  29. _APP_SECRET = 'MM_d*yP@`&1@]@!AVrXf_o-HVEnoTnm$O-ti4[G~$JDI/Dc-&piU&z&5.;:}95=Iad'
  30. _GEO_BYPASS = False
  31. _NETRC_MACHINE = 'viki'
  32. _token = None
  33. _ERRORS = {
  34. 'geo': 'Sorry, this content is not available in your region.',
  35. 'upcoming': 'Sorry, this content is not yet available.',
  36. # 'paywall': 'paywall',
  37. }
  38. def _prepare_call(self, path, timestamp=None, post_data=None):
  39. path += '?' if '?' not in path else '&'
  40. if not timestamp:
  41. timestamp = int(time.time())
  42. query = self._API_QUERY_TEMPLATE % (path, self._APP, timestamp)
  43. if self._token:
  44. query += '&token=%s' % self._token
  45. sig = hmac.new(
  46. self._APP_SECRET.encode('ascii'),
  47. query.encode('ascii'),
  48. hashlib.sha1
  49. ).hexdigest()
  50. url = self._API_URL_TEMPLATE % (query, sig)
  51. return sanitized_Request(
  52. url, json.dumps(post_data).encode('utf-8')) if post_data else url
  53. def _call_api(self, path, video_id, note, timestamp=None, post_data=None):
  54. resp = self._download_json(
  55. self._prepare_call(path, timestamp, post_data), video_id, note)
  56. error = resp.get('error')
  57. if error:
  58. if error == 'invalid timestamp':
  59. resp = self._download_json(
  60. self._prepare_call(path, int(resp['current_timestamp']), post_data),
  61. video_id, '%s (retry)' % note)
  62. error = resp.get('error')
  63. if error:
  64. self._raise_error(resp['error'])
  65. return resp
  66. def _raise_error(self, error):
  67. raise ExtractorError(
  68. '%s returned error: %s' % (self.IE_NAME, error),
  69. expected=True)
  70. def _check_errors(self, data):
  71. for reason, status in data.get('blocking', {}).items():
  72. if status and reason in self._ERRORS:
  73. message = self._ERRORS[reason]
  74. if reason == 'geo':
  75. self.raise_geo_restricted(msg=message)
  76. raise ExtractorError('%s said: %s' % (
  77. self.IE_NAME, message), expected=True)
  78. def _real_initialize(self):
  79. self._login()
  80. def _login(self):
  81. username, password = self._get_login_info()
  82. if username is None:
  83. return
  84. login_form = {
  85. 'login_id': username,
  86. 'password': password,
  87. }
  88. login = self._call_api(
  89. 'sessions.json', None,
  90. 'Logging in', post_data=login_form)
  91. self._token = login.get('token')
  92. if not self._token:
  93. self.report_warning('Unable to get session token, login has probably failed')
  94. @staticmethod
  95. def dict_selection(dict_obj, preferred_key, allow_fallback=True):
  96. if preferred_key in dict_obj:
  97. return dict_obj.get(preferred_key)
  98. if not allow_fallback:
  99. return
  100. filtered_dict = list(filter(None, [dict_obj.get(k) for k in dict_obj.keys()]))
  101. return filtered_dict[0] if filtered_dict else None
  102. class VikiIE(VikiBaseIE):
  103. IE_NAME = 'viki'
  104. _VALID_URL = r'%s(?:videos|player)/(?P<id>[0-9]+v)' % VikiBaseIE._VALID_URL_BASE
  105. _TESTS = [{
  106. 'url': 'http://www.viki.com/videos/1023585v-heirs-episode-14',
  107. 'info_dict': {
  108. 'id': '1023585v',
  109. 'ext': 'mp4',
  110. 'title': 'Heirs Episode 14',
  111. 'uploader': 'SBS',
  112. 'description': 'md5:c4b17b9626dd4b143dcc4d855ba3474e',
  113. 'upload_date': '20131121',
  114. 'age_limit': 13,
  115. },
  116. 'skip': 'Blocked in the US',
  117. }, {
  118. # clip
  119. 'url': 'http://www.viki.com/videos/1067139v-the-avengers-age-of-ultron-press-conference',
  120. 'md5': '86c0b5dbd4d83a6611a79987cc7a1989',
  121. 'info_dict': {
  122. 'id': '1067139v',
  123. 'ext': 'mp4',
  124. 'title': "'The Avengers: Age of Ultron' Press Conference",
  125. 'description': 'md5:d70b2f9428f5488321bfe1db10d612ea',
  126. 'duration': 352,
  127. 'timestamp': 1430380829,
  128. 'upload_date': '20150430',
  129. 'uploader': 'Arirang TV',
  130. 'like_count': int,
  131. 'age_limit': 0,
  132. }
  133. }, {
  134. 'url': 'http://www.viki.com/videos/1048879v-ankhon-dekhi',
  135. 'info_dict': {
  136. 'id': '1048879v',
  137. 'ext': 'mp4',
  138. 'title': 'Ankhon Dekhi',
  139. 'duration': 6512,
  140. 'timestamp': 1408532356,
  141. 'upload_date': '20140820',
  142. 'uploader': 'Spuul',
  143. 'like_count': int,
  144. 'age_limit': 13,
  145. },
  146. 'skip': 'Blocked in the US',
  147. }, {
  148. # episode
  149. 'url': 'http://www.viki.com/videos/44699v-boys-over-flowers-episode-1',
  150. 'md5': '94e0e34fd58f169f40c184f232356cfe',
  151. 'info_dict': {
  152. 'id': '44699v',
  153. 'ext': 'mp4',
  154. 'title': 'Boys Over Flowers - Episode 1',
  155. 'description': 'md5:b89cf50038b480b88b5b3c93589a9076',
  156. 'duration': 4172,
  157. 'timestamp': 1270496524,
  158. 'upload_date': '20100405',
  159. 'uploader': 'group8',
  160. 'like_count': int,
  161. 'age_limit': 13,
  162. },
  163. 'expected_warnings': ['Unknown MIME type image/jpeg in DASH manifest'],
  164. }, {
  165. # youtube external
  166. 'url': 'http://www.viki.com/videos/50562v-poor-nastya-complete-episode-1',
  167. 'md5': '63f8600c1da6f01b7640eee7eca4f1da',
  168. 'info_dict': {
  169. 'id': '50562v',
  170. 'ext': 'webm',
  171. 'title': 'Poor Nastya [COMPLETE] - Episode 1',
  172. 'description': '',
  173. 'duration': 606,
  174. 'timestamp': 1274949505,
  175. 'upload_date': '20101213',
  176. 'uploader': 'ad14065n',
  177. 'uploader_id': 'ad14065n',
  178. 'like_count': int,
  179. 'age_limit': 13,
  180. },
  181. 'skip': 'Page not found!',
  182. }, {
  183. 'url': 'http://www.viki.com/player/44699v',
  184. 'only_matching': True,
  185. }, {
  186. # non-English description
  187. 'url': 'http://www.viki.com/videos/158036v-love-in-magic',
  188. 'md5': 'adf9e321a0ae5d0aace349efaaff7691',
  189. 'info_dict': {
  190. 'id': '158036v',
  191. 'ext': 'mp4',
  192. 'uploader': 'I Planet Entertainment',
  193. 'upload_date': '20111122',
  194. 'timestamp': 1321985454,
  195. 'description': 'md5:44b1e46619df3a072294645c770cef36',
  196. 'title': 'Love In Magic',
  197. 'age_limit': 13,
  198. },
  199. }]
  200. def _real_extract(self, url):
  201. video_id = self._match_id(url)
  202. resp = self._download_json(
  203. 'https://www.viki.com/api/videos/' + video_id,
  204. video_id, 'Downloading video JSON', headers={
  205. 'x-client-user-agent': std_headers['User-Agent'],
  206. 'x-viki-app-ver': '4.0.57',
  207. })
  208. video = resp['video']
  209. self._check_errors(video)
  210. title = self.dict_selection(video.get('titles', {}), 'en', allow_fallback=False)
  211. if not title:
  212. title = 'Episode %d' % video.get('number') if video.get('type') == 'episode' else video.get('id') or video_id
  213. container_titles = video.get('container', {}).get('titles', {})
  214. container_title = self.dict_selection(container_titles, 'en')
  215. title = '%s - %s' % (container_title, title)
  216. description = self.dict_selection(video.get('descriptions', {}), 'en')
  217. duration = int_or_none(video.get('duration'))
  218. timestamp = parse_iso8601(video.get('created_at'))
  219. uploader = video.get('author')
  220. like_count = int_or_none(video.get('likes', {}).get('count'))
  221. age_limit = parse_age_limit(video.get('rating'))
  222. thumbnails = []
  223. for thumbnail_id, thumbnail in video.get('images', {}).items():
  224. thumbnails.append({
  225. 'id': thumbnail_id,
  226. 'url': thumbnail.get('url'),
  227. })
  228. subtitles = {}
  229. for subtitle_lang, _ in video.get('subtitle_completions', {}).items():
  230. subtitles[subtitle_lang] = [{
  231. 'ext': subtitles_format,
  232. 'url': self._prepare_call(
  233. 'videos/%s/subtitles/%s.%s' % (video_id, subtitle_lang, subtitles_format)),
  234. } for subtitles_format in ('srt', 'vtt')]
  235. result = {
  236. 'id': video_id,
  237. 'title': title,
  238. 'description': description,
  239. 'duration': duration,
  240. 'timestamp': timestamp,
  241. 'uploader': uploader,
  242. 'like_count': like_count,
  243. 'age_limit': age_limit,
  244. 'thumbnails': thumbnails,
  245. 'subtitles': subtitles,
  246. }
  247. formats = []
  248. def add_format(format_id, format_dict, protocol='http'):
  249. # rtmps URLs does not seem to work
  250. if protocol == 'rtmps':
  251. return
  252. format_url = format_dict.get('url')
  253. if not format_url:
  254. return
  255. qs = compat_parse_qs(compat_urllib_parse_urlparse(format_url).query)
  256. stream = qs.get('stream', [None])[0]
  257. if stream:
  258. format_url = base64.b64decode(stream).decode()
  259. if format_id in ('m3u8', 'hls'):
  260. m3u8_formats = self._extract_m3u8_formats(
  261. format_url, video_id, 'mp4',
  262. entry_protocol='m3u8_native',
  263. m3u8_id='m3u8-%s' % protocol, fatal=False)
  264. # Despite CODECS metadata in m3u8 all video-only formats
  265. # are actually video+audio
  266. for f in m3u8_formats:
  267. if '_drm/index_' in f['url']:
  268. continue
  269. if f.get('acodec') == 'none' and f.get('vcodec') != 'none':
  270. f['acodec'] = None
  271. formats.append(f)
  272. elif format_id in ('mpd', 'dash'):
  273. formats.extend(self._extract_mpd_formats(
  274. format_url, video_id, 'mpd-%s' % protocol, fatal=False))
  275. elif format_url.startswith('rtmp'):
  276. mobj = re.search(
  277. r'^(?P<url>rtmp://[^/]+/(?P<app>.+?))/(?P<playpath>mp4:.+)$',
  278. format_url)
  279. if not mobj:
  280. return
  281. formats.append({
  282. 'format_id': 'rtmp-%s' % format_id,
  283. 'ext': 'flv',
  284. 'url': mobj.group('url'),
  285. 'play_path': mobj.group('playpath'),
  286. 'app': mobj.group('app'),
  287. 'page_url': url,
  288. })
  289. else:
  290. formats.append({
  291. 'url': format_url,
  292. 'format_id': '%s-%s' % (format_id, protocol),
  293. 'height': int_or_none(self._search_regex(
  294. r'^(\d+)[pP]$', format_id, 'height', default=None)),
  295. })
  296. for format_id, format_dict in (resp.get('streams') or {}).items():
  297. add_format(format_id, format_dict)
  298. if not formats:
  299. streams = self._call_api(
  300. 'videos/%s/streams.json' % video_id, video_id,
  301. 'Downloading video streams JSON')
  302. if 'external' in streams:
  303. result.update({
  304. '_type': 'url_transparent',
  305. 'url': streams['external']['url'],
  306. })
  307. return result
  308. for format_id, stream_dict in streams.items():
  309. for protocol, format_dict in stream_dict.items():
  310. add_format(format_id, format_dict, protocol)
  311. self._sort_formats(formats)
  312. result['formats'] = formats
  313. return result
  314. class VikiChannelIE(VikiBaseIE):
  315. IE_NAME = 'viki:channel'
  316. _VALID_URL = r'%s(?:tv|news|movies|artists)/(?P<id>[0-9]+c)' % VikiBaseIE._VALID_URL_BASE
  317. _TESTS = [{
  318. 'url': 'http://www.viki.com/tv/50c-boys-over-flowers',
  319. 'info_dict': {
  320. 'id': '50c',
  321. 'title': 'Boys Over Flowers',
  322. 'description': 'md5:ecd3cff47967fe193cff37c0bec52790',
  323. },
  324. 'playlist_mincount': 71,
  325. }, {
  326. 'url': 'http://www.viki.com/tv/1354c-poor-nastya-complete',
  327. 'info_dict': {
  328. 'id': '1354c',
  329. 'title': 'Poor Nastya [COMPLETE]',
  330. 'description': 'md5:05bf5471385aa8b21c18ad450e350525',
  331. },
  332. 'playlist_count': 127,
  333. }, {
  334. 'url': 'http://www.viki.com/news/24569c-showbiz-korea',
  335. 'only_matching': True,
  336. }, {
  337. 'url': 'http://www.viki.com/movies/22047c-pride-and-prejudice-2005',
  338. 'only_matching': True,
  339. }, {
  340. 'url': 'http://www.viki.com/artists/2141c-shinee',
  341. 'only_matching': True,
  342. }]
  343. _PER_PAGE = 25
  344. def _real_extract(self, url):
  345. channel_id = self._match_id(url)
  346. channel = self._call_api(
  347. 'containers/%s.json' % channel_id, channel_id,
  348. 'Downloading channel JSON')
  349. self._check_errors(channel)
  350. title = self.dict_selection(channel['titles'], 'en')
  351. description = self.dict_selection(channel['descriptions'], 'en')
  352. entries = []
  353. for video_type in ('episodes', 'clips', 'movies'):
  354. for page_num in itertools.count(1):
  355. page = self._call_api(
  356. 'containers/%s/%s.json?per_page=%d&sort=number&direction=asc&with_paging=true&page=%d'
  357. % (channel_id, video_type, self._PER_PAGE, page_num), channel_id,
  358. 'Downloading %s JSON page #%d' % (video_type, page_num))
  359. for video in page['response']:
  360. video_id = video['id']
  361. entries.append(self.url_result(
  362. 'https://www.viki.com/videos/%s' % video_id, 'Viki'))
  363. if not page['pagination']['next']:
  364. break
  365. return self.playlist_result(entries, channel_id, title, description)