vlive.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383
  1. # coding: utf-8
  2. from __future__ import unicode_literals
  3. import re
  4. import time
  5. import itertools
  6. from .common import InfoExtractor
  7. from ..compat import (
  8. compat_urllib_parse_urlencode,
  9. compat_str,
  10. )
  11. from ..utils import (
  12. dict_get,
  13. ExtractorError,
  14. float_or_none,
  15. int_or_none,
  16. remove_start,
  17. try_get,
  18. urlencode_postdata,
  19. )
  20. class VLiveIE(InfoExtractor):
  21. IE_NAME = 'vlive'
  22. _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<id>[0-9]+)'
  23. _NETRC_MACHINE = 'vlive'
  24. _TESTS = [{
  25. 'url': 'http://www.vlive.tv/video/1326',
  26. 'md5': 'cc7314812855ce56de70a06a27314983',
  27. 'info_dict': {
  28. 'id': '1326',
  29. 'ext': 'mp4',
  30. 'title': "[V LIVE] Girl's Day's Broadcast",
  31. 'creator': "Girl's Day",
  32. 'view_count': int,
  33. },
  34. }, {
  35. 'url': 'http://www.vlive.tv/video/16937',
  36. 'info_dict': {
  37. 'id': '16937',
  38. 'ext': 'mp4',
  39. 'title': '[V LIVE] 첸백시 걍방',
  40. 'creator': 'EXO',
  41. 'view_count': int,
  42. 'subtitles': 'mincount:12',
  43. },
  44. 'params': {
  45. 'skip_download': True,
  46. },
  47. }, {
  48. 'url': 'https://www.vlive.tv/video/129100',
  49. 'md5': 'ca2569453b79d66e5b919e5d308bff6b',
  50. 'info_dict': {
  51. 'id': '129100',
  52. 'ext': 'mp4',
  53. 'title': "[V LIVE] [BTS+] Run BTS! 2019 - EP.71 :: Behind the scene",
  54. 'creator': "BTS+",
  55. 'view_count': int,
  56. 'subtitles': 'mincount:10',
  57. },
  58. 'skip': 'This video is only available for CH+ subscribers',
  59. }]
  60. @classmethod
  61. def suitable(cls, url):
  62. return False if VLivePlaylistIE.suitable(url) else super(VLiveIE, cls).suitable(url)
  63. def _real_initialize(self):
  64. self._login()
  65. def _login(self):
  66. email, password = self._get_login_info()
  67. if None in (email, password):
  68. return
  69. def is_logged_in():
  70. login_info = self._download_json(
  71. 'https://www.vlive.tv/auth/loginInfo', None,
  72. note='Downloading login info',
  73. headers={'Referer': 'https://www.vlive.tv/home'})
  74. return try_get(login_info,
  75. lambda x: x['message']['login'], bool) or False
  76. if is_logged_in():
  77. return
  78. LOGIN_URL = 'https://www.vlive.tv/auth/email/login'
  79. self._request_webpage(LOGIN_URL, None,
  80. note='Downloading login cookies')
  81. self._download_webpage(
  82. LOGIN_URL, None, note='Logging in',
  83. data=urlencode_postdata({'email': email, 'pwd': password}),
  84. headers={
  85. 'Referer': LOGIN_URL,
  86. 'Content-Type': 'application/x-www-form-urlencoded'
  87. })
  88. if not is_logged_in():
  89. raise ExtractorError('Unable to log in', expected=True)
  90. def _real_extract(self, url):
  91. video_id = self._match_id(url)
  92. webpage = self._download_webpage(
  93. 'https://www.vlive.tv/video/%s' % video_id, video_id)
  94. VIDEO_PARAMS_RE = r'\bvlive\.video\.init\(([^)]+)'
  95. VIDEO_PARAMS_FIELD = 'video params'
  96. params = self._parse_json(self._search_regex(
  97. VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD, default=''), video_id,
  98. transform_source=lambda s: '[' + s + ']', fatal=False)
  99. if not params or len(params) < 7:
  100. params = self._search_regex(
  101. VIDEO_PARAMS_RE, webpage, VIDEO_PARAMS_FIELD)
  102. params = [p.strip(r'"') for p in re.split(r'\s*,\s*', params)]
  103. status, long_video_id, key = params[2], params[5], params[6]
  104. status = remove_start(status, 'PRODUCT_')
  105. if status in ('LIVE_ON_AIR', 'BIG_EVENT_ON_AIR'):
  106. return self._live(video_id, webpage)
  107. elif status in ('VOD_ON_AIR', 'BIG_EVENT_INTRO'):
  108. return self._replay(video_id, webpage, long_video_id, key)
  109. if status == 'LIVE_END':
  110. raise ExtractorError('Uploading for replay. Please wait...',
  111. expected=True)
  112. elif status == 'COMING_SOON':
  113. raise ExtractorError('Coming soon!', expected=True)
  114. elif status == 'CANCELED':
  115. raise ExtractorError('We are sorry, '
  116. 'but the live broadcast has been canceled.',
  117. expected=True)
  118. elif status == 'ONLY_APP':
  119. raise ExtractorError('Unsupported video type', expected=True)
  120. else:
  121. raise ExtractorError('Unknown status %s' % status)
  122. def _get_common_fields(self, webpage):
  123. title = self._og_search_title(webpage)
  124. creator = self._html_search_regex(
  125. r'<div[^>]+class="info_area"[^>]*>\s*(?:<em[^>]*>.*</em\s*>\s*)?<a\s+[^>]*>([^<]+)',
  126. webpage, 'creator', fatal=False)
  127. thumbnail = self._og_search_thumbnail(webpage)
  128. return {
  129. 'title': title,
  130. 'creator': creator,
  131. 'thumbnail': thumbnail,
  132. }
  133. def _live(self, video_id, webpage):
  134. init_page = self._download_init_page(video_id)
  135. live_params = self._search_regex(
  136. r'"liveStreamInfo"\s*:\s*(".*"),',
  137. init_page, 'live stream info')
  138. live_params = self._parse_json(live_params, video_id)
  139. live_params = self._parse_json(live_params, video_id)
  140. formats = []
  141. for vid in live_params.get('resolutions', []):
  142. formats.extend(self._extract_m3u8_formats(
  143. vid['cdnUrl'], video_id, 'mp4',
  144. m3u8_id=vid.get('name'),
  145. fatal=False, live=True))
  146. self._sort_formats(formats)
  147. info = self._get_common_fields(webpage)
  148. info.update({
  149. 'title': self._live_title(info['title']),
  150. 'id': video_id,
  151. 'formats': formats,
  152. 'is_live': True,
  153. })
  154. return info
  155. def _replay(self, video_id, webpage, long_video_id, key):
  156. if '' in (long_video_id, key):
  157. init_page = self._download_init_page(video_id)
  158. video_info = self._parse_json(self._search_regex(
  159. r'(?s)oVideoStatus\s*=\s*({.*})', init_page, 'video info'),
  160. video_id)
  161. if video_info['status'] == 'NEED_CHANNEL_PLUS':
  162. self.raise_login_required(
  163. 'This video is only available for CH+ subscribers')
  164. long_video_id, key = video_info['vid'], video_info['inkey']
  165. playinfo = self._download_json(
  166. 'http://global.apis.naver.com/rmcnmv/rmcnmv/vod_play_videoInfo.json?%s'
  167. % compat_urllib_parse_urlencode({
  168. 'videoId': long_video_id,
  169. 'key': key,
  170. 'ptc': 'http',
  171. 'doct': 'json', # document type (xml or json)
  172. 'cpt': 'vtt', # captions type (vtt or ttml)
  173. }), video_id)
  174. formats = [{
  175. 'url': vid['source'],
  176. 'format_id': vid.get('encodingOption', {}).get('name'),
  177. 'abr': float_or_none(vid.get('bitrate', {}).get('audio')),
  178. 'vbr': float_or_none(vid.get('bitrate', {}).get('video')),
  179. 'width': int_or_none(vid.get('encodingOption', {}).get('width')),
  180. 'height': int_or_none(vid.get('encodingOption', {}).get('height')),
  181. 'filesize': int_or_none(vid.get('size')),
  182. } for vid in playinfo.get('videos', {}).get('list', []) if vid.get('source')]
  183. self._sort_formats(formats)
  184. view_count = int_or_none(playinfo.get('meta', {}).get('count'))
  185. subtitles = {}
  186. for caption in playinfo.get('captions', {}).get('list', []):
  187. lang = dict_get(caption, ('locale', 'language', 'country', 'label'))
  188. if lang and caption.get('source'):
  189. subtitles[lang] = [{
  190. 'ext': 'vtt',
  191. 'url': caption['source']}]
  192. info = self._get_common_fields(webpage)
  193. info.update({
  194. 'id': video_id,
  195. 'formats': formats,
  196. 'view_count': view_count,
  197. 'subtitles': subtitles,
  198. })
  199. return info
  200. def _download_init_page(self, video_id):
  201. return self._download_webpage(
  202. 'https://www.vlive.tv/video/init/view',
  203. video_id, note='Downloading live webpage',
  204. data=urlencode_postdata({'videoSeq': video_id}),
  205. headers={
  206. 'Referer': 'https://www.vlive.tv/video/%s' % video_id,
  207. 'Content-Type': 'application/x-www-form-urlencoded'
  208. })
  209. class VLiveChannelIE(InfoExtractor):
  210. IE_NAME = 'vlive:channel'
  211. _VALID_URL = r'https?://channels\.vlive\.tv/(?P<id>[0-9A-Z]+)'
  212. _TEST = {
  213. 'url': 'http://channels.vlive.tv/FCD4B',
  214. 'info_dict': {
  215. 'id': 'FCD4B',
  216. 'title': 'MAMAMOO',
  217. },
  218. 'playlist_mincount': 110
  219. }
  220. _APP_ID = '8c6cc7b45d2568fb668be6e05b6e5a3b'
  221. def _real_extract(self, url):
  222. channel_code = self._match_id(url)
  223. webpage = self._download_webpage(
  224. 'http://channels.vlive.tv/%s/video' % channel_code, channel_code)
  225. app_id = None
  226. app_js_url = self._search_regex(
  227. r'<script[^>]+src=(["\'])(?P<url>http.+?/app\.js.*?)\1',
  228. webpage, 'app js', default=None, group='url')
  229. if app_js_url:
  230. app_js = self._download_webpage(
  231. app_js_url, channel_code, 'Downloading app JS', fatal=False)
  232. if app_js:
  233. app_id = self._search_regex(
  234. r'Global\.VFAN_APP_ID\s*=\s*[\'"]([^\'"]+)[\'"]',
  235. app_js, 'app id', default=None)
  236. app_id = app_id or self._APP_ID
  237. channel_info = self._download_json(
  238. 'http://api.vfan.vlive.tv/vproxy/channelplus/decodeChannelCode',
  239. channel_code, note='Downloading decode channel code',
  240. query={
  241. 'app_id': app_id,
  242. 'channelCode': channel_code,
  243. '_': int(time.time())
  244. })
  245. channel_seq = channel_info['result']['channelSeq']
  246. channel_name = None
  247. entries = []
  248. for page_num in itertools.count(1):
  249. video_list = self._download_json(
  250. 'http://api.vfan.vlive.tv/vproxy/channelplus/getChannelVideoList',
  251. channel_code, note='Downloading channel list page #%d' % page_num,
  252. query={
  253. 'app_id': app_id,
  254. 'channelSeq': channel_seq,
  255. # Large values of maxNumOfRows (~300 or above) may cause
  256. # empty responses (see [1]), e.g. this happens for [2] that
  257. # has more than 300 videos.
  258. # 1. https://github.com/ytdl-org/youtube-dl/issues/13830
  259. # 2. http://channels.vlive.tv/EDBF.
  260. 'maxNumOfRows': 100,
  261. '_': int(time.time()),
  262. 'pageNo': page_num
  263. }
  264. )
  265. if not channel_name:
  266. channel_name = try_get(
  267. video_list,
  268. lambda x: x['result']['channelInfo']['channelName'],
  269. compat_str)
  270. videos = try_get(
  271. video_list, lambda x: x['result']['videoList'], list)
  272. if not videos:
  273. break
  274. for video in videos:
  275. video_id = video.get('videoSeq')
  276. if not video_id:
  277. continue
  278. video_id = compat_str(video_id)
  279. entries.append(
  280. self.url_result(
  281. 'http://www.vlive.tv/video/%s' % video_id,
  282. ie=VLiveIE.ie_key(), video_id=video_id))
  283. return self.playlist_result(
  284. entries, channel_code, channel_name)
  285. class VLivePlaylistIE(InfoExtractor):
  286. IE_NAME = 'vlive:playlist'
  287. _VALID_URL = r'https?://(?:(?:www|m)\.)?vlive\.tv/video/(?P<video_id>[0-9]+)/playlist/(?P<id>[0-9]+)'
  288. _TEST = {
  289. 'url': 'http://www.vlive.tv/video/22867/playlist/22912',
  290. 'info_dict': {
  291. 'id': '22912',
  292. 'title': 'Valentine Day Message from TWICE'
  293. },
  294. 'playlist_mincount': 9
  295. }
  296. def _real_extract(self, url):
  297. mobj = re.match(self._VALID_URL, url)
  298. video_id, playlist_id = mobj.group('video_id', 'id')
  299. VIDEO_URL_TEMPLATE = 'http://www.vlive.tv/video/%s'
  300. if self._downloader.params.get('noplaylist'):
  301. self.to_screen(
  302. 'Downloading just video %s because of --no-playlist' % video_id)
  303. return self.url_result(
  304. VIDEO_URL_TEMPLATE % video_id,
  305. ie=VLiveIE.ie_key(), video_id=video_id)
  306. self.to_screen(
  307. 'Downloading playlist %s - add --no-playlist to just download video'
  308. % playlist_id)
  309. webpage = self._download_webpage(
  310. 'http://www.vlive.tv/video/%s/playlist/%s'
  311. % (video_id, playlist_id), playlist_id)
  312. item_ids = self._parse_json(
  313. self._search_regex(
  314. r'playlistVideoSeqs\s*=\s*(\[[^]]+\])', webpage,
  315. 'playlist video seqs'),
  316. playlist_id)
  317. entries = [
  318. self.url_result(
  319. VIDEO_URL_TEMPLATE % item_id, ie=VLiveIE.ie_key(),
  320. video_id=compat_str(item_id))
  321. for item_id in item_ids]
  322. playlist_name = self._html_search_regex(
  323. r'<div[^>]+class="[^"]*multicam_playlist[^>]*>\s*<h3[^>]+>([^<]+)',
  324. webpage, 'playlist title', fatal=False)
  325. return self.playlist_result(entries, playlist_id, playlist_name)