|
@@ -1915,6 +1915,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
return int_or_none(self._search_regex(
|
|
|
r'\bclen[=/](\d+)', media_url, 'filesize', default=None))
|
|
|
|
|
|
+ streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list) or []
|
|
|
+ streaming_formats.extend(try_get(player_response, lambda x: x['streamingData']['adaptiveFormats'], list) or [])
|
|
|
+
|
|
|
if 'conn' in video_info and video_info['conn'][0].startswith('rtmp'):
|
|
|
self.report_rtmp_download()
|
|
|
formats = [{
|
|
@@ -1923,10 +1926,11 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
'url': video_info['conn'][0],
|
|
|
'player_url': player_url,
|
|
|
}]
|
|
|
- elif not is_live and (len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
|
|
|
+ elif not is_live and (streaming_formats or len(video_info.get('url_encoded_fmt_stream_map', [''])[0]) >= 1 or len(video_info.get('adaptive_fmts', [''])[0]) >= 1):
|
|
|
encoded_url_map = video_info.get('url_encoded_fmt_stream_map', [''])[0] + ',' + video_info.get('adaptive_fmts', [''])[0]
|
|
|
if 'rtmpe%3Dyes' in encoded_url_map:
|
|
|
raise ExtractorError('rtmpe downloads are not supported, see https://github.com/ytdl-org/youtube-dl/issues/343 for more information.', expected=True)
|
|
|
+ formats = []
|
|
|
formats_spec = {}
|
|
|
fmt_list = video_info.get('fmt_list', [''])[0]
|
|
|
if fmt_list:
|
|
@@ -1941,90 +1945,105 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
'height': int_or_none(width_height[1]),
|
|
|
}
|
|
|
q = qualities(['small', 'medium', 'hd720'])
|
|
|
- streaming_formats = try_get(player_response, lambda x: x['streamingData']['formats'], list)
|
|
|
- if streaming_formats:
|
|
|
- for fmt in streaming_formats:
|
|
|
- itag = str_or_none(fmt.get('itag'))
|
|
|
- if not itag:
|
|
|
- continue
|
|
|
- quality = fmt.get('quality')
|
|
|
- quality_label = fmt.get('qualityLabel') or quality
|
|
|
- formats_spec[itag] = {
|
|
|
- 'asr': int_or_none(fmt.get('audioSampleRate')),
|
|
|
- 'filesize': int_or_none(fmt.get('contentLength')),
|
|
|
- 'format_note': quality_label,
|
|
|
- 'fps': int_or_none(fmt.get('fps')),
|
|
|
- 'height': int_or_none(fmt.get('height')),
|
|
|
- 'quality': q(quality),
|
|
|
- # bitrate for itag 43 is always 2147483647
|
|
|
- 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
|
|
|
- 'width': int_or_none(fmt.get('width')),
|
|
|
- }
|
|
|
- formats = []
|
|
|
- for url_data_str in encoded_url_map.split(','):
|
|
|
- url_data = compat_parse_qs(url_data_str)
|
|
|
- if 'itag' not in url_data or 'url' not in url_data or url_data.get('drm_families'):
|
|
|
+ for fmt in streaming_formats:
|
|
|
+ itag = str_or_none(fmt.get('itag'))
|
|
|
+ if not itag:
|
|
|
continue
|
|
|
+ quality = fmt.get('quality')
|
|
|
+ quality_label = fmt.get('qualityLabel') or quality
|
|
|
+ formats_spec[itag] = {
|
|
|
+ 'asr': int_or_none(fmt.get('audioSampleRate')),
|
|
|
+ 'filesize': int_or_none(fmt.get('contentLength')),
|
|
|
+ 'format_note': quality_label,
|
|
|
+ 'fps': int_or_none(fmt.get('fps')),
|
|
|
+ 'height': int_or_none(fmt.get('height')),
|
|
|
+ 'quality': q(quality),
|
|
|
+ # bitrate for itag 43 is always 2147483647
|
|
|
+ 'tbr': float_or_none(fmt.get('averageBitrate') or fmt.get('bitrate'), 1000) if itag != '43' else None,
|
|
|
+ 'width': int_or_none(fmt.get('width')),
|
|
|
+ }
|
|
|
+
|
|
|
+ for fmt in streaming_formats:
|
|
|
+ if fmt.get('drm_families'):
|
|
|
+ continue
|
|
|
+ url = url_or_none(fmt.get('url'))
|
|
|
+
|
|
|
+ if not url:
|
|
|
+ cipher = fmt.get('cipher')
|
|
|
+ if not cipher:
|
|
|
+ continue
|
|
|
+ url_data = compat_parse_qs(cipher)
|
|
|
+ url = url_or_none(try_get(url_data, lambda x: x['url'][0], compat_str))
|
|
|
+ if not url:
|
|
|
+ continue
|
|
|
+ else:
|
|
|
+ cipher = None
|
|
|
+ url_data = compat_parse_qs(compat_urllib_parse_urlparse(url).query)
|
|
|
+
|
|
|
stream_type = int_or_none(try_get(url_data, lambda x: x['stream_type'][0]))
|
|
|
# Unsupported FORMAT_STREAM_TYPE_OTF
|
|
|
if stream_type == 3:
|
|
|
continue
|
|
|
- format_id = url_data['itag'][0]
|
|
|
- url = url_data['url'][0]
|
|
|
-
|
|
|
- if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
|
|
|
- ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
|
|
|
- jsplayer_url_json = self._search_regex(
|
|
|
- ASSETS_RE,
|
|
|
- embed_webpage if age_gate else video_webpage,
|
|
|
- 'JS player URL (1)', default=None)
|
|
|
- if not jsplayer_url_json and not age_gate:
|
|
|
- # We need the embed website after all
|
|
|
- if embed_webpage is None:
|
|
|
- embed_url = proto + '://www.youtube.com/embed/%s' % video_id
|
|
|
- embed_webpage = self._download_webpage(
|
|
|
- embed_url, video_id, 'Downloading embed webpage')
|
|
|
- jsplayer_url_json = self._search_regex(
|
|
|
- ASSETS_RE, embed_webpage, 'JS player URL')
|
|
|
|
|
|
- player_url = json.loads(jsplayer_url_json)
|
|
|
- if player_url is None:
|
|
|
- player_url_json = self._search_regex(
|
|
|
- r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
|
|
|
- video_webpage, 'age gate player URL')
|
|
|
- player_url = json.loads(player_url_json)
|
|
|
-
|
|
|
- if 'sig' in url_data:
|
|
|
- url += '&signature=' + url_data['sig'][0]
|
|
|
- elif 's' in url_data:
|
|
|
- encrypted_sig = url_data['s'][0]
|
|
|
+ format_id = fmt.get('itag') or url_data['itag'][0]
|
|
|
+ if not format_id:
|
|
|
+ continue
|
|
|
+ format_id = compat_str(format_id)
|
|
|
|
|
|
- if self._downloader.params.get('verbose'):
|
|
|
+ if cipher:
|
|
|
+ if 's' in url_data or self._downloader.params.get('youtube_include_dash_manifest', True):
|
|
|
+ ASSETS_RE = r'"assets":.+?"js":\s*("[^"]+")'
|
|
|
+ jsplayer_url_json = self._search_regex(
|
|
|
+ ASSETS_RE,
|
|
|
+ embed_webpage if age_gate else video_webpage,
|
|
|
+ 'JS player URL (1)', default=None)
|
|
|
+ if not jsplayer_url_json and not age_gate:
|
|
|
+ # We need the embed website after all
|
|
|
+ if embed_webpage is None:
|
|
|
+ embed_url = proto + '://www.youtube.com/embed/%s' % video_id
|
|
|
+ embed_webpage = self._download_webpage(
|
|
|
+ embed_url, video_id, 'Downloading embed webpage')
|
|
|
+ jsplayer_url_json = self._search_regex(
|
|
|
+ ASSETS_RE, embed_webpage, 'JS player URL')
|
|
|
+
|
|
|
+ player_url = json.loads(jsplayer_url_json)
|
|
|
if player_url is None:
|
|
|
- player_version = 'unknown'
|
|
|
- player_desc = 'unknown'
|
|
|
- else:
|
|
|
- if player_url.endswith('swf'):
|
|
|
- player_version = self._search_regex(
|
|
|
- r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
|
|
|
- 'flash player', fatal=False)
|
|
|
- player_desc = 'flash player %s' % player_version
|
|
|
+ player_url_json = self._search_regex(
|
|
|
+ r'ytplayer\.config.*?"url"\s*:\s*("[^"]+")',
|
|
|
+ video_webpage, 'age gate player URL')
|
|
|
+ player_url = json.loads(player_url_json)
|
|
|
+
|
|
|
+ if 'sig' in url_data:
|
|
|
+ url += '&signature=' + url_data['sig'][0]
|
|
|
+ elif 's' in url_data:
|
|
|
+ encrypted_sig = url_data['s'][0]
|
|
|
+
|
|
|
+ if self._downloader.params.get('verbose'):
|
|
|
+ if player_url is None:
|
|
|
+ player_version = 'unknown'
|
|
|
+ player_desc = 'unknown'
|
|
|
else:
|
|
|
- player_version = self._search_regex(
|
|
|
- [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
|
|
|
- r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
|
|
|
- player_url,
|
|
|
- 'html5 player', fatal=False)
|
|
|
- player_desc = 'html5 player %s' % player_version
|
|
|
-
|
|
|
- parts_sizes = self._signature_cache_id(encrypted_sig)
|
|
|
- self.to_screen('{%s} signature length %s, %s' %
|
|
|
- (format_id, parts_sizes, player_desc))
|
|
|
-
|
|
|
- signature = self._decrypt_signature(
|
|
|
- encrypted_sig, video_id, player_url, age_gate)
|
|
|
- sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
|
|
|
- url += '&%s=%s' % (sp, signature)
|
|
|
+ if player_url.endswith('swf'):
|
|
|
+ player_version = self._search_regex(
|
|
|
+ r'-(.+?)(?:/watch_as3)?\.swf$', player_url,
|
|
|
+ 'flash player', fatal=False)
|
|
|
+ player_desc = 'flash player %s' % player_version
|
|
|
+ else:
|
|
|
+ player_version = self._search_regex(
|
|
|
+ [r'html5player-([^/]+?)(?:/html5player(?:-new)?)?\.js',
|
|
|
+ r'(?:www|player(?:_ias)?)-([^/]+)(?:/[a-z]{2,3}_[A-Z]{2})?/base\.js'],
|
|
|
+ player_url,
|
|
|
+ 'html5 player', fatal=False)
|
|
|
+ player_desc = 'html5 player %s' % player_version
|
|
|
+
|
|
|
+ parts_sizes = self._signature_cache_id(encrypted_sig)
|
|
|
+ self.to_screen('{%s} signature length %s, %s' %
|
|
|
+ (format_id, parts_sizes, player_desc))
|
|
|
+
|
|
|
+ signature = self._decrypt_signature(
|
|
|
+ encrypted_sig, video_id, player_url, age_gate)
|
|
|
+ sp = try_get(url_data, lambda x: x['sp'][0], compat_str) or 'signature'
|
|
|
+ url += '&%s=%s' % (sp, signature)
|
|
|
if 'ratebypass' not in url:
|
|
|
url += '&ratebypass=yes'
|
|
|
|
|
@@ -2044,24 +2063,33 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
|
mobj = re.search(r'^(?P<width>\d+)[xX](?P<height>\d+)$', url_data.get('size', [''])[0])
|
|
|
width, height = (int(mobj.group('width')), int(mobj.group('height'))) if mobj else (None, None)
|
|
|
|
|
|
+ if width is None:
|
|
|
+ width = int_or_none(fmt.get('width'))
|
|
|
+ if height is None:
|
|
|
+ height = int_or_none(fmt.get('height'))
|
|
|
+
|
|
|
filesize = int_or_none(url_data.get(
|
|
|
'clen', [None])[0]) or _extract_filesize(url)
|
|
|
|
|
|
- quality = url_data.get('quality', [None])[0]
|
|
|
+ quality = url_data.get('quality', [None])[0] or fmt.get('quality')
|
|
|
+ quality_label = url_data.get('quality_label', [None])[0] or fmt.get('qualityLabel')
|
|
|
+
|
|
|
+ tbr = float_or_none(url_data.get('bitrate', [None])[0], 1000) or float_or_none(fmt.get('bitrate'), 1000)
|
|
|
+ fps = int_or_none(url_data.get('fps', [None])[0]) or int_or_none(fmt.get('fps'))
|
|
|
|
|
|
more_fields = {
|
|
|
'filesize': filesize,
|
|
|
- 'tbr': float_or_none(url_data.get('bitrate', [None])[0], 1000),
|
|
|
+ 'tbr': tbr,
|
|
|
'width': width,
|
|
|
'height': height,
|
|
|
- 'fps': int_or_none(url_data.get('fps', [None])[0]),
|
|
|
- 'format_note': url_data.get('quality_label', [None])[0] or quality,
|
|
|
+ 'fps': fps,
|
|
|
+ 'format_note': quality_label or quality,
|
|
|
'quality': q(quality),
|
|
|
}
|
|
|
for key, value in more_fields.items():
|
|
|
if value:
|
|
|
dct[key] = value
|
|
|
- type_ = url_data.get('type', [None])[0]
|
|
|
+ type_ = url_data.get('type', [None])[0] or fmt.get('mimeType')
|
|
|
if type_:
|
|
|
type_split = type_.split(';')
|
|
|
kind_ext = type_split[0].split('/')
|