|
@@ -31,6 +31,7 @@ from ..utils import (
|
|
extract_attributes,
|
|
extract_attributes,
|
|
get_element_by_attribute,
|
|
get_element_by_attribute,
|
|
int_or_none,
|
|
int_or_none,
|
|
|
|
+ join_nonempty,
|
|
js_to_json,
|
|
js_to_json,
|
|
LazyList,
|
|
LazyList,
|
|
merge_dicts,
|
|
merge_dicts,
|
|
@@ -45,6 +46,7 @@ from ..utils import (
|
|
str_to_int,
|
|
str_to_int,
|
|
traverse_obj,
|
|
traverse_obj,
|
|
try_get,
|
|
try_get,
|
|
|
|
+ txt_or_none,
|
|
unescapeHTML,
|
|
unescapeHTML,
|
|
unified_strdate,
|
|
unified_strdate,
|
|
unsmuggle_url,
|
|
unsmuggle_url,
|
|
@@ -2608,6 +2610,17 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|
'uploader_id': '@lexwill718',
|
|
'uploader_id': '@lexwill718',
|
|
},
|
|
},
|
|
'playlist_mincount': 75,
|
|
'playlist_mincount': 75,
|
|
|
|
+ }, {
|
|
|
|
+ # Releases tab
|
|
|
|
+ 'url': 'https://www.youtube.com/@daftpunk/releases',
|
|
|
|
+ 'info_dict': {
|
|
|
|
+ 'id': 'UC_kRDKYrUlrbtrSiyu5Tflg',
|
|
|
|
+ 'title': 'Daft Punk - Releases',
|
|
|
|
+ 'description': 'Daft Punk (1993 - 2021) - Official YouTube Channel',
|
|
|
|
+ 'uploader_id': '@daftpunk',
|
|
|
|
+ 'uploader': 'Daft Punk',
|
|
|
|
+ },
|
|
|
|
+ 'playlist_mincount': 36,
|
|
}, {
|
|
}, {
|
|
'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
|
|
'url': 'https://invidio.us/channel/UCmlqkdCBesrv2Lak1mF_MxA',
|
|
'only_matching': True,
|
|
'only_matching': True,
|
|
@@ -2822,6 +2835,12 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|
continue
|
|
continue
|
|
return renderer
|
|
return renderer
|
|
|
|
|
|
|
|
+ @staticmethod
|
|
|
|
+ def _get_text(r, k):
|
|
|
|
+ return traverse_obj(
|
|
|
|
+ r, (k, 'runs', 0, 'text'), (k, 'simpleText'),
|
|
|
|
+ expected_type=txt_or_none)
|
|
|
|
+
|
|
def _grid_entries(self, grid_renderer):
|
|
def _grid_entries(self, grid_renderer):
|
|
for item in grid_renderer['items']:
|
|
for item in grid_renderer['items']:
|
|
if not isinstance(item, dict):
|
|
if not isinstance(item, dict):
|
|
@@ -2829,9 +2848,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|
renderer = self._extract_grid_item_renderer(item)
|
|
renderer = self._extract_grid_item_renderer(item)
|
|
if not isinstance(renderer, dict):
|
|
if not isinstance(renderer, dict):
|
|
continue
|
|
continue
|
|
- title = try_get(
|
|
|
|
- renderer, (lambda x: x['title']['runs'][0]['text'],
|
|
|
|
- lambda x: x['title']['simpleText']), compat_str)
|
|
|
|
|
|
+ title = self._get_text(renderer, 'title')
|
|
# playlist
|
|
# playlist
|
|
playlist_id = renderer.get('playlistId')
|
|
playlist_id = renderer.get('playlistId')
|
|
if playlist_id:
|
|
if playlist_id:
|
|
@@ -2848,8 +2865,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|
# channel
|
|
# channel
|
|
channel_id = renderer.get('channelId')
|
|
channel_id = renderer.get('channelId')
|
|
if channel_id:
|
|
if channel_id:
|
|
- title = try_get(
|
|
|
|
- renderer, lambda x: x['title']['simpleText'], compat_str)
|
|
|
|
|
|
+ title = self._get_text(renderer, 'title')
|
|
yield self.url_result(
|
|
yield self.url_result(
|
|
'https://www.youtube.com/channel/%s' % channel_id,
|
|
'https://www.youtube.com/channel/%s' % channel_id,
|
|
ie=YoutubeTabIE.ie_key(), video_title=title)
|
|
ie=YoutubeTabIE.ie_key(), video_title=title)
|
|
@@ -2958,15 +2974,26 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|
|
|
|
|
def _rich_grid_entries(self, contents):
|
|
def _rich_grid_entries(self, contents):
|
|
for content in contents:
|
|
for content in contents:
|
|
- video_renderer = try_get(
|
|
|
|
- content,
|
|
|
|
- (lambda x: x['richItemRenderer']['content']['videoRenderer'],
|
|
|
|
- lambda x: x['richItemRenderer']['content']['reelItemRenderer']),
|
|
|
|
- dict)
|
|
|
|
|
|
+ content = traverse_obj(
|
|
|
|
+ content, ('richItemRenderer', 'content'),
|
|
|
|
+ expected_type=dict) or {}
|
|
|
|
+ video_renderer = traverse_obj(
|
|
|
|
+ content, 'videoRenderer', 'reelItemRenderer',
|
|
|
|
+ expected_type=dict)
|
|
if video_renderer:
|
|
if video_renderer:
|
|
entry = self._video_entry(video_renderer)
|
|
entry = self._video_entry(video_renderer)
|
|
if entry:
|
|
if entry:
|
|
yield entry
|
|
yield entry
|
|
|
|
+ # playlist
|
|
|
|
+ renderer = traverse_obj(
|
|
|
|
+ content, 'playlistRenderer', expected_type=dict) or {}
|
|
|
|
+ title = self._get_text(renderer, 'title')
|
|
|
|
+ playlist_id = renderer.get('playlistId')
|
|
|
|
+ if playlist_id:
|
|
|
|
+ yield self.url_result(
|
|
|
|
+ 'https://www.youtube.com/playlist?list=%s' % playlist_id,
|
|
|
|
+ ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
|
|
|
|
+ video_title=title)
|
|
|
|
|
|
@staticmethod
|
|
@staticmethod
|
|
def _build_continuation_query(continuation, ctp=None):
|
|
def _build_continuation_query(continuation, ctp=None):
|
|
@@ -3071,6 +3098,7 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|
return
|
|
return
|
|
for entry in self._rich_grid_entries(rich_grid_renderer.get('contents') or []):
|
|
for entry in self._rich_grid_entries(rich_grid_renderer.get('contents') or []):
|
|
yield entry
|
|
yield entry
|
|
|
|
+
|
|
continuation = self._extract_continuation(rich_grid_renderer)
|
|
continuation = self._extract_continuation(rich_grid_renderer)
|
|
|
|
|
|
ytcfg = self._extract_ytcfg(item_id, webpage)
|
|
ytcfg = self._extract_ytcfg(item_id, webpage)
|
|
@@ -3213,50 +3241,41 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|
uploader['channel'] = uploader['uploader']
|
|
uploader['channel'] = uploader['uploader']
|
|
return uploader
|
|
return uploader
|
|
|
|
|
|
- @staticmethod
|
|
|
|
- def _extract_alert(data):
|
|
|
|
|
|
+ @classmethod
|
|
|
|
+ def _extract_alert(cls, data):
|
|
alerts = []
|
|
alerts = []
|
|
- for alert in try_get(data, lambda x: x['alerts'], list) or []:
|
|
|
|
- if not isinstance(alert, dict):
|
|
|
|
- continue
|
|
|
|
- alert_text = try_get(
|
|
|
|
- alert, lambda x: x['alertRenderer']['text'], dict)
|
|
|
|
|
|
+ for alert in traverse_obj(data, ('alerts', Ellipsis), expected_type=dict):
|
|
|
|
+ alert_text = traverse_obj(
|
|
|
|
+ alert, (None, lambda x: x['alertRenderer']['text']), get_all=False)
|
|
if not alert_text:
|
|
if not alert_text:
|
|
continue
|
|
continue
|
|
- text = try_get(
|
|
|
|
- alert_text,
|
|
|
|
- (lambda x: x['simpleText'], lambda x: x['runs'][0]['text']),
|
|
|
|
- compat_str)
|
|
|
|
|
|
+ text = cls._get_text(alert_text, 'text')
|
|
if text:
|
|
if text:
|
|
alerts.append(text)
|
|
alerts.append(text)
|
|
return '\n'.join(alerts)
|
|
return '\n'.join(alerts)
|
|
|
|
|
|
def _extract_from_tabs(self, item_id, webpage, data, tabs):
|
|
def _extract_from_tabs(self, item_id, webpage, data, tabs):
|
|
selected_tab = self._extract_selected_tab(tabs)
|
|
selected_tab = self._extract_selected_tab(tabs)
|
|
- renderer = try_get(
|
|
|
|
- data, lambda x: x['metadata']['channelMetadataRenderer'], dict)
|
|
|
|
|
|
+ renderer = traverse_obj(data, ('metadata', 'channelMetadataRenderer'),
|
|
|
|
+ expected_type=dict) or {}
|
|
playlist_id = item_id
|
|
playlist_id = item_id
|
|
title = description = None
|
|
title = description = None
|
|
if renderer:
|
|
if renderer:
|
|
- channel_title = renderer.get('title') or item_id
|
|
|
|
- tab_title = selected_tab.get('title')
|
|
|
|
- title = channel_title or item_id
|
|
|
|
- if tab_title:
|
|
|
|
- title += ' - %s' % tab_title
|
|
|
|
- if selected_tab.get('expandedText'):
|
|
|
|
- title += ' - %s' % selected_tab['expandedText']
|
|
|
|
- description = renderer.get('description')
|
|
|
|
- playlist_id = renderer.get('externalId')
|
|
|
|
|
|
+ channel_title = txt_or_none(renderer.get('title')) or item_id
|
|
|
|
+ tab_title = txt_or_none(selected_tab.get('title'))
|
|
|
|
+ title = join_nonempty(
|
|
|
|
+ channel_title or item_id, tab_title,
|
|
|
|
+ txt_or_none(selected_tab.get('expandedText')),
|
|
|
|
+ delim=' - ')
|
|
|
|
+ description = txt_or_none(renderer.get('description'))
|
|
|
|
+ playlist_id = txt_or_none(renderer.get('externalId')) or playlist_id
|
|
else:
|
|
else:
|
|
- renderer = try_get(
|
|
|
|
- data, lambda x: x['metadata']['playlistMetadataRenderer'], dict)
|
|
|
|
- if renderer:
|
|
|
|
- title = renderer.get('title')
|
|
|
|
- else:
|
|
|
|
- renderer = try_get(
|
|
|
|
- data, lambda x: x['header']['hashtagHeaderRenderer'], dict)
|
|
|
|
- if renderer:
|
|
|
|
- title = try_get(renderer, lambda x: x['hashtag']['simpleText'])
|
|
|
|
|
|
+ renderer = traverse_obj(data,
|
|
|
|
+ ('metadata', 'playlistMetadataRenderer'),
|
|
|
|
+ ('header', 'hashtagHeaderRenderer'),
|
|
|
|
+ expected_type=dict) or {}
|
|
|
|
+ title = traverse_obj(renderer, 'title', ('hashtag', 'simpleText'),
|
|
|
|
+ expected_type=txt_or_none)
|
|
playlist = self.playlist_result(
|
|
playlist = self.playlist_result(
|
|
self._entries(selected_tab, item_id, webpage),
|
|
self._entries(selected_tab, item_id, webpage),
|
|
playlist_id=playlist_id, playlist_title=title,
|
|
playlist_id=playlist_id, playlist_title=title,
|
|
@@ -3264,15 +3283,16 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|
return merge_dicts(playlist, self._extract_uploader(renderer, data))
|
|
return merge_dicts(playlist, self._extract_uploader(renderer, data))
|
|
|
|
|
|
def _extract_from_playlist(self, item_id, url, data, playlist):
|
|
def _extract_from_playlist(self, item_id, url, data, playlist):
|
|
- title = playlist.get('title') or try_get(
|
|
|
|
- data, lambda x: x['titleText']['simpleText'], compat_str)
|
|
|
|
- playlist_id = playlist.get('playlistId') or item_id
|
|
|
|
|
|
+ title = traverse_obj((playlist, data),
|
|
|
|
+ (0, 'title'), (1, 'titleText', 'simpleText'),
|
|
|
|
+ expected_type=txt_or_none)
|
|
|
|
+ playlist_id = txt_or_none(playlist.get('playlistId')) or item_id
|
|
# Inline playlist rendition continuation does not always work
|
|
# Inline playlist rendition continuation does not always work
|
|
# at Youtube side, so delegating regular tab-based playlist URL
|
|
# at Youtube side, so delegating regular tab-based playlist URL
|
|
# processing whenever possible.
|
|
# processing whenever possible.
|
|
- playlist_url = urljoin(url, try_get(
|
|
|
|
- playlist, lambda x: x['endpoint']['commandMetadata']['webCommandMetadata']['url'],
|
|
|
|
- compat_str))
|
|
|
|
|
|
+ playlist_url = urljoin(url, traverse_obj(
|
|
|
|
+ playlist, ('endpoint', 'commandMetadata', 'webCommandMetadata', 'url'),
|
|
|
|
+ expected_type=url_or_none))
|
|
if playlist_url and playlist_url != url:
|
|
if playlist_url and playlist_url != url:
|
|
return self.url_result(
|
|
return self.url_result(
|
|
playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
|
|
playlist_url, ie=YoutubeTabIE.ie_key(), video_id=playlist_id,
|