|
@@ -308,6 +308,36 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
|
|
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
|
|
r'ytcfg\.set\s*\(\s*({.+?})\s*\)\s*;', webpage, 'ytcfg',
|
|
default='{}'), video_id, fatal=False)
|
|
default='{}'), video_id, fatal=False)
|
|
|
|
|
|
|
|
+ def _extract_video(self, renderer):
|
|
|
|
+ video_id = renderer['videoId']
|
|
|
|
+ title = try_get(
|
|
|
|
+ renderer,
|
|
|
|
+ (lambda x: x['title']['runs'][0]['text'],
|
|
|
|
+ lambda x: x['title']['simpleText']), compat_str)
|
|
|
|
+ description = try_get(
|
|
|
|
+ renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
|
|
|
|
+ compat_str)
|
|
|
|
+ duration = parse_duration(try_get(
|
|
|
|
+ renderer, lambda x: x['lengthText']['simpleText'], compat_str))
|
|
|
|
+ view_count_text = try_get(
|
|
|
|
+ renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
|
|
|
|
+ view_count = str_to_int(self._search_regex(
|
|
|
|
+ r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
|
|
|
|
+ 'view count', default=None))
|
|
|
|
+ uploader = try_get(
|
|
|
|
+ renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
|
|
|
|
+ return {
|
|
|
|
+ '_type': 'url_transparent',
|
|
|
|
+ 'ie_key': YoutubeIE.ie_key(),
|
|
|
|
+ 'id': video_id,
|
|
|
|
+ 'url': video_id,
|
|
|
|
+ 'title': title,
|
|
|
|
+ 'description': description,
|
|
|
|
+ 'duration': duration,
|
|
|
|
+ 'view_count': view_count,
|
|
|
|
+ 'uploader': uploader,
|
|
|
|
+ }
|
|
|
|
+
|
|
|
|
|
|
class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
class YoutubeIE(YoutubeBaseInfoExtractor):
|
|
IE_DESC = 'YouTube.com'
|
|
IE_DESC = 'YouTube.com'
|
|
@@ -2765,36 +2795,6 @@ class YoutubeTabIE(YoutubeBaseInfoExtractor):
|
|
if renderer:
|
|
if renderer:
|
|
return renderer
|
|
return renderer
|
|
|
|
|
|
- def _extract_video(self, renderer):
|
|
|
|
- video_id = renderer.get('videoId')
|
|
|
|
- title = try_get(
|
|
|
|
- renderer,
|
|
|
|
- (lambda x: x['title']['runs'][0]['text'],
|
|
|
|
- lambda x: x['title']['simpleText']), compat_str)
|
|
|
|
- description = try_get(
|
|
|
|
- renderer, lambda x: x['descriptionSnippet']['runs'][0]['text'],
|
|
|
|
- compat_str)
|
|
|
|
- duration = parse_duration(try_get(
|
|
|
|
- renderer, lambda x: x['lengthText']['simpleText'], compat_str))
|
|
|
|
- view_count_text = try_get(
|
|
|
|
- renderer, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
|
|
|
|
- view_count = str_to_int(self._search_regex(
|
|
|
|
- r'^([\d,]+)', re.sub(r'\s', '', view_count_text),
|
|
|
|
- 'view count', default=None))
|
|
|
|
- uploader = try_get(
|
|
|
|
- renderer, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
|
|
|
|
- return {
|
|
|
|
- '_type': 'url_transparent',
|
|
|
|
- 'ie_key': YoutubeIE.ie_key(),
|
|
|
|
- 'id': video_id,
|
|
|
|
- 'url': video_id,
|
|
|
|
- 'title': title,
|
|
|
|
- 'description': description,
|
|
|
|
- 'duration': duration,
|
|
|
|
- 'view_count': view_count,
|
|
|
|
- 'uploader': uploader,
|
|
|
|
- }
|
|
|
|
-
|
|
|
|
def _grid_entries(self, grid_renderer):
|
|
def _grid_entries(self, grid_renderer):
|
|
for item in grid_renderer['items']:
|
|
for item in grid_renderer['items']:
|
|
if not isinstance(item, dict):
|
|
if not isinstance(item, dict):
|
|
@@ -3417,46 +3417,29 @@ class YoutubeSearchIE(SearchInfoExtractor, YoutubeBaseInfoExtractor):
|
|
list)
|
|
list)
|
|
if not slr_contents:
|
|
if not slr_contents:
|
|
break
|
|
break
|
|
- isr_contents = try_get(
|
|
|
|
- slr_contents,
|
|
|
|
- lambda x: x[0]['itemSectionRenderer']['contents'],
|
|
|
|
- list)
|
|
|
|
- if not isr_contents:
|
|
|
|
- break
|
|
|
|
- for content in isr_contents:
|
|
|
|
- if not isinstance(content, dict):
|
|
|
|
- continue
|
|
|
|
- video = content.get('videoRenderer')
|
|
|
|
- if not isinstance(video, dict):
|
|
|
|
- continue
|
|
|
|
- video_id = video.get('videoId')
|
|
|
|
- if not video_id:
|
|
|
|
|
|
+ for slr_content in slr_contents:
|
|
|
|
+ isr_contents = try_get(
|
|
|
|
+ slr_content,
|
|
|
|
+ lambda x: x['itemSectionRenderer']['contents'],
|
|
|
|
+ list)
|
|
|
|
+ if not isr_contents:
|
|
continue
|
|
continue
|
|
- title = try_get(video, lambda x: x['title']['runs'][0]['text'], compat_str)
|
|
|
|
- description = try_get(video, lambda x: x['descriptionSnippet']['runs'][0]['text'], compat_str)
|
|
|
|
- duration = parse_duration(try_get(video, lambda x: x['lengthText']['simpleText'], compat_str))
|
|
|
|
- view_count_text = try_get(video, lambda x: x['viewCountText']['simpleText'], compat_str) or ''
|
|
|
|
- view_count = int_or_none(self._search_regex(
|
|
|
|
- r'^(\d+)', re.sub(r'\s', '', view_count_text),
|
|
|
|
- 'view count', default=None))
|
|
|
|
- uploader = try_get(video, lambda x: x['ownerText']['runs'][0]['text'], compat_str)
|
|
|
|
- total += 1
|
|
|
|
- yield {
|
|
|
|
- '_type': 'url_transparent',
|
|
|
|
- 'ie_key': YoutubeIE.ie_key(),
|
|
|
|
- 'id': video_id,
|
|
|
|
- 'url': video_id,
|
|
|
|
- 'title': title,
|
|
|
|
- 'description': description,
|
|
|
|
- 'duration': duration,
|
|
|
|
- 'view_count': view_count,
|
|
|
|
- 'uploader': uploader,
|
|
|
|
- }
|
|
|
|
- if total == n:
|
|
|
|
- return
|
|
|
|
|
|
+ for content in isr_contents:
|
|
|
|
+ if not isinstance(content, dict):
|
|
|
|
+ continue
|
|
|
|
+ video = content.get('videoRenderer')
|
|
|
|
+ if not isinstance(video, dict):
|
|
|
|
+ continue
|
|
|
|
+ video_id = video.get('videoId')
|
|
|
|
+ if not video_id:
|
|
|
|
+ continue
|
|
|
|
+ yield self._extract_video(video)
|
|
|
|
+ total += 1
|
|
|
|
+ if total == n:
|
|
|
|
+ return
|
|
token = try_get(
|
|
token = try_get(
|
|
slr_contents,
|
|
slr_contents,
|
|
- lambda x: x[1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
|
|
|
|
|
|
+ lambda x: x[-1]['continuationItemRenderer']['continuationEndpoint']['continuationCommand']['token'],
|
|
compat_str)
|
|
compat_str)
|
|
if not token:
|
|
if not token:
|
|
break
|
|
break
|