Quellcode durchsuchen

[youtube] Fix controversy videos extraction (closes #14027, closes #14029)

Sergey M․ vor 8 Jahren
Ursprung
Commit
c7121fa7b8
1 geänderte Dateien mit 31 neuen und 5 gelöschten Zeilen
  1. 31 5
      youtube_dl/extractor/youtube.py

+ 31 - 5
youtube_dl/extractor/youtube.py

@@ -1003,6 +1003,27 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'Skipping DASH manifest',
                 'Skipping DASH manifest',
             ],
             ],
         },
         },
+        {
+            # The following content has been identified by the YouTube community
+            # as inappropriate or offensive to some audiences.
+            'url': 'https://www.youtube.com/watch?v=6SJNVb0GnPI',
+            'info_dict': {
+                'id': '6SJNVb0GnPI',
+                'ext': 'mp4',
+                'title': 'Race Differences in Intelligence',
+                'description': 'md5:5d161533167390427a1f8ee89a1fc6f1',
+                'duration': 965,
+                'upload_date': '20140124',
+                'uploader': 'New Century Foundation',
+                'uploader_id': 'UCEJYpZGqgUob0zVVEaLhvVg',
+                'uploader_url': r're:https?://(?:www\.)?youtube\.com/channel/UCEJYpZGqgUob0zVVEaLhvVg',
+                'license': 'Standard YouTube License',
+                'view_count': int,
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
         {
         {
             # itag 212
             # itag 212
             'url': '1t24XAntNCY',
             'url': '1t24XAntNCY',
@@ -1437,9 +1458,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             if dash_mpd and dash_mpd[0] not in dash_mpds:
             if dash_mpd and dash_mpd[0] not in dash_mpds:
                 dash_mpds.append(dash_mpd[0])
                 dash_mpds.append(dash_mpd[0])
 
 
+        is_live = None
+        view_count = None
+
+        def extract_view_count(v_info):
+            return int_or_none(try_get(v_info, lambda x: x['view_count'][0]))
+
         # Get video info
         # Get video info
         embed_webpage = None
         embed_webpage = None
-        is_live = None
         if re.search(r'player-age-gate-content">', video_webpage) is not None:
         if re.search(r'player-age-gate-content">', video_webpage) is not None:
             age_gate = True
             age_gate = True
             # We simulate the access to the video from www.youtube.com/v/{video_id}
             # We simulate the access to the video from www.youtube.com/v/{video_id}
@@ -1509,6 +1535,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                         continue
                         continue
                     get_video_info = compat_parse_qs(video_info_webpage)
                     get_video_info = compat_parse_qs(video_info_webpage)
                     add_dash_mpd(get_video_info)
                     add_dash_mpd(get_video_info)
+                    if view_count is None:
+                        view_count = extract_view_count(get_video_info)
                     if not video_info:
                     if not video_info:
                         video_info = get_video_info
                         video_info = get_video_info
                     if 'token' in get_video_info:
                     if 'token' in get_video_info:
@@ -1592,10 +1620,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 return self.playlist_result(entries, video_id, video_title, video_description)
                 return self.playlist_result(entries, video_id, video_title, video_description)
             self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
             self.to_screen('Downloading just video %s because of --no-playlist' % video_id)
 
 
-        if 'view_count' in video_info:
-            view_count = int(video_info['view_count'][0])
-        else:
-            view_count = None
+        if view_count is None:
+            view_count = extract_view_count(get_video_info)
 
 
         # Check for "rental" videos
         # Check for "rental" videos
         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info:
         if 'ypc_video_rental_bar_text' in video_info and 'author' not in video_info: