Przeglądaj źródła

[YouTube] Switch to TV API client
* thx yt-dlp/yt-dlp#12059

dirkf 7 miesięcy temu
rodzic
commit
21fff05121
1 zmienionych plików z 83 dodań i 19 usunięć
  1. 83 19
      youtube_dl/extractor/youtube.py

+ 83 - 19
youtube_dl/extractor/youtube.py

@@ -31,7 +31,9 @@ from ..utils import (
     dict_get,
     error_to_compat_str,
     ExtractorError,
+    filter_dict,
     float_or_none,
+    get_first,
     extract_attributes,
     get_element_by_attribute,
     int_or_none,
@@ -82,6 +84,34 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
 
     _PLAYLIST_ID_RE = r'(?:(?:PL|LL|EC|UU|FL|RD|UL|TL|PU|OLAK5uy_)[0-9A-Za-z-_]{10,}|RDMM)'
 
+    _INNERTUBE_CLIENTS = {
+        # mweb has 'ultralow' formats
+        # See: https://github.com/yt-dlp/yt-dlp/pull/557
+        'mweb': {
+            'INNERTUBE_CONTEXT': {
+                'client': {
+                    'clientName': 'MWEB',
+                    'clientVersion': '2.20241202.07.00',
+                    # mweb previously did not require PO Token with this UA
+                    'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
+                },
+            },
+            'INNERTUBE_CONTEXT_CLIENT_NAME': 2,
+            'REQUIRE_PO_TOKEN': True,
+            'SUPPORTS_COOKIES': True,
+        },
+        'tv': {
+            'INNERTUBE_CONTEXT': {
+                'client': {
+                    'clientName': 'TVHTML5',
+                    'clientVersion': '7.20241201.18.00',
+                },
+            },
+            'INNERTUBE_CONTEXT_CLIENT_NAME': 7,
+            'SUPPORTS_COOKIES': True,
+        },
+    }
+
     def _login(self):
         """
         Attempt to log in to YouTube.
@@ -321,19 +351,24 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
             '{0} {1} {2}'.format(time_now, self._SAPISID, origin).encode('utf-8')).hexdigest()
         return 'SAPISIDHASH {0}_{1}'.format(time_now, sapisidhash)
 
-    def _call_api(self, ep, query, video_id, fatal=True, headers=None):
+    def _call_api(self, ep, query, video_id, fatal=True, headers=None,
+                  note='Downloading API JSON'):
         data = self._DEFAULT_API_DATA.copy()
         data.update(query)
         real_headers = {'content-type': 'application/json'}
         if headers:
             real_headers.update(headers)
 
+        # was: 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'
+        api_key = self.get_param('youtube_innertube_key')
         return self._download_json(
             'https://www.youtube.com/youtubei/v1/%s' % ep, video_id=video_id,
-            note='Downloading API JSON', errnote='Unable to download API page',
+            note=note, errnote='Unable to download API page',
             data=json.dumps(data).encode('utf8'), fatal=fatal,
-            headers=real_headers,
-            query={'key': 'AIzaSyAO_FJ2SlqU8Q4STEHLGCilw_Y9_11qcW8'})
+            headers=real_headers, query=filter_dict({
+                'key': api_key,
+                'prettyPrint': 'false',
+            }))
 
     def _extract_yt_initial_data(self, video_id, webpage):
         return self._parse_json(
@@ -342,6 +377,22 @@ class YoutubeBaseInfoExtractor(InfoExtractor):
                  self._YT_INITIAL_DATA_RE), webpage, 'yt initial data'),
             video_id)
 
+    def _extract_visitor_data(self, *args):
+        """
+        Extract visitorData from an API response or ytcfg
+
+        Appears to be used to track session state
+        """
+        visitor_data = self.get_param('youtube_visitor_data')
+        if visitor_data:
+            return visitor_data
+
+        return get_first(
+            args, (('VISITOR_DATA',
+                    ('INNERTUBE_CONTEXT', 'client', 'visitorData'),
+                    ('responseContext', 'visitorData')),
+                   T(compat_str)))
+
     def _extract_ytcfg(self, video_id, webpage):
         return self._parse_json(
             self._search_regex(
@@ -1957,6 +2008,10 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             if sts:
                 pb_context['signatureTimestamp'] = sts
 
+            client = traverse_obj(self._INNERTUBE_CLIENTS, (
+                lambda _, v: not v.get('REQUIRE_PO_TOKEN')),
+                get_all=False)
+
             query = {
                 'playbackContext': {
                     'contentPlaybackContext': pb_context,
@@ -1964,30 +2019,39 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                     'racyCheckOk': True,
                 },
                 'context': {
-                    'client': {
-                        'clientName': 'MWEB',
-                        'clientVersion': '2.20241202.07.00',
-                        'hl': 'en',
-                        'userAgent': 'Mozilla/5.0 (iPad; CPU OS 16_7_10 like Mac OS X) AppleWebKit/605.1.15 (KHTML, like Gecko) Version/16.6 Mobile/15E148 Safari/604.1,gzip(gfe)',
-                        'timeZone': 'UTC',
-                        'utcOffsetMinutes': 0,
-                    },
+                    'client': merge_dicts(
+                        traverse_obj(client, ('INNERTUBE_CONTEXT', 'client')), {
+                            'hl': 'en',
+                            'timeZone': 'UTC',
+                            'utcOffsetMinutes': 0,
+                        }),
                 },
                 'videoId': video_id,
             }
-            headers = {
-                'X-YouTube-Client-Name': '2',
-                'X-YouTube-Client-Version': '2.20241202.07.00',
-                'Origin': origin,
+
+            headers = merge_dicts({
                 'Sec-Fetch-Mode': 'navigate',
-                'User-Agent': query['context']['client']['userAgent'],
-            }
+                'Origin': origin,
+                # 'X-Goog-Visitor-Id': self._extract_visitor_data(ytcfg) or '',
+            }, traverse_obj(client, {
+                'X-YouTube-Client-Name': 'INNERTUBE_CONTEXT_CLIENT_NAME',
+                'X-YouTube-Client-Version': (
+                    'INNERTUBE_CONTEXT', 'client', 'clientVersion'),
+                'User-Agent': (
+                    'INNERTUBE_CONTEXT', 'client', 'userAgent'),
+            }))
+
             auth = self._generate_sapisidhash_header(origin)
             if auth is not None:
                 headers['Authorization'] = auth
                 headers['X-Origin'] = origin
 
-            player_response = self._call_api('player', query, video_id, fatal=False, headers=headers)
+            player_response = self._call_api(
+                'player', query, video_id, fatal=False, headers=headers,
+                note=join_nonempty(
+                    'Downloading', traverse_obj(query, (
+                        'context', 'client', 'clientName')),
+                    'API JSON', delim=' '))
 
         def is_agegated(playability):
             if not isinstance(playability, dict):