Browse Source

[YouTube] Initially support tce-style player JS
* resolves #33079

dirkf 5 months ago
parent
commit
283dca56fe
2 changed files with 30 additions and 14 deletions
  1. 17 4
      test/test_youtube_signature.py
  2. 13 10
      youtube_dl/extractor/youtube.py

+ 17 - 4
test/test_youtube_signature.py

@@ -223,6 +223,18 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/9c6dfc4a/player_ias.vflset/en_US/base.js',
         'https://www.youtube.com/s/player/9c6dfc4a/player_ias.vflset/en_US/base.js',
         'jbu7ylIosQHyJyJV', 'uwI0ESiynAmhNg',
         'jbu7ylIosQHyJyJV', 'uwI0ESiynAmhNg',
     ),
     ),
+    (
+        'https://www.youtube.com/s/player/f6e09c70/player_ias.vflset/en_US/base.js',
+        'W9HJZKktxuYoDTqW', 'jHbbkcaxm54',
+    ),
+    (
+        'https://www.youtube.com/s/player/f6e09c70/player_ias_tce.vflset/en_US/base.js',
+        'W9HJZKktxuYoDTqW', 'jHbbkcaxm54',
+    ),
+    (
+        'https://www.youtube.com/s/player/91201489/player_ias_tce.vflset/en_US/base.js',
+        'W9HJZKktxuYoDTqW', 'U48vOZHaeYS6vO',
+    ),
 ]
 ]
 
 
 
 
@@ -284,7 +296,7 @@ def t_factory(name, sig_func, url_pattern):
 
 
 
 
 def signature(jscode, sig_input):
 def signature(jscode, sig_input):
-    func = YoutubeIE(FakeYDL())._parse_sig_js(jscode)
+    func = YoutubeIE(FakeYDL({'cachedir': False}))._parse_sig_js(jscode)
     src_sig = (
     src_sig = (
         compat_str(string.printable[:sig_input])
         compat_str(string.printable[:sig_input])
         if isinstance(sig_input, int) else sig_input)
         if isinstance(sig_input, int) else sig_input)
@@ -292,9 +304,10 @@ def signature(jscode, sig_input):
 
 
 
 
 def n_sig(jscode, sig_input):
 def n_sig(jscode, sig_input):
-    funcname = YoutubeIE(FakeYDL())._extract_n_function_name(jscode)
-    return JSInterpreter(jscode).call_function(
-        funcname, sig_input, _ytdl_do_not_return=sig_input)
+    ie = YoutubeIE(FakeYDL({'cachedir': False}))
+    jsi = JSInterpreter(jscode)
+    jsi, _, func_code = ie._extract_n_function_code_jsi(sig_input, jsi)
+    return ie._extract_n_function_from_code(jsi, func_code)(sig_input)
 
 
 
 
 make_sig_test = t_factory(
 make_sig_test = t_factory(

+ 13 - 10
youtube_dl/extractor/youtube.py

@@ -1607,16 +1607,9 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 webpage or '', 'player URL', fatal=False)
                 webpage or '', 'player URL', fatal=False)
             if player_url:
             if player_url:
                 ytcfgs = ytcfgs + ({'PLAYER_JS_URL': player_url},)
                 ytcfgs = ytcfgs + ({'PLAYER_JS_URL': player_url},)
-        player_url = traverse_obj(
+        return traverse_obj(
             ytcfgs, (Ellipsis, 'PLAYER_JS_URL'), (Ellipsis, 'WEB_PLAYER_CONTEXT_CONFIGS', Ellipsis, 'jsUrl'),
             ytcfgs, (Ellipsis, 'PLAYER_JS_URL'), (Ellipsis, 'WEB_PLAYER_CONTEXT_CONFIGS', Ellipsis, 'jsUrl'),
             get_all=False, expected_type=lambda u: urljoin('https://www.youtube.com', u))
             get_all=False, expected_type=lambda u: urljoin('https://www.youtube.com', u))
-        nplayer_url, is_tce = re.subn(r'(?<=/player_ias)_tce(?=\.vflset/)', '', player_url or '')
-        if is_tce:
-            # TODO: Add proper support for the 'tce' variant players
-            # See https://github.com/yt-dlp/yt-dlp/issues/12398
-            self.write_debug('Modifying tce player URL: {0}'.format(player_url))
-            return nplayer_url
-        return player_url
 
 
     def _download_player_url(self, video_id, fatal=False):
     def _download_player_url(self, video_id, fatal=False):
         res = self._download_webpage(
         res = self._download_webpage(
@@ -1858,12 +1851,22 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
 
         if func_code:
         if func_code:
             return jsi, player_id, func_code
             return jsi, player_id, func_code
+        return self._extract_n_function_code_jsi(video_id, jsi, player_id)
+
+    def _extract_n_function_code_jsi(self, video_id, jsi, player_id=None):
+
+        var_ay = self._search_regex(
+            r'(?:[;\s]|^)\s*(var\s*[\w$]+\s*=\s*"[^"]+"\s*\.\s*split\("\{"\))(?=\s*[,;])',
+            jsi.code, 'useful values', default='')
 
 
-        func_name = self._extract_n_function_name(jscode)
+        func_name = self._extract_n_function_name(jsi.code)
 
 
         func_code = jsi.extract_function_code(func_name)
         func_code = jsi.extract_function_code(func_name)
+        if var_ay:
+            func_code = (func_code[0], ';\n'.join((var_ay, func_code[1])))
 
 
-        self.cache.store('youtube-nsig', player_id, func_code)
+        if player_id:
+            self.cache.store('youtube-nsig', player_id, func_code)
         return jsi, player_id, func_code
         return jsi, player_id, func_code
 
 
     def _extract_n_function_from_code(self, jsi, func_code):
     def _extract_n_function_from_code(self, jsi, func_code):