Browse Source

[Youtube] Fix nsig extraction for player 20dfca59 (#32891)

* dirkf's patch for nsig extraction
* add generic search per  yt-dlp/yt-dlp/pull/10611 - thx bashonly

---------

Co-authored-by: dirkf <fieldhouse@gmx.net>
Aiur Adept 1 year ago
parent
commit
71223bff39
2 changed files with 30 additions and 6 deletions
  1. 4 0
      test/test_youtube_signature.py
  2. 26 6
      youtube_dl/extractor/youtube.py

+ 4 - 0
test/test_youtube_signature.py

@@ -174,6 +174,10 @@ _NSIG_TESTS = [
         'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js',
         'https://www.youtube.com/s/player/5604538d/player_ias.vflset/en_US/base.js',
         '7X-he4jjvMx7BCX', 'sViSydX8IHtdWA',
         '7X-he4jjvMx7BCX', 'sViSydX8IHtdWA',
     ),
     ),
+    (
+        'https://www.youtube.com/s/player/20dfca59/player_ias.vflset/en_US/base.js',
+        '-fLCxedkAk4LUTK2', 'O8kfRq1y1eyHGw',
+    ),
 ]
 ]
 
 
 
 

+ 26 - 6
youtube_dl/extractor/youtube.py

@@ -1659,18 +1659,38 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
     def _extract_n_function_name(self, jscode):
     def _extract_n_function_name(self, jscode):
         func_name, idx = self._search_regex(
         func_name, idx = self._search_regex(
             # new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
             # new: (b=String.fromCharCode(110),c=a.get(b))&&c=nfunc[idx](c)
-            # or:  (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)s
+            # or:  (b="nn"[+a.D],c=a.get(b))&&(c=nfunc[idx](c)
+            # or:  (PL(a),b=a.j.n||null)&&(b=nfunc[idx](b)
             # old: .get("n"))&&(b=nfunc[idx](b)
             # old: .get("n"))&&(b=nfunc[idx](b)
             # older: .get("n"))&&(b=nfunc(b)
             # older: .get("n"))&&(b=nfunc(b)
             r'''(?x)
             r'''(?x)
-                (?:\(\s*(?P<b>[a-z])\s*=\s*(?:
+                (?:\((?:[\w$()\s]+,)*?\s*(?P<b>[a-z])\s*=\s*(?:
                     String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
                     String\s*\.\s*fromCharCode\s*\(\s*110\s*\)|
-                    "n+"\[\s*\+?s*[\w$.]+\s*]
-                )\s*,(?P<c>[a-z])\s*=\s*[a-z]\s*)?
-                \.\s*get\s*\(\s*(?(b)(?P=b)|"n{1,2}")(?:\s*\)){2}\s*&&\s*\(\s*(?(c)(?P=c)|b)\s*=\s*
+                    "n+"\[\s*\+?s*[\w$.]+\s*]|
+                    (?P<b1>(?:[\w$]+\s*\.\s*)+n\b(?:(?!&&).)+\))
+                )\s*
+                    (?(b1)
+                          &&\s*\(\s*(?P=b)|
+                          (?:
+                              ,(?P<c>[a-z])\s*=\s*[a-z]\s*)?
+                              \.\s*get\s*\(\s*(?(b)(?P=b)|"n{1,2}")(?:\s*\)){2}\s*
+                              &&\s*\(\s*(?(c)(?P=c)|(?P=b))
+                          )
+                    )\s*=\s*
                 (?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
                 (?P<nfunc>[a-zA-Z_$][\w$]*)(?:\s*\[(?P<idx>\d+)\])?\s*\(\s*[\w$]+\s*\)
-            ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'))
+            ''', jscode, 'Initial JS player n function name', group=('nfunc', 'idx'),
+            default=(None, None))
+        # thx bashonly: yt-dlp/yt-dlp/pull/10611
+        if not func_name:
+            self.report_warning('Falling back to generic n function search')
+            return self._search_regex(
+                r'''(?xs)
+                    (?:(?<=[^\w$])|^)       # instead of \b, which ignores $
+                    (?P<name>(?!\d)[a-zA-Z\d_$]+)\s*=\s*function\((?!\d)[a-zA-Z\d_$]+\)
+                    \s*\{(?:(?!};).)+?["']enhanced_except_
+                ''', jscode, 'Initial JS player n function name', group='name')
         if not idx:
         if not idx:
+            self.report_warning('Falling back to generic n function search')
             return func_name
             return func_name
 
 
         return self._parse_json(self._search_regex(
         return self._parse_json(self._search_regex(