فهرست منبع

[youtube & jsinterp] Fix signature extraction (fixes #3255)

Some functions are defined now inside an object, the jsinterp will search its definition if the variable is not defined in the local namespace.
Jaime Marquínez Ferrándiz 11 سال پیش
والد
کامیت
ad25aee245
2فایلهای تغییر یافته به همراه43 افزوده شده و 3 حذف شده
  1. 6 0
      test/test_youtube_signature.py
  2. 37 3
      youtube_dl/jsinterp.py

+ 6 - 0
test/test_youtube_signature.py

@@ -33,6 +33,12 @@ _TESTS = [
         90,
         u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
     ),
+    (
+        u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
+        u'js',
+        84,
+        u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
+    ),
     (
         u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
         u'js',

+ 37 - 3
youtube_dl/jsinterp.py

@@ -11,6 +11,7 @@ class JSInterpreter(object):
     def __init__(self, code):
         self.code = code
         self._functions = {}
+        self._objects = {}
 
     def interpret_statement(self, stmt, local_vars, allow_recursion=20):
         if allow_recursion < 0:
@@ -55,7 +56,19 @@ class JSInterpreter(object):
         m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
         if m:
             member = m.group('member')
-            val = local_vars[m.group('in')]
+            variable = m.group('in')
+
+            if variable not in local_vars:
+                if variable not in self._objects:
+                    self._objects[variable] = self.extract_object(variable)
+                obj = self._objects[variable]
+                key, args = member.split('(', 1)
+                args = args.strip(')')
+                argvals = [int(v) if v.isdigit() else local_vars[v]
+                           for v in args.split(',')]
+                return obj[key](argvals)
+
+            val = local_vars[variable]
             if member == 'split("")':
                 return list(val)
             if member == 'join("")':
@@ -97,6 +110,25 @@ class JSInterpreter(object):
             return self._functions[fname](argvals)
         raise ExtractorError('Unsupported JS expression %r' % expr)
 
+    def extract_object(self, objname):
+        obj = {}
+        obj_m = re.search(
+            (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) +
+            r'\s*(?P<fields>([a-zA-Z$]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' +
+            r'\}\s*;',
+            self.code)
+        fields = obj_m.group('fields')
+        # Currently, it only supports function definitions
+        fields_m = re.finditer(
+            r'(?P<key>[a-zA-Z$]+)\s*:\s*function'
+            r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
+            fields)
+        for f in fields_m:
+            argnames = f.group('args').split(',')
+            obj[f.group('key')] = self.build_function(argnames, f.group('code'))
+
+        return obj
+
     def extract_function(self, funcname):
         func_m = re.search(
             (r'(?:function %s|[{;]%s\s*=\s*function)' % (
@@ -107,10 +139,12 @@ class JSInterpreter(object):
             raise ExtractorError('Could not find JS function %r' % funcname)
         argnames = func_m.group('args').split(',')
 
+        return self.build_function(argnames, func_m.group('code'))
+
+    def build_function(self, argnames, code):
         def resf(args):
             local_vars = dict(zip(argnames, args))
-            for stmt in func_m.group('code').split(';'):
+            for stmt in code.split(';'):
                 res = self.interpret_statement(stmt, local_vars)
             return res
         return resf
-