Selaa lähdekoodia

[youtube & jsinterp] Fix signature extraction (fixes #3255)

Some functions are defined now inside an object, the jsinterp will search its definition if the variable is not defined in the local namespace.
Jaime Marquínez Ferrándiz 11 vuotta sitten
vanhempi
sitoutus
ad25aee245
2 muutettua tiedostoa jossa 43 lisäystä ja 3 poistoa
  1. 6 0
      test/test_youtube_signature.py
  2. 37 3
      youtube_dl/jsinterp.py

+ 6 - 0
test/test_youtube_signature.py

@@ -33,6 +33,12 @@ _TESTS = [
         90,
         u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
     ),
+    (
+        u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
+        u'js',
+        84,
+        u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
+    ),
     (
         u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
         u'js',

+ 37 - 3
youtube_dl/jsinterp.py

@@ -11,6 +11,7 @@ class JSInterpreter(object):
     def __init__(self, code):
         self.code = code
         self._functions = {}
+        self._objects = {}
 
     def interpret_statement(self, stmt, local_vars, allow_recursion=20):
         if allow_recursion < 0:
@@ -55,7 +56,19 @@ class JSInterpreter(object):
         m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
         if m:
             member = m.group('member')
-            val = local_vars[m.group('in')]
+            variable = m.group('in')
+
+            if variable not in local_vars:
+                if variable not in self._objects:
+                    self._objects[variable] = self.extract_object(variable)
+                obj = self._objects[variable]
+                key, args = member.split('(', 1)
+                args = args.strip(')')
+                argvals = [int(v) if v.isdigit() else local_vars[v]
+                           for v in args.split(',')]
+                return obj[key](argvals)
+
+            val = local_vars[variable]
             if member == 'split("")':
                 return list(val)
             if member == 'join("")':
@@ -97,6 +110,25 @@ class JSInterpreter(object):
             return self._functions[fname](argvals)
         raise ExtractorError('Unsupported JS expression %r' % expr)
 
+    def extract_object(self, objname):
+        obj = {}
+        obj_m = re.search(
+            (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) +
+            r'\s*(?P<fields>([a-zA-Z$]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' +
+            r'\}\s*;',
+            self.code)
+        fields = obj_m.group('fields')
+        # Currently, it only supports function definitions
+        fields_m = re.finditer(
+            r'(?P<key>[a-zA-Z$]+)\s*:\s*function'
+            r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
+            fields)
+        for f in fields_m:
+            argnames = f.group('args').split(',')
+            obj[f.group('key')] = self.build_function(argnames, f.group('code'))
+
+        return obj
+
     def extract_function(self, funcname):
         func_m = re.search(
             (r'(?:function %s|[{;]%s\s*=\s*function)' % (
@@ -107,10 +139,12 @@ class JSInterpreter(object):
             raise ExtractorError('Could not find JS function %r' % funcname)
         argnames = func_m.group('args').split(',')
 
+        return self.build_function(argnames, func_m.group('code'))
+
+    def build_function(self, argnames, code):
         def resf(args):
             local_vars = dict(zip(argnames, args))
-            for stmt in func_m.group('code').split(';'):
+            for stmt in code.split(';'):
                 res = self.interpret_statement(stmt, local_vars)
             return res
         return resf
-