浏览代码

[youtube & jsinterp] Fix signature extraction (fixes #3255)

Some functions are defined now inside an object, the jsinterp will search its definition if the variable is not defined in the local namespace.
Jaime Marquínez Ferrándiz 11 年之前
父节点
当前提交
ad25aee245
共有 2 个文件被更改,包括 43 次插入3 次删除
  1. 6 0
      test/test_youtube_signature.py
  2. 37 3
      youtube_dl/jsinterp.py

+ 6 - 0
test/test_youtube_signature.py

@@ -33,6 +33,12 @@ _TESTS = [
         90,
         90,
         u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
         u']\\[@?>=<;:/.-,+*)(\'&%$#"hZYXWVUTSRQPONMLKJIHGFEDCBAzyxwvutsrqponmlkjiagfedcb39876',
     ),
     ),
+    (
+        u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vfl0Cbn9e.js',
+        u'js',
+        84,
+        u'O1I3456789abcde0ghijklmnopqrstuvwxyzABCDEFGHfJKLMN2PQRSTUVW@YZ!"#$%&\'()*+,-./:;<=',
+    ),
     (
     (
         u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
         u'https://s.ytimg.com/yts/jsbin/html5player-en_US-vflXGBaUN.js',
         u'js',
         u'js',

+ 37 - 3
youtube_dl/jsinterp.py

@@ -11,6 +11,7 @@ class JSInterpreter(object):
     def __init__(self, code):
     def __init__(self, code):
         self.code = code
         self.code = code
         self._functions = {}
         self._functions = {}
+        self._objects = {}
 
 
     def interpret_statement(self, stmt, local_vars, allow_recursion=20):
     def interpret_statement(self, stmt, local_vars, allow_recursion=20):
         if allow_recursion < 0:
         if allow_recursion < 0:
@@ -55,7 +56,19 @@ class JSInterpreter(object):
         m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
         m = re.match(r'^(?P<in>[a-z]+)\.(?P<member>.*)$', expr)
         if m:
         if m:
             member = m.group('member')
             member = m.group('member')
-            val = local_vars[m.group('in')]
+            variable = m.group('in')
+
+            if variable not in local_vars:
+                if variable not in self._objects:
+                    self._objects[variable] = self.extract_object(variable)
+                obj = self._objects[variable]
+                key, args = member.split('(', 1)
+                args = args.strip(')')
+                argvals = [int(v) if v.isdigit() else local_vars[v]
+                           for v in args.split(',')]
+                return obj[key](argvals)
+
+            val = local_vars[variable]
             if member == 'split("")':
             if member == 'split("")':
                 return list(val)
                 return list(val)
             if member == 'join("")':
             if member == 'join("")':
@@ -97,6 +110,25 @@ class JSInterpreter(object):
             return self._functions[fname](argvals)
             return self._functions[fname](argvals)
         raise ExtractorError('Unsupported JS expression %r' % expr)
         raise ExtractorError('Unsupported JS expression %r' % expr)
 
 
+    def extract_object(self, objname):
+        obj = {}
+        obj_m = re.search(
+            (r'(?:var\s+)?%s\s*=\s*\{' % re.escape(objname)) +
+            r'\s*(?P<fields>([a-zA-Z$]+\s*:\s*function\(.*?\)\s*\{.*?\})*)' +
+            r'\}\s*;',
+            self.code)
+        fields = obj_m.group('fields')
+        # Currently, it only supports function definitions
+        fields_m = re.finditer(
+            r'(?P<key>[a-zA-Z$]+)\s*:\s*function'
+            r'\((?P<args>[a-z,]+)\){(?P<code>[^}]+)}',
+            fields)
+        for f in fields_m:
+            argnames = f.group('args').split(',')
+            obj[f.group('key')] = self.build_function(argnames, f.group('code'))
+
+        return obj
+
     def extract_function(self, funcname):
     def extract_function(self, funcname):
         func_m = re.search(
         func_m = re.search(
             (r'(?:function %s|[{;]%s\s*=\s*function)' % (
             (r'(?:function %s|[{;]%s\s*=\s*function)' % (
@@ -107,10 +139,12 @@ class JSInterpreter(object):
             raise ExtractorError('Could not find JS function %r' % funcname)
             raise ExtractorError('Could not find JS function %r' % funcname)
         argnames = func_m.group('args').split(',')
         argnames = func_m.group('args').split(',')
 
 
+        return self.build_function(argnames, func_m.group('code'))
+
+    def build_function(self, argnames, code):
         def resf(args):
         def resf(args):
             local_vars = dict(zip(argnames, args))
             local_vars = dict(zip(argnames, args))
-            for stmt in func_m.group('code').split(';'):
+            for stmt in code.split(';'):
                 res = self.interpret_statement(stmt, local_vars)
                 res = self.interpret_statement(stmt, local_vars)
             return res
             return res
         return resf
         return resf
-