浏览代码

[JSInterp] Improve Date processing
* add JS_Date class implementing JS Date
* support constructor args other than date string
* support static methods of Date
* Date objects are still automatically coerced to timestamp before using in JS.

dirkf 5 月之前
父节点
当前提交
94849bc997
共有 2 个文件被更改,包括 97 次插入1 次删除
  1. 22 0
      test/test_jsinterp.py
  2. 75 1
      youtube_dl/jsinterp.py

+ 22 - 0
test/test_jsinterp.py

@@ -11,6 +11,7 @@ sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
 
 import math
 import re
+import time
 
 from youtube_dl.compat import compat_str as str
 from youtube_dl.jsinterp import JS_Undefined, JSInterpreter
@@ -208,6 +209,27 @@ class TestJSInterpreter(unittest.TestCase):
         self._test(jsi, 86000, args=['12/31/1969 18:01:26 MDT'])
         # epoch 0
         self._test(jsi, 0, args=['1 January 1970 00:00:00 UTC'])
+        # undefined
+        self._test(jsi, NaN, args=[JS_Undefined])
+        # y,m,d, ... - may fail with older dates lacking DST data
+        jsi = JSInterpreter('function f() { return new Date(%s); }'
+                            % ('2024, 5, 29, 2, 52, 12, 42',))
+        self._test(jsi, 1719625932042)
+        # no arg
+        self.assertAlmostEqual(JSInterpreter(
+            'function f() { return new Date() - 0; }').call_function('f'),
+            time.time() * 1000, delta=100)
+        # Date.now()
+        self.assertAlmostEqual(JSInterpreter(
+            'function f() { return Date.now(); }').call_function('f'),
+            time.time() * 1000, delta=100)
+        # Date.parse()
+        jsi = JSInterpreter('function f(dt) { return Date.parse(dt); }')
+        self._test(jsi, 0, args=['1 January 1970 00:00:00 UTC'])
+        # Date.UTC()
+        jsi = JSInterpreter('function f() { return Date.UTC(%s); }'
+                            % ('1970, 0, 1, 0, 0, 0, 0',))
+        self._test(jsi, 0)
 
     def test_call(self):
         jsi = JSInterpreter('''

+ 75 - 1
youtube_dl/jsinterp.py

@@ -1,10 +1,12 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import calendar
 import itertools
 import json
 import operator
 import re
+import time
 
 from functools import update_wrapper, wraps
 
@@ -12,8 +14,10 @@ from .utils import (
     error_to_compat_str,
     ExtractorError,
     float_or_none,
+    int_or_none,
     js_to_json,
     remove_quotes,
+    str_or_none,
     unified_timestamp,
     variadic,
     write_string,
@@ -475,6 +479,73 @@ class JSInterpreter(object):
                 flags |= cls.RE_FLAGS[ch]
             return flags, expr[idx + 1:]
 
+    class JS_Date(object):
+        _t = None
+
+        @staticmethod
+        def __ymd_etc(*args, **kw_is_utc):
+            # args: year, monthIndex, day, hours, minutes, seconds, milliseconds
+            is_utc = kw_is_utc.get('is_utc', False)
+
+            args = list(args[:7])
+            args += [0] * (9 - len(args))
+            args[1] += 1  # month 0..11 -> 1..12
+            ms = args[6]
+            for i in range(6, 9):
+                args[i] = -1  # don't know
+            if is_utc:
+                args[-1] = 1
+            # TODO: [MDN] When a segment overflows or underflows its expected
+            # range, it usually "carries over to" or "borrows from" the higher segment.
+            try:
+                mktime = calendar.timegm if is_utc else time.mktime
+                return mktime(time.struct_time(args)) * 1000 + ms
+            except (OverflowError, ValueError):
+                return None
+
+        @classmethod
+        def UTC(cls, *args):
+            t = cls.__ymd_etc(*args, is_utc=True)
+            return _NaN if t is None else t
+
+        @staticmethod
+        def parse(date_str, **kw_is_raw):
+            is_raw = kw_is_raw.get('is_raw', False)
+
+            t = unified_timestamp(str_or_none(date_str), False)
+            return int(t * 1000) if t is not None else t if is_raw else _NaN
+
+        @staticmethod
+        def now(**kw_is_raw):
+            is_raw = kw_is_raw.get('is_raw', False)
+
+            t = time.time()
+            return int(t * 1000) if t is not None else t if is_raw else _NaN
+
+        def __init__(self, *args):
+            if not args:
+                args = [self.now(is_raw=True)]
+            if len(args) == 1:
+                if isinstance(args[0], JSInterpreter.JS_Date):
+                    self._t = int_or_none(args[0].valueOf(), default=None)
+                else:
+                    arg_type = _js_typeof(args[0])
+                    if arg_type == 'string':
+                        self._t = self.parse(args[0], is_raw=True)
+                    elif arg_type == 'number':
+                        self._t = int(args[0])
+            else:
+                self._t = self.__ymd_etc(*args)
+
+        def toString(self):
+            try:
+                return time.strftime('%a %b %0d %Y %H:%M:%S %Z%z', self._t).rstrip()
+            except TypeError:
+                return "Invalid Date"
+
+        def valueOf(self):
+            return _NaN if self._t is None else self._t
+
     @classmethod
     def __op_chars(cls):
         op_chars = set(';,[')
@@ -715,7 +786,7 @@ class JSInterpreter(object):
 
         new_kw, _, obj = expr.partition('new ')
         if not new_kw:
-            for klass, konstr in (('Date', lambda x: int(unified_timestamp(x, False) * 1000)),
+            for klass, konstr in (('Date', lambda *x: self.JS_Date(*x).valueOf()),
                                   ('RegExp', self.JS_RegExp),
                                   ('Error', self.Exception)):
                 if not obj.startswith(klass + '('):
@@ -1034,6 +1105,7 @@ class JSInterpreter(object):
                     'String': compat_str,
                     'Math': float,
                     'Array': list,
+                    'Date': self.JS_Date,
                 }
                 obj = local_vars.get(variable)
                 if obj in (JS_Undefined, None):
@@ -1086,6 +1158,8 @@ class JSInterpreter(object):
                         assertion(len(argvals) == 2, 'takes two arguments')
                         return argvals[0] ** argvals[1]
                     raise self.Exception('Unsupported Math method ' + member, expr=expr)
+                elif obj is self.JS_Date:
+                    return getattr(obj, member)(*argvals)
 
                 if member == 'split':
                     assertion(len(argvals) <= 2, 'takes at most two arguments')