浏览代码

[minhateca] Fix duration parsing

Philipp Hagemeister 10 年之前
父节点
当前提交
e8df5cee12
共有 3 个文件被更改,包括 21 次插入6 次删除
  1. 3 0
      test/test_utils.py
  2. 3 2
      youtube_dl/extractor/minhateca.py
  3. 15 4
      youtube_dl/utils.py

+ 3 - 0
test/test_utils.py

@@ -220,6 +220,9 @@ class TestUtil(unittest.TestCase):
         self.assertEqual(parse_duration('0s'), 0)
         self.assertEqual(parse_duration('01:02:03.05'), 3723.05)
         self.assertEqual(parse_duration('T30M38S'), 1838)
+        self.assertEqual(parse_duration('5 s'), 5)
+        self.assertEqual(parse_duration('3 min'), 180)
+        self.assertEqual(parse_duration('2.5 hours'), 9000)
 
     def test_fix_xml_ampersands(self):
         self.assertEqual(

+ 3 - 2
youtube_dl/extractor/minhateca.py

@@ -8,6 +8,7 @@ from ..compat import (
 )
 from ..utils import (
     int_or_none,
+    parse_duration,
     parse_filesize,
 )
 
@@ -52,8 +53,8 @@ class MinhatecaIE(InfoExtractor):
         filesize_approx = parse_filesize(self._html_search_regex(
             r'<p class="fileSize">(.*?)</p>',
             webpage, 'file size approximation', fatal=False))
-        duration = int_or_none(self._html_search_regex(
-            r'(?s)<p class="fileLeng[ht][th]">.*?([0-9]+)\s*s',
+        duration = parse_duration(self._html_search_regex(
+            r'(?s)<p class="fileLeng[ht][th]">.*?class="bold">(.*?)<',
             webpage, 'duration', fatal=False))
         view_count = int_or_none(self._html_search_regex(
             r'<p class="downloadsCounter">([0-9]+)</p>',

+ 15 - 4
youtube_dl/utils.py

@@ -1206,18 +1206,29 @@ def parse_duration(s):
 
     m = re.match(
         r'''(?ix)T?
+        (?:
+            (?P<only_mins>[0-9.]+)\s*(?:mins?|minutes?)\s*|
+            (?P<only_hours>[0-9.]+)\s*(?:hours?)|
+
             (?:
                 (?:(?P<hours>[0-9]+)\s*(?:[:h]|hours?)\s*)?
                 (?P<mins>[0-9]+)\s*(?:[:m]|mins?|minutes?)\s*
             )?
-            (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?$''', s)
+            (?P<secs>[0-9]+)(?P<ms>\.[0-9]+)?\s*(?:s|secs?|seconds?)?
+        )$''', s)
     if not m:
         return None
-    res = int(m.group('secs'))
+    res = 0
+    if m.group('only_mins'):
+        return float_or_none(m.group('only_mins'), invscale=60)
+    if m.group('only_hours'):
+        return float_or_none(m.group('only_hours'), invscale=60 * 60)
+    if m.group('secs'):
+        res += int(m.group('secs'))
     if m.group('mins'):
         res += int(m.group('mins')) * 60
-        if m.group('hours'):
-            res += int(m.group('hours')) * 60 * 60
+    if m.group('hours'):
+        res += int(m.group('hours')) * 60 * 60
     if m.group('ms'):
         res += float(m.group('ms'))
     return res