Browse Source

[youtube] Extract start_time

From the 't=*' in the url.
Currently youtube-dl doesn't use the value, but it was requested for the mpv plugin.
Jaime Marquínez Ferrándiz 10 years ago
parent
commit
7c80519cbf
2 changed files with 15 additions and 1 deletions
  1. 2 0
      youtube_dl/extractor/common.py
  2. 13 1
      youtube_dl/extractor/youtube.py

+ 2 - 0
youtube_dl/extractor/common.py

@@ -183,6 +183,8 @@ class InfoExtractor(object):
                     ["Sports", "Berlin"]
                     ["Sports", "Berlin"]
     is_live:        True, False, or None (=unknown). Whether this video is a
     is_live:        True, False, or None (=unknown). Whether this video is a
                     live stream that goes on instead of a fixed-length video.
                     live stream that goes on instead of a fixed-length video.
+    start_time:     Time in seconds where the reproduction should start, as
+                    specified in the url.
 
 
     Unless mentioned otherwise, the fields should be Unicode strings.
     Unless mentioned otherwise, the fields should be Unicode strings.
 
 

+ 13 - 1
youtube_dl/extractor/youtube.py

@@ -19,6 +19,7 @@ from ..compat import (
     compat_urllib_parse,
     compat_urllib_parse,
     compat_urllib_parse_unquote,
     compat_urllib_parse_unquote,
     compat_urllib_parse_unquote_plus,
     compat_urllib_parse_unquote_plus,
+    compat_urllib_parse_urlparse,
     compat_urllib_request,
     compat_urllib_request,
     compat_urlparse,
     compat_urlparse,
     compat_str,
     compat_str,
@@ -31,6 +32,7 @@ from ..utils import (
     get_element_by_id,
     get_element_by_id,
     int_or_none,
     int_or_none,
     orderedSet,
     orderedSet,
+    parse_duration,
     str_to_int,
     str_to_int,
     unescapeHTML,
     unescapeHTML,
     unified_strdate,
     unified_strdate,
@@ -317,7 +319,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
     IE_NAME = 'youtube'
     IE_NAME = 'youtube'
     _TESTS = [
     _TESTS = [
         {
         {
-            'url': 'http://www.youtube.com/watch?v=BaW_jenozKc',
+            'url': 'http://www.youtube.com/watch?v=BaW_jenozKcj&t=1s',
             'info_dict': {
             'info_dict': {
                 'id': 'BaW_jenozKc',
                 'id': 'BaW_jenozKc',
                 'ext': 'mp4',
                 'ext': 'mp4',
@@ -329,6 +331,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
                 'categories': ['Science & Technology'],
                 'categories': ['Science & Technology'],
                 'like_count': int,
                 'like_count': int,
                 'dislike_count': int,
                 'dislike_count': int,
+                'start_time': 1,
             }
             }
         },
         },
         {
         {
@@ -889,6 +892,14 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'http' if self._downloader.params.get('prefer_insecure', False)
             'http' if self._downloader.params.get('prefer_insecure', False)
             else 'https')
             else 'https')
 
 
+        start_time = None
+        parsed_url = compat_urllib_parse_urlparse(url)
+        for component in [parsed_url.fragment, parsed_url.query]:
+            query = compat_parse_qs(component)
+            if 't' in query:
+                start_time = parse_duration(query['t'][0])
+                break
+
         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
         # Extract original video URL from URL with redirection, like age verification, using next_url parameter
         mobj = re.search(self._NEXT_URL_RE, url)
         mobj = re.search(self._NEXT_URL_RE, url)
         if mobj:
         if mobj:
@@ -1255,6 +1266,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
             'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
             'average_rating': float_or_none(video_info.get('avg_rating', [None])[0]),
             'formats': formats,
             'formats': formats,
             'is_live': is_live,
             'is_live': is_live,
+            'start_time': start_time,
         }
         }