2
0
Эх сурвалжийг харах

Add the 'webpage_url' field to info_dict

The url for the video page, it must allow to reproduce the result.
It's automatically set by YoutubeDL if it's missing.
Jaime Marquínez Ferrándiz 12 жил өмнө
parent
commit
9103bbc5cd

+ 3 - 0
test/test_download.py

@@ -148,6 +148,9 @@ def generator(test_case):
                 # Check for the presence of mandatory fields
                 for key in ('id', 'url', 'title', 'ext'):
                     self.assertTrue(key in info_dict.keys() and info_dict[key])
+                # Check for mandatory fields that are automatically set by YoutubeDL
+                for key in ['webpage_url', 'extractor']:
+                    self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
         finally:
             try_rm_tcs_files()
 

+ 10 - 3
youtube_dl/YoutubeDL.py

@@ -354,8 +354,11 @@ class YoutubeDL(object):
                         '_type': 'compat_list',
                         'entries': ie_result,
                     }
-                if 'extractor' not in ie_result:
-                    ie_result['extractor'] = ie.IE_NAME
+                self.add_extra_info(ie_result,
+                    {
+                        'extractor': ie.IE_NAME,
+                        'webpage_url': url
+                    })
                 return self.process_ie_result(ie_result, download, extra_info)
             except ExtractorError as de: # An error we somewhat expected
                 self.report_error(compat_str(de), de.format_traceback())
@@ -417,6 +420,7 @@ class YoutubeDL(object):
                     'playlist': playlist,
                     'playlist_index': i + playliststart,
                     'extractor': ie_result['extractor'],
+                    'webpage_url': ie_result['webpage_url'],
                 }
                 entry_result = self.process_ie_result(entry,
                                                       download=download,
@@ -427,7 +431,10 @@ class YoutubeDL(object):
         elif result_type == 'compat_list':
             def _fixup(r):
                 self.add_extra_info(r,
-                    {'extractor': ie_result['extractor']})
+                    {
+                        'extractor': ie_result['extractor'],
+                        'webpage_url': ie_result['webpage_url'],
+                    })
                 return r
             ie_result['entries'] = [
                 self.process_ie_result(_fixup(r), download, extra_info)

+ 3 - 0
youtube_dl/extractor/common.py

@@ -71,6 +71,9 @@ class InfoExtractor(object):
                                 ("3D" or "DASH video")
                     * width     Width of the video, if known
                     * height    Height of the video, if known
+    webpage_url:    The url to the video webpage, if given to youtube-dl it
+                    should allow to get the same result again. (It will be set
+                    by YoutubeDL if it's missing)
 
     Unless mentioned otherwise, the fields should be Unicode strings.
 

+ 6 - 7
youtube_dl/extractor/vimeo.py

@@ -20,7 +20,7 @@ class VimeoIE(InfoExtractor):
     """Information extractor for vimeo.com."""
 
     # _VALID_URL matches Vimeo URLs
-    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
+    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
     _NETRC_MACHINE = 'vimeo'
     IE_NAME = u'vimeo'
     _TESTS = [
@@ -128,11 +128,9 @@ class VimeoIE(InfoExtractor):
             raise ExtractorError(u'Invalid URL: %s' % url)
 
         video_id = mobj.group('id')
-        if not mobj.group('proto'):
-            url = 'https://' + url
-        elif mobj.group('pro'):
+        if mobj.group('pro') or mobj.group('player'):
             url = 'http://player.vimeo.com/video/' + video_id
-        elif mobj.group('direct_link'):
+        else:
             url = 'https://vimeo.com/' + video_id
 
         # Retrieve video webpage to extract further information
@@ -234,7 +232,7 @@ class VimeoIE(InfoExtractor):
         if len(formats) == 0:
             raise ExtractorError(u'No known codec found')
 
-        return [{
+        return {
             'id':       video_id,
             'uploader': video_uploader,
             'uploader_id': video_uploader_id,
@@ -243,7 +241,8 @@ class VimeoIE(InfoExtractor):
             'thumbnail':    video_thumbnail,
             'description':  video_description,
             'formats': formats,
-        }]
+            'webpage_url': url,
+        }
 
 
 class VimeoChannelIE(InfoExtractor):

+ 2 - 1
youtube_dl/extractor/youtube.py

@@ -1485,7 +1485,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 'subtitles':    video_subtitles,
                 'duration':     video_duration,
                 'age_limit':    18 if age_gate else 0,
-                'annotations':  video_annotations
+                'annotations':  video_annotations,
+                'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
             })
         return results