Browse Source

Add the 'webpage_url' field to info_dict

The url for the video page, it must allow to reproduce the result.
It's automatically set by YoutubeDL if it's missing.
Jaime Marquínez Ferrándiz 12 years ago
parent
commit
9103bbc5cd

+ 3 - 0
test/test_download.py

@@ -148,6 +148,9 @@ def generator(test_case):
                 # Check for the presence of mandatory fields
                 # Check for the presence of mandatory fields
                 for key in ('id', 'url', 'title', 'ext'):
                 for key in ('id', 'url', 'title', 'ext'):
                     self.assertTrue(key in info_dict.keys() and info_dict[key])
                     self.assertTrue(key in info_dict.keys() and info_dict[key])
+                # Check for mandatory fields that are automatically set by YoutubeDL
+                for key in ['webpage_url', 'extractor']:
+                    self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
         finally:
         finally:
             try_rm_tcs_files()
             try_rm_tcs_files()
 
 

+ 10 - 3
youtube_dl/YoutubeDL.py

@@ -354,8 +354,11 @@ class YoutubeDL(object):
                         '_type': 'compat_list',
                         '_type': 'compat_list',
                         'entries': ie_result,
                         'entries': ie_result,
                     }
                     }
-                if 'extractor' not in ie_result:
-                    ie_result['extractor'] = ie.IE_NAME
+                self.add_extra_info(ie_result,
+                    {
+                        'extractor': ie.IE_NAME,
+                        'webpage_url': url
+                    })
                 return self.process_ie_result(ie_result, download, extra_info)
                 return self.process_ie_result(ie_result, download, extra_info)
             except ExtractorError as de: # An error we somewhat expected
             except ExtractorError as de: # An error we somewhat expected
                 self.report_error(compat_str(de), de.format_traceback())
                 self.report_error(compat_str(de), de.format_traceback())
@@ -417,6 +420,7 @@ class YoutubeDL(object):
                     'playlist': playlist,
                     'playlist': playlist,
                     'playlist_index': i + playliststart,
                     'playlist_index': i + playliststart,
                     'extractor': ie_result['extractor'],
                     'extractor': ie_result['extractor'],
+                    'webpage_url': ie_result['webpage_url'],
                 }
                 }
                 entry_result = self.process_ie_result(entry,
                 entry_result = self.process_ie_result(entry,
                                                       download=download,
                                                       download=download,
@@ -427,7 +431,10 @@ class YoutubeDL(object):
         elif result_type == 'compat_list':
         elif result_type == 'compat_list':
             def _fixup(r):
             def _fixup(r):
                 self.add_extra_info(r,
                 self.add_extra_info(r,
-                    {'extractor': ie_result['extractor']})
+                    {
+                        'extractor': ie_result['extractor'],
+                        'webpage_url': ie_result['webpage_url'],
+                    })
                 return r
                 return r
             ie_result['entries'] = [
             ie_result['entries'] = [
                 self.process_ie_result(_fixup(r), download, extra_info)
                 self.process_ie_result(_fixup(r), download, extra_info)

+ 3 - 0
youtube_dl/extractor/common.py

@@ -71,6 +71,9 @@ class InfoExtractor(object):
                                 ("3D" or "DASH video")
                                 ("3D" or "DASH video")
                     * width     Width of the video, if known
                     * width     Width of the video, if known
                     * height    Height of the video, if known
                     * height    Height of the video, if known
+    webpage_url:    The url to the video webpage, if given to youtube-dl it
+                    should allow to get the same result again. (It will be set
+                    by YoutubeDL if it's missing)
 
 
     Unless mentioned otherwise, the fields should be Unicode strings.
     Unless mentioned otherwise, the fields should be Unicode strings.
 
 

+ 6 - 7
youtube_dl/extractor/vimeo.py

@@ -20,7 +20,7 @@ class VimeoIE(InfoExtractor):
     """Information extractor for vimeo.com."""
     """Information extractor for vimeo.com."""
 
 
     # _VALID_URL matches Vimeo URLs
     # _VALID_URL matches Vimeo URLs
-    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
+    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
     _NETRC_MACHINE = 'vimeo'
     _NETRC_MACHINE = 'vimeo'
     IE_NAME = u'vimeo'
     IE_NAME = u'vimeo'
     _TESTS = [
     _TESTS = [
@@ -128,11 +128,9 @@ class VimeoIE(InfoExtractor):
             raise ExtractorError(u'Invalid URL: %s' % url)
             raise ExtractorError(u'Invalid URL: %s' % url)
 
 
         video_id = mobj.group('id')
         video_id = mobj.group('id')
-        if not mobj.group('proto'):
-            url = 'https://' + url
-        elif mobj.group('pro'):
+        if mobj.group('pro') or mobj.group('player'):
             url = 'http://player.vimeo.com/video/' + video_id
             url = 'http://player.vimeo.com/video/' + video_id
-        elif mobj.group('direct_link'):
+        else:
             url = 'https://vimeo.com/' + video_id
             url = 'https://vimeo.com/' + video_id
 
 
         # Retrieve video webpage to extract further information
         # Retrieve video webpage to extract further information
@@ -234,7 +232,7 @@ class VimeoIE(InfoExtractor):
         if len(formats) == 0:
         if len(formats) == 0:
             raise ExtractorError(u'No known codec found')
             raise ExtractorError(u'No known codec found')
 
 
-        return [{
+        return {
             'id':       video_id,
             'id':       video_id,
             'uploader': video_uploader,
             'uploader': video_uploader,
             'uploader_id': video_uploader_id,
             'uploader_id': video_uploader_id,
@@ -243,7 +241,8 @@ class VimeoIE(InfoExtractor):
             'thumbnail':    video_thumbnail,
             'thumbnail':    video_thumbnail,
             'description':  video_description,
             'description':  video_description,
             'formats': formats,
             'formats': formats,
-        }]
+            'webpage_url': url,
+        }
 
 
 
 
 class VimeoChannelIE(InfoExtractor):
 class VimeoChannelIE(InfoExtractor):

+ 2 - 1
youtube_dl/extractor/youtube.py

@@ -1485,7 +1485,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
                 'subtitles':    video_subtitles,
                 'subtitles':    video_subtitles,
                 'duration':     video_duration,
                 'duration':     video_duration,
                 'age_limit':    18 if age_gate else 0,
                 'age_limit':    18 if age_gate else 0,
-                'annotations':  video_annotations
+                'annotations':  video_annotations,
+                'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
             })
             })
         return results
         return results