12 years ago · 9103bbc5cd
--- a/test/test_download.py
+++ b/test/test_download.py
@@ -148,6 +148,9 @@ def generator(test_case):
 
				                 # Check for the presence of mandatory fields
			
 
				                 for key in ('id', 'url', 'title', 'ext'):
			
 
				                     self.assertTrue(key in info_dict.keys() and info_dict[key])
			
 
				+                # Check for mandatory fields that are automatically set by YoutubeDL
			
 
				+                for key in ['webpage_url', 'extractor']:
			
 
				+                    self.assertTrue(info_dict.get(key), u'Missing field: %s' % key)
			
 
				         finally:
			
 
				             try_rm_tcs_files()
			
 
				 
			
--- a/youtube_dl/YoutubeDL.py
+++ b/youtube_dl/YoutubeDL.py
@@ -354,8 +354,11 @@ class YoutubeDL(object):
 
				                         '_type': 'compat_list',
			
 
				                         'entries': ie_result,
			
 
				                     }
			
 
				-                if 'extractor' not in ie_result:
			
 
				-                    ie_result['extractor'] = ie.IE_NAME
			
 
				+                self.add_extra_info(ie_result,
			
 
				+                    {
			
 
				+                        'extractor': ie.IE_NAME,
			
 
				+                        'webpage_url': url
			
 
				+                    })
			
 
				                 return self.process_ie_result(ie_result, download, extra_info)
			
 
				             except ExtractorError as de: # An error we somewhat expected
			
 
				                 self.report_error(compat_str(de), de.format_traceback())
			
@@ -417,6 +420,7 @@ class YoutubeDL(object):
 
				                     'playlist': playlist,
			
 
				                     'playlist_index': i + playliststart,
			
 
				                     'extractor': ie_result['extractor'],
			
 
				+                    'webpage_url': ie_result['webpage_url'],
			
 
				                 }
			
 
				                 entry_result = self.process_ie_result(entry,
			
 
				                                                       download=download,
			
@@ -427,7 +431,10 @@ class YoutubeDL(object):
 
				         elif result_type == 'compat_list':
			
 
				             def _fixup(r):
			
 
				                 self.add_extra_info(r,
			
 
				-                    {'extractor': ie_result['extractor']})
			
 
				+                    {
			
 
				+                        'extractor': ie_result['extractor'],
			
 
				+                        'webpage_url': ie_result['webpage_url'],
			
 
				+                    })
			
 
				                 return r
			
 
				             ie_result['entries'] = [
			
 
				                 self.process_ie_result(_fixup(r), download, extra_info)
			
--- a/youtube_dl/extractor/common.py
+++ b/youtube_dl/extractor/common.py
@@ -71,6 +71,9 @@ class InfoExtractor(object):
 
				                                 ("3D" or "DASH video")
			
 
				                     * width     Width of the video, if known
			
 
				                     * height    Height of the video, if known
			
 
				+    webpage_url:    The url to the video webpage, if given to youtube-dl it
			
 
				+                    should allow to get the same result again. (It will be set
			
 
				+                    by YoutubeDL if it's missing)
			
 
				 
			
 
				     Unless mentioned otherwise, the fields should be Unicode strings.
			
 
				 
			
--- a/youtube_dl/extractor/vimeo.py
+++ b/youtube_dl/extractor/vimeo.py
@@ -20,7 +20,7 @@ class VimeoIE(InfoExtractor):
 
				     """Information extractor for vimeo.com."""
			
 
				 
			
 
				     # _VALID_URL matches Vimeo URLs
			
 
				-    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|player)\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
			
 
				+    _VALID_URL = r'(?P<proto>https?://)?(?:(?:www|(?P<player>player))\.)?vimeo(?P<pro>pro)?\.com/(?:(?:(?:groups|album)/[^/]+)|(?:.*?)/)?(?P<direct_link>play_redirect_hls\?clip_id=)?(?:videos?/)?(?P<id>[0-9]+)/?(?:[?].*)?(?:#.*)?$'
			
 
				     _NETRC_MACHINE = 'vimeo'
			
 
				     IE_NAME = u'vimeo'
			
 
				     _TESTS = [
			
@@ -128,11 +128,9 @@ class VimeoIE(InfoExtractor):
 
				             raise ExtractorError(u'Invalid URL: %s' % url)
			
 
				 
			
 
				         video_id = mobj.group('id')
			
 
				-        if not mobj.group('proto'):
			
 
				-            url = 'https://' + url
			
 
				-        elif mobj.group('pro'):
			
 
				+        if mobj.group('pro') or mobj.group('player'):
			
 
				             url = 'http://player.vimeo.com/video/' + video_id
			
 
				-        elif mobj.group('direct_link'):
			
 
				+        else:
			
 
				             url = 'https://vimeo.com/' + video_id
			
 
				 
			
 
				         # Retrieve video webpage to extract further information
			
@@ -234,7 +232,7 @@ class VimeoIE(InfoExtractor):
 
				         if len(formats) == 0:
			
 
				             raise ExtractorError(u'No known codec found')
			
 
				 
			
 
				-        return [{
			
 
				+        return {
			
 
				             'id':       video_id,
			
 
				             'uploader': video_uploader,
			
 
				             'uploader_id': video_uploader_id,
			
@@ -243,7 +241,8 @@ class VimeoIE(InfoExtractor):
 
				             'thumbnail':    video_thumbnail,
			
 
				             'description':  video_description,
			
 
				             'formats': formats,
			
 
				-        }]
			
 
				+            'webpage_url': url,
			
 
				+        }
			
 
				 
			
 
				 
			
 
				 class VimeoChannelIE(InfoExtractor):
			
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -1485,7 +1485,8 @@ class YoutubeIE(YoutubeBaseInfoExtractor, SubtitlesInfoExtractor):
 
				                 'subtitles':    video_subtitles,
			
 
				                 'duration':     video_duration,
			
 
				                 'age_limit':    18 if age_gate else 0,
			
 
				-                'annotations':  video_annotations
			
 
				+                'annotations':  video_annotations,
			
 
				+                'webpage_url': 'https://www.youtube.com/watch?v=%s' % video_id,
			
 
				             })
			
 
				         return results