Browse Source

[extractor/generic] Detect schema.org/VideoObject embeds

Sergey M․ 9 years ago
parent
commit
0de168f7ed
1 changed files with 30 additions and 0 deletions
  1. 30 0
      youtube_dl/extractor/generic.py

+ 30 - 0
youtube_dl/extractor/generic.py

@@ -1313,6 +1313,23 @@ class GenericIE(InfoExtractor):
             },
             'add_ie': ['Kaltura'],
         },
+        {
+            # TODO: find another test
+            # http://schema.org/VideoObject
+            # 'url': 'https://flipagram.com/f/nyvTSJMKId',
+            # 'md5': '888dcf08b7ea671381f00fab74692755',
+            # 'info_dict': {
+            #     'id': 'nyvTSJMKId',
+            #     'ext': 'mp4',
+            #     'title': 'Flipagram by sjuria101 featuring Midnight Memories by One Direction',
+            #     'description': '#love for cats.',
+            #     'timestamp': 1461244995,
+            #     'upload_date': '20160421',
+            # },
+            # 'params': {
+            #     'force_generic_extractor': True,
+            # },
+        }
     ]
 
     def report_following_redirect(self, new_url):
@@ -2157,6 +2174,19 @@ class GenericIE(InfoExtractor):
         if embed_url:
             return self.url_result(embed_url)
 
+        # Looking for http://schema.org/VideoObject
+        json_ld = self._search_json_ld(
+            webpage, video_id, default=None, expected_type='VideoObject')
+        if json_ld and json_ld.get('url'):
+            info_dict.update({
+                'title': video_title or info_dict['title'],
+                'description': video_description,
+                'thumbnail': video_thumbnail,
+                'age_limit': age_limit
+            })
+            info_dict.update(json_ld)
+            return info_dict
+
         def check_video(vurl):
             if YoutubeIE.suitable(vurl):
                 return True