浏览代码

[generic] Add support for rutube embeds

Sergey M․ 8 年之前
父节点
当前提交
eb3079b6ce
共有 2 个文件被更改,包括 30 次插入0 次删除
  1. 24 0
      youtube_dl/extractor/generic.py
  2. 6 0
      youtube_dl/extractor/rutube.py

+ 24 - 0
youtube_dl/extractor/generic.py

@@ -84,6 +84,7 @@ from .twentymin import TwentyMinutenIE
 from .ustream import UstreamIE
 from .openload import OpenloadIE
 from .videopress import VideoPressIE
+from .rutube import RutubeIE
 
 
 class GenericIE(InfoExtractor):
@@ -1502,6 +1503,23 @@ class GenericIE(InfoExtractor):
             },
             'add_ie': [VideoPressIE.ie_key()],
         },
+        {
+            # Rutube embed
+            'url': 'http://magazzino.friday.ru/videos/vipuski/kazan-2',
+            'info_dict': {
+                'id': '9b3d5bee0a8740bf70dfd29d3ea43541',
+                'ext': 'flv',
+                'title': 'Магаззино: Казань 2',
+                'description': 'md5:99bccdfac2269f0e8fdbc4bbc9db184a',
+                'uploader': 'Магаззино',
+                'upload_date': '20170228',
+                'uploader_id': '996642',
+            },
+            'params': {
+                'skip_download': True,
+            },
+            'add_ie': [RutubeIE.ie_key()],
+        },
         {
             # ThePlatform embedded with whitespaces in URLs
             'url': 'http://www.golfchannel.com/topics/shows/golftalkcentral.htm',
@@ -2480,6 +2498,12 @@ class GenericIE(InfoExtractor):
             return _playlist_from_matches(
                 videopress_urls, ie=VideoPressIE.ie_key())
 
+        # Look for Rutube embeds
+        rutube_urls = RutubeIE._extract_urls(webpage)
+        if rutube_urls:
+            return _playlist_from_matches(
+                rutube_urls, ie=RutubeIE.ie_key())
+
         # Looking for http://schema.org/VideoObject
         json_ld = self._search_json_ld(
             webpage, video_id, default={}, expected_type='VideoObject')

+ 6 - 0
youtube_dl/extractor/rutube.py

@@ -44,6 +44,12 @@ class RutubeIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    @staticmethod
+    def _extract_urls(webpage):
+        return [mobj.group('url') for mobj in re.finditer(
+            r'<iframe[^>]+?src=(["\'])(?P<url>(?:https?:)?//rutube\.ru/embed/[\da-z]{32}.*?)\1',
+            webpage)]
+
     def _real_extract(self, url):
         video_id = self._match_id(url)
         video = self._download_json(