Browse Source

[videa] Add support for videa embeds

Sergey M․ 8 years ago
parent
commit
e186a9ec03
2 changed files with 23 additions and 0 deletions
  1. 15 0
      youtube_dl/extractor/generic.py
  2. 8 0
      youtube_dl/extractor/videa.py

+ 15 - 0
youtube_dl/extractor/generic.py

@@ -76,6 +76,7 @@ from .soundcloud import SoundcloudIE
 from .vbox7 import Vbox7IE
 from .dbtv import DBTVIE
 from .piksel import PikselIE
+from .videa import VideaIE
 
 
 class GenericIE(InfoExtractor):
@@ -1422,6 +1423,15 @@ class GenericIE(InfoExtractor):
             },
             'playlist_mincount': 3,
         },
+        {
+            # Videa embeds
+            'url': 'http://forum.dvdtalk.com/movie-talk/623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style.html',
+            'info_dict': {
+                'id': '623756-deleted-magic-star-wars-ot-deleted-alt-scenes-docu-style',
+                'title': 'Deleted Magic - Star Wars: OT Deleted / Alt. Scenes Docu. Style - DVD Talk Forum',
+            },
+            'playlist_mincount': 2,
+        },
         # {
         #     # TODO: find another test
         #     # http://schema.org/VideoObject
@@ -2358,6 +2368,11 @@ class GenericIE(InfoExtractor):
         if dbtv_urls:
             return _playlist_from_matches(dbtv_urls, ie=DBTVIE.ie_key())
 
+        # Look for Videa embeds
+        videa_urls = VideaIE._extract_urls(webpage)
+        if videa_urls:
+            return _playlist_from_matches(videa_urls, ie=VideaIE.ie_key())
+
         # Looking for http://schema.org/VideoObject
         json_ld = self._search_json_ld(
             webpage, video_id, default={}, expected_type='VideoObject')

+ 8 - 0
youtube_dl/extractor/videa.py

@@ -1,6 +1,8 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
+import re
+
 from .common import InfoExtractor
 from ..utils import (
     int_or_none,
@@ -43,6 +45,12 @@ class VideaIE(InfoExtractor):
         'only_matching': True,
     }]
 
+    @staticmethod
+    def _extract_urls(webpage):
+        return [url for _, url in re.findall(
+            r'<iframe[^>]+src=(["\'])(?P<url>(?:https?:)?//videa\.hu/player\?.*?\bv=.+?)\1',
+            webpage)]
+
     def _real_extract(self, url):
         video_id = self._match_id(url)