Browse Source

[kaltura] Add support for iframe embeds

John Hawkinson 8 năm trước cách đây
mục cha
commit
a01825a541
2 tập tin đã thay đổi với 27 bổ sung1 xóa
  1. 15 0
      youtube_dl/extractor/generic.py
  2. 12 1
      youtube_dl/extractor/kaltura.py

+ 15 - 0
youtube_dl/extractor/generic.py

@@ -1080,6 +1080,21 @@ class GenericIE(InfoExtractor):
             },
             'add_ie': ['Kaltura'],
         },
+        {
+            # Kaltura iframe embed
+            'url': 'http://www.gsd.harvard.edu/event/i-m-pei-a-centennial-celebration/',
+            'md5': 'ae5ace8eb09dc1a35d03b579a9c2cc44',
+            'info_dict': {
+                'id': '0_f2cfbpwy',
+                'ext': 'mp4',
+                'title': 'I. M. Pei: A Centennial Celebration',
+                'description': 'md5:1db8f40c69edc46ca180ba30c567f37c',
+                'upload_date': '20170403',
+                'uploader_id': 'batchUser',
+                'timestamp': 1491232186,
+            },
+            'add_ie': ['Kaltura'],
+        },
         # Eagle.Platform embed (generic URL)
         {
             'url': 'http://lenta.ru/news/2015/03/06/navalny/',

+ 12 - 1
youtube_dl/extractor/kaltura.py

@@ -91,6 +91,7 @@ class KalturaIE(InfoExtractor):
                     }],
                 },
             },
+            'skip': 'Gone. Maybe https://www.safaribooksonline.com/library/tutorials/introduction-to-python-anon/3469/',
             'params': {
                 'skip_download': True,
             },
@@ -108,6 +109,7 @@ class KalturaIE(InfoExtractor):
     @staticmethod
     def _extract_url(webpage):
         mobj = (
+            # Embed codes: https://knowledge.kaltura.com/embedding-kaltura-media-players-your-site
             re.search(
                 r"""(?xs)
                     kWidget\.(?:thumb)?[Ee]mbed\(
@@ -127,7 +129,16 @@ class KalturaIE(InfoExtractor):
                         (?P<q2>["\'])entry_?[Ii]d(?P=q2)
                     )\s*:\s*
                     (?P<q3>["\'])(?P<id>(?:(?!(?P=q3)).)+)(?P=q3)
-                ''', webpage))
+                ''', webpage) or
+            re.search(
+                r'''(?xs)
+                    <iframe[^>]+src=(?P<q1>["\'])
+                      (?:https?:)?//(?:www\.)?kaltura\.com/p/(?P<partner_id>\d+)/
+                      (?:(?!(?P=q1)).)*
+                      [\?&]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
+                    (?P=q1)
+                ''', webpage)
+        )
         if mobj:
             embed_info = mobj.groupdict()
             url = 'kaltura:%(partner_id)s:%(id)s' % embed_info