Browse Source

[kaltura] Improve iframe extraction (#28969)

Co-authored-by: Sergey M. <dstftw@gmail.com>
Ben Rog-Wilhelm 4 years ago
parent
commit
fe05191b8c
2 changed files with 16 additions and 1 deletions
  1. 15 0
      youtube_dl/extractor/gdcvault.py
  2. 1 1
      youtube_dl/extractor/kaltura.py

+ 15 - 0
youtube_dl/extractor/gdcvault.py

@@ -102,6 +102,21 @@ class GDCVaultIE(InfoExtractor):
                 'format': 'mp4-408',
             },
         },
+        {
+            # Kaltura embed, whitespace between quote and embedded URL in iframe's src
+            'url': 'https://www.gdcvault.com/play/1025699',
+            'info_dict': {
+                'id': '0_zagynv0a',
+                'ext': 'mp4',
+                'title': 'Tech Toolbox',
+                'upload_date': '20190408',
+                'uploader_id': 'joe@blazestreaming.com',
+                'timestamp': 1554764629,
+            },
+            'params': {
+                'skip_download': True,
+            },
+        },
     ]
 
     def _login(self, webpage_url, display_id):

+ 1 - 1
youtube_dl/extractor/kaltura.py

@@ -145,7 +145,7 @@ class KalturaIE(InfoExtractor):
                 ''', webpage))
             or list(re.finditer(
                 r'''(?xs)
-                    <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
+                    <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])\s*
                       (?:https?:)?//(?:(?:www|cdnapi(?:sec)?)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
                       (?:(?!(?P=q1)).)*
                       [?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)