Browse Source

[kaltura] Improve embeds detection (closes #16201)

Sergey M․ 7 years ago
parent
commit
e30991f920
2 changed files with 20 additions and 4 deletions
  1. 17 1
      youtube_dl/extractor/generic.py
  2. 3 3
      youtube_dl/extractor/kaltura.py

+ 17 - 1
youtube_dl/extractor/generic.py

@@ -1220,7 +1220,7 @@ class GenericIE(InfoExtractor):
                 'title': '35871',
                 'title': '35871',
                 'timestamp': 1355743100,
                 'timestamp': 1355743100,
                 'upload_date': '20121217',
                 'upload_date': '20121217',
-                'uploader_id': 'batchUser',
+                'uploader_id': 'cplapp@learn360.com',
             },
             },
             'add_ie': ['Kaltura'],
             'add_ie': ['Kaltura'],
         },
         },
@@ -1271,6 +1271,22 @@ class GenericIE(InfoExtractor):
             },
             },
             'add_ie': ['Kaltura'],
             'add_ie': ['Kaltura'],
         },
         },
+        {
+            # meta twitter:player
+            'url': 'http://thechive.com/2017/12/08/all-i-want-for-christmas-is-more-twerk/',
+            'info_dict': {
+                'id': '0_01b42zps',
+                'ext': 'mp4',
+                'title': 'Main Twerk (Video)',
+                'upload_date': '20171208',
+                'uploader_id': 'sebastian.salinas@thechive.com',
+                'timestamp': 1512713057,
+            },
+            'params': {
+                'skip_download': True,
+            },
+            'add_ie': ['Kaltura'],
+        },
         # referrer protected EaglePlatform embed
         # referrer protected EaglePlatform embed
         {
         {
             'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',
             'url': 'https://tvrain.ru/lite/teleshow/kak_vse_nachinalos/namin-418921/',

+ 3 - 3
youtube_dl/extractor/kaltura.py

@@ -135,10 +135,10 @@ class KalturaIE(InfoExtractor):
                 ''', webpage) or
                 ''', webpage) or
             re.search(
             re.search(
                 r'''(?xs)
                 r'''(?xs)
-                    <iframe[^>]+src=(?P<q1>["'])
-                      (?:https?:)?//(?:www\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
+                    <(?:iframe[^>]+src|meta[^>]+\bcontent)=(?P<q1>["'])
+                      (?:https?:)?//(?:(?:www|cdnapi)\.)?kaltura\.com/(?:(?!(?P=q1)).)*\b(?:p|partner_id)/(?P<partner_id>\d+)
                       (?:(?!(?P=q1)).)*
                       (?:(?!(?P=q1)).)*
-                      [?&]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
+                      [?&;]entry_id=(?P<id>(?:(?!(?P=q1))[^&])+)
                     (?P=q1)
                     (?P=q1)
                 ''', webpage)
                 ''', webpage)
         )
         )