Bläddra i källkod

[cbsnews] Extract subtitles

added test function for CBS News subtitles
John Assael 9 år sedan
förälder
incheckning
4118cc02c1
1 ändrade filer med 26 tillägg och 0 borttagningar
  1. 26 0
      youtube_dl/extractor/cbsnews.py

+ 26 - 0
youtube_dl/extractor/cbsnews.py

@@ -41,6 +41,21 @@ class CBSNewsIE(InfoExtractor):
                 'skip_download': True,
             },
         },
+        {
+            'url': 'http://www.cbsnews.com/videos/mountain-lions-of-l-a/',
+            'info_dict': {
+                'id': 'Mountain Lions of L.A.',
+                'ext': 'flv',
+                'title': 'Fort Hood shooting: Army downplays mental illness as cause of attack',
+                'thumbnail': 're:^http?://.*\.jpg$',
+                'subtitles': 're:^http?://.*\.xml$',
+                'duration': 787,
+            },
+            'params': {
+                # rtmp download
+                'skip_download': True,
+            },
+        },
     ]
 
     def _real_extract(self, url):
@@ -85,10 +100,21 @@ class CBSNewsIE(InfoExtractor):
                 fmt['ext'] = 'mp4'
             formats.append(fmt)
 
+        if 'mpxRefId' in video_info:
+            cap_url = 'http://www.cbsnews.com/videos/captions/%s.adb_xml' % video_info['mpxRefId']
+            subtitles = {
+                'en': [{
+                    'url': cap_url,
+                    'ext': 'xml'
+                }], }
+        else:
+            subtitles = {}
+
         return {
             'id': video_id,
             'title': title,
             'thumbnail': thumbnail,
             'duration': duration,
             'formats': formats,
+            'subtitles': subtitles,
         }