Browse Source

[yahoo] Add support for closed captions (closes #5714)

Yen Chi Hsuan 10 years ago
parent
commit
ecee572411
2 changed files with 19 additions and 0 deletions
  1. 18 0
      youtube_dl/extractor/yahoo.py
  2. 1 0
      youtube_dl/utils.py

+ 18 - 0
youtube_dl/extractor/yahoo.py

@@ -15,6 +15,7 @@ from ..utils import (
     unescapeHTML,
     unescapeHTML,
     ExtractorError,
     ExtractorError,
     int_or_none,
     int_or_none,
+    mimetype2ext,
 )
 )
 
 
 from .nbc import NBCSportsVPlayerIE
 from .nbc import NBCSportsVPlayerIE
@@ -236,6 +237,22 @@ class YahooIE(InfoExtractor):
 
 
         self._sort_formats(formats)
         self._sort_formats(formats)
 
 
+        closed_captions = self._html_search_regex(
+            r'"closedcaptions":(\[[^\]]+\])', webpage, 'closed captions',
+            default='[]')
+
+        cc_json = self._parse_json(closed_captions, video_id, fatal=False)
+        subtitles = {}
+        if cc_json:
+            for closed_caption in cc_json:
+                lang = closed_caption['lang']
+                if lang not in subtitles:
+                    subtitles[lang] = []
+                subtitles[lang].append({
+                    'url': closed_caption['url'],
+                    'ext': mimetype2ext(closed_caption['content_type']),
+                })
+
         return {
         return {
             'id': video_id,
             'id': video_id,
             'display_id': display_id,
             'display_id': display_id,
@@ -244,6 +261,7 @@ class YahooIE(InfoExtractor):
             'description': clean_html(meta['description']),
             'description': clean_html(meta['description']),
             'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage),
             'thumbnail': meta['thumbnail'] if meta.get('thumbnail') else self._og_search_thumbnail(webpage),
             'duration': int_or_none(meta.get('duration')),
             'duration': int_or_none(meta.get('duration')),
+            'subtitles': subtitles,
         }
         }
 
 
 
 

+ 1 - 0
youtube_dl/utils.py

@@ -1665,6 +1665,7 @@ def mimetype2ext(mt):
     return {
     return {
         'x-ms-wmv': 'wmv',
         'x-ms-wmv': 'wmv',
         'x-mp4-fragmented': 'mp4',
         'x-mp4-fragmented': 'mp4',
+        'ttml+xml': 'ttml',
     }.get(res, res)
     }.get(res, res)