10 years ago · f13b1e7d7f
--- a/youtube_dl/extractor/bbccouk.py
+++ b/youtube_dl/extractor/bbccouk.py
@@ -2,12 +2,12 @@ from __future__ import unicode_literals
 
															 import xml.etree.ElementTree
														
 
															-from .subtitles import SubtitlesInfoExtractor
														
 
															+from .common import InfoExtractor
														
 
															 from ..utils import ExtractorError
														
 
															 from ..compat import compat_HTTPError
														
 
															-class BBCCoUkIE(SubtitlesInfoExtractor):
														
 
															+class BBCCoUkIE(InfoExtractor):
														
 
															     IE_NAME = 'bbc.co.uk'
														
 
															     IE_DESC = 'BBC iPlayer'
														
 
															     _VALID_URL = r'https?://(?:www\.)?bbc\.co\.uk/(?:(?:(?:programmes|iplayer(?:/[^/]+)?/(?:episode|playlist))/)|music/clips[/#])(?P<id>[\da-z]{8})'
														
@@ -215,17 +215,32 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
 
															             formats.extend(conn_formats)
														
 
															         return formats
														
 
															-    def _extract_captions(self, media, programme_id):
														
 
															+    def _get_subtitles(self, media, programme_id):
														
 
															         subtitles = {}
														
 
															         for connection in self._extract_connections(media):
														
 
															             captions = self._download_xml(connection.get('href'), programme_id, 'Downloading captions')
														
 
															             lang = captions.get('{http://www.w3.org/XML/1998/namespace}lang', 'en')
														
 
															             ps = captions.findall('./{0}body/{0}div/{0}p'.format('{http://www.w3.org/2006/10/ttaf1}'))
														
 
															             srt = ''
														
 
															+
														
 
															+            def _extract_text(p):
														
 
															+                if p.text is not None:
														
 
															+                    stripped_text = p.text.strip()
														
 
															+                    if stripped_text:
														
 
															+                        return stripped_text
														
 
															+                return ' '.join(span.text.strip() for span in p.findall('{http://www.w3.org/2006/10/ttaf1}span'))
														
 
															             for pos, p in enumerate(ps):
														
 
															-                srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'),
														
 
															-                                                          p.text.strip() if p.text is not None else '')
														
 
															-            subtitles[lang] = srt
														
 
															+                srt += '%s\r\n%s --> %s\r\n%s\r\n\r\n' % (str(pos), p.get('begin'), p.get('end'), _extract_text(p))
														
 
															+            subtitles[lang] = [
														
 
															+                {
														
 
															+                    'url': connection.get('href'),
														
 
															+                    'ext': 'ttml',
														
 
															+                },
														
 
															+                {
														
 
															+                    'data': srt,
														
 
															+                    'ext': 'srt',
														
 
															+                },
														
 
															+            ]
														
 
															         return subtitles
														
 
															     def _download_media_selector(self, programme_id):
														
@@ -249,7 +264,7 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
 
															             elif kind == 'video':
														
 
															                 formats.extend(self._extract_video(media, programme_id))
														
 
															             elif kind == 'captions':
														
 
															-                subtitles = self._extract_captions(media, programme_id)
														
 
															+                subtitles = self.extract_subtitles(media, programme_id)
														
 
															         return formats, subtitles
														
@@ -324,10 +339,6 @@ class BBCCoUkIE(SubtitlesInfoExtractor):
 
															         else:
														
 
															             programme_id, title, description, duration, formats, subtitles = self._download_playlist(group_id)
														
 
															-        if self._downloader.params.get('listsubtitles', False):
														
 
															-            self._list_available_subtitles(programme_id, subtitles)
														
 
															-            return
														
 
															-
														
 
															         self._sort_formats(formats)
														
 
															         return {