7 年之前 · 30374f4d40
--- a/youtube_dl/extractor/itv.py
+++ b/youtube_dl/extractor/itv.py
@@ -18,6 +18,7 @@ from ..utils import (
 
				     xpath_element,
			
 
				     xpath_text,
			
 
				     int_or_none,
			
 
				+    merge_dicts,
			
 
				     parse_duration,
			
 
				     smuggle_url,
			
 
				     ExtractorError,
			
@@ -129,64 +130,65 @@ class ITVIE(InfoExtractor):
 
				 
			
 
				         resp_env = self._download_xml(
			
 
				             params['data-playlist-url'], video_id,
			
 
				-            headers=headers, data=etree.tostring(req_env))
			
 
				-        playlist = xpath_element(resp_env, './/Playlist')
			
 
				-        if playlist is None:
			
 
				-            fault_code = xpath_text(resp_env, './/faultcode')
			
 
				-            fault_string = xpath_text(resp_env, './/faultstring')
			
 
				-            if fault_code == 'InvalidGeoRegion':
			
 
				-                self.raise_geo_restricted(
			
 
				-                    msg=fault_string, countries=self._GEO_COUNTRIES)
			
 
				-            elif fault_code not in (
			
 
				-                    'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'):
			
 
				-                raise ExtractorError(
			
 
				-                    '%s said: %s' % (self.IE_NAME, fault_string), expected=True)
			
 
				-            info.update({
			
 
				-                'title': self._og_search_title(webpage),
			
 
				-                'episode_title': params.get('data-video-episode'),
			
 
				-                'series': params.get('data-video-title'),
			
 
				-            })
			
 
				-        else:
			
 
				-            title = xpath_text(playlist, 'EpisodeTitle', default=None)
			
 
				-            info.update({
			
 
				-                'title': title,
			
 
				-                'episode_title': title,
			
 
				-                'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
			
 
				-                'series': xpath_text(playlist, 'ProgrammeTitle'),
			
 
				-                'duration': parse_duration(xpath_text(playlist, 'Duration')),
			
 
				-            })
			
 
				-            video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
			
 
				-            media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
			
 
				-            rtmp_url = media_files.attrib['base']
			
 
				+            headers=headers, data=etree.tostring(req_env), fatal=False)
			
 
				+        if resp_env:
			
 
				+            playlist = xpath_element(resp_env, './/Playlist')
			
 
				+            if playlist is None:
			
 
				+                fault_code = xpath_text(resp_env, './/faultcode')
			
 
				+                fault_string = xpath_text(resp_env, './/faultstring')
			
 
				+                if fault_code == 'InvalidGeoRegion':
			
 
				+                    self.raise_geo_restricted(
			
 
				+                        msg=fault_string, countries=self._GEO_COUNTRIES)
			
 
				+                elif fault_code not in (
			
 
				+                        'InvalidEntity', 'InvalidVodcrid', 'ContentUnavailable'):
			
 
				+                    raise ExtractorError(
			
 
				+                        '%s said: %s' % (self.IE_NAME, fault_string), expected=True)
			
 
				+                info.update({
			
 
				+                    'title': self._og_search_title(webpage),
			
 
				+                    'episode_title': params.get('data-video-episode'),
			
 
				+                    'series': params.get('data-video-title'),
			
 
				+                })
			
 
				+            else:
			
 
				+                title = xpath_text(playlist, 'EpisodeTitle', default=None)
			
 
				+                info.update({
			
 
				+                    'title': title,
			
 
				+                    'episode_title': title,
			
 
				+                    'episode_number': int_or_none(xpath_text(playlist, 'EpisodeNumber')),
			
 
				+                    'series': xpath_text(playlist, 'ProgrammeTitle'),
			
 
				+                    'duration': parse_duration(xpath_text(playlist, 'Duration')),
			
 
				+                })
			
 
				+                video_element = xpath_element(playlist, 'VideoEntries/Video', fatal=True)
			
 
				+                media_files = xpath_element(video_element, 'MediaFiles', fatal=True)
			
 
				+                rtmp_url = media_files.attrib['base']
			
 
				 
			
 
				-            for media_file in media_files.findall('MediaFile'):
			
 
				-                play_path = xpath_text(media_file, 'URL')
			
 
				-                if not play_path:
			
 
				-                    continue
			
 
				-                tbr = int_or_none(media_file.get('bitrate'), 1000)
			
 
				-                f = {
			
 
				-                    'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
			
 
				-                    'play_path': play_path,
			
 
				-                    # Providing this swfVfy allows to avoid truncated downloads
			
 
				-                    'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
			
 
				-                    'page_url': url,
			
 
				-                    'tbr': tbr,
			
 
				-                    'ext': 'flv',
			
 
				-                }
			
 
				-                app = self._search_regex(
			
 
				-                    'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
			
 
				-                if app:
			
 
				-                    f.update({
			
 
				-                        'url': rtmp_url.split('?', 1)[0],
			
 
				-                        'app': app,
			
 
				-                    })
			
 
				-                else:
			
 
				-                    f['url'] = rtmp_url
			
 
				-                formats.append(f)
			
 
				+                for media_file in media_files.findall('MediaFile'):
			
 
				+                    play_path = xpath_text(media_file, 'URL')
			
 
				+                    if not play_path:
			
 
				+                        continue
			
 
				+                    tbr = int_or_none(media_file.get('bitrate'), 1000)
			
 
				+                    f = {
			
 
				+                        'format_id': 'rtmp' + ('-%d' % tbr if tbr else ''),
			
 
				+                        'play_path': play_path,
			
 
				+                        # Providing this swfVfy allows to avoid truncated downloads
			
 
				+                        'player_url': 'http://www.itv.com/mercury/Mercury_VideoPlayer.swf',
			
 
				+                        'page_url': url,
			
 
				+                        'tbr': tbr,
			
 
				+                        'ext': 'flv',
			
 
				+                    }
			
 
				+                    app = self._search_regex(
			
 
				+                        'rtmpe?://[^/]+/(.+)$', rtmp_url, 'app', default=None)
			
 
				+                    if app:
			
 
				+                        f.update({
			
 
				+                            'url': rtmp_url.split('?', 1)[0],
			
 
				+                            'app': app,
			
 
				+                        })
			
 
				+                    else:
			
 
				+                        f['url'] = rtmp_url
			
 
				+                    formats.append(f)
			
 
				 
			
 
				-            for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
			
 
				-                if caption_url.text:
			
 
				-                    extract_subtitle(caption_url.text)
			
 
				+                for caption_url in video_element.findall('ClosedCaptioningURIs/URL'):
			
 
				+                    if caption_url.text:
			
 
				+                        extract_subtitle(caption_url.text)
			
 
				 
			
 
				         ios_playlist_url = params.get('data-video-playlist') or params.get('data-video-id')
			
 
				         hmac = params.get('data-video-hmac')
			
@@ -261,7 +263,17 @@ class ITVIE(InfoExtractor):
 
				             'formats': formats,
			
 
				             'subtitles': subtitles,
			
 
				         })
			
 
				-        return info
			
 
				+
			
 
				+        webpage_info = self._search_json_ld(webpage, video_id, default={})
			
 
				+        if not webpage_info.get('title'):
			
 
				+            webpage_info['title'] = self._html_search_regex(
			
 
				+                r'(?s)<h\d+[^>]+\bclass=["\'][^>]*episode-title["\'][^>]*>([^<]+)<',
			
 
				+                webpage, 'title', default=None) or self._og_search_title(
			
 
				+                webpage, default=None) or self._html_search_meta(
			
 
				+                'twitter:title', webpage, 'title',
			
 
				+                default=None) or webpage_info['episode']
			
 
				+
			
 
				+        return merge_dicts(info, webpage_info)
			
 
				 
			
 
				 
			
 
				 class ITVBTCCIE(InfoExtractor):