|  | @@ -18,6 +18,7 @@ from ..compat import (
 | 
	
		
			
				|  |  |      compat_HTTPError,
 | 
	
		
			
				|  |  |      compat_http_client,
 | 
	
		
			
				|  |  |      compat_urllib_error,
 | 
	
		
			
				|  |  | +    compat_urllib_parse,
 | 
	
		
			
				|  |  |      compat_urllib_parse_urlparse,
 | 
	
		
			
				|  |  |      compat_urllib_request,
 | 
	
		
			
				|  |  |      compat_urlparse,
 | 
	
	
		
			
				|  | @@ -37,6 +38,7 @@ from ..utils import (
 | 
	
		
			
				|  |  |      RegexNotFoundError,
 | 
	
		
			
				|  |  |      sanitize_filename,
 | 
	
		
			
				|  |  |      unescapeHTML,
 | 
	
		
			
				|  |  | +    url_basename,
 | 
	
		
			
				|  |  |  )
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |  
 | 
	
	
		
			
				|  | @@ -978,69 +980,167 @@ class InfoExtractor(object):
 | 
	
		
			
				|  |  |          self._sort_formats(formats)
 | 
	
		
			
				|  |  |          return formats
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    # TODO: improve extraction
 | 
	
		
			
				|  |  | -    def _extract_smil_formats(self, smil_url, video_id, fatal=True):
 | 
	
		
			
				|  |  | -        smil = self._download_xml(
 | 
	
		
			
				|  |  | -            smil_url, video_id, 'Downloading SMIL file',
 | 
	
		
			
				|  |  | -            'Unable to download SMIL file', fatal=fatal)
 | 
	
		
			
				|  |  | +    @staticmethod
 | 
	
		
			
				|  |  | +    def _xpath_ns(path, namespace=None):
 | 
	
		
			
				|  |  | +        if not namespace:
 | 
	
		
			
				|  |  | +            return path
 | 
	
		
			
				|  |  | +        out = []
 | 
	
		
			
				|  |  | +        for c in path.split('/'):
 | 
	
		
			
				|  |  | +            if not c or c == '.':
 | 
	
		
			
				|  |  | +                out.append(c)
 | 
	
		
			
				|  |  | +            else:
 | 
	
		
			
				|  |  | +                out.append('{%s}%s' % (namespace, c))
 | 
	
		
			
				|  |  | +        return '/'.join(out)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    def _extract_smil_formats(self, smil_url, video_id, fatal=True, f4m_params=None):
 | 
	
		
			
				|  |  | +        smil = self._download_smil(smil_url, video_id, fatal=fatal)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  |          if smil is False:
 | 
	
		
			
				|  |  |              assert not fatal
 | 
	
		
			
				|  |  |              return []
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -        base = smil.find('./head/meta').get('base')
 | 
	
		
			
				|  |  | +        namespace = self._parse_smil_namespace(smil)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        return self._parse_smil_formats(
 | 
	
		
			
				|  |  | +            smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    def _extract_smil_info(self, smil_url, video_id, fatal=True, f4m_params=None):
 | 
	
		
			
				|  |  | +        smil = self._download_smil(smil_url, video_id, fatal=fatal)
 | 
	
		
			
				|  |  | +        if smil is False:
 | 
	
		
			
				|  |  | +            return {}
 | 
	
		
			
				|  |  | +        return self._parse_smil(smil, smil_url, video_id, f4m_params=f4m_params)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    def _download_smil(self, smil_url, video_id, fatal=True):
 | 
	
		
			
				|  |  | +        return self._download_xml(
 | 
	
		
			
				|  |  | +            smil_url, video_id, 'Downloading SMIL file',
 | 
	
		
			
				|  |  | +            'Unable to download SMIL file', fatal=fatal)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    def _parse_smil(self, smil, smil_url, video_id, f4m_params=None):
 | 
	
		
			
				|  |  | +        namespace = self._parse_smil_namespace(smil)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        formats = self._parse_smil_formats(
 | 
	
		
			
				|  |  | +            smil, smil_url, video_id, namespace=namespace, f4m_params=f4m_params)
 | 
	
		
			
				|  |  | +        subtitles = self._parse_smil_subtitles(smil, namespace=namespace)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        video_id = os.path.splitext(url_basename(smil_url))[0]
 | 
	
		
			
				|  |  | +        title = None
 | 
	
		
			
				|  |  | +        description = None
 | 
	
		
			
				|  |  | +        for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
 | 
	
		
			
				|  |  | +            name = meta.attrib.get('name')
 | 
	
		
			
				|  |  | +            content = meta.attrib.get('content')
 | 
	
		
			
				|  |  | +            if not name or not content:
 | 
	
		
			
				|  |  | +                continue
 | 
	
		
			
				|  |  | +            if not title and name == 'title':
 | 
	
		
			
				|  |  | +                title = content
 | 
	
		
			
				|  |  | +            elif not description and name in ('description', 'abstract'):
 | 
	
		
			
				|  |  | +                description = content
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        return {
 | 
	
		
			
				|  |  | +            'id': video_id,
 | 
	
		
			
				|  |  | +            'title': title or video_id,
 | 
	
		
			
				|  |  | +            'description': description,
 | 
	
		
			
				|  |  | +            'formats': formats,
 | 
	
		
			
				|  |  | +            'subtitles': subtitles,
 | 
	
		
			
				|  |  | +        }
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    def _parse_smil_namespace(self, smil):
 | 
	
		
			
				|  |  | +        return self._search_regex(
 | 
	
		
			
				|  |  | +            r'(?i)^{([^}]+)?}smil$', smil.tag, 'namespace', default=None)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +    def _parse_smil_formats(self, smil, smil_url, video_id, namespace=None, f4m_params=None):
 | 
	
		
			
				|  |  | +        base = smil_url
 | 
	
		
			
				|  |  | +        for meta in smil.findall(self._xpath_ns('./head/meta', namespace)):
 | 
	
		
			
				|  |  | +            b = meta.get('base') or meta.get('httpBase')
 | 
	
		
			
				|  |  | +            if b:
 | 
	
		
			
				|  |  | +                base = b
 | 
	
		
			
				|  |  | +                break
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |          formats = []
 | 
	
		
			
				|  |  |          rtmp_count = 0
 | 
	
		
			
				|  |  | -        if smil.findall('./body/seq/video'):
 | 
	
		
			
				|  |  | -            video = smil.findall('./body/seq/video')[0]
 | 
	
		
			
				|  |  | -            fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
 | 
	
		
			
				|  |  | -            formats.extend(fmts)
 | 
	
		
			
				|  |  | -        else:
 | 
	
		
			
				|  |  | -            for video in smil.findall('./body/switch/video'):
 | 
	
		
			
				|  |  | -                fmts, rtmp_count = self._parse_smil_video(video, video_id, base, rtmp_count)
 | 
	
		
			
				|  |  | -                formats.extend(fmts)
 | 
	
		
			
				|  |  | +        http_count = 0
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +        videos = smil.findall(self._xpath_ns('.//video', namespace))
 | 
	
		
			
				|  |  | +        for video in videos:
 | 
	
		
			
				|  |  | +            src = video.get('src')
 | 
	
		
			
				|  |  | +            if not src:
 | 
	
		
			
				|  |  | +                continue
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +            bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
 | 
	
		
			
				|  |  | +            filesize = int_or_none(video.get('size') or video.get('fileSize'))
 | 
	
		
			
				|  |  | +            width = int_or_none(video.get('width'))
 | 
	
		
			
				|  |  | +            height = int_or_none(video.get('height'))
 | 
	
		
			
				|  |  | +            proto = video.get('proto')
 | 
	
		
			
				|  |  | +            ext = video.get('ext')
 | 
	
		
			
				|  |  | +            src_ext = determine_ext(src)
 | 
	
		
			
				|  |  | +            streamer = video.get('streamer') or base
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +            if proto == 'rtmp' or streamer.startswith('rtmp'):
 | 
	
		
			
				|  |  | +                rtmp_count += 1
 | 
	
		
			
				|  |  | +                formats.append({
 | 
	
		
			
				|  |  | +                    'url': streamer,
 | 
	
		
			
				|  |  | +                    'play_path': src,
 | 
	
		
			
				|  |  | +                    'ext': 'flv',
 | 
	
		
			
				|  |  | +                    'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
 | 
	
		
			
				|  |  | +                    'tbr': bitrate,
 | 
	
		
			
				|  |  | +                    'filesize': filesize,
 | 
	
		
			
				|  |  | +                    'width': width,
 | 
	
		
			
				|  |  | +                    'height': height,
 | 
	
		
			
				|  |  | +                })
 | 
	
		
			
				|  |  | +                continue
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +            src_url = src if src.startswith('http') else compat_urlparse.urljoin(base, src)
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +            if proto == 'm3u8' or src_ext == 'm3u8':
 | 
	
		
			
				|  |  | +                formats.extend(self._extract_m3u8_formats(
 | 
	
		
			
				|  |  | +                    src_url, video_id, ext or 'mp4', m3u8_id='hls'))
 | 
	
		
			
				|  |  | +                continue
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +            if src_ext == 'f4m':
 | 
	
		
			
				|  |  | +                f4m_url = src_url
 | 
	
		
			
				|  |  | +                if not f4m_params:
 | 
	
		
			
				|  |  | +                    f4m_params = {
 | 
	
		
			
				|  |  | +                        'hdcore': '3.2.0',
 | 
	
		
			
				|  |  | +                        'plugin': 'flowplayer-3.2.0.1',
 | 
	
		
			
				|  |  | +                    }
 | 
	
		
			
				|  |  | +                f4m_url += '&' if '?' in f4m_url else '?'
 | 
	
		
			
				|  |  | +                f4m_url += compat_urllib_parse.urlencode(f4m_params)
 | 
	
		
			
				|  |  | +                formats.extend(self._extract_f4m_formats(f4m_url, video_id, f4m_id='hds'))
 | 
	
		
			
				|  |  | +                continue
 | 
	
		
			
				|  |  | +
 | 
	
		
			
				|  |  | +            if src_url.startswith('http'):
 | 
	
		
			
				|  |  | +                http_count += 1
 | 
	
		
			
				|  |  | +                formats.append({
 | 
	
		
			
				|  |  | +                    'url': src_url,
 | 
	
		
			
				|  |  | +                    'ext': ext or src_ext or 'flv',
 | 
	
		
			
				|  |  | +                    'format_id': 'http-%d' % (bitrate or http_count),
 | 
	
		
			
				|  |  | +                    'tbr': bitrate,
 | 
	
		
			
				|  |  | +                    'filesize': filesize,
 | 
	
		
			
				|  |  | +                    'width': width,
 | 
	
		
			
				|  |  | +                    'height': height,
 | 
	
		
			
				|  |  | +                })
 | 
	
		
			
				|  |  | +                continue
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |          self._sort_formats(formats)
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |          return formats
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  | -    def _parse_smil_video(self, video, video_id, base, rtmp_count):
 | 
	
		
			
				|  |  | -        src = video.get('src')
 | 
	
		
			
				|  |  | -        if not src:
 | 
	
		
			
				|  |  | -            return [], rtmp_count
 | 
	
		
			
				|  |  | -        bitrate = int_or_none(video.get('system-bitrate') or video.get('systemBitrate'), 1000)
 | 
	
		
			
				|  |  | -        width = int_or_none(video.get('width'))
 | 
	
		
			
				|  |  | -        height = int_or_none(video.get('height'))
 | 
	
		
			
				|  |  | -        proto = video.get('proto')
 | 
	
		
			
				|  |  | -        if not proto:
 | 
	
		
			
				|  |  | -            if base:
 | 
	
		
			
				|  |  | -                if base.startswith('rtmp'):
 | 
	
		
			
				|  |  | -                    proto = 'rtmp'
 | 
	
		
			
				|  |  | -                elif base.startswith('http'):
 | 
	
		
			
				|  |  | -                    proto = 'http'
 | 
	
		
			
				|  |  | -        ext = video.get('ext')
 | 
	
		
			
				|  |  | -        if proto == 'm3u8':
 | 
	
		
			
				|  |  | -            return self._extract_m3u8_formats(src, video_id, ext), rtmp_count
 | 
	
		
			
				|  |  | -        elif proto == 'rtmp':
 | 
	
		
			
				|  |  | -            rtmp_count += 1
 | 
	
		
			
				|  |  | -            streamer = video.get('streamer') or base
 | 
	
		
			
				|  |  | -            return ([{
 | 
	
		
			
				|  |  | -                'url': streamer,
 | 
	
		
			
				|  |  | -                'play_path': src,
 | 
	
		
			
				|  |  | -                'ext': 'flv',
 | 
	
		
			
				|  |  | -                'format_id': 'rtmp-%d' % (rtmp_count if bitrate is None else bitrate),
 | 
	
		
			
				|  |  | -                'tbr': bitrate,
 | 
	
		
			
				|  |  | -                'width': width,
 | 
	
		
			
				|  |  | -                'height': height,
 | 
	
		
			
				|  |  | -            }], rtmp_count)
 | 
	
		
			
				|  |  | -        elif proto.startswith('http'):
 | 
	
		
			
				|  |  | -            return ([{
 | 
	
		
			
				|  |  | -                'url': base + src,
 | 
	
		
			
				|  |  | -                'ext': ext or 'flv',
 | 
	
		
			
				|  |  | -                'tbr': bitrate,
 | 
	
		
			
				|  |  | -                'width': width,
 | 
	
		
			
				|  |  | -                'height': height,
 | 
	
		
			
				|  |  | -            }], rtmp_count)
 | 
	
		
			
				|  |  | +    def _parse_smil_subtitles(self, smil, namespace=None):
 | 
	
		
			
				|  |  | +        subtitles = {}
 | 
	
		
			
				|  |  | +        for num, textstream in enumerate(smil.findall(self._xpath_ns('.//textstream', namespace))):
 | 
	
		
			
				|  |  | +            src = textstream.get('src')
 | 
	
		
			
				|  |  | +            if not src:
 | 
	
		
			
				|  |  | +                continue
 | 
	
		
			
				|  |  | +            ext = textstream.get('ext') or determine_ext(src)
 | 
	
		
			
				|  |  | +            if not ext:
 | 
	
		
			
				|  |  | +                type_ = textstream.get('type')
 | 
	
		
			
				|  |  | +                if type_ == 'text/srt':
 | 
	
		
			
				|  |  | +                    ext = 'srt'
 | 
	
		
			
				|  |  | +            lang = textstream.get('systemLanguage') or textstream.get('systemLanguageName')
 | 
	
		
			
				|  |  | +            subtitles.setdefault(lang, []).append({
 | 
	
		
			
				|  |  | +                'url': src,
 | 
	
		
			
				|  |  | +                'ext': ext,
 | 
	
		
			
				|  |  | +            })
 | 
	
		
			
				|  |  | +        return subtitles
 | 
	
		
			
				|  |  |  
 | 
	
		
			
				|  |  |      def _live_title(self, name):
 | 
	
		
			
				|  |  |          """ Generate the title for a live video """
 |