|
@@ -55,6 +55,8 @@ from ..utils import (
|
|
|
update_Request,
|
|
|
update_url_query,
|
|
|
parse_m3u8_attributes,
|
|
|
+ extract_attributes,
|
|
|
+ parse_codecs,
|
|
|
)
|
|
|
|
|
|
|
|
@@ -1635,6 +1637,62 @@ class InfoExtractor(object):
|
|
|
self.report_warning('Unknown MIME type %s in DASH manifest' % mime_type)
|
|
|
return formats
|
|
|
|
|
|
+ def _parse_html5_media_entries(self, base_url, webpage):
|
|
|
+ def absolute_url(video_url):
|
|
|
+ return compat_urlparse.urljoin(base_url, video_url)
|
|
|
+
|
|
|
+ def parse_content_type(content_type):
|
|
|
+ if not content_type:
|
|
|
+ return {}
|
|
|
+ ctr = re.search(r'(?P<mimetype>[^/]+/[^;]+)(?:;\s*codecs="?(?P<codecs>[^"]+))?', content_type)
|
|
|
+ if ctr:
|
|
|
+ mimetype, codecs = ctr.groups()
|
|
|
+ f = parse_codecs(codecs)
|
|
|
+ f['ext'] = mimetype2ext(mimetype)
|
|
|
+ return f
|
|
|
+ return {}
|
|
|
+
|
|
|
+ entries = []
|
|
|
+ for media_tag, media_type, media_content in re.findall(r'(?s)(<(?P<tag>video|audio)[^>]*>)(.*?)</(?P=tag)>', webpage):
|
|
|
+ media_info = {
|
|
|
+ 'formats': [],
|
|
|
+ 'subtitles': {},
|
|
|
+ }
|
|
|
+ media_attributes = extract_attributes(media_tag)
|
|
|
+ src = media_attributes.get('src')
|
|
|
+ if src:
|
|
|
+ media_info['formats'].append({
|
|
|
+ 'url': absolute_url(src),
|
|
|
+ 'vcodec': 'none' if media_type == 'audio' else None,
|
|
|
+ })
|
|
|
+ media_info['thumbnail'] = media_attributes.get('poster')
|
|
|
+ if media_content:
|
|
|
+ for source_tag in re.findall(r'<source[^>]+>', media_content):
|
|
|
+ source_attributes = extract_attributes(source_tag)
|
|
|
+ src = source_attributes.get('src')
|
|
|
+ if not src:
|
|
|
+ continue
|
|
|
+ f = parse_content_type(source_attributes.get('type'))
|
|
|
+ f.update({
|
|
|
+ 'url': absolute_url(src),
|
|
|
+ 'vcodec': 'none' if media_type == 'audio' else None,
|
|
|
+ })
|
|
|
+ media_info['formats'].append(f)
|
|
|
+ for track_tag in re.findall(r'<track[^>]+>', media_content):
|
|
|
+ track_attributes = extract_attributes(track_tag)
|
|
|
+ kind = track_attributes.get('kind')
|
|
|
+ if not kind or kind == 'subtitles':
|
|
|
+ src = track_attributes.get('src')
|
|
|
+ if not src:
|
|
|
+ continue
|
|
|
+ lang = track_attributes.get('srclang') or track_attributes.get('lang') or track_attributes.get('label')
|
|
|
+ media_info['subtitles'].setdefault(lang, []).append({
|
|
|
+ 'url': absolute_url(src),
|
|
|
+ })
|
|
|
+ if media_info['formats']:
|
|
|
+ entries.append(media_info)
|
|
|
+ return entries
|
|
|
+
|
|
|
def _live_title(self, name):
|
|
|
""" Generate the title for a live video """
|
|
|
now = datetime.datetime.now()
|