|
@@ -6,6 +6,7 @@ import re
|
|
|
from .common import InfoExtractor
|
|
|
from ..utils import (
|
|
|
clean_html,
|
|
|
+ determine_ext,
|
|
|
int_or_none,
|
|
|
js_to_json,
|
|
|
qualities,
|
|
@@ -33,42 +34,76 @@ class NovaEmbedIE(InfoExtractor):
|
|
|
|
|
|
webpage = self._download_webpage(url, video_id)
|
|
|
|
|
|
- bitrates = self._parse_json(
|
|
|
+ duration = None
|
|
|
+ formats = []
|
|
|
+
|
|
|
+ player = self._parse_json(
|
|
|
self._search_regex(
|
|
|
- r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
|
|
- video_id, transform_source=js_to_json)
|
|
|
+ r'Player\.init\s*\([^,]+,\s*({.+?})\s*,\s*{.+?}\s*\)\s*;',
|
|
|
+ webpage, 'player', default='{}'), video_id, fatal=False)
|
|
|
+ if player:
|
|
|
+ for format_id, format_list in player['tracks'].items():
|
|
|
+ if not isinstance(format_list, list):
|
|
|
+ format_list = [format_list]
|
|
|
+ for format_dict in format_list:
|
|
|
+ if not isinstance(format_dict, dict):
|
|
|
+ continue
|
|
|
+ format_url = url_or_none(format_dict.get('src'))
|
|
|
+ format_type = format_dict.get('type')
|
|
|
+ ext = determine_ext(format_url)
|
|
|
+ if (format_type == 'application/x-mpegURL'
|
|
|
+ or format_id == 'HLS' or ext == 'm3u8'):
|
|
|
+ formats.extend(self._extract_m3u8_formats(
|
|
|
+ format_url, video_id, 'mp4',
|
|
|
+ entry_protocol='m3u8_native', m3u8_id='hls',
|
|
|
+ fatal=False))
|
|
|
+ elif (format_type == 'application/dash+xml'
|
|
|
+ or format_id == 'DASH' or ext == 'mpd'):
|
|
|
+ formats.extend(self._extract_mpd_formats(
|
|
|
+ format_url, video_id, mpd_id='dash', fatal=False))
|
|
|
+ else:
|
|
|
+ formats.append({
|
|
|
+ 'url': format_url,
|
|
|
+ })
|
|
|
+ duration = int_or_none(player.get('duration'))
|
|
|
+ else:
|
|
|
+ # Old path, not actual as of 08.04.2020
|
|
|
+ bitrates = self._parse_json(
|
|
|
+ self._search_regex(
|
|
|
+ r'(?s)(?:src|bitrates)\s*=\s*({.+?})\s*;', webpage, 'formats'),
|
|
|
+ video_id, transform_source=js_to_json)
|
|
|
|
|
|
- QUALITIES = ('lq', 'mq', 'hq', 'hd')
|
|
|
- quality_key = qualities(QUALITIES)
|
|
|
+ QUALITIES = ('lq', 'mq', 'hq', 'hd')
|
|
|
+ quality_key = qualities(QUALITIES)
|
|
|
+
|
|
|
+ for format_id, format_list in bitrates.items():
|
|
|
+ if not isinstance(format_list, list):
|
|
|
+ format_list = [format_list]
|
|
|
+ for format_url in format_list:
|
|
|
+ format_url = url_or_none(format_url)
|
|
|
+ if not format_url:
|
|
|
+ continue
|
|
|
+ if format_id == 'hls':
|
|
|
+ formats.extend(self._extract_m3u8_formats(
|
|
|
+ format_url, video_id, ext='mp4',
|
|
|
+ entry_protocol='m3u8_native', m3u8_id='hls',
|
|
|
+ fatal=False))
|
|
|
+ continue
|
|
|
+ f = {
|
|
|
+ 'url': format_url,
|
|
|
+ }
|
|
|
+ f_id = format_id
|
|
|
+ for quality in QUALITIES:
|
|
|
+ if '%s.mp4' % quality in format_url:
|
|
|
+ f_id += '-%s' % quality
|
|
|
+ f.update({
|
|
|
+ 'quality': quality_key(quality),
|
|
|
+ 'format_note': quality.upper(),
|
|
|
+ })
|
|
|
+ break
|
|
|
+ f['format_id'] = f_id
|
|
|
+ formats.append(f)
|
|
|
|
|
|
- formats = []
|
|
|
- for format_id, format_list in bitrates.items():
|
|
|
- if not isinstance(format_list, list):
|
|
|
- format_list = [format_list]
|
|
|
- for format_url in format_list:
|
|
|
- format_url = url_or_none(format_url)
|
|
|
- if not format_url:
|
|
|
- continue
|
|
|
- if format_id == 'hls':
|
|
|
- formats.extend(self._extract_m3u8_formats(
|
|
|
- format_url, video_id, ext='mp4',
|
|
|
- entry_protocol='m3u8_native', m3u8_id='hls',
|
|
|
- fatal=False))
|
|
|
- continue
|
|
|
- f = {
|
|
|
- 'url': format_url,
|
|
|
- }
|
|
|
- f_id = format_id
|
|
|
- for quality in QUALITIES:
|
|
|
- if '%s.mp4' % quality in format_url:
|
|
|
- f_id += '-%s' % quality
|
|
|
- f.update({
|
|
|
- 'quality': quality_key(quality),
|
|
|
- 'format_note': quality.upper(),
|
|
|
- })
|
|
|
- break
|
|
|
- f['format_id'] = f_id
|
|
|
- formats.append(f)
|
|
|
self._sort_formats(formats)
|
|
|
|
|
|
title = self._og_search_title(
|
|
@@ -81,7 +116,8 @@ class NovaEmbedIE(InfoExtractor):
|
|
|
r'poster\s*:\s*(["\'])(?P<value>(?:(?!\1).)+)\1', webpage,
|
|
|
'thumbnail', fatal=False, group='value')
|
|
|
duration = int_or_none(self._search_regex(
|
|
|
- r'videoDuration\s*:\s*(\d+)', webpage, 'duration', fatal=False))
|
|
|
+ r'videoDuration\s*:\s*(\d+)', webpage, 'duration',
|
|
|
+ default=duration))
|
|
|
|
|
|
return {
|
|
|
'id': video_id,
|