7 years ago · 54a5be4dba
--- a/youtube_dl/extractor/crunchyroll.py
+++ b/youtube_dl/extractor/crunchyroll.py
@@ -7,7 +7,7 @@ import zlib
 
				 
			
 
				 from hashlib import sha1
			
 
				 from math import pow, sqrt, floor
			
 
				-from .common import InfoExtractor
			
 
				+from .vrv import VRVIE
			
 
				 from ..compat import (
			
 
				     compat_b64decode,
			
 
				     compat_etree_fromstring,
			
@@ -18,6 +18,8 @@ from ..compat import (
 
				 from ..utils import (
			
 
				     ExtractorError,
			
 
				     bytes_to_intlist,
			
 
				+    extract_attributes,
			
 
				+    float_or_none,
			
 
				     intlist_to_bytes,
			
 
				     int_or_none,
			
 
				     lowercase_escape,
			
@@ -26,14 +28,13 @@ from ..utils import (
 
				     unified_strdate,
			
 
				     urlencode_postdata,
			
 
				     xpath_text,
			
 
				-    extract_attributes,
			
 
				 )
			
 
				 from ..aes import (
			
 
				     aes_cbc_decrypt,
			
 
				 )
			
 
				 
			
 
				 
			
 
				-class CrunchyrollBaseIE(InfoExtractor):
			
 
				+class CrunchyrollBaseIE(VRVIE):
			
 
				     _LOGIN_URL = 'https://www.crunchyroll.com/login'
			
 
				     _LOGIN_FORM = 'login_form'
			
 
				     _NETRC_MACHINE = 'crunchyroll'
			
@@ -148,7 +149,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
 
				             'ext': 'mp4',
			
 
				             'title': 'Wanna be the Strongest in the World Episode 1 – An Idol-Wrestler is Born!',
			
 
				             'description': 'md5:2d17137920c64f2f49981a7797d275ef',
			
 
				-            'thumbnail': 'http://img1.ak.crunchyroll.com/i/spire1-tmb/20c6b5e10f1a47b10516877d3c039cae1380951166_full.jpg',
			
 
				+            'thumbnail': r're:^https?://.*\.jpg$',
			
 
				             'uploader': 'Yomiuri Telecasting Corporation (YTV)',
			
 
				             'upload_date': '20131013',
			
 
				             'url': 're:(?!.*&amp)',
			
@@ -221,7 +222,7 @@ class CrunchyrollIE(CrunchyrollBaseIE):
 
				         'info_dict': {
			
 
				             'id': '535080',
			
 
				             'ext': 'mp4',
			
 
				-            'title': '11eyes Episode 1 – Piros éjszaka - Red Night',
			
 
				+            'title': '11eyes Episode 1 – Red Night ~ Piros éjszaka',
			
 
				             'description': 'Kakeru and Yuka are thrown into an alternate nightmarish world they call "Red Night".',
			
 
				             'uploader': 'Marvelous AQL Inc.',
			
 
				             'upload_date': '20091021',
			
@@ -437,13 +438,18 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 
				         if 'To view this, please log in to verify you are 18 or older.' in webpage:
			
 
				             self.raise_login_required()
			
 
				 
			
 
				+        media = self._parse_json(self._search_regex(
			
 
				+            r'vilos\.config\.media\s*=\s*({.+?});',
			
 
				+            webpage, 'vilos media', default='{}'), video_id)
			
 
				+        media_metadata = media.get('metadata') or {}
			
 
				+
			
 
				         video_title = self._html_search_regex(
			
 
				             r'(?s)<h1[^>]*>((?:(?!<h1).)*?<span[^>]+itemprop=["\']title["\'][^>]*>(?:(?!<h1).)+?)</h1>',
			
 
				             webpage, 'video_title')
			
 
				         video_title = re.sub(r' {2,}', ' ', video_title)
			
 
				-        video_description = self._parse_json(self._html_search_regex(
			
 
				+        video_description = (self._parse_json(self._html_search_regex(
			
 
				             r'<script[^>]*>\s*.+?\[media_id=%s\].+?({.+?"description"\s*:.+?})\);' % video_id,
			
 
				-            webpage, 'description', default='{}'), video_id).get('description')
			
 
				+            webpage, 'description', default='{}'), video_id) or media_metadata).get('description')
			
 
				         if video_description:
			
 
				             video_description = lowercase_escape(video_description.replace(r'\r\n', '\n'))
			
 
				         video_upload_date = self._html_search_regex(
			
@@ -456,91 +462,99 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 
				             [r'<a[^>]+href="/publisher/[^"]+"[^>]*>([^<]+)</a>', r'<div>\s*Publisher:\s*<span>\s*(.+?)\s*</span>\s*</div>'],
			
 
				             webpage, 'video_uploader', fatal=False)
			
 
				 
			
 
				-        available_fmts = []
			
 
				-        for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
			
 
				-            attrs = extract_attributes(a)
			
 
				-            href = attrs.get('href')
			
 
				-            if href and '/freetrial' in href:
			
 
				-                continue
			
 
				-            available_fmts.append(fmt)
			
 
				-        if not available_fmts:
			
 
				-            for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
			
 
				-                available_fmts = re.findall(p, webpage)
			
 
				-                if available_fmts:
			
 
				-                    break
			
 
				-        video_encode_ids = []
			
 
				         formats = []
			
 
				-        for fmt in available_fmts:
			
 
				-            stream_quality, stream_format = self._FORMAT_IDS[fmt]
			
 
				-            video_format = fmt + 'p'
			
 
				-            stream_infos = []
			
 
				-            streamdata = self._call_rpc_api(
			
 
				-                'VideoPlayer_GetStandardConfig', video_id,
			
 
				-                'Downloading media info for %s' % video_format, data={
			
 
				-                    'media_id': video_id,
			
 
				-                    'video_format': stream_format,
			
 
				-                    'video_quality': stream_quality,
			
 
				-                    'current_page': url,
			
 
				-                })
			
 
				-            if streamdata is not None:
			
 
				-                stream_info = streamdata.find('./{default}preload/stream_info')
			
 
				+        for stream in media.get('streams', []):
			
 
				+            formats.extend(self._extract_vrv_formats(
			
 
				+                stream.get('url'), video_id, stream.get('format'),
			
 
				+                stream.get('audio_lang'), stream.get('hardsub_lang')))
			
 
				+        if not formats:
			
 
				+            available_fmts = []
			
 
				+            for a, fmt in re.findall(r'(<a[^>]+token=["\']showmedia\.([0-9]{3,4})p["\'][^>]+>)', webpage):
			
 
				+                attrs = extract_attributes(a)
			
 
				+                href = attrs.get('href')
			
 
				+                if href and '/freetrial' in href:
			
 
				+                    continue
			
 
				+                available_fmts.append(fmt)
			
 
				+            if not available_fmts:
			
 
				+                for p in (r'token=["\']showmedia\.([0-9]{3,4})p"', r'showmedia\.([0-9]{3,4})p'):
			
 
				+                    available_fmts = re.findall(p, webpage)
			
 
				+                    if available_fmts:
			
 
				+                        break
			
 
				+            if not available_fmts:
			
 
				+                available_fmts = self._FORMAT_IDS.keys()
			
 
				+            video_encode_ids = []
			
 
				+
			
 
				+            for fmt in available_fmts:
			
 
				+                stream_quality, stream_format = self._FORMAT_IDS[fmt]
			
 
				+                video_format = fmt + 'p'
			
 
				+                stream_infos = []
			
 
				+                streamdata = self._call_rpc_api(
			
 
				+                    'VideoPlayer_GetStandardConfig', video_id,
			
 
				+                    'Downloading media info for %s' % video_format, data={
			
 
				+                        'media_id': video_id,
			
 
				+                        'video_format': stream_format,
			
 
				+                        'video_quality': stream_quality,
			
 
				+                        'current_page': url,
			
 
				+                    })
			
 
				+                if streamdata is not None:
			
 
				+                    stream_info = streamdata.find('./{default}preload/stream_info')
			
 
				+                    if stream_info is not None:
			
 
				+                        stream_infos.append(stream_info)
			
 
				+                stream_info = self._call_rpc_api(
			
 
				+                    'VideoEncode_GetStreamInfo', video_id,
			
 
				+                    'Downloading stream info for %s' % video_format, data={
			
 
				+                        'media_id': video_id,
			
 
				+                        'video_format': stream_format,
			
 
				+                        'video_encode_quality': stream_quality,
			
 
				+                    })
			
 
				                 if stream_info is not None:
			
 
				                     stream_infos.append(stream_info)
			
 
				-            stream_info = self._call_rpc_api(
			
 
				-                'VideoEncode_GetStreamInfo', video_id,
			
 
				-                'Downloading stream info for %s' % video_format, data={
			
 
				-                    'media_id': video_id,
			
 
				-                    'video_format': stream_format,
			
 
				-                    'video_encode_quality': stream_quality,
			
 
				-                })
			
 
				-            if stream_info is not None:
			
 
				-                stream_infos.append(stream_info)
			
 
				-            for stream_info in stream_infos:
			
 
				-                video_encode_id = xpath_text(stream_info, './video_encode_id')
			
 
				-                if video_encode_id in video_encode_ids:
			
 
				-                    continue
			
 
				-                video_encode_ids.append(video_encode_id)
			
 
				-
			
 
				-                video_file = xpath_text(stream_info, './file')
			
 
				-                if not video_file:
			
 
				-                    continue
			
 
				-                if video_file.startswith('http'):
			
 
				-                    formats.extend(self._extract_m3u8_formats(
			
 
				-                        video_file, video_id, 'mp4', entry_protocol='m3u8_native',
			
 
				-                        m3u8_id='hls', fatal=False))
			
 
				-                    continue
			
 
				+                for stream_info in stream_infos:
			
 
				+                    video_encode_id = xpath_text(stream_info, './video_encode_id')
			
 
				+                    if video_encode_id in video_encode_ids:
			
 
				+                        continue
			
 
				+                    video_encode_ids.append(video_encode_id)
			
 
				 
			
 
				-                video_url = xpath_text(stream_info, './host')
			
 
				-                if not video_url:
			
 
				-                    continue
			
 
				-                metadata = stream_info.find('./metadata')
			
 
				-                format_info = {
			
 
				-                    'format': video_format,
			
 
				-                    'height': int_or_none(xpath_text(metadata, './height')),
			
 
				-                    'width': int_or_none(xpath_text(metadata, './width')),
			
 
				-                }
			
 
				-
			
 
				-                if '.fplive.net/' in video_url:
			
 
				-                    video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
			
 
				-                    parsed_video_url = compat_urlparse.urlparse(video_url)
			
 
				-                    direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
			
 
				-                        netloc='v.lvlt.crcdn.net',
			
 
				-                        path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
			
 
				-                    if self._is_valid_url(direct_video_url, video_id, video_format):
			
 
				-                        format_info.update({
			
 
				-                            'format_id': 'http-' + video_format,
			
 
				-                            'url': direct_video_url,
			
 
				-                        })
			
 
				-                        formats.append(format_info)
			
 
				+                    video_file = xpath_text(stream_info, './file')
			
 
				+                    if not video_file:
			
 
				+                        continue
			
 
				+                    if video_file.startswith('http'):
			
 
				+                        formats.extend(self._extract_m3u8_formats(
			
 
				+                            video_file, video_id, 'mp4', entry_protocol='m3u8_native',
			
 
				+                            m3u8_id='hls', fatal=False))
			
 
				                         continue
			
 
				 
			
 
				-                format_info.update({
			
 
				-                    'format_id': 'rtmp-' + video_format,
			
 
				-                    'url': video_url,
			
 
				-                    'play_path': video_file,
			
 
				-                    'ext': 'flv',
			
 
				-                })
			
 
				-                formats.append(format_info)
			
 
				+                    video_url = xpath_text(stream_info, './host')
			
 
				+                    if not video_url:
			
 
				+                        continue
			
 
				+                    metadata = stream_info.find('./metadata')
			
 
				+                    format_info = {
			
 
				+                        'format': video_format,
			
 
				+                        'height': int_or_none(xpath_text(metadata, './height')),
			
 
				+                        'width': int_or_none(xpath_text(metadata, './width')),
			
 
				+                    }
			
 
				+
			
 
				+                    if '.fplive.net/' in video_url:
			
 
				+                        video_url = re.sub(r'^rtmpe?://', 'http://', video_url.strip())
			
 
				+                        parsed_video_url = compat_urlparse.urlparse(video_url)
			
 
				+                        direct_video_url = compat_urlparse.urlunparse(parsed_video_url._replace(
			
 
				+                            netloc='v.lvlt.crcdn.net',
			
 
				+                            path='%s/%s' % (remove_end(parsed_video_url.path, '/'), video_file.split(':')[-1])))
			
 
				+                        if self._is_valid_url(direct_video_url, video_id, video_format):
			
 
				+                            format_info.update({
			
 
				+                                'format_id': 'http-' + video_format,
			
 
				+                                'url': direct_video_url,
			
 
				+                            })
			
 
				+                            formats.append(format_info)
			
 
				+                            continue
			
 
				+
			
 
				+                    format_info.update({
			
 
				+                        'format_id': 'rtmp-' + video_format,
			
 
				+                        'url': video_url,
			
 
				+                        'play_path': video_file,
			
 
				+                        'ext': 'flv',
			
 
				+                    })
			
 
				+                    formats.append(format_info)
			
 
				         self._sort_formats(formats, ('height', 'width', 'tbr', 'fps'))
			
 
				 
			
 
				         metadata = self._call_rpc_api(
			
@@ -549,7 +563,17 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 
				                 'media_id': video_id,
			
 
				             })
			
 
				 
			
 
				-        subtitles = self.extract_subtitles(video_id, webpage)
			
 
				+        subtitles = {}
			
 
				+        for subtitle in media.get('subtitles', []):
			
 
				+            subtitle_url = subtitle.get('url')
			
 
				+            if not subtitle_url:
			
 
				+                continue
			
 
				+            subtitles.setdefault(subtitle.get('language', 'enUS'), []).append({
			
 
				+                'url': subtitle_url,
			
 
				+                'ext': subtitle.get('format', 'ass'),
			
 
				+            })
			
 
				+        if not subtitles:
			
 
				+            subtitles = self.extract_subtitles(video_id, webpage)
			
 
				 
			
 
				         # webpage provide more accurate data than series_title from XML
			
 
				         series = self._html_search_regex(
			
@@ -557,8 +581,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 
				             webpage, 'series', fatal=False)
			
 
				         season = xpath_text(metadata, 'series_title')
			
 
				 
			
 
				-        episode = xpath_text(metadata, 'episode_title')
			
 
				-        episode_number = int_or_none(xpath_text(metadata, 'episode_number'))
			
 
				+        episode = xpath_text(metadata, 'episode_title') or media_metadata.get('title')
			
 
				+        episode_number = int_or_none(xpath_text(metadata, 'episode_number') or media_metadata.get('episode_number'))
			
 
				 
			
 
				         season_number = int_or_none(self._search_regex(
			
 
				             r'(?s)<h\d[^>]+id=["\']showmedia_about_episode_num[^>]+>.+?</h\d>\s*<h4>\s*Season (\d+)',
			
@@ -568,7 +592,8 @@ Format: Layer, Start, End, Style, Name, MarginL, MarginR, MarginV, Effect, Text
 
				             'id': video_id,
			
 
				             'title': video_title,
			
 
				             'description': video_description,
			
 
				-            'thumbnail': xpath_text(metadata, 'episode_image_url'),
			
 
				+            'duration': float_or_none(media_metadata.get('duration'), 1000),
			
 
				+            'thumbnail': xpath_text(metadata, 'episode_image_url') or media_metadata.get('thumbnail', {}).get('url'),
			
 
				             'uploader': video_uploader,
			
 
				             'upload_date': video_upload_date,
			
 
				             'series': series,
			
--- a/youtube_dl/extractor/vrv.py
+++ b/youtube_dl/extractor/vrv.py
@@ -72,7 +72,7 @@ class VRVBaseIE(InfoExtractor):
 
				 class VRVIE(VRVBaseIE):
			
 
				     IE_NAME = 'vrv'
			
 
				     _VALID_URL = r'https?://(?:www\.)?vrv\.co/watch/(?P<id>[A-Z0-9]+)'
			
 
				-    _TEST = {
			
 
				+    _TESTS = [{
			
 
				         'url': 'https://vrv.co/watch/GR9PNZ396/Hidden-America-with-Jonah-Ray:BOSTON-WHERE-THE-PAST-IS-THE-PRESENT',
			
 
				         'info_dict': {
			
 
				             'id': 'GR9PNZ396',
			
@@ -85,7 +85,28 @@ class VRVIE(VRVBaseIE):
 
				             # m3u8 download
			
 
				             'skip_download': True,
			
 
				         },
			
 
				-    }
			
 
				+    }]
			
 
				+
			
 
				+    def _extract_vrv_formats(self, url, video_id, stream_format, audio_lang, hardsub_lang):
			
 
				+        if not url or stream_format not in ('hls', 'dash'):
			
 
				+            return []
			
 
				+        stream_id = hardsub_lang or audio_lang
			
 
				+        format_id = '%s-%s' % (stream_format, stream_id)
			
 
				+        if stream_format == 'hls':
			
 
				+            adaptive_formats = self._extract_m3u8_formats(
			
 
				+                url, video_id, 'mp4', m3u8_id=format_id,
			
 
				+                note='Downloading %s m3u8 information' % stream_id,
			
 
				+                fatal=False)
			
 
				+        elif stream_format == 'dash':
			
 
				+            adaptive_formats = self._extract_mpd_formats(
			
 
				+                url, video_id, mpd_id=format_id,
			
 
				+                note='Downloading %s MPD information' % stream_id,
			
 
				+                fatal=False)
			
 
				+        if audio_lang:
			
 
				+            for f in adaptive_formats:
			
 
				+                if f.get('acodec') != 'none':
			
 
				+                    f['language'] = audio_lang
			
 
				+        return adaptive_formats
			
 
				 
			
 
				     def _real_extract(self, url):
			
 
				         video_id = self._match_id(url)
			
@@ -115,26 +136,9 @@ class VRVIE(VRVBaseIE):
 
				         for stream_type, streams in streams_json.get('streams', {}).items():
			
 
				             if stream_type in ('adaptive_hls', 'adaptive_dash'):
			
 
				                 for stream in streams.values():
			
 
				-                    stream_url = stream.get('url')
			
 
				-                    if not stream_url:
			
 
				-                        continue
			
 
				-                    stream_id = stream.get('hardsub_locale') or audio_locale
			
 
				-                    format_id = '%s-%s' % (stream_type.split('_')[1], stream_id)
			
 
				-                    if stream_type == 'adaptive_hls':
			
 
				-                        adaptive_formats = self._extract_m3u8_formats(
			
 
				-                            stream_url, video_id, 'mp4', m3u8_id=format_id,
			
 
				-                            note='Downloading %s m3u8 information' % stream_id,
			
 
				-                            fatal=False)
			
 
				-                    else:
			
 
				-                        adaptive_formats = self._extract_mpd_formats(
			
 
				-                            stream_url, video_id, mpd_id=format_id,
			
 
				-                            note='Downloading %s MPD information' % stream_id,
			
 
				-                            fatal=False)
			
 
				-                    if audio_locale:
			
 
				-                        for f in adaptive_formats:
			
 
				-                            if f.get('acodec') != 'none':
			
 
				-                                f['language'] = audio_locale
			
 
				-                    formats.extend(adaptive_formats)
			
 
				+                    formats.extend(self._extract_vrv_formats(
			
 
				+                        stream.get('url'), video_id, stream_type.split('_')[1],
			
 
				+                        audio_locale, stream.get('hardsub_locale')))
			
 
				         self._sort_formats(formats)
			
 
				 
			
 
				         subtitles = {}