|
@@ -2,22 +2,19 @@
|
|
|
from __future__ import unicode_literals
|
|
|
|
|
|
from .common import InfoExtractor
|
|
|
-from ..utils import (
|
|
|
- js_to_json,
|
|
|
- unescapeHTML,
|
|
|
- int_or_none,
|
|
|
-)
|
|
|
+from ..utils import int_or_none
|
|
|
|
|
|
|
|
|
class R7IE(InfoExtractor):
|
|
|
- _VALID_URL = r'''(?x)https?://
|
|
|
+ _VALID_URL = r'''(?x)
|
|
|
+ https?://
|
|
|
(?:
|
|
|
(?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/|
|
|
|
noticias\.r7\.com(?:/[^/]+)+/[^/]+-|
|
|
|
player\.r7\.com/video/i/
|
|
|
)
|
|
|
(?P<id>[\da-f]{24})
|
|
|
- '''
|
|
|
+ '''
|
|
|
_TESTS = [{
|
|
|
'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html',
|
|
|
'md5': '403c4e393617e8e8ddc748978ee8efde',
|
|
@@ -25,6 +22,7 @@ class R7IE(InfoExtractor):
|
|
|
'id': '54e7050b0cf2ff57e0279389',
|
|
|
'ext': 'mp4',
|
|
|
'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"',
|
|
|
+ 'description': 'md5:01812008664be76a6479aa58ec865b72',
|
|
|
'thumbnail': 're:^https?://.*\.jpg$',
|
|
|
'duration': 98,
|
|
|
'like_count': int,
|
|
@@ -44,45 +42,72 @@ class R7IE(InfoExtractor):
|
|
|
def _real_extract(self, url):
|
|
|
video_id = self._match_id(url)
|
|
|
|
|
|
- webpage = self._download_webpage(
|
|
|
- 'http://player.r7.com/video/i/%s' % video_id, video_id)
|
|
|
+ video = self._download_json(
|
|
|
+ 'http://player-api.r7.com/video/i/%s' % video_id, video_id)
|
|
|
|
|
|
- item = self._parse_json(js_to_json(self._search_regex(
|
|
|
- r'(?s)var\s+item\s*=\s*({.+?});', webpage, 'player')), video_id)
|
|
|
-
|
|
|
- title = unescapeHTML(item['title'])
|
|
|
- thumbnail = item.get('init', {}).get('thumbUri')
|
|
|
- duration = None
|
|
|
-
|
|
|
- statistics = item.get('statistics', {})
|
|
|
- like_count = int_or_none(statistics.get('likes'))
|
|
|
- view_count = int_or_none(statistics.get('views'))
|
|
|
+ title = video['title']
|
|
|
|
|
|
formats = []
|
|
|
- for format_key, format_dict in item['playlist'][0].items():
|
|
|
- src = format_dict.get('src')
|
|
|
- if not src:
|
|
|
- continue
|
|
|
- format_id = format_dict.get('format') or format_key
|
|
|
- if duration is None:
|
|
|
- duration = format_dict.get('duration')
|
|
|
- if '.f4m' in src:
|
|
|
- formats.extend(self._extract_f4m_formats(src, video_id, preference=-1))
|
|
|
- elif src.endswith('.m3u8'):
|
|
|
- formats.extend(self._extract_m3u8_formats(src, video_id, 'mp4', preference=-2))
|
|
|
- else:
|
|
|
- formats.append({
|
|
|
- 'url': src,
|
|
|
- 'format_id': format_id,
|
|
|
- })
|
|
|
+ media_url_hls = video.get('media_url_hls')
|
|
|
+ if media_url_hls:
|
|
|
+ formats.extend(self._extract_m3u8_formats(
|
|
|
+ media_url_hls, video_id, 'mp4', entry_protocol='m3u8_native',
|
|
|
+ m3u8_id='hls', fatal=False))
|
|
|
+ media_url = video.get('media_url')
|
|
|
+ if media_url:
|
|
|
+ f = {
|
|
|
+ 'url': media_url,
|
|
|
+ 'format_id': 'http',
|
|
|
+ }
|
|
|
+ # m3u8 format always matches the http format, let's copy metadata from
|
|
|
+ # one to another
|
|
|
+ m3u8_formats = list(filter(
|
|
|
+ lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
|
|
|
+ formats))
|
|
|
+ if len(m3u8_formats) == 1:
|
|
|
+ f_copy = m3u8_formats[0].copy()
|
|
|
+ f_copy.update(f)
|
|
|
+ f_copy['protocol'] = 'http'
|
|
|
+ f = f_copy
|
|
|
+ formats.append(f)
|
|
|
self._sort_formats(formats)
|
|
|
|
|
|
+ description = video.get('description')
|
|
|
+ thumbnail = video.get('thumb')
|
|
|
+ duration = int_or_none(video.get('media_duration'))
|
|
|
+ like_count = int_or_none(video.get('likes'))
|
|
|
+ view_count = int_or_none(video.get('views'))
|
|
|
+
|
|
|
return {
|
|
|
'id': video_id,
|
|
|
'title': title,
|
|
|
+ 'description': description,
|
|
|
'thumbnail': thumbnail,
|
|
|
'duration': duration,
|
|
|
'like_count': like_count,
|
|
|
'view_count': view_count,
|
|
|
'formats': formats,
|
|
|
}
|
|
|
+
|
|
|
+
|
|
|
+class R7ArticleIE(InfoExtractor):
|
|
|
+ _VALID_URL = r'https?://(?:[a-zA-Z]+)\.r7\.com/(?:[^/]+/)+[^/?#&]+-(?P<id>\d+)'
|
|
|
+ _TEST = {
|
|
|
+ 'url': 'http://tv.r7.com/record-play/balanco-geral/videos/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-16102015',
|
|
|
+ 'only_matching': True,
|
|
|
+ }
|
|
|
+
|
|
|
+ @classmethod
|
|
|
+ def suitable(cls, url):
|
|
|
+ return False if R7IE.suitable(url) else super(R7ArticleIE, cls).suitable(url)
|
|
|
+
|
|
|
+ def _real_extract(self, url):
|
|
|
+ display_id = self._match_id(url)
|
|
|
+
|
|
|
+ webpage = self._download_webpage(url, display_id)
|
|
|
+
|
|
|
+ video_id = self._search_regex(
|
|
|
+ r'<div[^>]+(?:id=["\']player-|class=["\']embed["\'][^>]+id=["\'])([\da-f]{24})',
|
|
|
+ webpage, 'video id')
|
|
|
+
|
|
|
+ return self.url_result('http://player.r7.com/video/i/%s' % video_id, R7IE.ie_key())
|