9 years ago · 7577d849a6
--- a/youtube_dl/extractor/extractors.py
+++ b/youtube_dl/extractor/extractors.py
@@ -631,7 +631,10 @@ from .qqmusic import (
 
				     QQMusicToplistIE,
			
 
				     QQMusicPlaylistIE,
			
 
				 )
			
 
				-from .r7 import R7IE
			
 
				+from .r7 import (
			
 
				+    R7IE,
			
 
				+    R7ArticleIE,
			
 
				+)
			
 
				 from .radiocanada import (
			
 
				     RadioCanadaIE,
			
 
				     RadioCanadaAudioVideoIE,
			
--- a/youtube_dl/extractor/r7.py
+++ b/youtube_dl/extractor/r7.py
@@ -2,22 +2,19 @@
 
				 from __future__ import unicode_literals
			
 
				 
			
 
				 from .common import InfoExtractor
			
 
				-from ..utils import (
			
 
				-    js_to_json,
			
 
				-    unescapeHTML,
			
 
				-    int_or_none,
			
 
				-)
			
 
				+from ..utils import int_or_none
			
 
				 
			
 
				 
			
 
				 class R7IE(InfoExtractor):
			
 
				-    _VALID_URL = r'''(?x)https?://
			
 
				+    _VALID_URL = r'''(?x)
			
 
				+                        https?://
			
 
				                         (?:
			
 
				                             (?:[a-zA-Z]+)\.r7\.com(?:/[^/]+)+/idmedia/|
			
 
				                             noticias\.r7\.com(?:/[^/]+)+/[^/]+-|
			
 
				                             player\.r7\.com/video/i/
			
 
				                         )
			
 
				                         (?P<id>[\da-f]{24})
			
 
				-                        '''
			
 
				+                    '''
			
 
				     _TESTS = [{
			
 
				         'url': 'http://videos.r7.com/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-/idmedia/54e7050b0cf2ff57e0279389.html',
			
 
				         'md5': '403c4e393617e8e8ddc748978ee8efde',
			
@@ -25,6 +22,7 @@ class R7IE(InfoExtractor):
 
				             'id': '54e7050b0cf2ff57e0279389',
			
 
				             'ext': 'mp4',
			
 
				             'title': 'Policiais humilham suspeito à beira da morte: "Morre com dignidade"',
			
 
				+            'description': 'md5:01812008664be76a6479aa58ec865b72',
			
 
				             'thumbnail': 're:^https?://.*\.jpg$',
			
 
				             'duration': 98,
			
 
				             'like_count': int,
			
@@ -44,45 +42,72 @@ class R7IE(InfoExtractor):
 
				     def _real_extract(self, url):
			
 
				         video_id = self._match_id(url)
			
 
				 
			
 
				-        webpage = self._download_webpage(
			
 
				-            'http://player.r7.com/video/i/%s' % video_id, video_id)
			
 
				+        video = self._download_json(
			
 
				+            'http://player-api.r7.com/video/i/%s' % video_id, video_id)
			
 
				 
			
 
				-        item = self._parse_json(js_to_json(self._search_regex(
			
 
				-            r'(?s)var\s+item\s*=\s*({.+?});', webpage, 'player')), video_id)
			
 
				-
			
 
				-        title = unescapeHTML(item['title'])
			
 
				-        thumbnail = item.get('init', {}).get('thumbUri')
			
 
				-        duration = None
			
 
				-
			
 
				-        statistics = item.get('statistics', {})
			
 
				-        like_count = int_or_none(statistics.get('likes'))
			
 
				-        view_count = int_or_none(statistics.get('views'))
			
 
				+        title = video['title']
			
 
				 
			
 
				         formats = []
			
 
				-        for format_key, format_dict in item['playlist'][0].items():
			
 
				-            src = format_dict.get('src')
			
 
				-            if not src:
			
 
				-                continue
			
 
				-            format_id = format_dict.get('format') or format_key
			
 
				-            if duration is None:
			
 
				-                duration = format_dict.get('duration')
			
 
				-            if '.f4m' in src:
			
 
				-                formats.extend(self._extract_f4m_formats(src, video_id, preference=-1))
			
 
				-            elif src.endswith('.m3u8'):
			
 
				-                formats.extend(self._extract_m3u8_formats(src, video_id, 'mp4', preference=-2))
			
 
				-            else:
			
 
				-                formats.append({
			
 
				-                    'url': src,
			
 
				-                    'format_id': format_id,
			
 
				-                })
			
 
				+        media_url_hls = video.get('media_url_hls')
			
 
				+        if media_url_hls:
			
 
				+            formats.extend(self._extract_m3u8_formats(
			
 
				+                media_url_hls, video_id, 'mp4', entry_protocol='m3u8_native',
			
 
				+                m3u8_id='hls', fatal=False))
			
 
				+        media_url = video.get('media_url')
			
 
				+        if media_url:
			
 
				+            f = {
			
 
				+                'url': media_url,
			
 
				+                'format_id': 'http',
			
 
				+            }
			
 
				+            # m3u8 format always matches the http format, let's copy metadata from
			
 
				+            # one to another
			
 
				+            m3u8_formats = list(filter(
			
 
				+                lambda f: f.get('vcodec') != 'none' and f.get('resolution') != 'multiple',
			
 
				+                formats))
			
 
				+            if len(m3u8_formats) == 1:
			
 
				+                f_copy = m3u8_formats[0].copy()
			
 
				+                f_copy.update(f)
			
 
				+                f_copy['protocol'] = 'http'
			
 
				+                f = f_copy
			
 
				+            formats.append(f)
			
 
				         self._sort_formats(formats)
			
 
				 
			
 
				+        description = video.get('description')
			
 
				+        thumbnail = video.get('thumb')
			
 
				+        duration = int_or_none(video.get('media_duration'))
			
 
				+        like_count = int_or_none(video.get('likes'))
			
 
				+        view_count = int_or_none(video.get('views'))
			
 
				+
			
 
				         return {
			
 
				             'id': video_id,
			
 
				             'title': title,
			
 
				+            'description': description,
			
 
				             'thumbnail': thumbnail,
			
 
				             'duration': duration,
			
 
				             'like_count': like_count,
			
 
				             'view_count': view_count,
			
 
				             'formats': formats,
			
 
				         }
			
 
				+
			
 
				+
			
 
				+class R7ArticleIE(InfoExtractor):
			
 
				+    _VALID_URL = r'https?://(?:[a-zA-Z]+)\.r7\.com/(?:[^/]+/)+[^/?#&]+-(?P<id>\d+)'
			
 
				+    _TEST = {
			
 
				+        'url': 'http://tv.r7.com/record-play/balanco-geral/videos/policiais-humilham-suspeito-a-beira-da-morte-morre-com-dignidade-16102015',
			
 
				+        'only_matching': True,
			
 
				+    }
			
 
				+
			
 
				+    @classmethod
			
 
				+    def suitable(cls, url):
			
 
				+        return False if R7IE.suitable(url) else super(R7ArticleIE, cls).suitable(url)
			
 
				+
			
 
				+    def _real_extract(self, url):
			
 
				+        display_id = self._match_id(url)
			
 
				+
			
 
				+        webpage = self._download_webpage(url, display_id)
			
 
				+
			
 
				+        video_id = self._search_regex(
			
 
				+            r'<div[^>]+(?:id=["\']player-|class=["\']embed["\'][^>]+id=["\'])([\da-f]{24})',
			
 
				+            webpage, 'video id')
			
 
				+
			
 
				+        return self.url_result('http://player.r7.com/video/i/%s' % video_id, R7IE.ie_key())