فهرست منبع

[rds] extract 9c9media formats

Remita Amine 9 سال پیش
والد
کامیت
20361b4f25
1فایلهای تغییر یافته به همراه12 افزوده شده و 15 حذف شده
  1. 12 15
      youtube_dl/extractor/rds.py

+ 12 - 15
youtube_dl/extractor/rds.py

@@ -1,23 +1,23 @@
 # coding: utf-8
 from __future__ import unicode_literals
 
-import re
-
 from .common import InfoExtractor
 from ..utils import (
     parse_duration,
     parse_iso8601,
+    js_to_json,
 )
+from ..compat import compat_str
 
 
 class RDSIE(InfoExtractor):
     IE_DESC = 'RDS.ca'
-    _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<display_id>[^/]+)-(?P<id>\d+\.\d+)'
+    _VALID_URL = r'https?://(?:www\.)?rds\.ca/vid(?:[eé]|%C3%A9)os/(?:[^/]+/)*(?P<id>[^/]+)-\d+\.\d+'
 
     _TESTS = [{
         'url': 'http://www.rds.ca/videos/football/nfl/fowler-jr-prend-la-direction-de-jacksonville-3.1132799',
         'info_dict': {
-            'id': '3.1132799',
+            'id': '604333',
             'display_id': 'fowler-jr-prend-la-direction-de-jacksonville',
             'ext': 'mp4',
             'title': 'Fowler Jr. prend la direction de Jacksonville',
@@ -33,22 +33,17 @@ class RDSIE(InfoExtractor):
     }]
 
     def _real_extract(self, url):
-        mobj = re.match(self._VALID_URL, url)
-        video_id = mobj.group('id')
-        display_id = mobj.group('display_id')
+        display_id = self._match_id(url)
 
         webpage = self._download_webpage(url, display_id)
 
-        # TODO: extract f4m from 9c9media.com
-        video_url = self._search_regex(
-            r'<span[^>]+itemprop="contentURL"[^>]+content="([^"]+)"',
-            webpage, 'video url')
-
-        title = self._og_search_title(webpage) or self._html_search_meta(
+        item = self._parse_json(self._search_regex(r'(?s)itemToPush\s*=\s*({.+?});', webpage, 'item'), display_id, js_to_json)
+        video_id = compat_str(item['id'])
+        title = item.get('title') or self._og_search_title(webpage) or self._html_search_meta(
             'title', webpage, 'title', fatal=True)
         description = self._og_search_description(webpage) or self._html_search_meta(
             'description', webpage, 'description')
-        thumbnail = self._og_search_thumbnail(webpage) or self._search_regex(
+        thumbnail = item.get('urlImageBig') or self._og_search_thumbnail(webpage) or self._search_regex(
             [r'<link[^>]+itemprop="thumbnailUrl"[^>]+href="([^"]+)"',
              r'<span[^>]+itemprop="thumbnailUrl"[^>]+content="([^"]+)"'],
             webpage, 'thumbnail', fatal=False)
@@ -61,13 +56,15 @@ class RDSIE(InfoExtractor):
         age_limit = self._family_friendly_search(webpage)
 
         return {
+            '_type': 'url_transparent',
             'id': video_id,
             'display_id': display_id,
-            'url': video_url,
+            'url': '9c9media:rds_web:%s' % video_id,
             'title': title,
             'description': description,
             'thumbnail': thumbnail,
             'timestamp': timestamp,
             'duration': duration,
             'age_limit': age_limit,
+            'ie_key': 'NineCNineMedia',
         }