Jelajahi Sumber

[rtp] Add new extractor (Closes #4382)

Naglis Jonaitis 11 tahun lalu
induk
melakukan
c3f3b29b92
2 mengubah file dengan 58 tambahan dan 0 penghapusan
  1. 1 0
      youtube_dl/extractor/__init__.py
  2. 57 0
      youtube_dl/extractor/rtp.py

+ 1 - 0
youtube_dl/extractor/__init__.py

@@ -323,6 +323,7 @@ from .roxwel import RoxwelIE
 from .rtbf import RTBFIE
 from .rtlnl import RtlXlIE
 from .rtlnow import RTLnowIE
+from .rtp import RTPIE
 from .rts import RTSIE
 from .rtve import RTVEALaCartaIE, RTVELiveIE
 from .ruhd import RUHDIE

+ 57 - 0
youtube_dl/extractor/rtp.py

@@ -0,0 +1,57 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import json
+
+from .common import InfoExtractor
+from ..utils import js_to_json
+
+
+class RTPIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?rtp\.pt/play/p(?P<program_id>[0-9]+)/e(?P<id>[0-9]+)/?'
+    _TEST = {
+        'url': 'http://www.rtp.pt/play/p405/e174042/paixoes-cruzadas',
+        'info_dict': {
+            'id': '174042',
+            'ext': 'mp3',
+            'title': 'Paixões Cruzadas',
+            'description': 'As paixões musicais de António Cartaxo e António Macedo',
+            'thumbnail': 're:^https?://.*\.jpg',
+        },
+        'params': {
+            'skip_download': True,  # RTMP download
+        },
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id)
+        title = self._html_search_meta(
+            'twitter:title', webpage, display_name='title', fatal=True)
+        description = self._html_search_meta('description', webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+
+        player_config = self._search_regex(
+            r'(?s)RTPPLAY\.player\.newPlayer\(\s*(\{.*?\})\s*\)', webpage, 'player config')
+        config = json.loads(js_to_json(player_config))
+
+        path, ext = config.get('file').rsplit('.', 1)
+        formats = [{
+            'app': config.get('application'),
+            'play_path': '{ext:s}:{path:s}'.format(ext=ext, path=path),
+            'page_url': url,
+            'url': 'rtmp://{streamer:s}/{application:s}'.format(**config),
+            'rtmp_live': config.get('live', False),
+            'ext': ext,
+            'vcodec': config.get('type') == 'audio' and 'none' or None,
+            'player_url': 'http://programas.rtp.pt/play/player.swf?v3',
+        }]
+
+        return {
+            'id': video_id,
+            'title': title,
+            'formats': formats,
+            'description': description,
+            'thumbnail': thumbnail,
+        }