Browse Source

[extractor/rte.py] Add support for RTE radio player

While here, stop RteIE changing filename extensions to .mp4. The files
saved are .flv containers with h264 video.
bpfoley 10 years ago
parent
commit
896c7a23cd
2 changed files with 70 additions and 9 deletions
  1. 1 1
      youtube_dl/extractor/__init__.py
  2. 69 8
      youtube_dl/extractor/rte.py

+ 1 - 1
youtube_dl/extractor/__init__.py

@@ -556,7 +556,7 @@ from .ro220 import Ro220IE
 from .rottentomatoes import RottenTomatoesIE
 from .rottentomatoes import RottenTomatoesIE
 from .roxwel import RoxwelIE
 from .roxwel import RoxwelIE
 from .rtbf import RTBFIE
 from .rtbf import RTBFIE
-from .rte import RteIE
+from .rte import RteIE, RteRadioIE
 from .rtlnl import RtlNlIE
 from .rtlnl import RtlNlIE
 from .rtl2 import RTL2IE
 from .rtl2 import RTL2IE
 from .rtp import RTPIE
 from .rtp import RTPIE

+ 69 - 8
youtube_dl/extractor/rte.py

@@ -5,16 +5,19 @@ from .common import InfoExtractor
 
 
 from ..utils import (
 from ..utils import (
     float_or_none,
     float_or_none,
+    unescapeHTML,
 )
 )
 
 
 
 
 class RteIE(InfoExtractor):
 class RteIE(InfoExtractor):
+    IE_NAME = 'rte'
+    IE_DESC = 'Raidió Teilifís Éireann TV'
     _VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)'
     _VALID_URL = r'https?://(?:www\.)?rte\.ie/player/[^/]{2,3}/show/[^/]+/(?P<id>[0-9]+)'
     _TEST = {
     _TEST = {
         'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/',
         'url': 'http://www.rte.ie/player/ie/show/iwitness-862/10478715/',
         'info_dict': {
         'info_dict': {
             'id': '10478715',
             'id': '10478715',
-            'ext': 'mp4',
+            'ext': 'flv',
             'title': 'Watch iWitness  online',
             'title': 'Watch iWitness  online',
             'thumbnail': 're:^https?://.*\.jpg$',
             'thumbnail': 're:^https?://.*\.jpg$',
             'description': 'iWitness : The spirit of Ireland, one voice and one minute at a time.',
             'description': 'iWitness : The spirit of Ireland, one voice and one minute at a time.',
@@ -44,13 +47,6 @@ class RteIE(InfoExtractor):
         # f4m_url = server + relative_url
         # f4m_url = server + relative_url
         f4m_url = json_string['shows'][0]['media:group'][0]['rte:server'] + json_string['shows'][0]['media:group'][0]['url']
         f4m_url = json_string['shows'][0]['media:group'][0]['rte:server'] + json_string['shows'][0]['media:group'][0]['url']
         f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
         f4m_formats = self._extract_f4m_formats(f4m_url, video_id)
-        f4m_formats = [{
-            'format_id': f['format_id'],
-            'url': f['url'],
-            'ext': 'mp4',
-            'width': f['width'],
-            'height': f['height'],
-        } for f in f4m_formats]
 
 
         return {
         return {
             'id': video_id,
             'id': video_id,
@@ -60,3 +56,68 @@ class RteIE(InfoExtractor):
             'thumbnail': thumbnail,
             'thumbnail': thumbnail,
             'duration': duration,
             'duration': duration,
         }
         }
+
+
+
+class RteRadioIE(InfoExtractor):
+    IE_NAME = 'rte:radio'
+    IE_DESC = 'Raidió Teilifís Éireann radio'
+    # Radioplayer URLs have the specifier #!rii=<channel_id>:<id>:<playable_item_id>:<date>:
+    # where the IDs are int/empty, the date is DD-MM-YYYY, and the specifier may be truncated.
+    # An <id> uniquely defines an individual recording, and is the only part we require.
+    _VALID_URL = r'https?://(?:www\.)?rte\.ie/radio/utils/radioplayer/rteradioweb\.html#!rii=(?:[0-9]*)(?:%3A|:)(?P<id>[0-9]+)'
+
+    _TEST = {
+        'url': 'http://www.rte.ie/radio/utils/radioplayer/rteradioweb.html#!rii=16:10507902:2414:27-12-2015:',
+        'info_dict': {
+            'id': '10507902',
+            'ext': 'flv',
+            'title': 'Gloria',
+            'thumbnail': 're:^https?://.*\.jpg$',
+            'description': 'Tim Thurston guides you through a millennium of sacred music featuring Gregorian chant, pure solo voices and choral masterpieces, framed around the glorious music of J.S. Bach.',
+            'duration': 7230.0,
+        },
+        'params': {
+            'skip_download': 'f4m fails with --test atm'
+        }
+    }
+
+    def _real_extract(self, url):
+        item_id = self._match_id(url)
+        feeds_url = 'http://www.rte.ie/rteavgen/getplaylist/?type=web&format=json&id=' + item_id
+        json_string = self._download_json(feeds_url, item_id)
+
+        # NB the string values in the JSON are stored using XML escaping(!)
+        show = json_string['shows'][0]
+        title = unescapeHTML(show['title'])
+        description = unescapeHTML(show.get('description'))
+        thumbnail = show.get('thumbnail')
+        duration = float_or_none(show.get('duration'), 1000)
+
+        mg = show['media:group'][0]
+
+        formats = []
+
+        if mg.get('url') and not mg['url'].startswith('rtmpe:'):
+            formats.append({'url': mg.get('url')})
+
+        if mg.get('hls_server') and mg.get('hls_url'):
+            hls_url = mg['hls_server'] +  mg['hls_url']
+            hls_formats = self._extract_m3u8_formats(
+                    hls_url, item_id, 'mp4', m3u8_id='hls', fatal=False)
+            formats.extend(hls_formats)
+
+        if mg.get('hds_server') and mg.get('hds_url'):
+            f4m_url = mg['hds_server'] + mg['hds_url']
+            f4m_formats = self._extract_f4m_formats(
+                    f4m_url, item_id, f4m_id='hds', fatal=False)
+            formats.extend(f4m_formats)
+
+        return {
+            'id': item_id,
+            'title': title,
+            'formats': formats,
+            'description': description,
+            'thumbnail': thumbnail,
+            'duration': duration,
+        }