Browse Source

Merge pull request #6537 from ngld/rtvnhnl

[rtvnhnl] Added new extractor
Sergey M. 10 years ago
parent
commit
fa37c26c4d
2 changed files with 37 additions and 0 deletions
  1. 1 0
      youtube_dl/extractor/__init__.py
  2. 36 0
      youtube_dl/extractor/rtvnh.py

+ 1 - 0
youtube_dl/extractor/__init__.py

@@ -491,6 +491,7 @@ from .rtl2 import RTL2IE
 from .rtp import RTPIE
 from .rtp import RTPIE
 from .rts import RTSIE
 from .rts import RTSIE
 from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
 from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
+from .rtvnh import RTVNHIE
 from .ruhd import RUHDIE
 from .ruhd import RUHDIE
 from .rutube import (
 from .rutube import (
     RutubeIE,
     RutubeIE,

+ 36 - 0
youtube_dl/extractor/rtvnh.py

@@ -0,0 +1,36 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+from .common import InfoExtractor
+
+
+class RTVNHIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?rtvnh\.nl/video/(?P<id>[0-9]+)'
+    _TEST = {
+        'url': 'http://www.rtvnh.nl/video/131946',
+        'md5': '6e1d0ab079e2a00b6161442d3ceacfc1',
+        'info_dict': {
+            'id': '131946',
+            'ext': 'mp4',
+            'title': 'Grote zoektocht in zee bij Zandvoort naar vermiste vrouw',
+            'thumbnail': 're:^https?:.*\.jpg$'
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+        meta = self._parse_json(self._download_webpage('http://www.rtvnh.nl/video/json?m=' + video_id, video_id), video_id)
+        formats = self._extract_smil_formats('http://www.rtvnh.nl/video/smil?m=' + video_id, video_id)
+
+        for item in meta['source']['fb']:
+            if item.get('type') == 'hls':
+                formats.extend(self._extract_m3u8_formats(item['file'], video_id, ext='mp4', entry_protocol='m3u8_native'))
+            elif item.get('type') == '':
+                formats.append({'url': item['file']})
+        
+        return {
+            'id': video_id,
+            'title': meta['title'].strip(),
+            'thumbnail': meta['image'],
+            'formats': formats
+        }