Browse Source

[rudo] Add new extractor

Used in biobiochile.tv
Yen Chi Hsuan 9 years ago
parent
commit
8e7020daef
2 changed files with 54 additions and 0 deletions
  1. 1 0
      youtube_dl/extractor/extractors.py
  2. 53 0
      youtube_dl/extractor/rudo.py

+ 1 - 0
youtube_dl/extractor/extractors.py

@@ -691,6 +691,7 @@ from .rtp import RTPIE
 from .rts import RTSIE
 from .rts import RTSIE
 from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
 from .rtve import RTVEALaCartaIE, RTVELiveIE, RTVEInfantilIE
 from .rtvnh import RTVNHIE
 from .rtvnh import RTVNHIE
+from .rudo import RudoIE
 from .ruhd import RUHDIE
 from .ruhd import RUHDIE
 from .ruleporn import RulePornIE
 from .ruleporn import RulePornIE
 from .rutube import (
 from .rutube import (

+ 53 - 0
youtube_dl/extractor/rudo.py

@@ -0,0 +1,53 @@
+# coding: utf-8
+from __future__ import unicode_literals
+
+import re
+
+from .jwplatform import JWPlatformBaseIE
+from ..utils import (
+    js_to_json,
+    get_element_by_class,
+    unified_strdate,
+)
+
+
+class RudoIE(JWPlatformBaseIE):
+    _VALID_URL = r'https?://rudo\.video/vod/(?P<id>[0-9a-zA-Z]+)'
+
+    _TEST = {
+        'url': 'http://rudo.video/vod/oTzw0MGnyG',
+        'md5': '2a03a5b32dd90a04c83b6d391cf7b415',
+        'info_dict': {
+            'id': 'oTzw0MGnyG',
+            'ext': 'mp4',
+            'title': 'Comentario Tomás Mosciatti',
+            'upload_date': '20160617',
+        },
+    }
+
+    @classmethod
+    def _extract_url(self, webpage):
+        mobj = re.search(
+            '<iframe[^>]+src=(?P<q1>[\'"])(?P<url>(?:https?:)?//rudo\.video/vod/[0-9a-zA-Z]+)(?P=q1)',
+            webpage)
+        if mobj:
+            return mobj.group('url')
+
+    def _real_extract(self, url):
+        video_id = self._match_id(url)
+
+        webpage = self._download_webpage(url, video_id, encoding='iso-8859-1')
+
+        jwplayer_data = self._parse_json(self._search_regex(
+            r'(?s)playerInstance\.setup\(({.+?})\)', webpage, 'jwplayer data'), video_id,
+            transform_source=lambda s: js_to_json(re.sub(r'encodeURI\([^)]+\)', '""', s)))
+
+        info_dict = self._parse_jwplayer_data(
+            jwplayer_data, video_id, require_title=False, m3u8_id='hls')
+
+        info_dict.update({
+            'title': self._og_search_title(webpage),
+            'upload_date': unified_strdate(get_element_by_class('date', webpage)),
+        })
+
+        return info_dict