11 years ago · ca7a9c1bf7
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -147,6 +147,7 @@ from .ivi import (
 
															 from .izlesene import IzleseneIE
														
 
															 from .jadorecettepub import JadoreCettePubIE
														
 
															 from .jeuxvideo import JeuxVideoIE
														
 
															+from .jove import JoveIE
														
 
															 from .jukebox import JukeboxIE
														
 
															 from .justintv import JustinTVIE
														
 
															 from .jpopsukitv import JpopsukiIE
														
@@ -178,6 +179,7 @@ from .mdr import MDRIE
 
															 from .metacafe import MetacafeIE
														
 
															 from .metacritic import MetacriticIE
														
 
															 from .mit import TechTVMITIE, MITIE, OCWMITIE
														
 
															+from .mitele import MiTeleIE
														
 
															 from .mixcloud import MixcloudIE
														
 
															 from .mlb import MLBIE
														
 
															 from .mpora import MporaIE
														
@@ -252,6 +254,7 @@ from .ro220 import Ro220IE
 
															 from .rottentomatoes import RottenTomatoesIE
														
 
															 from .roxwel import RoxwelIE
														
 
															 from .rtbf import RTBFIE
														
 
															+from .rtlnl import RtlXlIE
														
 
															 from .rtlnow import RTLnowIE
														
 
															 from .rts import RTSIE
														
 
															 from .rtve import RTVEALaCartaIE
														
--- a/youtube_dl/extractor/dfb.py
+++ b/youtube_dl/extractor/dfb.py
@@ -30,7 +30,7 @@ class DFBIE(InfoExtractor):
 
															             video_id)
														
 
															         video_info = player_info.find('video')
														
 
															-        f4m_info = self._download_xml(video_info.find('url').text, video_id)
														
 
															+        f4m_info = self._download_xml(self._proto_relative_url(video_info.find('url').text.strip()), video_id)
														
 
															         token_el = f4m_info.find('token')
														
 
															         manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
														
--- a/youtube_dl/extractor/jove.py
+++ b/youtube_dl/extractor/jove.py
@@ -0,0 +1,80 @@
 
															+from __future__ import unicode_literals
														
 
															+
														
 
															+import re
														
 
															+
														
 
															+from .common import InfoExtractor
														
 
															+from ..utils import (
														
 
															+    ExtractorError,
														
 
															+    unified_strdate
														
 
															+)
														
 
															+
														
 
															+
														
 
															+class JoveIE(InfoExtractor):
														
 
															+    _VALID_URL = r'https?://(?:www\.)?jove\.com/video/(?P<id>[0-9]+)'
														
 
															+    _CHAPTERS_URL = 'http://www.jove.com/video-chapters?videoid={video_id:}'
														
 
															+    _TESTS = [
														
 
															+        {
														
 
															+            'url': 'http://www.jove.com/video/2744/electrode-positioning-montage-transcranial-direct-current',
														
 
															+            'md5': '93723888d82dbd6ba8b3d7d0cd65dd2b',
														
 
															+            'info_dict': {
														
 
															+                'id': '2744',
														
 
															+                'ext': 'mp4',
														
 
															+                'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation',
														
 
															+                'description': 'md5:015dd4509649c0908bc27f049e0262c6',
														
 
															+                'thumbnail': 're:^https?://.*\.png$',
														
 
															+                'upload_date': '20110523',
														
 
															+            }
														
 
															+        },
														
 
															+        {
														
 
															+            'url': 'http://www.jove.com/video/51796/culturing-caenorhabditis-elegans-axenic-liquid-media-creation',
														
 
															+            'md5': '914aeb356f416811d911996434811beb',
														
 
															+            'info_dict': {
														
 
															+                'id': '51796',
														
 
															+                'ext': 'mp4',
														
 
															+                'title': 'Culturing Caenorhabditis elegans in Axenic Liquid Media and Creation of Transgenic Worms by Microparticle Bombardment',
														
 
															+                'description': 'md5:35ff029261900583970c4023b70f1dc9',
														
 
															+                'thumbnail': 're:^https?://.*\.png$',
														
 
															+                'upload_date': '20140802',
														
 
															+            }
														
 
															+        },
														
 
															+
														
 
															+    ]
														
 
															+
														
 
															+    def _real_extract(self, url):
														
 
															+        mobj = re.match(self._VALID_URL, url)
														
 
															+        video_id = mobj.group('id')
														
 
															+
														
 
															+        webpage = self._download_webpage(url, video_id)
														
 
															+
														
 
															+        chapters_id = self._html_search_regex(
														
 
															+            r'/video-chapters\?videoid=([0-9]+)', webpage, 'chapters id')
														
 
															+
														
 
															+        chapters_xml = self._download_xml(
														
 
															+            self._CHAPTERS_URL.format(video_id=chapters_id),
														
 
															+            video_id, note='Downloading chapters XML',
														
 
															+            errnote='Failed to download chapters XML')
														
 
															+
														
 
															+        video_url = chapters_xml.attrib.get('video')
														
 
															+        if not video_url:
														
 
															+            raise ExtractorError('Failed to get the video URL')
														
 
															+
														
 
															+        title = self._html_search_meta('citation_title', webpage, 'title')
														
 
															+        thumbnail = self._og_search_thumbnail(webpage)
														
 
															+        description = self._html_search_regex(
														
 
															+            r'<div id="section_body_summary"><p class="jove_content">(.+?)</p>',
														
 
															+            webpage, 'description', fatal=False)
														
 
															+        publish_date = unified_strdate(self._html_search_meta(
														
 
															+            'citation_publication_date', webpage, 'publish date', fatal=False))
														
 
															+        comment_count = self._html_search_regex(
														
 
															+            r'<meta name="num_comments" content="(\d+) Comments?"',
														
 
															+            webpage, 'comment count', fatal=False)
														
 
															+
														
 
															+        return {
														
 
															+            'id': video_id,
														
 
															+            'title': title,
														
 
															+            'url': video_url,
														
 
															+            'thumbnail': thumbnail,
														
 
															+            'description': description,
														
 
															+            'upload_date': publish_date,
														
 
															+            'comment_count': comment_count,
														
 
															+        }
														
--- a/youtube_dl/extractor/mitele.py
+++ b/youtube_dl/extractor/mitele.py
@@ -0,0 +1,60 @@
 
															+from __future__ import unicode_literals
														
 
															+
														
 
															+import re
														
 
															+import json
														
 
															+
														
 
															+from .common import InfoExtractor
														
 
															+from ..utils import (
														
 
															+    compat_urllib_parse,
														
 
															+    get_element_by_attribute,
														
 
															+    parse_duration,
														
 
															+    strip_jsonp,
														
 
															+)
														
 
															+
														
 
															+
														
 
															+class MiTeleIE(InfoExtractor):
														
 
															+    IE_NAME = 'mitele.es'
														
 
															+    _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<episode>[^/]+)/'
														
 
															+
														
 
															+    _TEST = {
														
 
															+        'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
														
 
															+        'md5': '6a75fe9d0d3275bead0cb683c616fddb',
														
 
															+        'info_dict': {
														
 
															+            'id': '0fce117d',
														
 
															+            'ext': 'mp4',
														
 
															+            'title': 'Programa 144 - Tor, la web invisible',
														
 
															+            'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
														
 
															+            'display_id': 'programa-144',
														
 
															+            'duration': 2913,
														
 
															+        },
														
 
															+    }
														
 
															+
														
 
															+    def _real_extract(self, url):
														
 
															+        mobj = re.match(self._VALID_URL, url)
														
 
															+        episode = mobj.group('episode')
														
 
															+        webpage = self._download_webpage(url, episode)
														
 
															+        embed_data_json = self._search_regex(
														
 
															+            r'MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
														
 
															+            flags=re.DOTALL
														
 
															+        ).replace('\'', '"')
														
 
															+        embed_data = json.loads(embed_data_json)
														
 
															+
														
 
															+        info_url = embed_data['flashvars']['host']
														
 
															+        info_el = self._download_xml(info_url, episode).find('./video/info')
														
 
															+
														
 
															+        video_link = info_el.find('videoUrl/link').text
														
 
															+        token_query = compat_urllib_parse.urlencode({'id': video_link})
														
 
															+        token_info = self._download_json(
														
 
															+            'http://token.mitele.es/?' + token_query, episode,
														
 
															+            transform_source=strip_jsonp
														
 
															+        )
														
 
															+
														
 
															+        return {
														
 
															+            'id': embed_data['videoId'],
														
 
															+            'display_id': episode,
														
 
															+            'title': info_el.find('title').text,
														
 
															+            'url': token_info['tokenizedUrl'],
														
 
															+            'description': get_element_by_attribute('class', 'text', webpage),
														
 
															+            'thumbnail': info_el.find('thumb').text,
														
 
															+            'duration': parse_duration(info_el.find('duration').text),
														
 
															+        }
														
--- a/youtube_dl/extractor/pbs.py
+++ b/youtube_dl/extractor/pbs.py
@@ -20,17 +20,41 @@ class PBSIE(InfoExtractor):
 
															         )
														
 
															     '''
														
 
															-    _TEST = {
														
 
															-        'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
														
 
															-        'md5': 'ce1888486f0908d555a8093cac9a7362',
														
 
															-        'info_dict': {
														
 
															-            'id': '2365006249',
														
 
															-            'ext': 'mp4',
														
 
															-            'title': 'A More Perfect Union',
														
 
															-            'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
														
 
															-            'duration': 3190,
														
 
															+    _TESTS = [
														
 
															+        {
														
 
															+            'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
														
 
															+            'md5': 'ce1888486f0908d555a8093cac9a7362',
														
 
															+            'info_dict': {
														
 
															+                'id': '2365006249',
														
 
															+                'ext': 'mp4',
														
 
															+                'title': 'A More Perfect Union',
														
 
															+                'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
														
 
															+                'duration': 3190,
														
 
															+            },
														
 
															+        },
														
 
															+        {
														
 
															+            'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/',
														
 
															+            'md5': '143c98aa54a346738a3d78f54c925321',
														
 
															+            'info_dict': {
														
 
															+                'id': '2365297690',
														
 
															+                'ext': 'mp4',
														
 
															+                'title': 'Losing Iraq',
														
 
															+                'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
														
 
															+                'duration': 5050,
														
 
															+            },
														
 
															         },
														
 
															-    }
														
 
															+        {
														
 
															+            'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/',
														
 
															+            'md5': 'b19856d7f5351b17a5ab1dc6a64be633',
														
 
															+            'info_dict': {
														
 
															+                'id': '2201174722',
														
 
															+                'ext': 'mp4',
														
 
															+                'title': 'Cyber Schools Gain Popularity, but Quality Questions Persist',
														
 
															+                'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28',
														
 
															+                'duration': 801,
														
 
															+            },
														
 
															+        },
														
 
															+    ]
														
 
															     def _extract_ids(self, url):
														
 
															         mobj = re.match(self._VALID_URL, url)
														
@@ -40,10 +64,13 @@ class PBSIE(InfoExtractor):
 
															         if presumptive_id:
														
 
															             webpage = self._download_webpage(url, display_id)
														
 
															-            # frontline video embed
														
 
															+            MEDIA_ID_REGEXES = [
														
 
															+                r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",  # frontline video embed
														
 
															+                r'class="coveplayerid">([^<]+)<',                       # coveplayer
														
 
															+            ]
														
 
															+
														
 
															             media_id = self._search_regex(
														
 
															-                r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",
														
 
															-                webpage, 'frontline video ID', fatal=False, default=None)
														
 
															+                MEDIA_ID_REGEXES, webpage, 'media ID', fatal=False, default=None)
														
 
															             if media_id:
														
 
															                 return media_id, presumptive_id
														
--- a/youtube_dl/extractor/rtlnl.py
+++ b/youtube_dl/extractor/rtlnl.py
@@ -0,0 +1,52 @@
 
															+from __future__ import unicode_literals
														
 
															+
														
 
															+import re
														
 
															+
														
 
															+from .common import InfoExtractor
														
 
															+
														
 
															+
														
 
															+class RtlXlIE(InfoExtractor):
														
 
															+    IE_NAME = 'rtlxl.nl'
														
 
															+    _VALID_URL = r'https?://www\.rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
														
 
															+
														
 
															+    _TEST = {
														
 
															+        'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
														
 
															+        'info_dict': {
														
 
															+            'id': '6e4203a6-0a5e-3596-8424-c599a59e0677',
														
 
															+            'ext': 'flv',
														
 
															+            'title': 'RTL Nieuws - Laat',
														
 
															+            'description': 'Dagelijks het laatste nieuws uit binnen- en '
														
 
															+                'buitenland. Voor nog meer nieuws kunt u ook gebruikmaken van '
														
 
															+                'onze mobiele apps.',
														
 
															+            'timestamp': 1408051800,
														
 
															+            'upload_date': '20140814',
														
 
															+        },
														
 
															+        'params': {
														
 
															+            # We download the first bytes of the first fragment, it can't be
														
 
															+            # processed by the f4m downloader beacuse it isn't complete
														
 
															+            'skip_download': True,
														
 
															+        },
														
 
															+    }
														
 
															+
														
 
															+    def _real_extract(self, url):
														
 
															+        mobj = re.match(self._VALID_URL, url)
														
 
															+        uuid = mobj.group('uuid')
														
 
															+
														
 
															+        info = self._download_json(
														
 
															+            'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
														
 
															+            uuid)
														
 
															+        meta = info['meta']
														
 
															+        material = info['material'][0]
														
 
															+        episode_info = info['episodes'][0]
														
 
															+
														
 
															+        f4m_url = 'http://manifest.us.rtl.nl' + material['videopath']
														
 
															+        progname = info['abstracts'][0]['name']
														
 
															+        subtitle = material['title'] or info['episodes'][0]['name']
														
 
															+
														
 
															+        return {
														
 
															+            'id': uuid,
														
 
															+            'title': '%s - %s' % (progname, subtitle), 
														
 
															+            'formats': self._extract_f4m_formats(f4m_url, uuid),
														
 
															+            'timestamp': material['original_date'],
														
 
															+            'description': episode_info['synopsis'],
														
 
															+        }
														
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -827,6 +827,7 @@ def unified_strdate(date_str):
 
															         '%b %dnd %Y %I:%M%p',
														
 
															         '%b %dth %Y %I:%M%p',
														
 
															         '%Y-%m-%d',
														
 
															+        '%Y/%m/%d',
														
 
															         '%d.%m.%Y',
														
 
															         '%d/%m/%Y',
														
 
															         '%Y/%m/%d %H:%M:%S',