瀏覽代碼

Merge remote-tracking branch 'upstream/master'

rupertbaxter2 11 年之前
父節點
當前提交
ca7a9c1bf7

+ 3 - 0
youtube_dl/extractor/__init__.py

@@ -147,6 +147,7 @@ from .ivi import (
 from .izlesene import IzleseneIE
 from .izlesene import IzleseneIE
 from .jadorecettepub import JadoreCettePubIE
 from .jadorecettepub import JadoreCettePubIE
 from .jeuxvideo import JeuxVideoIE
 from .jeuxvideo import JeuxVideoIE
+from .jove import JoveIE
 from .jukebox import JukeboxIE
 from .jukebox import JukeboxIE
 from .justintv import JustinTVIE
 from .justintv import JustinTVIE
 from .jpopsukitv import JpopsukiIE
 from .jpopsukitv import JpopsukiIE
@@ -178,6 +179,7 @@ from .mdr import MDRIE
 from .metacafe import MetacafeIE
 from .metacafe import MetacafeIE
 from .metacritic import MetacriticIE
 from .metacritic import MetacriticIE
 from .mit import TechTVMITIE, MITIE, OCWMITIE
 from .mit import TechTVMITIE, MITIE, OCWMITIE
+from .mitele import MiTeleIE
 from .mixcloud import MixcloudIE
 from .mixcloud import MixcloudIE
 from .mlb import MLBIE
 from .mlb import MLBIE
 from .mpora import MporaIE
 from .mpora import MporaIE
@@ -252,6 +254,7 @@ from .ro220 import Ro220IE
 from .rottentomatoes import RottenTomatoesIE
 from .rottentomatoes import RottenTomatoesIE
 from .roxwel import RoxwelIE
 from .roxwel import RoxwelIE
 from .rtbf import RTBFIE
 from .rtbf import RTBFIE
+from .rtlnl import RtlXlIE
 from .rtlnow import RTLnowIE
 from .rtlnow import RTLnowIE
 from .rts import RTSIE
 from .rts import RTSIE
 from .rtve import RTVEALaCartaIE
 from .rtve import RTVEALaCartaIE

+ 1 - 1
youtube_dl/extractor/dfb.py

@@ -30,7 +30,7 @@ class DFBIE(InfoExtractor):
             video_id)
             video_id)
         video_info = player_info.find('video')
         video_info = player_info.find('video')
 
 
-        f4m_info = self._download_xml(video_info.find('url').text, video_id)
+        f4m_info = self._download_xml(self._proto_relative_url(video_info.find('url').text.strip()), video_id)
         token_el = f4m_info.find('token')
         token_el = f4m_info.find('token')
         manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
         manifest_url = token_el.attrib['url'] + '?' + 'hdnea=' + token_el.attrib['auth'] + '&hdcore=3.2.0'
 
 

+ 80 - 0
youtube_dl/extractor/jove.py

@@ -0,0 +1,80 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+    unified_strdate
+)
+
+
+class JoveIE(InfoExtractor):
+    _VALID_URL = r'https?://(?:www\.)?jove\.com/video/(?P<id>[0-9]+)'
+    _CHAPTERS_URL = 'http://www.jove.com/video-chapters?videoid={video_id:}'
+    _TESTS = [
+        {
+            'url': 'http://www.jove.com/video/2744/electrode-positioning-montage-transcranial-direct-current',
+            'md5': '93723888d82dbd6ba8b3d7d0cd65dd2b',
+            'info_dict': {
+                'id': '2744',
+                'ext': 'mp4',
+                'title': 'Electrode Positioning and Montage in Transcranial Direct Current Stimulation',
+                'description': 'md5:015dd4509649c0908bc27f049e0262c6',
+                'thumbnail': 're:^https?://.*\.png$',
+                'upload_date': '20110523',
+            }
+        },
+        {
+            'url': 'http://www.jove.com/video/51796/culturing-caenorhabditis-elegans-axenic-liquid-media-creation',
+            'md5': '914aeb356f416811d911996434811beb',
+            'info_dict': {
+                'id': '51796',
+                'ext': 'mp4',
+                'title': 'Culturing Caenorhabditis elegans in Axenic Liquid Media and Creation of Transgenic Worms by Microparticle Bombardment',
+                'description': 'md5:35ff029261900583970c4023b70f1dc9',
+                'thumbnail': 're:^https?://.*\.png$',
+                'upload_date': '20140802',
+            }
+        },
+
+    ]
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        chapters_id = self._html_search_regex(
+            r'/video-chapters\?videoid=([0-9]+)', webpage, 'chapters id')
+
+        chapters_xml = self._download_xml(
+            self._CHAPTERS_URL.format(video_id=chapters_id),
+            video_id, note='Downloading chapters XML',
+            errnote='Failed to download chapters XML')
+
+        video_url = chapters_xml.attrib.get('video')
+        if not video_url:
+            raise ExtractorError('Failed to get the video URL')
+
+        title = self._html_search_meta('citation_title', webpage, 'title')
+        thumbnail = self._og_search_thumbnail(webpage)
+        description = self._html_search_regex(
+            r'<div id="section_body_summary"><p class="jove_content">(.+?)</p>',
+            webpage, 'description', fatal=False)
+        publish_date = unified_strdate(self._html_search_meta(
+            'citation_publication_date', webpage, 'publish date', fatal=False))
+        comment_count = self._html_search_regex(
+            r'<meta name="num_comments" content="(\d+) Comments?"',
+            webpage, 'comment count', fatal=False)
+
+        return {
+            'id': video_id,
+            'title': title,
+            'url': video_url,
+            'thumbnail': thumbnail,
+            'description': description,
+            'upload_date': publish_date,
+            'comment_count': comment_count,
+        }

+ 60 - 0
youtube_dl/extractor/mitele.py

@@ -0,0 +1,60 @@
+from __future__ import unicode_literals
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import (
+    compat_urllib_parse,
+    get_element_by_attribute,
+    parse_duration,
+    strip_jsonp,
+)
+
+
+class MiTeleIE(InfoExtractor):
+    IE_NAME = 'mitele.es'
+    _VALID_URL = r'http://www\.mitele\.es/[^/]+/[^/]+/[^/]+/(?P<episode>[^/]+)/'
+
+    _TEST = {
+        'url': 'http://www.mitele.es/programas-tv/diario-de/la-redaccion/programa-144/',
+        'md5': '6a75fe9d0d3275bead0cb683c616fddb',
+        'info_dict': {
+            'id': '0fce117d',
+            'ext': 'mp4',
+            'title': 'Programa 144 - Tor, la web invisible',
+            'description': 'md5:3b6fce7eaa41b2d97358726378d9369f',
+            'display_id': 'programa-144',
+            'duration': 2913,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        episode = mobj.group('episode')
+        webpage = self._download_webpage(url, episode)
+        embed_data_json = self._search_regex(
+            r'MSV\.embedData\[.*?\]\s*=\s*({.*?});', webpage, 'embed data',
+            flags=re.DOTALL
+        ).replace('\'', '"')
+        embed_data = json.loads(embed_data_json)
+
+        info_url = embed_data['flashvars']['host']
+        info_el = self._download_xml(info_url, episode).find('./video/info')
+
+        video_link = info_el.find('videoUrl/link').text
+        token_query = compat_urllib_parse.urlencode({'id': video_link})
+        token_info = self._download_json(
+            'http://token.mitele.es/?' + token_query, episode,
+            transform_source=strip_jsonp
+        )
+
+        return {
+            'id': embed_data['videoId'],
+            'display_id': episode,
+            'title': info_el.find('title').text,
+            'url': token_info['tokenizedUrl'],
+            'description': get_element_by_attribute('class', 'text', webpage),
+            'thumbnail': info_el.find('thumb').text,
+            'duration': parse_duration(info_el.find('duration').text),
+        }

+ 40 - 13
youtube_dl/extractor/pbs.py

@@ -20,17 +20,41 @@ class PBSIE(InfoExtractor):
         )
         )
     '''
     '''
 
 
-    _TEST = {
-        'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
-        'md5': 'ce1888486f0908d555a8093cac9a7362',
-        'info_dict': {
-            'id': '2365006249',
-            'ext': 'mp4',
-            'title': 'A More Perfect Union',
-            'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
-            'duration': 3190,
+    _TESTS = [
+        {
+            'url': 'http://www.pbs.org/tpt/constitution-usa-peter-sagal/watch/a-more-perfect-union/',
+            'md5': 'ce1888486f0908d555a8093cac9a7362',
+            'info_dict': {
+                'id': '2365006249',
+                'ext': 'mp4',
+                'title': 'A More Perfect Union',
+                'description': 'md5:ba0c207295339c8d6eced00b7c363c6a',
+                'duration': 3190,
+            },
+        },
+        {
+            'url': 'http://www.pbs.org/wgbh/pages/frontline/losing-iraq/',
+            'md5': '143c98aa54a346738a3d78f54c925321',
+            'info_dict': {
+                'id': '2365297690',
+                'ext': 'mp4',
+                'title': 'Losing Iraq',
+                'description': 'md5:f5bfbefadf421e8bb8647602011caf8e',
+                'duration': 5050,
+            },
         },
         },
-    }
+        {
+            'url': 'http://www.pbs.org/newshour/bb/education-jan-june12-cyberschools_02-23/',
+            'md5': 'b19856d7f5351b17a5ab1dc6a64be633',
+            'info_dict': {
+                'id': '2201174722',
+                'ext': 'mp4',
+                'title': 'Cyber Schools Gain Popularity, but Quality Questions Persist',
+                'description': 'md5:5871c15cba347c1b3d28ac47a73c7c28',
+                'duration': 801,
+            },
+        },
+    ]
 
 
     def _extract_ids(self, url):
     def _extract_ids(self, url):
         mobj = re.match(self._VALID_URL, url)
         mobj = re.match(self._VALID_URL, url)
@@ -40,10 +64,13 @@ class PBSIE(InfoExtractor):
         if presumptive_id:
         if presumptive_id:
             webpage = self._download_webpage(url, display_id)
             webpage = self._download_webpage(url, display_id)
 
 
-            # frontline video embed
+            MEDIA_ID_REGEXES = [
+                r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",  # frontline video embed
+                r'class="coveplayerid">([^<]+)<',                       # coveplayer
+            ]
+
             media_id = self._search_regex(
             media_id = self._search_regex(
-                r"div\s*:\s*'videoembed'\s*,\s*mediaid\s*:\s*'(\d+)'",
-                webpage, 'frontline video ID', fatal=False, default=None)
+                MEDIA_ID_REGEXES, webpage, 'media ID', fatal=False, default=None)
             if media_id:
             if media_id:
                 return media_id, presumptive_id
                 return media_id, presumptive_id
 
 

+ 52 - 0
youtube_dl/extractor/rtlnl.py

@@ -0,0 +1,52 @@
+from __future__ import unicode_literals
+
+import re
+
+from .common import InfoExtractor
+
+
+class RtlXlIE(InfoExtractor):
+    IE_NAME = 'rtlxl.nl'
+    _VALID_URL = r'https?://www\.rtlxl\.nl/#!/[^/]+/(?P<uuid>[^/?]+)'
+
+    _TEST = {
+        'url': 'http://www.rtlxl.nl/#!/rtl-nieuws-132237/6e4203a6-0a5e-3596-8424-c599a59e0677',
+        'info_dict': {
+            'id': '6e4203a6-0a5e-3596-8424-c599a59e0677',
+            'ext': 'flv',
+            'title': 'RTL Nieuws - Laat',
+            'description': 'Dagelijks het laatste nieuws uit binnen- en '
+                'buitenland. Voor nog meer nieuws kunt u ook gebruikmaken van '
+                'onze mobiele apps.',
+            'timestamp': 1408051800,
+            'upload_date': '20140814',
+        },
+        'params': {
+            # We download the first bytes of the first fragment, it can't be
+            # processed by the f4m downloader beacuse it isn't complete
+            'skip_download': True,
+        },
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        uuid = mobj.group('uuid')
+
+        info = self._download_json(
+            'http://www.rtl.nl/system/s4m/vfd/version=2/uuid=%s/fmt=flash/' % uuid,
+            uuid)
+        meta = info['meta']
+        material = info['material'][0]
+        episode_info = info['episodes'][0]
+
+        f4m_url = 'http://manifest.us.rtl.nl' + material['videopath']
+        progname = info['abstracts'][0]['name']
+        subtitle = material['title'] or info['episodes'][0]['name']
+
+        return {
+            'id': uuid,
+            'title': '%s - %s' % (progname, subtitle), 
+            'formats': self._extract_f4m_formats(f4m_url, uuid),
+            'timestamp': material['original_date'],
+            'description': episode_info['synopsis'],
+        }

+ 1 - 0
youtube_dl/utils.py

@@ -827,6 +827,7 @@ def unified_strdate(date_str):
         '%b %dnd %Y %I:%M%p',
         '%b %dnd %Y %I:%M%p',
         '%b %dth %Y %I:%M%p',
         '%b %dth %Y %I:%M%p',
         '%Y-%m-%d',
         '%Y-%m-%d',
+        '%Y/%m/%d',
         '%d.%m.%Y',
         '%d.%m.%Y',
         '%d/%m/%Y',
         '%d/%m/%Y',
         '%Y/%m/%d %H:%M:%S',
         '%Y/%m/%d %H:%M:%S',