Browse Source

Merge remote-tracking branch 'upstream/master'

Pierre Rudloff 12 years ago
parent
commit
c5b921b597

+ 5 - 2
youtube_dl/extractor/__init__.py

@@ -6,7 +6,9 @@ from .bandcamp import BandcampIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .bliptv import BlipTVIE, BlipTVUserIE
 from .breakcom import BreakIE
 from .breakcom import BreakIE
 from .brightcove import BrightcoveIE
 from .brightcove import BrightcoveIE
+from .c56 import C56IE
 from .canalplus import CanalplusIE
 from .canalplus import CanalplusIE
+from .canalc2 import Canalc2IE
 from .collegehumor import CollegeHumorIE
 from .collegehumor import CollegeHumorIE
 from .comedycentral import ComedyCentralIE
 from .comedycentral import ComedyCentralIE
 from .condenast import CondeNastIE
 from .condenast import CondeNastIE
@@ -29,6 +31,7 @@ from .gametrailers import GametrailersIE
 from .generic import GenericIE
 from .generic import GenericIE
 from .googleplus import GooglePlusIE
 from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
 from .googlesearch import GoogleSearchIE
+from .hark import HarkIE
 from .hotnewhiphop import HotNewHipHopIE
 from .hotnewhiphop import HotNewHipHopIE
 from .howcast import HowcastIE
 from .howcast import HowcastIE
 from .hypem import HypemIE
 from .hypem import HypemIE
@@ -72,18 +75,18 @@ from .ted import TEDIE
 from .tf1 import TF1IE
 from .tf1 import TF1IE
 from .thisav import ThisAVIE
 from .thisav import ThisAVIE
 from .traileraddict import TrailerAddictIE
 from .traileraddict import TrailerAddictIE
+from .trilulilu import TriluliluIE
 from .tudou import TudouIE
 from .tudou import TudouIE
 from .tumblr import TumblrIE
 from .tumblr import TumblrIE
 from .tutv import TutvIE
 from .tutv import TutvIE
-from .ustream import UstreamIE
 from .unistra import UnistraIE
 from .unistra import UnistraIE
+from .ustream import UstreamIE
 from .vbox7 import Vbox7IE
 from .vbox7 import Vbox7IE
 from .veoh import VeohIE
 from .veoh import VeohIE
 from .vevo import VevoIE
 from .vevo import VevoIE
 from .videofyme import VideofyMeIE
 from .videofyme import VideofyMeIE
 from .vimeo import VimeoIE, VimeoChannelIE
 from .vimeo import VimeoIE, VimeoChannelIE
 from .vine import VineIE
 from .vine import VineIE
-from .c56 import C56IE
 from .wat import WatIE
 from .wat import WatIE
 from .weibo import WeiboIE
 from .weibo import WeiboIE
 from .wimp import WimpIE
 from .wimp import WimpIE

+ 2 - 2
youtube_dl/extractor/c56.py

@@ -12,8 +12,8 @@ class C56IE(InfoExtractor):
 
 
     _TEST ={
     _TEST ={
         u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
         u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
-        u'file': u'93440716.mp4',
-        u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e',
+        u'file': u'93440716.flv',
+        u'md5': u'e59995ac63d0457783ea05f93f12a866',
         u'info_dict': {
         u'info_dict': {
             u'title': u'网事知多少 第32期:车怒',
             u'title': u'网事知多少 第32期:车怒',
         },
         },

+ 35 - 0
youtube_dl/extractor/canalc2.py

@@ -0,0 +1,35 @@
+# coding: utf-8
+import re
+
+from .common import InfoExtractor
+
+
+class Canalc2IE(InfoExtractor):
+    _IE_NAME = 'canalc2.tv'
+    _VALID_URL = r'http://.*?\.canalc2\.tv/video\.asp\?idVideo=(\d+)&voir=oui'
+
+    _TEST = {
+        u'url': u'http://www.canalc2.tv/video.asp?idVideo=12163&voir=oui',
+        u'file': u'12163.mp4',
+        u'md5': u'060158428b650f896c542dfbb3d6487f',
+        u'info_dict': {
+            u'title': u'Terrasses du Numérique'
+        }
+    }
+
+    def _real_extract(self, url):
+        video_id = re.match(self._VALID_URL, url).group(1)
+        webpage = self._download_webpage(url, video_id)
+        file_name = self._search_regex(
+            r"so\.addVariable\('file','(.*?)'\);",
+            webpage, 'file name')
+        video_url = 'http://vod-flash.u-strasbg.fr:8080/' + file_name
+
+        title = self._html_search_regex(
+            r'class="evenement8">(.*?)</a>', webpage, u'title')
+        
+        return {'id': video_id,
+                'ext': 'mp4',
+                'url': video_url,
+                'title': title,
+                }

+ 1 - 1
youtube_dl/extractor/canalplus.py

@@ -5,7 +5,7 @@ from .common import InfoExtractor
 from ..utils import unified_strdate
 from ..utils import unified_strdate
 
 
 class CanalplusIE(InfoExtractor):
 class CanalplusIE(InfoExtractor):
-    _VALID_URL = r'https?://www\.canalplus\.fr/.*?\?vid=(?P<id>\d+)'
+    _VALID_URL = r'https?://(www\.canalplus\.fr/.*?\?vid=|player\.canalplus\.fr/#/)(?P<id>\d+)'
     _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
     _VIDEO_INFO_TEMPLATE = 'http://service.canal-plus.com/video/rest/getVideosLiees/cplus/%s'
     IE_NAME = u'canalplus.fr'
     IE_NAME = u'canalplus.fr'
 
 

+ 1 - 1
youtube_dl/extractor/dailymotion.py

@@ -21,7 +21,7 @@ class DailymotionIE(InfoExtractor):
         u'file': u'x33vw9.mp4',
         u'file': u'x33vw9.mp4',
         u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
         u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
         u'info_dict': {
         u'info_dict': {
-            u"uploader": u"Alex and Van .", 
+            u"uploader": u"Amphora Alex and Van .", 
             u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
             u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
         }
         }
     }
     }

+ 1 - 1
youtube_dl/extractor/generic.py

@@ -126,7 +126,7 @@ class GenericIE(InfoExtractor):
             raise ExtractorError(u'Invalid URL: %s' % url)
             raise ExtractorError(u'Invalid URL: %s' % url)
 
 
         self.report_extraction(video_id)
         self.report_extraction(video_id)
-        # Look for BrigthCove:
+        # Look for BrightCove:
         m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
         m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
         if m_brightcove is not None:
         if m_brightcove is not None:
             self.to_screen(u'Brightcove video detected.')
             self.to_screen(u'Brightcove video detected.')

+ 37 - 0
youtube_dl/extractor/hark.py

@@ -0,0 +1,37 @@
+# -*- coding: utf-8 -*-
+
+import re
+import json
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+class HarkIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+'
+    _TEST = {
+        u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
+        u'file': u'mmbzyhkgny.mp3',
+        u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
+        u'info_dict': {
+            u'title': u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' on May 23, 2013",
+            u'description': u'President Barack Obama addressed the nation live on May 23, 2013 in a speech aimed at addressing counter-terrorism policies including the use of drone strikes, detainees at Guantanamo Bay prison facility, and American citizens who are terrorists.',
+            u'duration': 11,
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(1)
+        json_url = "http://www.hark.com/clips/%s.json" %(video_id)
+        info_json = self._download_webpage(json_url, video_id)
+        info = json.loads(info_json)
+        final_url = info['url']
+
+        return {'id': video_id,
+                'url' : final_url,
+                'title': info['name'],
+                'ext': determine_ext(final_url),
+                'description': info['description'],
+                'thumbnail': info['image_original'],
+                'duration': info['duration'],
+                }

+ 76 - 0
youtube_dl/extractor/trilulilu.py

@@ -0,0 +1,76 @@
+import json
+import re
+import xml.etree.ElementTree
+
+from .common import InfoExtractor
+from ..utils import (
+    ExtractorError,
+)
+
+
+class TriluliluIE(InfoExtractor):
+    _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?trilulilu\.ro/video-(?P<category>[^/]+)/(?P<video_id>[^/]+)'
+    _TEST = {
+        u"url": u"http://www.trilulilu.ro/video-animatie/big-buck-bunny-1",
+        u'file': u"big-buck-bunny-1.mp4",
+        u'info_dict': {
+            u"title": u"Big Buck Bunny",
+            u"description": u":) pentru copilul din noi",
+        },
+        # Server ignores Range headers (--test)
+        u"params": {
+            u"skip_download": True
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('video_id')
+
+        webpage = self._download_webpage(url, video_id)
+
+        title = self._og_search_title(webpage)
+        thumbnail = self._og_search_thumbnail(webpage)
+        description = self._og_search_description(webpage)
+
+        log_str = self._search_regex(
+            r'block_flash_vars[ ]=[ ]({[^}]+})', webpage, u'log info')
+        log = json.loads(log_str)
+
+        format_url = (u'http://fs%(server)s.trilulilu.ro/%(hash)s/'
+                      u'video-formats2' % log)
+        format_str = self._download_webpage(
+            format_url, video_id,
+            note=u'Downloading formats',
+            errnote=u'Error while downloading formats')
+
+        format_doc = xml.etree.ElementTree.fromstring(format_str)
+ 
+        video_url_template = (
+            u'http://fs%(server)s.trilulilu.ro/stream.php?type=video'
+            u'&source=site&hash=%(hash)s&username=%(userid)s&'
+            u'key=ministhebest&format=%%s&sig=&exp=' %
+            log)
+        formats = [
+            {
+                'format': fnode.text,
+                'url': video_url_template % fnode.text,
+            }
+
+            for fnode in format_doc.findall('./formats/format')
+        ]
+
+        info = {
+            '_type': 'video',
+            'id': video_id,
+            'formats': formats,
+            'title': title,
+            'description': description,
+            'thumbnail': thumbnail,
+        }
+
+        # TODO: Remove when #980 has been merged
+        info['url'] = formats[-1]['url']
+        info['ext'] = formats[-1]['format'].partition('-')[0]
+
+        return info

+ 1 - 1
youtube_dl/version.py

@@ -1,2 +1,2 @@
 
 
-__version__ = '2013.08.23'
+__version__ = '2013.08.27'