Browse Source

Merge remote-tracking branch 'upstream/master'

Allan Zhou 12 years ago
parent
commit
99859d436c

+ 2 - 2
devscripts/youtube_genalgo.py

@@ -26,9 +26,9 @@ tests = [
     # 85
     # 85
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
      ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
      ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
-    # 84
+    # 84 - vflh9ybst 2013/08/23 (sporadic)
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
-     "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"),
+     "yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"),
     # 83
     # 83
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
      ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
      ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),

+ 4 - 0
youtube_dl/extractor/__init__.py

@@ -29,6 +29,7 @@ from .gametrailers import GametrailersIE
 from .generic import GenericIE
 from .generic import GenericIE
 from .googleplus import GooglePlusIE
 from .googleplus import GooglePlusIE
 from .googlesearch import GoogleSearchIE
 from .googlesearch import GoogleSearchIE
+from .hark import HarkIE
 from .hotnewhiphop import HotNewHipHopIE
 from .hotnewhiphop import HotNewHipHopIE
 from .howcast import HowcastIE
 from .howcast import HowcastIE
 from .hypem import HypemIE
 from .hypem import HypemIE
@@ -57,6 +58,7 @@ from .pornotube import PornotubeIE
 from .rbmaradio import RBMARadioIE
 from .rbmaradio import RBMARadioIE
 from .redtube import RedTubeIE
 from .redtube import RedTubeIE
 from .ringtv import RingTVIE
 from .ringtv import RingTVIE
+from .ro220 import Ro220IE
 from .roxwel import RoxwelIE
 from .roxwel import RoxwelIE
 from .rtlnow import RTLnowIE
 from .rtlnow import RTLnowIE
 from .sina import SinaIE
 from .sina import SinaIE
@@ -116,12 +118,14 @@ _ALL_CLASSES = [
 ]
 ]
 _ALL_CLASSES.append(GenericIE)
 _ALL_CLASSES.append(GenericIE)
 
 
+
 def gen_extractors():
 def gen_extractors():
     """ Return a list of an instance of every supported extractor.
     """ Return a list of an instance of every supported extractor.
     The order does matter; the first extractor matched is the one handling the URL.
     The order does matter; the first extractor matched is the one handling the URL.
     """
     """
     return [klass() for klass in _ALL_CLASSES]
     return [klass() for klass in _ALL_CLASSES]
 
 
+
 def get_info_extractor(ie_name):
 def get_info_extractor(ie_name):
     """Returns the info extractor class with the given ie_name"""
     """Returns the info extractor class with the given ie_name"""
     return globals()[ie_name+'IE']
     return globals()[ie_name+'IE']

+ 2 - 2
youtube_dl/extractor/c56.py

@@ -12,8 +12,8 @@ class C56IE(InfoExtractor):
 
 
     _TEST ={
     _TEST ={
         u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
         u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
-        u'file': u'93440716.mp4',
-        u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e',
+        u'file': u'93440716.flv',
+        u'md5': u'e59995ac63d0457783ea05f93f12a866',
         u'info_dict': {
         u'info_dict': {
             u'title': u'网事知多少 第32期:车怒',
             u'title': u'网事知多少 第32期:车怒',
         },
         },

+ 1 - 1
youtube_dl/extractor/dailymotion.py

@@ -21,7 +21,7 @@ class DailymotionIE(InfoExtractor):
         u'file': u'x33vw9.mp4',
         u'file': u'x33vw9.mp4',
         u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
         u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
         u'info_dict': {
         u'info_dict': {
-            u"uploader": u"Alex and Van .", 
+            u"uploader": u"Amphora Alex and Van .", 
             u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
             u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
         }
         }
     }
     }

+ 8 - 2
youtube_dl/extractor/generic.py

@@ -7,12 +7,14 @@ from .common import InfoExtractor
 from ..utils import (
 from ..utils import (
     compat_urllib_error,
     compat_urllib_error,
     compat_urllib_parse,
     compat_urllib_parse,
+    compat_urllib_parse_urlparse,
     compat_urllib_request,
     compat_urllib_request,
 
 
     ExtractorError,
     ExtractorError,
 )
 )
 from .brightcove import BrightcoveIE
 from .brightcove import BrightcoveIE
 
 
+
 class GenericIE(InfoExtractor):
 class GenericIE(InfoExtractor):
     IE_DESC = u'Generic downloader that works on some sites'
     IE_DESC = u'Generic downloader that works on some sites'
     _VALID_URL = r'.*'
     _VALID_URL = r'.*'
@@ -23,7 +25,7 @@ class GenericIE(InfoExtractor):
             u'file': u'13601338388002.mp4',
             u'file': u'13601338388002.mp4',
             u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
             u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
             u'info_dict': {
             u'info_dict': {
-                u"uploader": u"www.hodiho.fr", 
+                u"uploader": u"www.hodiho.fr",
                 u"title": u"R\u00e9gis plante sa Jeep"
                 u"title": u"R\u00e9gis plante sa Jeep"
             }
             }
         },
         },
@@ -124,7 +126,7 @@ class GenericIE(InfoExtractor):
             raise ExtractorError(u'Invalid URL: %s' % url)
             raise ExtractorError(u'Invalid URL: %s' % url)
 
 
         self.report_extraction(video_id)
         self.report_extraction(video_id)
-        # Look for BrigthCove:
+        # Look for BrightCove:
         m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
         m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
         if m_brightcove is not None:
         if m_brightcove is not None:
             self.to_screen(u'Brightcove video detected.')
             self.to_screen(u'Brightcove video detected.')
@@ -161,6 +163,10 @@ class GenericIE(InfoExtractor):
             raise ExtractorError(u'Invalid URL: %s' % url)
             raise ExtractorError(u'Invalid URL: %s' % url)
 
 
         video_url = compat_urllib_parse.unquote(mobj.group(1))
         video_url = compat_urllib_parse.unquote(mobj.group(1))
+        if video_url.startswith('//'):
+            video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url
+        if '://' not in video_url:
+            video_url = url + ('' if url.endswith('/') else '/') + video_url
         video_id = os.path.basename(video_url)
         video_id = os.path.basename(video_url)
 
 
         # here's a fun little line of code for you:
         # here's a fun little line of code for you:

+ 35 - 0
youtube_dl/extractor/hark.py

@@ -0,0 +1,35 @@
+# -*- coding: utf-8 -*-
+
+import re
+
+from .common import InfoExtractor
+from ..utils import determine_ext
+
+class HarkIE(InfoExtractor):
+    _VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+'
+    _TEST = {
+        u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
+        u'file': u'mmbzyhkgny.mp3',
+        u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
+        u'info_dict': {
+            u"title": u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' On May 23, 2013 ",
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group(1)
+        embed_url = "http://www.hark.com/clips/%s/homepage_embed" %(video_id)
+        webpage = self._download_webpage(embed_url, video_id)
+
+        final_url = self._search_regex(r'src="(.+?).mp3"',
+                                webpage, 'video url')+'.mp3'
+        title = self._html_search_regex(r'<title>(.+?)</title>',
+                                webpage, 'video title').replace(' Sound Clip and Quote - Hark','').replace(
+                                'Sound Clip , Quote, MP3, and Ringtone - Hark','')
+
+        return {'id': video_id,
+                'url' : final_url,
+                'title': title,
+                'ext': determine_ext(final_url),
+                }

+ 42 - 0
youtube_dl/extractor/ro220.py

@@ -0,0 +1,42 @@
+import re
+
+from .common import InfoExtractor
+from ..utils import (
+    clean_html,
+    compat_parse_qs,
+)
+
+
+class Ro220IE(InfoExtractor):
+    IE_NAME = '220.ro'
+    _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
+    _TEST = {
+        u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
+        u'file': u'LYV6doKo7f.mp4',
+        u'md5': u'03af18b73a07b4088753930db7a34add',
+        u'info_dict': {
+            u"title": u"Luati-le Banii sez 4 ep 1",
+            u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
+        }
+    }
+
+    def _real_extract(self, url):
+        mobj = re.match(self._VALID_URL, url)
+        video_id = mobj.group('video_id')
+
+        webpage = self._download_webpage(url, video_id)
+        flashVars_str = self._search_regex(
+            r'<param name="flashVars" value="([^"]+)"',
+            webpage, u'flashVars')
+        flashVars = compat_parse_qs(flashVars_str)
+
+        info = {
+            '_type': 'video',
+            'id': video_id,
+            'ext': 'mp4',
+            'url': flashVars['videoURL'][0],
+            'title': flashVars['title'][0],
+            'description': clean_html(flashVars['desc'][0]),
+            'thumbnail': flashVars['preview'][0],
+        }
+        return info

+ 15 - 2
youtube_dl/extractor/rtlnow.py

@@ -8,8 +8,8 @@ from ..utils import (
 )
 )
 
 
 class RTLnowIE(InfoExtractor):
 class RTLnowIE(InfoExtractor):
-    """Information Extractor for RTLnow, RTL2now and VOXnow"""
-    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl(?:(?P<is_rtl2>2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
+    """Information Extractor for RTL NOW, RTL2 NOW, SUPER RTL NOW and VOX NOW"""
+    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
     _TESTS = [{
     _TESTS = [{
         u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
         u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
         u'file': u'90419.flv',
         u'file': u'90419.flv',
@@ -48,6 +48,19 @@ class RTLnowIE(InfoExtractor):
         u'params': {
         u'params': {
             u'skip_download': True,
             u'skip_download': True,
         },
         },
+    },
+    {
+        u'url': u'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
+        u'file': u'99205.flv',
+        u'info_dict': {
+            u'upload_date': u'20080928', 
+            u'title': u'Medicopter 117 - Angst!',
+            u'description': u'Angst!',
+            u'thumbnail': u'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg'
+        },
+        u'params': {
+            u'skip_download': True,
+        },
     }]
     }]
 
 
     def _real_extract(self,url):
     def _real_extract(self,url):

+ 1 - 1
youtube_dl/extractor/youtube.py

@@ -427,7 +427,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
         elif len(s) == 85:
         elif len(s) == 85:
             return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
             return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
         elif len(s) == 84:
         elif len(s) == 84:
-            return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
+            return s[5:40] + s[3] + s[41:48] + s[0] + s[49:84]
         elif len(s) == 83:
         elif len(s) == 83:
             return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
             return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
         elif len(s) == 82:
         elif len(s) == 82:

+ 17 - 8
youtube_dl/utils.py

@@ -476,7 +476,7 @@ def formatSeconds(secs):
 def make_HTTPS_handler(opts):
 def make_HTTPS_handler(opts):
     if sys.version_info < (3,2):
     if sys.version_info < (3,2):
         # Python's 2.x handler is very simplistic
         # Python's 2.x handler is very simplistic
-        return compat_urllib_request.HTTPSHandler()
+        return YoutubeDLHandlerHTTPS()
     else:
     else:
         import ssl
         import ssl
         context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
         context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
@@ -485,7 +485,7 @@ def make_HTTPS_handler(opts):
         context.verify_mode = (ssl.CERT_NONE
         context.verify_mode = (ssl.CERT_NONE
                                if opts.no_check_certificate
                                if opts.no_check_certificate
                                else ssl.CERT_REQUIRED)
                                else ssl.CERT_REQUIRED)
-        return compat_urllib_request.HTTPSHandler(context=context)
+        return YoutubeDLHandlerHTTPS(context=context)
 
 
 class ExtractorError(Exception):
 class ExtractorError(Exception):
     """Error during info extraction."""
     """Error during info extraction."""
@@ -569,7 +569,8 @@ class ContentTooShortError(Exception):
         self.downloaded = downloaded
         self.downloaded = downloaded
         self.expected = expected
         self.expected = expected
 
 
-class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
+
+class YoutubeDLHandler_Template:  # Old-style class, like HTTPHandler
     """Handler for HTTP requests and responses.
     """Handler for HTTP requests and responses.
 
 
     This class, when installed with an OpenerDirector, automatically adds
     This class, when installed with an OpenerDirector, automatically adds
@@ -602,8 +603,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
         ret.code = code
         ret.code = code
         return ret
         return ret
 
 
-    def http_request(self, req):
-        for h,v in std_headers.items():
+    def _http_request(self, req):
+        for h, v in std_headers.items():
             if h in req.headers:
             if h in req.headers:
                 del req.headers[h]
                 del req.headers[h]
             req.add_header(h, v)
             req.add_header(h, v)
@@ -618,7 +619,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
             del req.headers['Youtubedl-user-agent']
             del req.headers['Youtubedl-user-agent']
         return req
         return req
 
 
-    def http_response(self, req, resp):
+    def _http_response(self, req, resp):
         old_resp = resp
         old_resp = resp
         # gzip
         # gzip
         if resp.headers.get('Content-encoding', '') == 'gzip':
         if resp.headers.get('Content-encoding', '') == 'gzip':
@@ -632,8 +633,16 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
             resp.msg = old_resp.msg
             resp.msg = old_resp.msg
         return resp
         return resp
 
 
-    https_request = http_request
-    https_response = http_response
+
+class YoutubeDLHandler(YoutubeDLHandler_Template, compat_urllib_request.HTTPHandler):
+    http_request = YoutubeDLHandler_Template._http_request
+    http_response = YoutubeDLHandler_Template._http_response
+
+
+class YoutubeDLHandlerHTTPS(YoutubeDLHandler_Template, compat_urllib_request.HTTPSHandler):
+    https_request = YoutubeDLHandler_Template._http_request
+    https_response = YoutubeDLHandler_Template._http_response
+
 
 
 def unified_strdate(date_str):
 def unified_strdate(date_str):
     """Return a string with the date in the format YYYYMMDD"""
     """Return a string with the date in the format YYYYMMDD"""