12 years ago · 99859d436c
--- a/devscripts/youtube_genalgo.py
+++ b/devscripts/youtube_genalgo.py
@@ -26,9 +26,9 @@ tests = [
 
															     # 85
														
 
															     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?/>.<",
														
 
															      ".>/?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ0q876543r1mnbvcx9asdfghjklpoiuyt2"),
														
 
															-    # 84
														
 
															+    # 84 - vflh9ybst 2013/08/23 (sporadic)
														
 
															     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<",
														
 
															-     "<.>?;}[{=+-_)(*&^%$#@!MNBVCXZASDFGHJKLPOIUYTREWQ09876543q1mnbvcxzasdfghjklpoiuew2"),
														
 
															+     "yuioplkjhgfdsazxcvbnm1234567890QWERrYUIOPLKqHGFDSAZXCVBNM!@#$%^&*()_-+={[};?>.<"),
														
 
															     # 83
														
 
															     ("qwertyuioplkjhgfdsazxcvbnm1234567890QWERTYUIOPLKJHGFDSAZXCVBNM!#$%^&*()_+={[};?/>.<",
														
 
															      ".>/?;}[{=+_)(*&^%<#!MNBVCXZASPFGHJKLwOIUYTREWQ0987654321mnbvcxzasdfghjklpoiuytreq"),
														
--- a/youtube_dl/extractor/__init__.py
+++ b/youtube_dl/extractor/__init__.py
@@ -29,6 +29,7 @@ from .gametrailers import GametrailersIE
 
															 from .generic import GenericIE
														
 
															 from .googleplus import GooglePlusIE
														
 
															 from .googlesearch import GoogleSearchIE
														
 
															+from .hark import HarkIE
														
 
															 from .hotnewhiphop import HotNewHipHopIE
														
 
															 from .howcast import HowcastIE
														
 
															 from .hypem import HypemIE
														
@@ -57,6 +58,7 @@ from .pornotube import PornotubeIE
 
															 from .rbmaradio import RBMARadioIE
														
 
															 from .redtube import RedTubeIE
														
 
															 from .ringtv import RingTVIE
														
 
															+from .ro220 import Ro220IE
														
 
															 from .roxwel import RoxwelIE
														
 
															 from .rtlnow import RTLnowIE
														
 
															 from .sina import SinaIE
														
@@ -116,12 +118,14 @@ _ALL_CLASSES = [
 
															 ]
														
 
															 _ALL_CLASSES.append(GenericIE)
														
 
															+
														
 
															 def gen_extractors():
														
 
															     """ Return a list of an instance of every supported extractor.
														
 
															     The order does matter; the first extractor matched is the one handling the URL.
														
 
															     """
														
 
															     return [klass() for klass in _ALL_CLASSES]
														
 
															+
														
 
															 def get_info_extractor(ie_name):
														
 
															     """Returns the info extractor class with the given ie_name"""
														
 
															     return globals()[ie_name+'IE']
														
--- a/youtube_dl/extractor/c56.py
+++ b/youtube_dl/extractor/c56.py
@@ -12,8 +12,8 @@ class C56IE(InfoExtractor):
 
															     _TEST ={
														
 
															         u'url': u'http://www.56.com/u39/v_OTM0NDA3MTY.html',
														
 
															-        u'file': u'93440716.mp4',
														
 
															-        u'md5': u'9dc07b5c8e978112a6441f9e75d2b59e',
														
 
															+        u'file': u'93440716.flv',
														
 
															+        u'md5': u'e59995ac63d0457783ea05f93f12a866',
														
 
															         u'info_dict': {
														
 
															             u'title': u'网事知多少 第32期：车怒',
														
 
															         },
														
--- a/youtube_dl/extractor/dailymotion.py
+++ b/youtube_dl/extractor/dailymotion.py
@@ -21,7 +21,7 @@ class DailymotionIE(InfoExtractor):
 
															         u'file': u'x33vw9.mp4',
														
 
															         u'md5': u'392c4b85a60a90dc4792da41ce3144eb',
														
 
															         u'info_dict': {
														
 
															-            u"uploader": u"Alex and Van .", 
														
 
															+            u"uploader": u"Amphora Alex and Van .", 
														
 
															             u"title": u"Tutoriel de Youtubeur\"DL DES VIDEO DE YOUTUBE\""
														
 
															         }
														
 
															     }
														
--- a/youtube_dl/extractor/generic.py
+++ b/youtube_dl/extractor/generic.py
@@ -7,12 +7,14 @@ from .common import InfoExtractor
 
															 from ..utils import (
														
 
															     compat_urllib_error,
														
 
															     compat_urllib_parse,
														
 
															+    compat_urllib_parse_urlparse,
														
 
															     compat_urllib_request,
														
 
															     ExtractorError,
														
 
															 )
														
 
															 from .brightcove import BrightcoveIE
														
 
															+
														
 
															 class GenericIE(InfoExtractor):
														
 
															     IE_DESC = u'Generic downloader that works on some sites'
														
 
															     _VALID_URL = r'.*'
														
@@ -23,7 +25,7 @@ class GenericIE(InfoExtractor):
 
															             u'file': u'13601338388002.mp4',
														
 
															             u'md5': u'85b90ccc9d73b4acd9138d3af4c27f89',
														
 
															             u'info_dict': {
														
 
															-                u"uploader": u"www.hodiho.fr", 
														
 
															+                u"uploader": u"www.hodiho.fr",
														
 
															                 u"title": u"R\u00e9gis plante sa Jeep"
														
 
															             }
														
 
															         },
														
@@ -124,7 +126,7 @@ class GenericIE(InfoExtractor):
 
															             raise ExtractorError(u'Invalid URL: %s' % url)
														
 
															         self.report_extraction(video_id)
														
 
															-        # Look for BrigthCove:
														
 
															+        # Look for BrightCove:
														
 
															         m_brightcove = re.search(r'<object.+?class=([\'"]).*?BrightcoveExperience.*?\1.+?</object>', webpage, re.DOTALL)
														
 
															         if m_brightcove is not None:
														
 
															             self.to_screen(u'Brightcove video detected.')
														
@@ -161,6 +163,10 @@ class GenericIE(InfoExtractor):
 
															             raise ExtractorError(u'Invalid URL: %s' % url)
														
 
															         video_url = compat_urllib_parse.unquote(mobj.group(1))
														
 
															+        if video_url.startswith('//'):
														
 
															+            video_url = compat_urllib_parse_urlparse(url).scheme + ':' + video_url
														
 
															+        if '://' not in video_url:
														
 
															+            video_url = url + ('' if url.endswith('/') else '/') + video_url
														
 
															         video_id = os.path.basename(video_url)
														
 
															         # here's a fun little line of code for you:
														
--- a/youtube_dl/extractor/hark.py
+++ b/youtube_dl/extractor/hark.py
@@ -0,0 +1,35 @@
 
															+# -*- coding: utf-8 -*-
														
 
															+
														
 
															+import re
														
 
															+
														
 
															+from .common import InfoExtractor
														
 
															+from ..utils import determine_ext
														
 
															+
														
 
															+class HarkIE(InfoExtractor):
														
 
															+    _VALID_URL = r'https?://www\.hark\.com/clips/(.+?)-.+'
														
 
															+    _TEST = {
														
 
															+        u'url': u'http://www.hark.com/clips/mmbzyhkgny-obama-beyond-the-afghan-theater-we-only-target-al-qaeda-on-may-23-2013',
														
 
															+        u'file': u'mmbzyhkgny.mp3',
														
 
															+        u'md5': u'6783a58491b47b92c7c1af5a77d4cbee',
														
 
															+        u'info_dict': {
														
 
															+            u"title": u"Obama: 'Beyond The Afghan Theater, We Only Target Al Qaeda' On May 23, 2013 ",
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    def _real_extract(self, url):
														
 
															+        mobj = re.match(self._VALID_URL, url)
														
 
															+        video_id = mobj.group(1)
														
 
															+        embed_url = "http://www.hark.com/clips/%s/homepage_embed" %(video_id)
														
 
															+        webpage = self._download_webpage(embed_url, video_id)
														
 
															+
														
 
															+        final_url = self._search_regex(r'src="(.+?).mp3"',
														
 
															+                                webpage, 'video url')+'.mp3'
														
 
															+        title = self._html_search_regex(r'<title>(.+?)</title>',
														
 
															+                                webpage, 'video title').replace(' Sound Clip and Quote - Hark','').replace(
														
 
															+                                'Sound Clip , Quote, MP3, and Ringtone - Hark','')
														
 
															+
														
 
															+        return {'id': video_id,
														
 
															+                'url' : final_url,
														
 
															+                'title': title,
														
 
															+                'ext': determine_ext(final_url),
														
 
															+                }
														
--- a/youtube_dl/extractor/ro220.py
+++ b/youtube_dl/extractor/ro220.py
@@ -0,0 +1,42 @@
 
															+import re
														
 
															+
														
 
															+from .common import InfoExtractor
														
 
															+from ..utils import (
														
 
															+    clean_html,
														
 
															+    compat_parse_qs,
														
 
															+)
														
 
															+
														
 
															+
														
 
															+class Ro220IE(InfoExtractor):
														
 
															+    IE_NAME = '220.ro'
														
 
															+    _VALID_URL = r'(?x)(?:https?://)?(?:www\.)?220\.ro/(?P<category>[^/]+)/(?P<shorttitle>[^/]+)/(?P<video_id>[^/]+)'
														
 
															+    _TEST = {
														
 
															+        u"url": u"http://www.220.ro/sport/Luati-Le-Banii-Sez-4-Ep-1/LYV6doKo7f/",
														
 
															+        u'file': u'LYV6doKo7f.mp4',
														
 
															+        u'md5': u'03af18b73a07b4088753930db7a34add',
														
 
															+        u'info_dict': {
														
 
															+            u"title": u"Luati-le Banii sez 4 ep 1",
														
 
															+            u"description": u"Iata-ne reveniti dupa o binemeritata vacanta. Va astept si pe Facebook cu pareri si comentarii.",
														
 
															+        }
														
 
															+    }
														
 
															+
														
 
															+    def _real_extract(self, url):
														
 
															+        mobj = re.match(self._VALID_URL, url)
														
 
															+        video_id = mobj.group('video_id')
														
 
															+
														
 
															+        webpage = self._download_webpage(url, video_id)
														
 
															+        flashVars_str = self._search_regex(
														
 
															+            r'<param name="flashVars" value="([^"]+)"',
														
 
															+            webpage, u'flashVars')
														
 
															+        flashVars = compat_parse_qs(flashVars_str)
														
 
															+
														
 
															+        info = {
														
 
															+            '_type': 'video',
														
 
															+            'id': video_id,
														
 
															+            'ext': 'mp4',
														
 
															+            'url': flashVars['videoURL'][0],
														
 
															+            'title': flashVars['title'][0],
														
 
															+            'description': clean_html(flashVars['desc'][0]),
														
 
															+            'thumbnail': flashVars['preview'][0],
														
 
															+        }
														
 
															+        return info
														
--- a/youtube_dl/extractor/rtlnow.py
+++ b/youtube_dl/extractor/rtlnow.py
@@ -8,8 +8,8 @@ from ..utils import (
 
															 )
														
 
															 class RTLnowIE(InfoExtractor):
														
 
															-    """Information Extractor for RTLnow, RTL2now and VOXnow"""
														
 
															-    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl(?:(?P<is_rtl2>2)|-)now\.rtl(?(is_rtl2)2|)\.de/|(?:www\.)?voxnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
														
 
															+    """Information Extractor for RTL NOW, RTL2 NOW, SUPER RTL NOW and VOX NOW"""
														
 
															+    _VALID_URL = r'(?:http://)?(?P<url>(?P<base_url>rtl-now\.rtl\.de/|rtl2now\.rtl2\.de/|(?:www\.)?voxnow\.de/|(?:www\.)?superrtlnow\.de/)[a-zA-Z0-9-]+/[a-zA-Z0-9-]+\.php\?(?:container_id|film_id)=(?P<video_id>[0-9]+)&player=1(?:&season=[0-9]+)?(?:&.*)?)'
														
 
															     _TESTS = [{
														
 
															         u'url': u'http://rtl-now.rtl.de/ahornallee/folge-1.php?film_id=90419&player=1&season=1',
														
 
															         u'file': u'90419.flv',
														
@@ -48,6 +48,19 @@ class RTLnowIE(InfoExtractor):
 
															         u'params': {
														
 
															             u'skip_download': True,
														
 
															         },
														
 
															+    },
														
 
															+    {
														
 
															+        u'url': u'http://superrtlnow.de/medicopter-117/angst.php?film_id=99205&player=1',
														
 
															+        u'file': u'99205.flv',
														
 
															+        u'info_dict': {
														
 
															+            u'upload_date': u'20080928', 
														
 
															+            u'title': u'Medicopter 117 - Angst!',
														
 
															+            u'description': u'Angst!',
														
 
															+            u'thumbnail': u'http://autoimg.static-fra.de/superrtlnow/287529/1500x1500/image2.jpg'
														
 
															+        },
														
 
															+        u'params': {
														
 
															+            u'skip_download': True,
														
 
															+        },
														
 
															     }]
														
 
															     def _real_extract(self,url):
														
--- a/youtube_dl/extractor/youtube.py
+++ b/youtube_dl/extractor/youtube.py
@@ -427,7 +427,7 @@ class YoutubeIE(YoutubeBaseInfoExtractor):
 
															         elif len(s) == 85:
														
 
															             return s[83:34:-1] + s[0] + s[33:27:-1] + s[3] + s[26:19:-1] + s[34] + s[18:3:-1] + s[27]
														
 
															         elif len(s) == 84:
														
 
															-            return s[83:27:-1] + s[0] + s[26:5:-1] + s[2:0:-1] + s[27]
														
 
															+            return s[5:40] + s[3] + s[41:48] + s[0] + s[49:84]
														
 
															         elif len(s) == 83:
														
 
															             return s[81:64:-1] + s[82] + s[63:52:-1] + s[45] + s[51:45:-1] + s[1] + s[44:1:-1] + s[0]
														
 
															         elif len(s) == 82:
														
--- a/youtube_dl/utils.py
+++ b/youtube_dl/utils.py
@@ -476,7 +476,7 @@ def formatSeconds(secs):
 
															 def make_HTTPS_handler(opts):
														
 
															     if sys.version_info < (3,2):
														
 
															         # Python's 2.x handler is very simplistic
														
 
															-        return compat_urllib_request.HTTPSHandler()
														
 
															+        return YoutubeDLHandlerHTTPS()
														
 
															     else:
														
 
															         import ssl
														
 
															         context = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
														
@@ -485,7 +485,7 @@ def make_HTTPS_handler(opts):
 
															         context.verify_mode = (ssl.CERT_NONE
														
 
															                                if opts.no_check_certificate
														
 
															                                else ssl.CERT_REQUIRED)
														
 
															-        return compat_urllib_request.HTTPSHandler(context=context)
														
 
															+        return YoutubeDLHandlerHTTPS(context=context)
														
 
															 class ExtractorError(Exception):
														
 
															     """Error during info extraction."""
														
@@ -569,7 +569,8 @@ class ContentTooShortError(Exception):
 
															         self.downloaded = downloaded
														
 
															         self.expected = expected
														
 
															-class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
														
 
															+
														
 
															+class YoutubeDLHandler_Template:  # Old-style class, like HTTPHandler
														
 
															     """Handler for HTTP requests and responses.
														
 
															     This class, when installed with an OpenerDirector, automatically adds
														
@@ -602,8 +603,8 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 
															         ret.code = code
														
 
															         return ret
														
 
															-    def http_request(self, req):
														
 
															-        for h,v in std_headers.items():
														
 
															+    def _http_request(self, req):
														
 
															+        for h, v in std_headers.items():
														
 
															             if h in req.headers:
														
 
															                 del req.headers[h]
														
 
															             req.add_header(h, v)
														
@@ -618,7 +619,7 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 
															             del req.headers['Youtubedl-user-agent']
														
 
															         return req
														
 
															-    def http_response(self, req, resp):
														
 
															+    def _http_response(self, req, resp):
														
 
															         old_resp = resp
														
 
															         # gzip
														
 
															         if resp.headers.get('Content-encoding', '') == 'gzip':
														
@@ -632,8 +633,16 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
 
															             resp.msg = old_resp.msg
														
 
															         return resp
														
 
															-    https_request = http_request
														
 
															-    https_response = http_response
														
 
															+
														
 
															+class YoutubeDLHandler(YoutubeDLHandler_Template, compat_urllib_request.HTTPHandler):
														
 
															+    http_request = YoutubeDLHandler_Template._http_request
														
 
															+    http_response = YoutubeDLHandler_Template._http_response
														
 
															+
														
 
															+
														
 
															+class YoutubeDLHandlerHTTPS(YoutubeDLHandler_Template, compat_urllib_request.HTTPSHandler):
														
 
															+    https_request = YoutubeDLHandler_Template._http_request
														
 
															+    https_response = YoutubeDLHandler_Template._http_response
														
 
															+
														
 
															 def unified_strdate(date_str):
														
 
															     """Return a string with the date in the format YYYYMMDD"""