|
@@ -2678,17 +2678,52 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|
def compress(data):
|
|
def compress(data):
|
|
return data and ncompress.decompress(data)
|
|
return data and ncompress.decompress(data)
|
|
|
|
|
|
|
|
+ @staticmethod
|
|
|
|
+ def _fix_path(url):
|
|
|
|
+ # an embedded /../ or /./ sequence is not automatically handled by urllib2
|
|
|
|
+ # see https://github.com/yt-dlp/yt-dlp/issues/3355
|
|
|
|
+ parsed_url = compat_urllib_parse.urlsplit(url)
|
|
|
|
+ path = parsed_url.path
|
|
|
|
+ if not path.endswith('/'):
|
|
|
|
+ path += '/'
|
|
|
|
+ parts = path.partition('/./')
|
|
|
|
+ if not parts[1]:
|
|
|
|
+ parts = path.partition('/../')
|
|
|
|
+ if parts[1]:
|
|
|
|
+ path = compat_urllib_parse.urljoin(
|
|
|
|
+ parts[0] + parts[1][:1],
|
|
|
|
+ parts[1][1:] + (parts[2] if parsed_url.path.endswith('/') else parts[2][:-1]))
|
|
|
|
+ url = parsed_url._replace(path=path).geturl()
|
|
|
|
+ if '/.' in url:
|
|
|
|
+ # worse, URL path may have initial /../ against RFCs: work-around
|
|
|
|
+ # by stripping such prefixes, like eg Firefox
|
|
|
|
+ path = parsed_url.path + '/'
|
|
|
|
+ while path.startswith('/.'):
|
|
|
|
+ if path.startswith('/../'):
|
|
|
|
+ path = path[3:]
|
|
|
|
+ elif path.startswith('/./'):
|
|
|
|
+ path = path[2:]
|
|
|
|
+ else:
|
|
|
|
+ break
|
|
|
|
+ path = path[:-1]
|
|
|
|
+ if not path.startswith('/') and parsed_url.path.startswith('/'):
|
|
|
|
+ path = '/' + path
|
|
|
|
+ url = parsed_url._replace(path=path).geturl()
|
|
|
|
+ return url
|
|
|
|
+
|
|
def http_request(self, req):
|
|
def http_request(self, req):
|
|
- # According to RFC 3986, URLs can not contain non-ASCII characters, however this is not
|
|
|
|
- # always respected by websites, some tend to give out URLs with non percent-encoded
|
|
|
|
|
|
+ url = req.get_full_url()
|
|
|
|
+ # resolve embedded . and ..
|
|
|
|
+ url_fixed = self._fix_path(url)
|
|
|
|
+ # According to RFC 3986, URLs can not contain non-ASCII characters; however this is not
|
|
|
|
+ # always respected by websites: some tend to give out URLs with non percent-encoded
|
|
# non-ASCII characters (see telemb.py, ard.py [#3412])
|
|
# non-ASCII characters (see telemb.py, ard.py [#3412])
|
|
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
|
|
# urllib chokes on URLs with non-ASCII characters (see http://bugs.python.org/issue3991)
|
|
# To work around aforementioned issue we will replace request's original URL with
|
|
# To work around aforementioned issue we will replace request's original URL with
|
|
# percent-encoded one
|
|
# percent-encoded one
|
|
# Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
|
|
# Since redirects are also affected (e.g. http://www.southpark.de/alle-episoden/s18e09)
|
|
# the code of this workaround has been moved here from YoutubeDL.urlopen()
|
|
# the code of this workaround has been moved here from YoutubeDL.urlopen()
|
|
- url = req.get_full_url()
|
|
|
|
- url_escaped = escape_url(url)
|
|
|
|
|
|
+ url_escaped = escape_url(url_fixed)
|
|
|
|
|
|
# Substitute URL if any change after escaping
|
|
# Substitute URL if any change after escaping
|
|
if url != url_escaped:
|
|
if url != url_escaped:
|
|
@@ -2702,10 +2737,13 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|
|
|
|
|
req.headers = handle_youtubedl_headers(req.headers)
|
|
req.headers = handle_youtubedl_headers(req.headers)
|
|
|
|
|
|
- if sys.version_info < (2, 7) and '#' in req.get_full_url():
|
|
|
|
- # Python 2.6 is brain-dead when it comes to fragments
|
|
|
|
- req._Request__original = req._Request__original.partition('#')[0]
|
|
|
|
- req._Request__r_type = req._Request__r_type.partition('#')[0]
|
|
|
|
|
|
+ if sys.version_info < (2, 7):
|
|
|
|
+ # avoid possible race where __r_type may be unset
|
|
|
|
+ req.get_type()
|
|
|
|
+ if '#' in req.get_full_url():
|
|
|
|
+ # Python 2.6 is brain-dead when it comes to fragments
|
|
|
|
+ req._Request__original = req._Request__original.partition('#')[0]
|
|
|
|
+ req._Request__r_type = req._Request__r_type.partition('#')[0]
|
|
|
|
|
|
# Use the totally undocumented AbstractHTTPHandler per
|
|
# Use the totally undocumented AbstractHTTPHandler per
|
|
# https://github.com/yt-dlp/yt-dlp/pull/4158
|
|
# https://github.com/yt-dlp/yt-dlp/pull/4158
|
|
@@ -2775,10 +2813,13 @@ class YoutubeDLHandler(compat_urllib_request.HTTPHandler):
|
|
if sys.version_info >= (3, 0):
|
|
if sys.version_info >= (3, 0):
|
|
location = location.encode('iso-8859-1')
|
|
location = location.encode('iso-8859-1')
|
|
location = location.decode('utf-8')
|
|
location = location.decode('utf-8')
|
|
- location_escaped = escape_url(location)
|
|
|
|
|
|
+ # resolve embedded . and ..
|
|
|
|
+ location_fixed = self._fix_path(location)
|
|
|
|
+ location_escaped = escape_url(location_fixed)
|
|
if location != location_escaped:
|
|
if location != location_escaped:
|
|
del resp.headers['Location']
|
|
del resp.headers['Location']
|
|
- if sys.version_info < (3, 0):
|
|
|
|
|
|
+ # if sys.version_info < (3, 0):
|
|
|
|
+ if not isinstance(location_escaped, str):
|
|
location_escaped = location_escaped.encode('utf-8')
|
|
location_escaped = location_escaped.encode('utf-8')
|
|
resp.headers['Location'] = location_escaped
|
|
resp.headers['Location'] = location_escaped
|
|
return resp
|
|
return resp
|
|
@@ -4248,13 +4289,8 @@ def update_Request(req, url=None, data=None, headers={}, query={}):
|
|
req_headers.update(headers)
|
|
req_headers.update(headers)
|
|
req_data = data if data is not None else req.data
|
|
req_data = data if data is not None else req.data
|
|
req_url = update_url_query(url or req.get_full_url(), query)
|
|
req_url = update_url_query(url or req.get_full_url(), query)
|
|
- req_get_method = req.get_method()
|
|
|
|
- if req_get_method == 'HEAD':
|
|
|
|
- req_type = HEADRequest
|
|
|
|
- elif req_get_method == 'PUT':
|
|
|
|
- req_type = PUTRequest
|
|
|
|
- else:
|
|
|
|
- req_type = compat_urllib_request.Request
|
|
|
|
|
|
+ req_type = {'HEAD': HEADRequest, 'PUT': PUTRequest}.get(
|
|
|
|
+ req.get_method(), compat_urllib_request.Request)
|
|
new_req = req_type(
|
|
new_req = req_type(
|
|
req_url, data=req_data, headers=req_headers,
|
|
req_url, data=req_data, headers=req_headers,
|
|
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
|
|
origin_req_host=req.origin_req_host, unverifiable=req.unverifiable)
|