| 123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604 | #!/usr/bin/env python# coding: utf-8from __future__ import unicode_literals# Allow direct executionimport osimport sysimport unittestsys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))import contextlibimport gzipimport ioimport sslimport tempfileimport threadingimport zlib# avoid deprecated alias assertRaisesRegexpif hasattr(unittest.TestCase, 'assertRaisesRegex'):    unittest.TestCase.assertRaisesRegexp = unittest.TestCase.assertRaisesRegextry:    import brotliexcept ImportError:    brotli = Nonetry:    from urllib.request import pathname2urlexcept ImportError:    from urllib import pathname2urlfrom youtube_dl.compat import (    compat_http_cookiejar_Cookie,    compat_http_server,    compat_str as str,    compat_urllib_error,    compat_urllib_HTTPError,    compat_urllib_parse,    compat_urllib_request,)from youtube_dl.utils import (    sanitized_Request,    update_Request,    urlencode_postdata,)from test.helper import (    expectedFailureIf,    FakeYDL,    FakeLogger,    http_server_port,)from youtube_dl import YoutubeDLTEST_DIR = os.path.dirname(os.path.abspath(__file__))class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):    protocol_version = 'HTTP/1.1'    # work-around old/new -style class inheritance    def super(self, meth_name, *args, **kwargs):        from types import MethodType        try:            super()            fn = lambda s, m, *a, **k: getattr(super(), m)(*a, **k)        except TypeError:            fn = lambda s, m, *a, **k: getattr(compat_http_server.BaseHTTPRequestHandler, m)(s, *a, **k)        self.super = MethodType(fn, self)        return self.super(meth_name, *args, **kwargs)    def log_message(self, format, *args):        pass    def _headers(self):        payload = str(self.headers).encode('utf-8')        self.send_response(200)        self.send_header('Content-Type', 'application/json')        self.send_header('Content-Length', str(len(payload)))        self.end_headers()        self.wfile.write(payload)    def _redirect(self):        self.send_response(int(self.path[len('/redirect_'):]))        self.send_header('Location', '/method')        self.send_header('Content-Length', '0')        self.end_headers()    def _method(self, method, payload=None):        self.send_response(200)        self.send_header('Content-Length', str(len(payload or '')))        self.send_header('Method', method)        self.end_headers()        if payload:            self.wfile.write(payload)    def _status(self, status):        payload = '<html>{0} NOT FOUND</html>'.format(status).encode('utf-8')        self.send_response(int(status))        self.send_header('Content-Type', 'text/html; charset=utf-8')        self.send_header('Content-Length', str(len(payload)))        self.end_headers()        self.wfile.write(payload)    def _read_data(self):        if 'Content-Length' in self.headers:            return self.rfile.read(int(self.headers['Content-Length']))    def _test_url(self, path, host='127.0.0.1', scheme='http', port=None):        return '{0}://{1}:{2}/{3}'.format(            scheme, host,            port if port is not None            else http_server_port(self.server), path)    def do_POST(self):        data = self._read_data()        if self.path.startswith('/redirect_'):            self._redirect()        elif self.path.startswith('/method'):            self._method('POST', data)        elif self.path.startswith('/headers'):            self._headers()        else:            self._status(404)    def do_HEAD(self):        if self.path.startswith('/redirect_'):            self._redirect()        elif self.path.startswith('/method'):            self._method('HEAD')        else:            self._status(404)    def do_PUT(self):        data = self._read_data()        if self.path.startswith('/redirect_'):            self._redirect()        elif self.path.startswith('/method'):            self._method('PUT', data)        else:            self._status(404)    def do_GET(self):        def respond(payload=b'<html><video src="/vid.mp4" /></html>',                    payload_type='text/html; charset=utf-8',                    payload_encoding=None,                    resp_code=200):            self.send_response(resp_code)            self.send_header('Content-Type', payload_type)            if payload_encoding:                self.send_header('Content-Encoding', payload_encoding)            self.send_header('Content-Length', str(len(payload)))  # required for persistent connections            self.end_headers()            self.wfile.write(payload)        def gzip_compress(p):            buf = io.BytesIO()            with contextlib.closing(gzip.GzipFile(fileobj=buf, mode='wb')) as f:                f.write(p)            return buf.getvalue()        if self.path == '/video.html':            respond()        elif self.path == '/vid.mp4':            respond(b'\x00\x00\x00\x00\x20\x66\x74[video]', 'video/mp4')        elif self.path == '/302':            if sys.version_info[0] == 3:                # XXX: Python 3 http server does not allow non-ASCII header values                self.send_response(404)                self.end_headers()                return            new_url = self._test_url('中文.html')            self.send_response(302)            self.send_header(b'Location', new_url.encode('utf-8'))            self.end_headers()        elif self.path == '/%E4%B8%AD%E6%96%87.html':            respond()        elif self.path == '/%c7%9f':            respond()        elif self.path == '/redirect_dotsegments':            self.send_response(301)            # redirect to /headers but with dot segments before            self.send_header('Location', '/a/b/./../../headers')            self.send_header('Content-Length', '0')            self.end_headers()        elif self.path.startswith('/redirect_'):            self._redirect()        elif self.path.startswith('/method'):            self._method('GET')        elif self.path.startswith('/headers'):            self._headers()        elif self.path.startswith('/308-to-headers'):            self.send_response(308)            self.send_header('Location', '/headers')            self.send_header('Content-Length', '0')            self.end_headers()        elif self.path == '/trailing_garbage':            payload = b'<html><video src="/vid.mp4" /></html>'            compressed = gzip_compress(payload) + b'trailing garbage'            respond(compressed, payload_encoding='gzip')        elif self.path == '/302-non-ascii-redirect':            new_url = self._test_url('中文.html')            # actually respond with permanent redirect            self.send_response(301)            self.send_header('Location', new_url)            self.send_header('Content-Length', '0')            self.end_headers()        elif self.path == '/content-encoding':            encodings = self.headers.get('ytdl-encoding', '')            payload = b'<html><video src="/vid.mp4" /></html>'            for encoding in filter(None, (e.strip() for e in encodings.split(','))):                if encoding == 'br' and brotli:                    payload = brotli.compress(payload)                elif encoding == 'gzip':                    payload = gzip_compress(payload)                elif encoding == 'deflate':                    payload = zlib.compress(payload)                elif encoding == 'unsupported':                    payload = b'raw'                    break                else:                    self._status(415)                    return            respond(payload, payload_encoding=encodings)        else:            self._status(404)    def send_header(self, keyword, value):        """        Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.        This is against what is defined in RFC 3986: but we need to test that we support this        since some sites incorrectly do this.        """        if keyword.lower() == 'connection':            return self.super('send_header', keyword, value)        if not hasattr(self, '_headers_buffer'):            self._headers_buffer = []        self._headers_buffer.append('{0}: {1}\r\n'.format(keyword, value).encode('utf-8'))    def end_headers(self):        if hasattr(self, '_headers_buffer'):            self.wfile.write(b''.join(self._headers_buffer))            self._headers_buffer = []        self.super('end_headers')class TestHTTP(unittest.TestCase):    # when does it make sense to check the SSL certificate?    _check_cert = (        sys.version_info >= (3, 2)        or (sys.version_info[0] == 2 and sys.version_info[1:] >= (7, 19)))    def setUp(self):        # HTTP server        self.http_httpd = compat_http_server.HTTPServer(            ('127.0.0.1', 0), HTTPTestRequestHandler)        self.http_port = http_server_port(self.http_httpd)        self.http_server_thread = threading.Thread(target=self.http_httpd.serve_forever)        self.http_server_thread.daemon = True        self.http_server_thread.start()        try:            from http.server import ThreadingHTTPServer        except ImportError:            try:                from socketserver import ThreadingMixIn            except ImportError:                from SocketServer import ThreadingMixIn            class ThreadingHTTPServer(ThreadingMixIn, compat_http_server.HTTPServer):                pass        # HTTPS server        certfn = os.path.join(TEST_DIR, 'testcert.pem')        self.https_httpd = ThreadingHTTPServer(            ('127.0.0.1', 0), HTTPTestRequestHandler)        try:            sslctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23)            sslctx.verify_mode = ssl.CERT_NONE            sslctx.check_hostname = False            sslctx.load_cert_chain(certfn, None)            self.https_httpd.socket = sslctx.wrap_socket(                self.https_httpd.socket, server_side=True)        except AttributeError:            self.https_httpd.socket = ssl.wrap_socket(                self.https_httpd.socket, certfile=certfn, server_side=True)        self.https_port = http_server_port(self.https_httpd)        self.https_server_thread = threading.Thread(target=self.https_httpd.serve_forever)        self.https_server_thread.daemon = True        self.https_server_thread.start()    def tearDown(self):        def closer(svr):            def _closer():                svr.shutdown()                svr.server_close()            return _closer        shutdown_thread = threading.Thread(target=closer(self.http_httpd))        shutdown_thread.start()        self.http_server_thread.join(2.0)        shutdown_thread = threading.Thread(target=closer(self.https_httpd))        shutdown_thread.start()        self.https_server_thread.join(2.0)    def _test_url(self, path, host='127.0.0.1', scheme='http', port=None):        return '{0}://{1}:{2}/{3}'.format(            scheme, host,            port if port is not None            else self.https_port if scheme == 'https'            else self.http_port, path)    @unittest.skipUnless(_check_cert, 'No support for certificate check in SSL')    def test_nocheckcertificate(self):        with FakeYDL({'logger': FakeLogger()}) as ydl:            with self.assertRaises(compat_urllib_error.URLError):                ydl.urlopen(sanitized_Request(self._test_url('headers', scheme='https')))        with FakeYDL({'logger': FakeLogger(), 'nocheckcertificate': True}) as ydl:            r = ydl.urlopen(sanitized_Request(self._test_url('headers', scheme='https')))            self.assertEqual(r.getcode(), 200)            r.close()    def test_percent_encode(self):        with FakeYDL() as ydl:            # Unicode characters should be encoded with uppercase percent-encoding            res = ydl.urlopen(sanitized_Request(self._test_url('中文.html')))            self.assertEqual(res.getcode(), 200)            res.close()            # don't normalize existing percent encodings            res = ydl.urlopen(sanitized_Request(self._test_url('%c7%9f')))            self.assertEqual(res.getcode(), 200)            res.close()    def test_unicode_path_redirection(self):        with FakeYDL() as ydl:            r = ydl.urlopen(sanitized_Request(self._test_url('302-non-ascii-redirect')))            self.assertEqual(r.url, self._test_url('%E4%B8%AD%E6%96%87.html'))            r.close()    def test_redirect(self):        with FakeYDL() as ydl:            def do_req(redirect_status, method, check_no_content=False):                data = b'testdata' if method in ('POST', 'PUT') else None                res = ydl.urlopen(sanitized_Request(                    self._test_url('redirect_{0}'.format(redirect_status)),                    method=method, data=data))                if check_no_content:                    self.assertNotIn('Content-Type', res.headers)                return res.read().decode('utf-8'), res.headers.get('method', '')            # A 303 must either use GET or HEAD for subsequent request            self.assertEqual(do_req(303, 'POST'), ('', 'GET'))            self.assertEqual(do_req(303, 'HEAD'), ('', 'HEAD'))            self.assertEqual(do_req(303, 'PUT'), ('', 'GET'))            # 301 and 302 turn POST only into a GET, with no Content-Type            self.assertEqual(do_req(301, 'POST', True), ('', 'GET'))            self.assertEqual(do_req(301, 'HEAD'), ('', 'HEAD'))            self.assertEqual(do_req(302, 'POST', True), ('', 'GET'))            self.assertEqual(do_req(302, 'HEAD'), ('', 'HEAD'))            self.assertEqual(do_req(301, 'PUT'), ('testdata', 'PUT'))            self.assertEqual(do_req(302, 'PUT'), ('testdata', 'PUT'))            # 307 and 308 should not change method            for m in ('POST', 'PUT'):                self.assertEqual(do_req(307, m), ('testdata', m))                self.assertEqual(do_req(308, m), ('testdata', m))            self.assertEqual(do_req(307, 'HEAD'), ('', 'HEAD'))            self.assertEqual(do_req(308, 'HEAD'), ('', 'HEAD'))            # These should not redirect and instead raise an HTTPError            for code in (300, 304, 305, 306):                with self.assertRaises(compat_urllib_HTTPError):                    do_req(code, 'GET')    # Jython 2.7.1 times out for some reason    @expectedFailureIf(sys.platform.startswith('java') and sys.version_info < (2, 7, 2))    def test_content_type(self):        # https://github.com/yt-dlp/yt-dlp/commit/379a4f161d4ad3e40932dcf5aca6e6fb9715ab28        with FakeYDL({'nocheckcertificate': True}) as ydl:            # method should be auto-detected as POST            r = sanitized_Request(self._test_url('headers', scheme='https'), data=urlencode_postdata({'test': 'test'}))            headers = ydl.urlopen(r).read().decode('utf-8')            self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)            # test http            r = sanitized_Request(self._test_url('headers'), data=urlencode_postdata({'test': 'test'}))            headers = ydl.urlopen(r).read().decode('utf-8')            self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)    def test_update_req(self):        req = sanitized_Request('http://example.com')        assert req.data is None        assert req.get_method() == 'GET'        assert not req.has_header('Content-Type')        # Test that zero-byte payloads will be sent        req = update_Request(req, data=b'')        assert req.data == b''        assert req.get_method() == 'POST'        # yt-dl expects data to be encoded and Content-Type to be added by sender        # assert req.get_header('Content-Type') == 'application/x-www-form-urlencoded'    def test_cookiejar(self):        with FakeYDL() as ydl:            ydl.cookiejar.set_cookie(compat_http_cookiejar_Cookie(                0, 'test', 'ytdl', None, False, '127.0.0.1', True,                False, '/headers', True, False, None, False, None, None, {}))            data = ydl.urlopen(sanitized_Request(                self._test_url('headers'))).read().decode('utf-8')            self.assertIn('Cookie: test=ytdl', data)    def test_passed_cookie_header(self):        # We should accept a Cookie header being passed as in normal headers and handle it appropriately.        with FakeYDL() as ydl:            # Specified Cookie header should be used            res = ydl.urlopen(sanitized_Request(                self._test_url('headers'), headers={'Cookie': 'test=test'})).read().decode('utf-8')            self.assertIn('Cookie: test=test', res)            # Specified Cookie header should be removed on any redirect            res = ydl.urlopen(sanitized_Request(                self._test_url('308-to-headers'), headers={'Cookie': 'test=test'})).read().decode('utf-8')            self.assertNotIn('Cookie: test=test', res)            # Specified Cookie header should override global cookiejar for that request            ydl.cookiejar.set_cookie(compat_http_cookiejar_Cookie(                0, 'test', 'ytdlp', None, False, '127.0.0.1', True,                False, '/headers', True, False, None, False, None, None, {}))            data = ydl.urlopen(sanitized_Request(                self._test_url('headers'), headers={'Cookie': 'test=test'})).read().decode('utf-8')            self.assertNotIn('Cookie: test=ytdlp', data)            self.assertIn('Cookie: test=test', data)    def test_no_compression_compat_header(self):        with FakeYDL() as ydl:            data = ydl.urlopen(                sanitized_Request(                    self._test_url('headers'),                    headers={'Youtubedl-no-compression': True})).read()            self.assertIn(b'Accept-Encoding: identity', data)            self.assertNotIn(b'youtubedl-no-compression', data.lower())    def test_gzip_trailing_garbage(self):        # https://github.com/ytdl-org/youtube-dl/commit/aa3e950764337ef9800c936f4de89b31c00dfcf5        # https://github.com/ytdl-org/youtube-dl/commit/6f2ec15cee79d35dba065677cad9da7491ec6e6f        with FakeYDL() as ydl:            data = ydl.urlopen(sanitized_Request(self._test_url('trailing_garbage'))).read().decode('utf-8')            self.assertEqual(data, '<html><video src="/vid.mp4" /></html>')    def __test_compression(self, encoding):        with FakeYDL() as ydl:            res = ydl.urlopen(                sanitized_Request(                    self._test_url('content-encoding'),                    headers={'ytdl-encoding': encoding}))            # decoded encodings are removed: only check for valid decompressed data            self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')    @unittest.skipUnless(brotli, 'brotli support is not installed')    def test_brotli(self):        self.__test_compression('br')    def test_deflate(self):        self.__test_compression('deflate')    def test_gzip(self):        self.__test_compression('gzip')    def test_multiple_encodings(self):        # https://www.rfc-editor.org/rfc/rfc9110.html#section-8.4        for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):            self.__test_compression(pair)    def test_unsupported_encoding(self):        # it should return the raw content        with FakeYDL() as ydl:            res = ydl.urlopen(                sanitized_Request(                    self._test_url('content-encoding'),                    headers={'ytdl-encoding': 'unsupported'}))            self.assertEqual(res.headers.get('Content-Encoding'), 'unsupported')            self.assertEqual(res.read(), b'raw')    def test_remove_dot_segments(self):        with FakeYDL() as ydl:            res = ydl.urlopen(sanitized_Request(self._test_url('a/b/./../../headers')))            self.assertEqual(compat_urllib_parse.urlparse(res.geturl()).path, '/headers')            res = ydl.urlopen(sanitized_Request(self._test_url('redirect_dotsegments')))            self.assertEqual(compat_urllib_parse.urlparse(res.geturl()).path, '/headers')def _build_proxy_handler(name):    class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):        proxy_name = name        def log_message(self, format, *args):            pass        def do_GET(self):            self.send_response(200)            self.send_header('Content-Type', 'text/plain; charset=utf-8')            self.end_headers()            self.wfile.write('{0}: {1}'.format(self.proxy_name, self.path).encode('utf-8'))    return HTTPTestRequestHandlerclass TestProxy(unittest.TestCase):    def setUp(self):        self.proxy = compat_http_server.HTTPServer(            ('127.0.0.1', 0), _build_proxy_handler('normal'))        self.port = http_server_port(self.proxy)        self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)        self.proxy_thread.daemon = True        self.proxy_thread.start()        self.geo_proxy = compat_http_server.HTTPServer(            ('127.0.0.1', 0), _build_proxy_handler('geo'))        self.geo_port = http_server_port(self.geo_proxy)        self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever)        self.geo_proxy_thread.daemon = True        self.geo_proxy_thread.start()    def tearDown(self):        def closer(svr):            def _closer():                svr.shutdown()                svr.server_close()            return _closer        shutdown_thread = threading.Thread(target=closer(self.proxy))        shutdown_thread.start()        self.proxy_thread.join(2.0)        shutdown_thread = threading.Thread(target=closer(self.geo_proxy))        shutdown_thread.start()        self.geo_proxy_thread.join(2.0)    def _test_proxy(self, host='127.0.0.1', port=None):        return '{0}:{1}'.format(            host, port if port is not None else self.port)    def test_proxy(self):        geo_proxy = self._test_proxy(port=self.geo_port)        ydl = YoutubeDL({            'proxy': self._test_proxy(),            'geo_verification_proxy': geo_proxy,        })        url = 'http://foo.com/bar'        response = ydl.urlopen(url).read().decode('utf-8')        self.assertEqual(response, 'normal: {0}'.format(url))        req = compat_urllib_request.Request(url)        req.add_header('Ytdl-request-proxy', geo_proxy)        response = ydl.urlopen(req).read().decode('utf-8')        self.assertEqual(response, 'geo: {0}'.format(url))    def test_proxy_with_idn(self):        ydl = YoutubeDL({            'proxy': self._test_proxy(),        })        url = 'http://中文.tw/'        response = ydl.urlopen(url).read().decode('utf-8')        # b'xn--fiq228c' is '中文'.encode('idna')        self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')class TestFileURL(unittest.TestCase):    # See https://github.com/ytdl-org/youtube-dl/issues/8227    def test_file_urls(self):        tf = tempfile.NamedTemporaryFile(delete=False)        tf.write(b'foobar')        tf.close()        url = compat_urllib_parse.urljoin('file://', pathname2url(tf.name))        with FakeYDL() as ydl:            self.assertRaisesRegexp(                compat_urllib_error.URLError, 'file:// scheme is explicitly disabled in youtube-dl for security reasons', ydl.urlopen, url)        # not yet implemented        """        with FakeYDL({'enable_file_urls': True}) as ydl:            res = ydl.urlopen(url)            self.assertEqual(res.read(), b'foobar')            res.close()        """        os.unlink(tf.name)if __name__ == '__main__':    unittest.main()
 |