test_http.py 21 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549
  1. #!/usr/bin/env python
  2. # coding: utf-8
  3. from __future__ import unicode_literals
  4. # Allow direct execution
  5. import os
  6. import sys
  7. import unittest
  8. sys.path.insert(0, os.path.dirname(os.path.dirname(os.path.abspath(__file__))))
  9. import gzip
  10. import io
  11. import ssl
  12. import tempfile
  13. import threading
  14. import zlib
  15. # avoid deprecated alias assertRaisesRegexp
  16. if hasattr(unittest.TestCase, 'assertRaisesRegex'):
  17. unittest.TestCase.assertRaisesRegexp = unittest.TestCase.assertRaisesRegex
  18. try:
  19. import brotli
  20. except ImportError:
  21. brotli = None
  22. try:
  23. from urllib.request import pathname2url
  24. except ImportError:
  25. from urllib import pathname2url
  26. from youtube_dl.compat import (
  27. compat_http_cookiejar_Cookie,
  28. compat_http_server,
  29. compat_str as str,
  30. compat_urllib_error,
  31. compat_urllib_HTTPError,
  32. compat_urllib_parse,
  33. compat_urllib_request,
  34. )
  35. from youtube_dl.utils import (
  36. sanitized_Request,
  37. urlencode_postdata,
  38. )
  39. from test.helper import (
  40. FakeYDL,
  41. FakeLogger,
  42. http_server_port,
  43. )
  44. from youtube_dl import YoutubeDL
  45. TEST_DIR = os.path.dirname(os.path.abspath(__file__))
  46. class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
  47. protocol_version = 'HTTP/1.1'
  48. # work-around old/new -style class inheritance
  49. def super(self, meth_name, *args, **kwargs):
  50. from types import MethodType
  51. try:
  52. super()
  53. fn = lambda s, m, *a, **k: getattr(super(), m)(*a, **k)
  54. except TypeError:
  55. fn = lambda s, m, *a, **k: getattr(compat_http_server.BaseHTTPRequestHandler, m)(s, *a, **k)
  56. self.super = MethodType(fn, self)
  57. return self.super(meth_name, *args, **kwargs)
  58. def log_message(self, format, *args):
  59. pass
  60. def _headers(self):
  61. payload = str(self.headers).encode('utf-8')
  62. self.send_response(200)
  63. self.send_header('Content-Type', 'application/json')
  64. self.send_header('Content-Length', str(len(payload)))
  65. self.end_headers()
  66. self.wfile.write(payload)
  67. def _redirect(self):
  68. self.send_response(int(self.path[len('/redirect_'):]))
  69. self.send_header('Location', '/method')
  70. self.send_header('Content-Length', '0')
  71. self.end_headers()
  72. def _method(self, method, payload=None):
  73. self.send_response(200)
  74. self.send_header('Content-Length', str(len(payload or '')))
  75. self.send_header('Method', method)
  76. self.end_headers()
  77. if payload:
  78. self.wfile.write(payload)
  79. def _status(self, status):
  80. payload = '<html>{0} NOT FOUND</html>'.format(status).encode('utf-8')
  81. self.send_response(int(status))
  82. self.send_header('Content-Type', 'text/html; charset=utf-8')
  83. self.send_header('Content-Length', str(len(payload)))
  84. self.end_headers()
  85. self.wfile.write(payload)
  86. def _read_data(self):
  87. if 'Content-Length' in self.headers:
  88. return self.rfile.read(int(self.headers['Content-Length']))
  89. def _test_url(self, path, host='127.0.0.1', scheme='http', port=None):
  90. return '{0}://{1}:{2}/{3}'.format(
  91. scheme, host,
  92. port if port is not None
  93. else http_server_port(self.server), path)
  94. def do_POST(self):
  95. data = self._read_data()
  96. if self.path.startswith('/redirect_'):
  97. self._redirect()
  98. elif self.path.startswith('/method'):
  99. self._method('POST', data)
  100. elif self.path.startswith('/headers'):
  101. self._headers()
  102. else:
  103. self._status(404)
  104. def do_HEAD(self):
  105. if self.path.startswith('/redirect_'):
  106. self._redirect()
  107. elif self.path.startswith('/method'):
  108. self._method('HEAD')
  109. else:
  110. self._status(404)
  111. def do_PUT(self):
  112. data = self._read_data()
  113. if self.path.startswith('/redirect_'):
  114. self._redirect()
  115. elif self.path.startswith('/method'):
  116. self._method('PUT', data)
  117. else:
  118. self._status(404)
  119. def do_GET(self):
  120. def respond(payload=b'<html><video src="/vid.mp4" /></html>',
  121. payload_type='text/html; charset=utf-8',
  122. payload_encoding=None,
  123. resp_code=200):
  124. self.send_response(resp_code)
  125. self.send_header('Content-Type', payload_type)
  126. if payload_encoding:
  127. self.send_header('Content-Encoding', payload_encoding)
  128. self.send_header('Content-Length', str(len(payload))) # required for persistent connections
  129. self.end_headers()
  130. self.wfile.write(payload)
  131. def gzip_compress(p):
  132. buf = io.BytesIO()
  133. with gzip.GzipFile(fileobj=buf, mode='wb') as f:
  134. f.write(p)
  135. return buf.getvalue()
  136. if self.path == '/video.html':
  137. respond()
  138. elif self.path == '/vid.mp4':
  139. respond(b'\x00\x00\x00\x00\x20\x66\x74[video]', 'video/mp4')
  140. elif self.path == '/302':
  141. if sys.version_info[0] == 3:
  142. # XXX: Python 3 http server does not allow non-ASCII header values
  143. self.send_response(404)
  144. self.end_headers()
  145. return
  146. new_url = self._test_url('中文.html')
  147. self.send_response(302)
  148. self.send_header(b'Location', new_url.encode('utf-8'))
  149. self.end_headers()
  150. elif self.path == '/%E4%B8%AD%E6%96%87.html':
  151. respond()
  152. elif self.path == '/%c7%9f':
  153. respond()
  154. elif self.path.startswith('/redirect_'):
  155. self._redirect()
  156. elif self.path.startswith('/method'):
  157. self._method('GET')
  158. elif self.path.startswith('/headers'):
  159. self._headers()
  160. elif self.path == '/trailing_garbage':
  161. payload = b'<html><video src="/vid.mp4" /></html>'
  162. compressed = gzip_compress(payload) + b'trailing garbage'
  163. respond(compressed, payload_encoding='gzip')
  164. elif self.path == '/302-non-ascii-redirect':
  165. new_url = self._test_url('中文.html')
  166. # actually respond with permanent redirect
  167. self.send_response(301)
  168. self.send_header('Location', new_url)
  169. self.send_header('Content-Length', '0')
  170. self.end_headers()
  171. elif self.path == '/content-encoding':
  172. encodings = self.headers.get('ytdl-encoding', '')
  173. payload = b'<html><video src="/vid.mp4" /></html>'
  174. for encoding in filter(None, (e.strip() for e in encodings.split(','))):
  175. if encoding == 'br' and brotli:
  176. payload = brotli.compress(payload)
  177. elif encoding == 'gzip':
  178. payload = gzip_compress(payload)
  179. elif encoding == 'deflate':
  180. payload = zlib.compress(payload)
  181. elif encoding == 'unsupported':
  182. payload = b'raw'
  183. break
  184. else:
  185. self._status(415)
  186. return
  187. respond(payload, payload_encoding=encodings)
  188. else:
  189. self._status(404)
  190. def send_header(self, keyword, value):
  191. """
  192. Forcibly allow HTTP server to send non percent-encoded non-ASCII characters in headers.
  193. This is against what is defined in RFC 3986: but we need to test that we support this
  194. since some sites incorrectly do this.
  195. """
  196. if keyword.lower() == 'connection':
  197. return self.super('send_header', keyword, value)
  198. if not hasattr(self, '_headers_buffer'):
  199. self._headers_buffer = []
  200. self._headers_buffer.append('{0}: {1}\r\n'.format(keyword, value).encode('utf-8'))
  201. def end_headers(self):
  202. if hasattr(self, '_headers_buffer'):
  203. self.wfile.write(b''.join(self._headers_buffer))
  204. self._headers_buffer = []
  205. self.super('end_headers')
  206. class TestHTTP(unittest.TestCase):
  207. def setUp(self):
  208. # HTTP server
  209. self.http_httpd = compat_http_server.HTTPServer(
  210. ('127.0.0.1', 0), HTTPTestRequestHandler)
  211. self.http_port = http_server_port(self.http_httpd)
  212. self.http_server_thread = threading.Thread(target=self.http_httpd.serve_forever)
  213. self.http_server_thread.daemon = True
  214. self.http_server_thread.start()
  215. try:
  216. from http.server import ThreadingHTTPServer
  217. except ImportError:
  218. try:
  219. from socketserver import ThreadingMixIn
  220. except ImportError:
  221. from SocketServer import ThreadingMixIn
  222. class ThreadingHTTPServer(ThreadingMixIn, compat_http_server.HTTPServer):
  223. pass
  224. # HTTPS server
  225. certfn = os.path.join(TEST_DIR, 'testcert.pem')
  226. self.https_httpd = ThreadingHTTPServer(
  227. ('127.0.0.1', 0), HTTPTestRequestHandler)
  228. try:
  229. sslctx = ssl.SSLContext(ssl.PROTOCOL_SSLv23)
  230. sslctx.verify_mode = ssl.CERT_NONE
  231. sslctx.check_hostname = False
  232. sslctx.load_cert_chain(certfn, None)
  233. self.https_httpd.socket = sslctx.wrap_socket(
  234. self.https_httpd.socket, server_side=True)
  235. except AttributeError:
  236. self.https_httpd.socket = ssl.wrap_socket(
  237. self.https_httpd.socket, certfile=certfn, server_side=True)
  238. self.https_port = http_server_port(self.https_httpd)
  239. self.https_server_thread = threading.Thread(target=self.https_httpd.serve_forever)
  240. self.https_server_thread.daemon = True
  241. self.https_server_thread.start()
  242. def tearDown(self):
  243. def closer(svr):
  244. def _closer():
  245. svr.shutdown()
  246. svr.server_close()
  247. return _closer
  248. shutdown_thread = threading.Thread(target=closer(self.http_httpd))
  249. shutdown_thread.start()
  250. self.http_server_thread.join(2.0)
  251. shutdown_thread = threading.Thread(target=closer(self.https_httpd))
  252. shutdown_thread.start()
  253. self.https_server_thread.join(2.0)
  254. def _test_url(self, path, host='127.0.0.1', scheme='http', port=None):
  255. return '{0}://{1}:{2}/{3}'.format(
  256. scheme, host,
  257. port if port is not None
  258. else self.https_port if scheme == 'https'
  259. else self.http_port, path)
  260. def test_nocheckcertificate(self):
  261. with FakeYDL({'logger': FakeLogger()}) as ydl:
  262. with self.assertRaises(compat_urllib_error.URLError):
  263. ydl.urlopen(sanitized_Request(self._test_url('headers', scheme='https')))
  264. with FakeYDL({'logger': FakeLogger(), 'nocheckcertificate': True}) as ydl:
  265. r = ydl.urlopen(sanitized_Request(self._test_url('headers', scheme='https')))
  266. self.assertEqual(r.getcode(), 200)
  267. r.close()
  268. def test_percent_encode(self):
  269. with FakeYDL() as ydl:
  270. # Unicode characters should be encoded with uppercase percent-encoding
  271. res = ydl.urlopen(sanitized_Request(self._test_url('中文.html')))
  272. self.assertEqual(res.getcode(), 200)
  273. res.close()
  274. # don't normalize existing percent encodings
  275. res = ydl.urlopen(sanitized_Request(self._test_url('%c7%9f')))
  276. self.assertEqual(res.getcode(), 200)
  277. res.close()
  278. def test_unicode_path_redirection(self):
  279. with FakeYDL() as ydl:
  280. r = ydl.urlopen(sanitized_Request(self._test_url('302-non-ascii-redirect')))
  281. self.assertEqual(r.url, self._test_url('%E4%B8%AD%E6%96%87.html'))
  282. r.close()
  283. def test_redirect(self):
  284. with FakeYDL() as ydl:
  285. def do_req(redirect_status, method, check_no_content=False):
  286. data = b'testdata' if method in ('POST', 'PUT') else None
  287. res = ydl.urlopen(sanitized_Request(
  288. self._test_url('redirect_{0}'.format(redirect_status)),
  289. method=method, data=data))
  290. if check_no_content:
  291. self.assertNotIn('Content-Type', res.headers)
  292. return res.read().decode('utf-8'), res.headers.get('method', '')
  293. # A 303 must either use GET or HEAD for subsequent request
  294. self.assertEqual(do_req(303, 'POST'), ('', 'GET'))
  295. self.assertEqual(do_req(303, 'HEAD'), ('', 'HEAD'))
  296. self.assertEqual(do_req(303, 'PUT'), ('', 'GET'))
  297. # 301 and 302 turn POST only into a GET, with no Content-Type
  298. self.assertEqual(do_req(301, 'POST', True), ('', 'GET'))
  299. self.assertEqual(do_req(301, 'HEAD'), ('', 'HEAD'))
  300. self.assertEqual(do_req(302, 'POST', True), ('', 'GET'))
  301. self.assertEqual(do_req(302, 'HEAD'), ('', 'HEAD'))
  302. self.assertEqual(do_req(301, 'PUT'), ('testdata', 'PUT'))
  303. self.assertEqual(do_req(302, 'PUT'), ('testdata', 'PUT'))
  304. # 307 and 308 should not change method
  305. for m in ('POST', 'PUT'):
  306. self.assertEqual(do_req(307, m), ('testdata', m))
  307. self.assertEqual(do_req(308, m), ('testdata', m))
  308. self.assertEqual(do_req(307, 'HEAD'), ('', 'HEAD'))
  309. self.assertEqual(do_req(308, 'HEAD'), ('', 'HEAD'))
  310. # These should not redirect and instead raise an HTTPError
  311. for code in (300, 304, 305, 306):
  312. with self.assertRaises(compat_urllib_HTTPError):
  313. do_req(code, 'GET')
  314. def test_content_type(self):
  315. # https://github.com/yt-dlp/yt-dlp/commit/379a4f161d4ad3e40932dcf5aca6e6fb9715ab28
  316. with FakeYDL({'nocheckcertificate': True}) as ydl:
  317. # method should be auto-detected as POST
  318. r = sanitized_Request(self._test_url('headers', scheme='https'), data=urlencode_postdata({'test': 'test'}))
  319. headers = ydl.urlopen(r).read().decode('utf-8')
  320. self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
  321. # test http
  322. r = sanitized_Request(self._test_url('headers'), data=urlencode_postdata({'test': 'test'}))
  323. headers = ydl.urlopen(r).read().decode('utf-8')
  324. self.assertIn('Content-Type: application/x-www-form-urlencoded', headers)
  325. def test_cookiejar(self):
  326. with FakeYDL() as ydl:
  327. ydl.cookiejar.set_cookie(compat_http_cookiejar_Cookie(
  328. 0, 'test', 'ytdl', None, False, '127.0.0.1', True,
  329. False, '/headers', True, False, None, False, None, None, {}))
  330. data = ydl.urlopen(sanitized_Request(self._test_url('headers'))).read()
  331. self.assertIn(b'Cookie: test=ytdl', data)
  332. def test_no_compression_compat_header(self):
  333. with FakeYDL() as ydl:
  334. data = ydl.urlopen(
  335. sanitized_Request(
  336. self._test_url('headers'),
  337. headers={'Youtubedl-no-compression': True})).read()
  338. self.assertIn(b'Accept-Encoding: identity', data)
  339. self.assertNotIn(b'youtubedl-no-compression', data.lower())
  340. def test_gzip_trailing_garbage(self):
  341. # https://github.com/ytdl-org/youtube-dl/commit/aa3e950764337ef9800c936f4de89b31c00dfcf5
  342. # https://github.com/ytdl-org/youtube-dl/commit/6f2ec15cee79d35dba065677cad9da7491ec6e6f
  343. with FakeYDL() as ydl:
  344. data = ydl.urlopen(sanitized_Request(self._test_url('trailing_garbage'))).read().decode('utf-8')
  345. self.assertEqual(data, '<html><video src="/vid.mp4" /></html>')
  346. def __test_compression(self, encoding):
  347. with FakeYDL() as ydl:
  348. res = ydl.urlopen(
  349. sanitized_Request(
  350. self._test_url('content-encoding'),
  351. headers={'ytdl-encoding': encoding}))
  352. self.assertEqual(res.headers.get('Content-Encoding'), encoding)
  353. self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
  354. @unittest.skipUnless(brotli, 'brotli support is not installed')
  355. @unittest.expectedFailure
  356. def test_brotli(self):
  357. self.__test_compression('br')
  358. @unittest.expectedFailure
  359. def test_deflate(self):
  360. self.__test_compression('deflate')
  361. @unittest.expectedFailure
  362. def test_gzip(self):
  363. self.__test_compression('gzip')
  364. @unittest.expectedFailure # not yet implemented
  365. def test_multiple_encodings(self):
  366. # https://www.rfc-editor.org/rfc/rfc9110.html#section-8.4
  367. with FakeYDL() as ydl:
  368. for pair in ('gzip,deflate', 'deflate, gzip', 'gzip, gzip', 'deflate, deflate'):
  369. res = ydl.urlopen(
  370. sanitized_Request(
  371. self._test_url('content-encoding'),
  372. headers={'ytdl-encoding': pair}))
  373. self.assertEqual(res.headers.get('Content-Encoding'), pair)
  374. self.assertEqual(res.read(), b'<html><video src="/vid.mp4" /></html>')
  375. def test_unsupported_encoding(self):
  376. # it should return the raw content
  377. with FakeYDL() as ydl:
  378. res = ydl.urlopen(
  379. sanitized_Request(
  380. self._test_url('content-encoding'),
  381. headers={'ytdl-encoding': 'unsupported'}))
  382. self.assertEqual(res.headers.get('Content-Encoding'), 'unsupported')
  383. self.assertEqual(res.read(), b'raw')
  384. def _build_proxy_handler(name):
  385. class HTTPTestRequestHandler(compat_http_server.BaseHTTPRequestHandler):
  386. proxy_name = name
  387. def log_message(self, format, *args):
  388. pass
  389. def do_GET(self):
  390. self.send_response(200)
  391. self.send_header('Content-Type', 'text/plain; charset=utf-8')
  392. self.end_headers()
  393. self.wfile.write('{0}: {1}'.format(self.proxy_name, self.path).encode('utf-8'))
  394. return HTTPTestRequestHandler
  395. class TestProxy(unittest.TestCase):
  396. def setUp(self):
  397. self.proxy = compat_http_server.HTTPServer(
  398. ('127.0.0.1', 0), _build_proxy_handler('normal'))
  399. self.port = http_server_port(self.proxy)
  400. self.proxy_thread = threading.Thread(target=self.proxy.serve_forever)
  401. self.proxy_thread.daemon = True
  402. self.proxy_thread.start()
  403. self.geo_proxy = compat_http_server.HTTPServer(
  404. ('127.0.0.1', 0), _build_proxy_handler('geo'))
  405. self.geo_port = http_server_port(self.geo_proxy)
  406. self.geo_proxy_thread = threading.Thread(target=self.geo_proxy.serve_forever)
  407. self.geo_proxy_thread.daemon = True
  408. self.geo_proxy_thread.start()
  409. def tearDown(self):
  410. def closer(svr):
  411. def _closer():
  412. svr.shutdown()
  413. svr.server_close()
  414. return _closer
  415. shutdown_thread = threading.Thread(target=closer(self.proxy))
  416. shutdown_thread.start()
  417. self.proxy_thread.join(2.0)
  418. shutdown_thread = threading.Thread(target=closer(self.geo_proxy))
  419. shutdown_thread.start()
  420. self.geo_proxy_thread.join(2.0)
  421. def _test_proxy(self, host='127.0.0.1', port=None):
  422. return '{0}:{1}'.format(
  423. host, port if port is not None else self.port)
  424. def test_proxy(self):
  425. geo_proxy = self._test_proxy(port=self.geo_port)
  426. ydl = YoutubeDL({
  427. 'proxy': self._test_proxy(),
  428. 'geo_verification_proxy': geo_proxy,
  429. })
  430. url = 'http://foo.com/bar'
  431. response = ydl.urlopen(url).read().decode('utf-8')
  432. self.assertEqual(response, 'normal: {0}'.format(url))
  433. req = compat_urllib_request.Request(url)
  434. req.add_header('Ytdl-request-proxy', geo_proxy)
  435. response = ydl.urlopen(req).read().decode('utf-8')
  436. self.assertEqual(response, 'geo: {0}'.format(url))
  437. def test_proxy_with_idn(self):
  438. ydl = YoutubeDL({
  439. 'proxy': self._test_proxy(),
  440. })
  441. url = 'http://中文.tw/'
  442. response = ydl.urlopen(url).read().decode('utf-8')
  443. # b'xn--fiq228c' is '中文'.encode('idna')
  444. self.assertEqual(response, 'normal: http://xn--fiq228c.tw/')
  445. class TestFileURL(unittest.TestCase):
  446. # See https://github.com/ytdl-org/youtube-dl/issues/8227
  447. def test_file_urls(self):
  448. tf = tempfile.NamedTemporaryFile(delete=False)
  449. tf.write(b'foobar')
  450. tf.close()
  451. url = compat_urllib_parse.urljoin('file://', pathname2url(tf.name))
  452. with FakeYDL() as ydl:
  453. self.assertRaisesRegexp(
  454. compat_urllib_error.URLError, 'file:// scheme is explicitly disabled in youtube-dl for security reasons', ydl.urlopen, url)
  455. # not yet implemented
  456. """
  457. with FakeYDL({'enable_file_urls': True}) as ydl:
  458. res = ydl.urlopen(url)
  459. self.assertEqual(res.read(), b'foobar')
  460. res.close()
  461. """
  462. os.unlink(tf.name)
  463. if __name__ == '__main__':
  464. unittest.main()