compat.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508
  1. from __future__ import unicode_literals
  2. import collections
  3. import getpass
  4. import optparse
  5. import os
  6. import re
  7. import shutil
  8. import socket
  9. import subprocess
  10. import sys
  11. import itertools
  12. try:
  13. import urllib.request as compat_urllib_request
  14. except ImportError: # Python 2
  15. import urllib2 as compat_urllib_request
  16. try:
  17. import urllib.error as compat_urllib_error
  18. except ImportError: # Python 2
  19. import urllib2 as compat_urllib_error
  20. try:
  21. import urllib.parse as compat_urllib_parse
  22. except ImportError: # Python 2
  23. import urllib as compat_urllib_parse
  24. try:
  25. from urllib.parse import urlparse as compat_urllib_parse_urlparse
  26. except ImportError: # Python 2
  27. from urlparse import urlparse as compat_urllib_parse_urlparse
  28. try:
  29. import urllib.parse as compat_urlparse
  30. except ImportError: # Python 2
  31. import urlparse as compat_urlparse
  32. try:
  33. import http.cookiejar as compat_cookiejar
  34. except ImportError: # Python 2
  35. import cookielib as compat_cookiejar
  36. try:
  37. import html.entities as compat_html_entities
  38. except ImportError: # Python 2
  39. import htmlentitydefs as compat_html_entities
  40. try:
  41. import http.client as compat_http_client
  42. except ImportError: # Python 2
  43. import httplib as compat_http_client
  44. try:
  45. from urllib.error import HTTPError as compat_HTTPError
  46. except ImportError: # Python 2
  47. from urllib2 import HTTPError as compat_HTTPError
  48. try:
  49. from urllib.request import urlretrieve as compat_urlretrieve
  50. except ImportError: # Python 2
  51. from urllib import urlretrieve as compat_urlretrieve
  52. try:
  53. from subprocess import DEVNULL
  54. compat_subprocess_get_DEVNULL = lambda: DEVNULL
  55. except ImportError:
  56. compat_subprocess_get_DEVNULL = lambda: open(os.path.devnull, 'w')
  57. try:
  58. import http.server as compat_http_server
  59. except ImportError:
  60. import BaseHTTPServer as compat_http_server
  61. from pprint import (pprint, pformat)
  62. def dprint(fmt):
  63. sys.stderr.write(pformat(fmt) + "\n")
  64. try:
  65. from urllib.parse import unquote as compat_urllib_parse_unquote
  66. except ImportError:
  67. def compat_urllib_parse_unquote_to_bytes(string):
  68. """unquote_to_bytes('abc%20def') -> b'abc def'."""
  69. # Note: strings are encoded as UTF-8. This is only an issue if it contains
  70. # unescaped non-ASCII characters, which URIs should not.
  71. if not string:
  72. # Is it a string-like object?
  73. string.split
  74. return b''
  75. if isinstance(string, str):
  76. string = string.encode('utf-8')
  77. # string = encode('utf-8')
  78. # python3 -> 2: must implicitly convert to bits
  79. bits = bytes(string).split(b'%')
  80. if len(bits) == 1:
  81. return string
  82. res = [bits[0]]
  83. append = res.append
  84. for item in bits[1:]:
  85. try:
  86. append(item[:2].decode('hex'))
  87. append(item[2:])
  88. except:
  89. append(b'%')
  90. append(item)
  91. return b''.join(res)
  92. compat_urllib_parse_asciire = re.compile('([\x00-\x7f]+)')
  93. def new_compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
  94. """Replace %xx escapes by their single-character equivalent. The optional
  95. encoding and errors parameters specify how to decode percent-encoded
  96. sequences into Unicode characters, as accepted by the bytes.decode()
  97. method.
  98. By default, percent-encoded sequences are decoded with UTF-8, and invalid
  99. sequences are replaced by a placeholder character.
  100. unquote('abc%20def') -> 'abc def'.
  101. """
  102. if '%' not in string:
  103. string.split
  104. return string
  105. if encoding is None:
  106. encoding = 'utf-8'
  107. if errors is None:
  108. errors = 'replace'
  109. bits = compat_urllib_parse_asciire.split(string)
  110. res = [bits[0]]
  111. append = res.append
  112. for i in range(1, len(bits), 2):
  113. foo = compat_urllib_parse_unquote_to_bytes(bits[i])
  114. foo = foo.decode(encoding, errors)
  115. append(foo)
  116. if bits[i + 1]:
  117. bar = bits[i + 1]
  118. if not isinstance(bar, unicode):
  119. bar = bar.decode('utf-8')
  120. append(bar)
  121. return ''.join(res)
  122. def old_compat_urllib_parse_unquote(string, encoding='utf-8', errors='replace'):
  123. if string == '':
  124. return string
  125. res = string.split('%')
  126. if len(res) == 1:
  127. return string
  128. if encoding is None:
  129. encoding = 'utf-8'
  130. if errors is None:
  131. errors = 'replace'
  132. # pct_sequence: contiguous sequence of percent-encoded bytes, decoded
  133. pct_sequence = b''
  134. string = res[0]
  135. for item in res[1:]:
  136. try:
  137. if not item:
  138. raise ValueError
  139. if not re.match('[0-9a-fA-F][0-9a-fA-F]',item[:2]):
  140. raise ValueError
  141. pct_sequence += item[:2].decode('hex')
  142. rest = item[2:]
  143. if not rest:
  144. # This segment was just a single percent-encoded character.
  145. # May be part of a sequence of code units, so delay decoding.
  146. # (Stored in pct_sequence).
  147. continue
  148. except ValueError:
  149. rest = '%' + item
  150. # Encountered non-percent-encoded characters. Flush the current
  151. # pct_sequence.
  152. string += pct_sequence.decode(encoding, errors) + rest
  153. pct_sequence = b''
  154. if pct_sequence:
  155. # Flush the final pct_sequence
  156. string += pct_sequence.decode(encoding, errors)
  157. return string
  158. compat_urllib_parse_unquote = new_compat_urllib_parse_unquote
  159. try:
  160. compat_str = unicode # Python 2
  161. except NameError:
  162. compat_str = str
  163. try:
  164. compat_basestring = basestring # Python 2
  165. except NameError:
  166. compat_basestring = str
  167. try:
  168. compat_chr = unichr # Python 2
  169. except NameError:
  170. compat_chr = chr
  171. try:
  172. from xml.etree.ElementTree import ParseError as compat_xml_parse_error
  173. except ImportError: # Python 2.6
  174. from xml.parsers.expat import ExpatError as compat_xml_parse_error
  175. try:
  176. from urllib.parse import parse_qs as compat_parse_qs
  177. except ImportError: # Python 2
  178. # HACK: The following is the correct parse_qs implementation from cpython 3's stdlib.
  179. # Python 2's version is apparently totally broken
  180. def _parse_qsl(qs, keep_blank_values=False, strict_parsing=False,
  181. encoding='utf-8', errors='replace'):
  182. qs, _coerce_result = qs, compat_str
  183. pairs = [s2 for s1 in qs.split('&') for s2 in s1.split(';')]
  184. r = []
  185. for name_value in pairs:
  186. if not name_value and not strict_parsing:
  187. continue
  188. nv = name_value.split('=', 1)
  189. if len(nv) != 2:
  190. if strict_parsing:
  191. raise ValueError("bad query field: %r" % (name_value,))
  192. # Handle case of a control-name with no equal sign
  193. if keep_blank_values:
  194. nv.append('')
  195. else:
  196. continue
  197. if len(nv[1]) or keep_blank_values:
  198. name = nv[0].replace('+', ' ')
  199. name = compat_urllib_parse_unquote(
  200. name, encoding=encoding, errors=errors)
  201. name = _coerce_result(name)
  202. value = nv[1].replace('+', ' ')
  203. value = compat_urllib_parse_unquote(
  204. value, encoding=encoding, errors=errors)
  205. value = _coerce_result(value)
  206. r.append((name, value))
  207. return r
  208. def compat_parse_qs(qs, keep_blank_values=False, strict_parsing=False,
  209. encoding='utf-8', errors='replace'):
  210. parsed_result = {}
  211. pairs = _parse_qsl(qs, keep_blank_values, strict_parsing,
  212. encoding=encoding, errors=errors)
  213. for name, value in pairs:
  214. if name in parsed_result:
  215. parsed_result[name].append(value)
  216. else:
  217. parsed_result[name] = [value]
  218. return parsed_result
  219. try:
  220. from shlex import quote as shlex_quote
  221. except ImportError: # Python < 3.3
  222. def shlex_quote(s):
  223. if re.match(r'^[-_\w./]+$', s):
  224. return s
  225. else:
  226. return "'" + s.replace("'", "'\"'\"'") + "'"
  227. def compat_ord(c):
  228. if type(c) is int:
  229. return c
  230. else:
  231. return ord(c)
  232. if sys.version_info >= (3, 0):
  233. compat_getenv = os.getenv
  234. compat_expanduser = os.path.expanduser
  235. else:
  236. # Environment variables should be decoded with filesystem encoding.
  237. # Otherwise it will fail if any non-ASCII characters present (see #3854 #3217 #2918)
  238. def compat_getenv(key, default=None):
  239. from .utils import get_filesystem_encoding
  240. env = os.getenv(key, default)
  241. if env:
  242. env = env.decode(get_filesystem_encoding())
  243. return env
  244. # HACK: The default implementations of os.path.expanduser from cpython do not decode
  245. # environment variables with filesystem encoding. We will work around this by
  246. # providing adjusted implementations.
  247. # The following are os.path.expanduser implementations from cpython 2.7.8 stdlib
  248. # for different platforms with correct environment variables decoding.
  249. if os.name == 'posix':
  250. def compat_expanduser(path):
  251. """Expand ~ and ~user constructions. If user or $HOME is unknown,
  252. do nothing."""
  253. if not path.startswith('~'):
  254. return path
  255. i = path.find('/', 1)
  256. if i < 0:
  257. i = len(path)
  258. if i == 1:
  259. if 'HOME' not in os.environ:
  260. import pwd
  261. userhome = pwd.getpwuid(os.getuid()).pw_dir
  262. else:
  263. userhome = compat_getenv('HOME')
  264. else:
  265. import pwd
  266. try:
  267. pwent = pwd.getpwnam(path[1:i])
  268. except KeyError:
  269. return path
  270. userhome = pwent.pw_dir
  271. userhome = userhome.rstrip('/')
  272. return (userhome + path[i:]) or '/'
  273. elif os.name == 'nt' or os.name == 'ce':
  274. def compat_expanduser(path):
  275. """Expand ~ and ~user constructs.
  276. If user or $HOME is unknown, do nothing."""
  277. if path[:1] != '~':
  278. return path
  279. i, n = 1, len(path)
  280. while i < n and path[i] not in '/\\':
  281. i = i + 1
  282. if 'HOME' in os.environ:
  283. userhome = compat_getenv('HOME')
  284. elif 'USERPROFILE' in os.environ:
  285. userhome = compat_getenv('USERPROFILE')
  286. elif 'HOMEPATH' not in os.environ:
  287. return path
  288. else:
  289. try:
  290. drive = compat_getenv('HOMEDRIVE')
  291. except KeyError:
  292. drive = ''
  293. userhome = os.path.join(drive, compat_getenv('HOMEPATH'))
  294. if i != 1: # ~user
  295. userhome = os.path.join(os.path.dirname(userhome), path[1:i])
  296. return userhome + path[i:]
  297. else:
  298. compat_expanduser = os.path.expanduser
  299. if sys.version_info < (3, 0):
  300. def compat_print(s):
  301. from .utils import preferredencoding
  302. print(s.encode(preferredencoding(), 'xmlcharrefreplace'))
  303. else:
  304. def compat_print(s):
  305. assert isinstance(s, compat_str)
  306. print(s)
  307. try:
  308. subprocess_check_output = subprocess.check_output
  309. except AttributeError:
  310. def subprocess_check_output(*args, **kwargs):
  311. assert 'input' not in kwargs
  312. p = subprocess.Popen(*args, stdout=subprocess.PIPE, **kwargs)
  313. output, _ = p.communicate()
  314. ret = p.poll()
  315. if ret:
  316. raise subprocess.CalledProcessError(ret, p.args, output=output)
  317. return output
  318. if sys.version_info < (3, 0) and sys.platform == 'win32':
  319. def compat_getpass(prompt, *args, **kwargs):
  320. if isinstance(prompt, compat_str):
  321. from .utils import preferredencoding
  322. prompt = prompt.encode(preferredencoding())
  323. return getpass.getpass(prompt, *args, **kwargs)
  324. else:
  325. compat_getpass = getpass.getpass
  326. # Old 2.6 and 2.7 releases require kwargs to be bytes
  327. try:
  328. def _testfunc(x):
  329. pass
  330. _testfunc(**{'x': 0})
  331. except TypeError:
  332. def compat_kwargs(kwargs):
  333. return dict((bytes(k), v) for k, v in kwargs.items())
  334. else:
  335. compat_kwargs = lambda kwargs: kwargs
  336. if sys.version_info < (2, 7):
  337. def compat_socket_create_connection(address, timeout, source_address=None):
  338. host, port = address
  339. err = None
  340. for res in socket.getaddrinfo(host, port, 0, socket.SOCK_STREAM):
  341. af, socktype, proto, canonname, sa = res
  342. sock = None
  343. try:
  344. sock = socket.socket(af, socktype, proto)
  345. sock.settimeout(timeout)
  346. if source_address:
  347. sock.bind(source_address)
  348. sock.connect(sa)
  349. return sock
  350. except socket.error as _:
  351. err = _
  352. if sock is not None:
  353. sock.close()
  354. if err is not None:
  355. raise err
  356. else:
  357. raise socket.error("getaddrinfo returns an empty list")
  358. else:
  359. compat_socket_create_connection = socket.create_connection
  360. # Fix https://github.com/rg3/youtube-dl/issues/4223
  361. # See http://bugs.python.org/issue9161 for what is broken
  362. def workaround_optparse_bug9161():
  363. op = optparse.OptionParser()
  364. og = optparse.OptionGroup(op, 'foo')
  365. try:
  366. og.add_option('-t')
  367. except TypeError:
  368. real_add_option = optparse.OptionGroup.add_option
  369. def _compat_add_option(self, *args, **kwargs):
  370. enc = lambda v: (
  371. v.encode('ascii', 'replace') if isinstance(v, compat_str)
  372. else v)
  373. bargs = [enc(a) for a in args]
  374. bkwargs = dict(
  375. (k, enc(v)) for k, v in kwargs.items())
  376. return real_add_option(self, *bargs, **bkwargs)
  377. optparse.OptionGroup.add_option = _compat_add_option
  378. if hasattr(shutil, 'get_terminal_size'): # Python >= 3.3
  379. compat_get_terminal_size = shutil.get_terminal_size
  380. else:
  381. _terminal_size = collections.namedtuple('terminal_size', ['columns', 'lines'])
  382. def compat_get_terminal_size():
  383. columns = compat_getenv('COLUMNS', None)
  384. if columns:
  385. columns = int(columns)
  386. else:
  387. columns = None
  388. lines = compat_getenv('LINES', None)
  389. if lines:
  390. lines = int(lines)
  391. else:
  392. lines = None
  393. try:
  394. sp = subprocess.Popen(
  395. ['stty', 'size'],
  396. stdout=subprocess.PIPE, stderr=subprocess.PIPE)
  397. out, err = sp.communicate()
  398. lines, columns = map(int, out.split())
  399. except Exception:
  400. pass
  401. return _terminal_size(columns, lines)
  402. try:
  403. itertools.count(start=0, step=1)
  404. compat_itertools_count = itertools.count
  405. except TypeError: # Python 2.6
  406. def compat_itertools_count(start=0, step=1):
  407. n = start
  408. while True:
  409. yield n
  410. n += step
  411. __all__ = [
  412. 'compat_HTTPError',
  413. 'compat_basestring',
  414. 'compat_chr',
  415. 'compat_cookiejar',
  416. 'compat_expanduser',
  417. 'compat_get_terminal_size',
  418. 'compat_getenv',
  419. 'compat_getpass',
  420. 'compat_html_entities',
  421. 'compat_http_client',
  422. 'compat_http_server',
  423. 'compat_itertools_count',
  424. 'compat_kwargs',
  425. 'compat_ord',
  426. 'compat_parse_qs',
  427. 'compat_print',
  428. 'compat_socket_create_connection',
  429. 'compat_str',
  430. 'compat_subprocess_get_DEVNULL',
  431. 'compat_urllib_error',
  432. 'compat_urllib_parse',
  433. 'compat_urllib_parse_unquote',
  434. 'compat_urllib_parse_urlparse',
  435. 'compat_urllib_request',
  436. 'compat_urlparse',
  437. 'compat_urlretrieve',
  438. 'compat_xml_parse_error',
  439. 'shlex_quote',
  440. 'subprocess_check_output',
  441. 'workaround_optparse_bug9161',
  442. ]