external.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433
  1. from __future__ import unicode_literals
  2. import os.path
  3. import re
  4. import subprocess
  5. import sys
  6. import time
  7. from .common import FileDownloader
  8. from ..compat import (
  9. compat_setenv,
  10. compat_str,
  11. )
  12. from ..postprocessor.ffmpeg import FFmpegPostProcessor, EXT_TO_OUT_FORMATS
  13. from ..utils import (
  14. cli_option,
  15. cli_valueless_option,
  16. cli_bool_option,
  17. cli_configuration_args,
  18. encodeFilename,
  19. encodeArgument,
  20. handle_youtubedl_headers,
  21. check_executable,
  22. is_outdated_version,
  23. process_communicate_or_kill,
  24. )
  25. class ExternalFD(FileDownloader):
  26. def real_download(self, filename, info_dict):
  27. self.report_destination(filename)
  28. tmpfilename = self.temp_name(filename)
  29. try:
  30. started = time.time()
  31. retval = self._call_downloader(tmpfilename, info_dict)
  32. except KeyboardInterrupt:
  33. if not info_dict.get('is_live'):
  34. raise
  35. # Live stream downloading cancellation should be considered as
  36. # correct and expected termination thus all postprocessing
  37. # should take place
  38. retval = 0
  39. self.to_screen('[%s] Interrupted by user' % self.get_basename())
  40. if retval == 0:
  41. status = {
  42. 'filename': filename,
  43. 'status': 'finished',
  44. 'elapsed': time.time() - started,
  45. }
  46. if filename != '-':
  47. fsize = os.path.getsize(encodeFilename(tmpfilename))
  48. self.to_screen('\r[%s] Downloaded %s bytes' % (self.get_basename(), fsize))
  49. self.try_rename(tmpfilename, filename)
  50. status.update({
  51. 'downloaded_bytes': fsize,
  52. 'total_bytes': fsize,
  53. })
  54. self._hook_progress(status)
  55. return True
  56. else:
  57. self.to_stderr('\n')
  58. self.report_error('%s exited with code %d' % (
  59. self.get_basename(), retval))
  60. return False
  61. @classmethod
  62. def get_basename(cls):
  63. return cls.__name__[:-2].lower()
  64. @property
  65. def exe(self):
  66. return self.params.get('external_downloader')
  67. @classmethod
  68. def available(cls):
  69. return check_executable(cls.get_basename(), [cls.AVAILABLE_OPT])
  70. @classmethod
  71. def supports(cls, info_dict):
  72. return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps')
  73. @classmethod
  74. def can_download(cls, info_dict):
  75. return cls.available() and cls.supports(info_dict)
  76. def _option(self, command_option, param):
  77. return cli_option(self.params, command_option, param)
  78. def _bool_option(self, command_option, param, true_value='true', false_value='false', separator=None):
  79. return cli_bool_option(self.params, command_option, param, true_value, false_value, separator)
  80. def _valueless_option(self, command_option, param, expected_value=True):
  81. return cli_valueless_option(self.params, command_option, param, expected_value)
  82. def _configuration_args(self, default=[]):
  83. return cli_configuration_args(self.params, 'external_downloader_args', default)
  84. def _call_downloader(self, tmpfilename, info_dict):
  85. """ Either overwrite this or implement _make_cmd """
  86. cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
  87. self._debug_cmd(cmd)
  88. p = subprocess.Popen(
  89. cmd, stderr=subprocess.PIPE)
  90. _, stderr = process_communicate_or_kill(p)
  91. if p.returncode != 0:
  92. self.to_stderr(stderr.decode('utf-8', 'replace'))
  93. return p.returncode
  94. class CurlFD(ExternalFD):
  95. AVAILABLE_OPT = '-V'
  96. def _make_cmd(self, tmpfilename, info_dict):
  97. cmd = [self.exe, '--location', '-o', tmpfilename]
  98. for key, val in info_dict['http_headers'].items():
  99. cmd += ['--header', '%s: %s' % (key, val)]
  100. cmd += self._bool_option('--continue-at', 'continuedl', '-', '0')
  101. cmd += self._valueless_option('--silent', 'noprogress')
  102. cmd += self._valueless_option('--verbose', 'verbose')
  103. cmd += self._option('--limit-rate', 'ratelimit')
  104. retry = self._option('--retry', 'retries')
  105. if len(retry) == 2:
  106. if retry[1] in ('inf', 'infinite'):
  107. retry[1] = '2147483647'
  108. cmd += retry
  109. cmd += self._option('--max-filesize', 'max_filesize')
  110. cmd += self._option('--interface', 'source_address')
  111. cmd += self._option('--proxy', 'proxy')
  112. cmd += self._valueless_option('--insecure', 'nocheckcertificate')
  113. cmd += self._configuration_args()
  114. cmd += ['--', info_dict['url']]
  115. return cmd
  116. def _call_downloader(self, tmpfilename, info_dict):
  117. cmd = [encodeArgument(a) for a in self._make_cmd(tmpfilename, info_dict)]
  118. self._debug_cmd(cmd)
  119. # curl writes the progress to stderr so don't capture it.
  120. p = subprocess.Popen(cmd)
  121. process_communicate_or_kill(p)
  122. return p.returncode
  123. class AxelFD(ExternalFD):
  124. AVAILABLE_OPT = '-V'
  125. def _make_cmd(self, tmpfilename, info_dict):
  126. cmd = [self.exe, '-o', tmpfilename]
  127. for key, val in info_dict['http_headers'].items():
  128. cmd += ['-H', '%s: %s' % (key, val)]
  129. cmd += self._configuration_args()
  130. cmd += ['--', info_dict['url']]
  131. return cmd
  132. class WgetFD(ExternalFD):
  133. AVAILABLE_OPT = '--version'
  134. def _make_cmd(self, tmpfilename, info_dict):
  135. cmd = [self.exe, '-O', tmpfilename, '-nv', '--no-cookies']
  136. for key, val in info_dict['http_headers'].items():
  137. cmd += ['--header', '%s: %s' % (key, val)]
  138. cmd += self._option('--limit-rate', 'ratelimit')
  139. retry = self._option('--tries', 'retries')
  140. if len(retry) == 2:
  141. if retry[1] in ('inf', 'infinite'):
  142. retry[1] = '0'
  143. cmd += retry
  144. cmd += self._option('--bind-address', 'source_address')
  145. cmd += self._option('--proxy', 'proxy')
  146. cmd += self._valueless_option('--no-check-certificate', 'nocheckcertificate')
  147. cmd += self._configuration_args()
  148. cmd += ['--', info_dict['url']]
  149. return cmd
  150. class Aria2cFD(ExternalFD):
  151. AVAILABLE_OPT = '-v'
  152. def _make_cmd(self, tmpfilename, info_dict):
  153. cmd = [self.exe, '-c']
  154. cmd += self._configuration_args([
  155. '--min-split-size', '1M', '--max-connection-per-server', '4'])
  156. dn = os.path.dirname(tmpfilename)
  157. if dn:
  158. cmd += ['--dir', dn]
  159. cmd += ['--out', os.path.basename(tmpfilename)]
  160. for key, val in info_dict['http_headers'].items():
  161. cmd += ['--header', '%s: %s' % (key, val)]
  162. cmd += self._option('--interface', 'source_address')
  163. cmd += self._option('--all-proxy', 'proxy')
  164. cmd += self._bool_option('--check-certificate', 'nocheckcertificate', 'false', 'true', '=')
  165. cmd += self._bool_option('--remote-time', 'updatetime', 'true', 'false', '=')
  166. cmd += ['--', info_dict['url']]
  167. return cmd
  168. class Aria2pFD(ExternalFD):
  169. ''' Aria2pFD class
  170. This class support to use aria2p as downloader.
  171. (Aria2p, a command-line tool and Python library to interact with an aria2c daemon process
  172. through JSON-RPC.)
  173. It can help you to get download progress more easily.
  174. To use aria2p as downloader, you need to install aria2c and aria2p, aria2p can download with pip.
  175. Then run aria2c in the background and enable with the --enable-rpc option.
  176. '''
  177. try:
  178. import aria2p
  179. __avail = True
  180. except ImportError:
  181. __avail = False
  182. @classmethod
  183. def available(cls):
  184. return cls.__avail
  185. def _call_downloader(self, tmpfilename, info_dict):
  186. aria2 = self.aria2p.API(
  187. self.aria2p.Client(
  188. host='http://localhost',
  189. port=6800,
  190. secret=''
  191. )
  192. )
  193. options = {
  194. 'min-split-size': '1M',
  195. 'max-connection-per-server': 4,
  196. 'auto-file-renaming': 'false',
  197. }
  198. options['dir'] = os.path.dirname(tmpfilename) or os.path.abspath('.')
  199. options['out'] = os.path.basename(tmpfilename)
  200. options['header'] = []
  201. for key, val in info_dict['http_headers'].items():
  202. options['header'].append('{0}: {1}'.format(key, val))
  203. download = aria2.add_uris([info_dict['url']], options)
  204. status = {
  205. 'status': 'downloading',
  206. 'tmpfilename': tmpfilename,
  207. }
  208. started = time.time()
  209. while download.status in ['active', 'waiting']:
  210. download = aria2.get_download(download.gid)
  211. status.update({
  212. 'downloaded_bytes': download.completed_length,
  213. 'total_bytes': download.total_length,
  214. 'elapsed': time.time() - started,
  215. 'eta': download.eta.total_seconds(),
  216. 'speed': download.download_speed,
  217. })
  218. self._hook_progress(status)
  219. time.sleep(.5)
  220. return download.status != 'complete'
  221. class HttpieFD(ExternalFD):
  222. @classmethod
  223. def available(cls):
  224. return check_executable('http', ['--version'])
  225. def _make_cmd(self, tmpfilename, info_dict):
  226. cmd = ['http', '--download', '--output', tmpfilename, info_dict['url']]
  227. for key, val in info_dict['http_headers'].items():
  228. cmd += ['%s:%s' % (key, val)]
  229. return cmd
  230. class FFmpegFD(ExternalFD):
  231. @classmethod
  232. def supports(cls, info_dict):
  233. return info_dict['protocol'] in ('http', 'https', 'ftp', 'ftps', 'm3u8', 'rtsp', 'rtmp', 'mms')
  234. @classmethod
  235. def available(cls):
  236. return FFmpegPostProcessor().available
  237. def _call_downloader(self, tmpfilename, info_dict):
  238. url = info_dict['url']
  239. ffpp = FFmpegPostProcessor(downloader=self)
  240. if not ffpp.available:
  241. self.report_error('m3u8 download detected but ffmpeg or avconv could not be found. Please install one.')
  242. return False
  243. ffpp.check_version()
  244. args = [ffpp.executable, '-y']
  245. for log_level in ('quiet', 'verbose'):
  246. if self.params.get(log_level, False):
  247. args += ['-loglevel', log_level]
  248. break
  249. seekable = info_dict.get('_seekable')
  250. if seekable is not None:
  251. # setting -seekable prevents ffmpeg from guessing if the server
  252. # supports seeking(by adding the header `Range: bytes=0-`), which
  253. # can cause problems in some cases
  254. # https://github.com/ytdl-org/youtube-dl/issues/11800#issuecomment-275037127
  255. # http://trac.ffmpeg.org/ticket/6125#comment:10
  256. args += ['-seekable', '1' if seekable else '0']
  257. args += self._configuration_args()
  258. # start_time = info_dict.get('start_time') or 0
  259. # if start_time:
  260. # args += ['-ss', compat_str(start_time)]
  261. # end_time = info_dict.get('end_time')
  262. # if end_time:
  263. # args += ['-t', compat_str(end_time - start_time)]
  264. if info_dict['http_headers'] and re.match(r'^https?://', url):
  265. # Trailing \r\n after each HTTP header is important to prevent warning from ffmpeg/avconv:
  266. # [http @ 00000000003d2fa0] No trailing CRLF found in HTTP header.
  267. headers = handle_youtubedl_headers(info_dict['http_headers'])
  268. args += [
  269. '-headers',
  270. ''.join('%s: %s\r\n' % (key, val) for key, val in headers.items())]
  271. env = None
  272. proxy = self.params.get('proxy')
  273. if proxy:
  274. if not re.match(r'^[\da-zA-Z]+://', proxy):
  275. proxy = 'http://%s' % proxy
  276. if proxy.startswith('socks'):
  277. self.report_warning(
  278. '%s does not support SOCKS proxies. Downloading is likely to fail. '
  279. 'Consider adding --hls-prefer-native to your command.' % self.get_basename())
  280. # Since December 2015 ffmpeg supports -http_proxy option (see
  281. # http://git.videolan.org/?p=ffmpeg.git;a=commit;h=b4eb1f29ebddd60c41a2eb39f5af701e38e0d3fd)
  282. # We could switch to the following code if we are able to detect version properly
  283. # args += ['-http_proxy', proxy]
  284. env = os.environ.copy()
  285. compat_setenv('HTTP_PROXY', proxy, env=env)
  286. compat_setenv('http_proxy', proxy, env=env)
  287. protocol = info_dict.get('protocol')
  288. if protocol == 'rtmp':
  289. player_url = info_dict.get('player_url')
  290. page_url = info_dict.get('page_url')
  291. app = info_dict.get('app')
  292. play_path = info_dict.get('play_path')
  293. tc_url = info_dict.get('tc_url')
  294. flash_version = info_dict.get('flash_version')
  295. live = info_dict.get('rtmp_live', False)
  296. conn = info_dict.get('rtmp_conn')
  297. if player_url is not None:
  298. args += ['-rtmp_swfverify', player_url]
  299. if page_url is not None:
  300. args += ['-rtmp_pageurl', page_url]
  301. if app is not None:
  302. args += ['-rtmp_app', app]
  303. if play_path is not None:
  304. args += ['-rtmp_playpath', play_path]
  305. if tc_url is not None:
  306. args += ['-rtmp_tcurl', tc_url]
  307. if flash_version is not None:
  308. args += ['-rtmp_flashver', flash_version]
  309. if live:
  310. args += ['-rtmp_live', 'live']
  311. if isinstance(conn, list):
  312. for entry in conn:
  313. args += ['-rtmp_conn', entry]
  314. elif isinstance(conn, compat_str):
  315. args += ['-rtmp_conn', conn]
  316. args += ['-i', url, '-c', 'copy']
  317. if self.params.get('test', False):
  318. args += ['-fs', compat_str(self._TEST_FILE_SIZE)]
  319. if protocol in ('m3u8', 'm3u8_native'):
  320. if self.params.get('hls_use_mpegts', False) or tmpfilename == '-':
  321. args += ['-f', 'mpegts']
  322. else:
  323. args += ['-f', 'mp4']
  324. if (ffpp.basename == 'ffmpeg' and is_outdated_version(ffpp._versions['ffmpeg'], '3.2', False)) and (not info_dict.get('acodec') or info_dict['acodec'].split('.')[0] in ('aac', 'mp4a')):
  325. args += ['-bsf:a', 'aac_adtstoasc']
  326. elif protocol == 'rtmp':
  327. args += ['-f', 'flv']
  328. else:
  329. args += ['-f', EXT_TO_OUT_FORMATS.get(info_dict['ext'], info_dict['ext'])]
  330. args = [encodeArgument(opt) for opt in args]
  331. args.append(encodeFilename(ffpp._ffmpeg_filename_argument(tmpfilename), True))
  332. self._debug_cmd(args)
  333. proc = subprocess.Popen(args, stdin=subprocess.PIPE, env=env)
  334. try:
  335. retval = proc.wait()
  336. except BaseException as e:
  337. # subprocess.run would send the SIGKILL signal to ffmpeg and the
  338. # mp4 file couldn't be played, but if we ask ffmpeg to quit it
  339. # produces a file that is playable (this is mostly useful for live
  340. # streams). Note that Windows is not affected and produces playable
  341. # files (see https://github.com/ytdl-org/youtube-dl/issues/8300).
  342. if isinstance(e, KeyboardInterrupt) and sys.platform != 'win32':
  343. process_communicate_or_kill(proc, b'q')
  344. else:
  345. proc.kill()
  346. proc.wait()
  347. raise
  348. return retval
  349. class AVconvFD(FFmpegFD):
  350. pass
  351. _BY_NAME = dict(
  352. (klass.get_basename(), klass)
  353. for name, klass in globals().items()
  354. if name.endswith('FD') and name != 'ExternalFD'
  355. )
  356. def list_external_downloaders():
  357. return sorted(_BY_NAME.keys())
  358. def get_external_downloader(external_downloader):
  359. """ Given the name of the executable, see whether we support the given
  360. downloader . """
  361. # Drop .exe extension on Windows
  362. bn = os.path.splitext(os.path.basename(external_downloader))[0]
  363. return _BY_NAME[bn]