common.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386
  1. from __future__ import division, unicode_literals
  2. import os
  3. import re
  4. import sys
  5. import time
  6. import random
  7. from ..compat import compat_os_name
  8. from ..utils import (
  9. decodeArgument,
  10. encodeFilename,
  11. error_to_compat_str,
  12. format_bytes,
  13. shell_quote,
  14. timeconvert,
  15. )
  16. class FileDownloader(object):
  17. """File Downloader class.
  18. File downloader objects are the ones responsible of downloading the
  19. actual video file and writing it to disk.
  20. File downloaders accept a lot of parameters. In order not to saturate
  21. the object constructor with arguments, it receives a dictionary of
  22. options instead.
  23. Available options:
  24. verbose: Print additional info to stdout.
  25. quiet: Do not print messages to stdout.
  26. ratelimit: Download speed limit, in bytes/sec.
  27. retries: Number of times to retry for HTTP error 5xx
  28. buffersize: Size of download buffer in bytes.
  29. noresizebuffer: Do not automatically resize the download buffer.
  30. continuedl: Try to continue downloads if possible.
  31. noprogress: Do not print the progress bar.
  32. logtostderr: Log messages to stderr instead of stdout.
  33. consoletitle: Display progress in console window's titlebar.
  34. nopart: Do not use temporary .part files.
  35. updatetime: Use the Last-modified header to set output file timestamps.
  36. test: Download only first bytes to test the downloader.
  37. min_filesize: Skip files smaller than this size
  38. max_filesize: Skip files larger than this size
  39. xattr_set_filesize: Set ytdl.filesize user xattribute with expected size.
  40. (experimental)
  41. external_downloader_args: A list of additional command-line arguments for the
  42. external downloader.
  43. hls_use_mpegts: Use the mpegts container for HLS videos.
  44. Subclasses of this one must re-define the real_download method.
  45. """
  46. _TEST_FILE_SIZE = 10241
  47. params = None
  48. def __init__(self, ydl, params):
  49. """Create a FileDownloader object with the given options."""
  50. self.ydl = ydl
  51. self._progress_hooks = []
  52. self.params = params
  53. self.add_progress_hook(self.report_progress)
  54. @staticmethod
  55. def format_seconds(seconds):
  56. (mins, secs) = divmod(seconds, 60)
  57. (hours, mins) = divmod(mins, 60)
  58. if hours > 99:
  59. return '--:--:--'
  60. if hours == 0:
  61. return '%02d:%02d' % (mins, secs)
  62. else:
  63. return '%02d:%02d:%02d' % (hours, mins, secs)
  64. @staticmethod
  65. def calc_percent(byte_counter, data_len):
  66. if data_len is None:
  67. return None
  68. return float(byte_counter) / float(data_len) * 100.0
  69. @staticmethod
  70. def format_percent(percent):
  71. if percent is None:
  72. return '---.-%'
  73. return '%6s' % ('%3.1f%%' % percent)
  74. @staticmethod
  75. def calc_eta(start, now, total, current):
  76. if total is None:
  77. return None
  78. if now is None:
  79. now = time.time()
  80. dif = now - start
  81. if current == 0 or dif < 0.001: # One millisecond
  82. return None
  83. rate = float(current) / dif
  84. return int((float(total) - float(current)) / rate)
  85. @staticmethod
  86. def format_eta(eta):
  87. if eta is None:
  88. return '--:--'
  89. return FileDownloader.format_seconds(eta)
  90. @staticmethod
  91. def calc_speed(start, now, bytes):
  92. dif = now - start
  93. if bytes == 0 or dif < 0.001: # One millisecond
  94. return None
  95. return float(bytes) / dif
  96. @staticmethod
  97. def format_speed(speed):
  98. if speed is None:
  99. return '%10s' % '---b/s'
  100. return '%10s' % ('%s/s' % format_bytes(speed))
  101. @staticmethod
  102. def format_retries(retries):
  103. return 'inf' if retries == float('inf') else '%.0f' % retries
  104. @staticmethod
  105. def best_block_size(elapsed_time, bytes):
  106. new_min = max(bytes / 2.0, 1.0)
  107. new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
  108. if elapsed_time < 0.001:
  109. return int(new_max)
  110. rate = bytes / elapsed_time
  111. if rate > new_max:
  112. return int(new_max)
  113. if rate < new_min:
  114. return int(new_min)
  115. return int(rate)
  116. @staticmethod
  117. def parse_bytes(bytestr):
  118. """Parse a string indicating a byte quantity into an integer."""
  119. matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
  120. if matchobj is None:
  121. return None
  122. number = float(matchobj.group(1))
  123. multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
  124. return int(round(number * multiplier))
  125. def to_screen(self, *args, **kargs):
  126. self.ydl.to_screen(*args, **kargs)
  127. def to_stderr(self, message):
  128. self.ydl.to_screen(message)
  129. def to_console_title(self, message):
  130. self.ydl.to_console_title(message)
  131. def trouble(self, *args, **kargs):
  132. self.ydl.trouble(*args, **kargs)
  133. def report_warning(self, *args, **kargs):
  134. self.ydl.report_warning(*args, **kargs)
  135. def report_error(self, *args, **kargs):
  136. self.ydl.report_error(*args, **kargs)
  137. def slow_down(self, start_time, now, byte_counter):
  138. """Sleep if the download speed is over the rate limit."""
  139. rate_limit = self.params.get('ratelimit')
  140. if rate_limit is None or byte_counter == 0:
  141. return
  142. if now is None:
  143. now = time.time()
  144. elapsed = now - start_time
  145. if elapsed <= 0.0:
  146. return
  147. speed = float(byte_counter) / elapsed
  148. if speed > rate_limit:
  149. time.sleep(max((byte_counter // rate_limit) - elapsed, 0))
  150. def temp_name(self, filename):
  151. """Returns a temporary filename for the given filename."""
  152. if self.params.get('nopart', False) or filename == '-' or \
  153. (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
  154. return filename
  155. return filename + '.part'
  156. def undo_temp_name(self, filename):
  157. if filename.endswith('.part'):
  158. return filename[:-len('.part')]
  159. return filename
  160. def ytdl_filename(self, filename):
  161. return filename + '.ytdl'
  162. def try_rename(self, old_filename, new_filename):
  163. try:
  164. if old_filename == new_filename:
  165. return
  166. os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
  167. except (IOError, OSError) as err:
  168. self.report_error('unable to rename file: %s' % error_to_compat_str(err))
  169. def try_utime(self, filename, last_modified_hdr):
  170. """Try to set the last-modified time of the given file."""
  171. if last_modified_hdr is None:
  172. return
  173. if not os.path.isfile(encodeFilename(filename)):
  174. return
  175. timestr = last_modified_hdr
  176. if timestr is None:
  177. return
  178. filetime = timeconvert(timestr)
  179. if filetime is None:
  180. return filetime
  181. # Ignore obviously invalid dates
  182. if filetime == 0:
  183. return
  184. try:
  185. os.utime(filename, (time.time(), filetime))
  186. except Exception:
  187. pass
  188. return filetime
  189. def report_destination(self, filename):
  190. """Report destination filename."""
  191. self.to_screen('[download] Destination: ' + filename)
  192. def _report_progress_status(self, msg, is_last_line=False):
  193. fullmsg = '[download] ' + msg
  194. if self.params.get('progress_with_newline', False):
  195. self.to_screen(fullmsg)
  196. else:
  197. if compat_os_name == 'nt':
  198. prev_len = getattr(self, '_report_progress_prev_line_length',
  199. 0)
  200. if prev_len > len(fullmsg):
  201. fullmsg += ' ' * (prev_len - len(fullmsg))
  202. self._report_progress_prev_line_length = len(fullmsg)
  203. clear_line = '\r'
  204. else:
  205. clear_line = ('\r\x1b[K' if sys.stderr.isatty() else '\r')
  206. self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
  207. self.to_console_title('youtube-dl ' + msg)
  208. def report_progress(self, s):
  209. if s['status'] == 'finished':
  210. if self.params.get('noprogress', False):
  211. self.to_screen('[download] Download completed')
  212. else:
  213. s['_total_bytes_str'] = format_bytes(s['total_bytes'])
  214. if s.get('elapsed') is not None:
  215. s['_elapsed_str'] = self.format_seconds(s['elapsed'])
  216. msg_template = '100%% of %(_total_bytes_str)s in %(_elapsed_str)s'
  217. else:
  218. msg_template = '100%% of %(_total_bytes_str)s'
  219. self._report_progress_status(
  220. msg_template % s, is_last_line=True)
  221. if self.params.get('noprogress'):
  222. return
  223. if s['status'] != 'downloading':
  224. return
  225. if s.get('eta') is not None:
  226. s['_eta_str'] = self.format_eta(s['eta'])
  227. else:
  228. s['_eta_str'] = 'Unknown ETA'
  229. if s.get('total_bytes') and s.get('downloaded_bytes') is not None:
  230. s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes'])
  231. elif s.get('total_bytes_estimate') and s.get('downloaded_bytes') is not None:
  232. s['_percent_str'] = self.format_percent(100 * s['downloaded_bytes'] / s['total_bytes_estimate'])
  233. else:
  234. if s.get('downloaded_bytes') == 0:
  235. s['_percent_str'] = self.format_percent(0)
  236. else:
  237. s['_percent_str'] = 'Unknown %'
  238. if s.get('speed') is not None:
  239. s['_speed_str'] = self.format_speed(s['speed'])
  240. else:
  241. s['_speed_str'] = 'Unknown speed'
  242. if s.get('total_bytes') is not None:
  243. s['_total_bytes_str'] = format_bytes(s['total_bytes'])
  244. msg_template = '%(_percent_str)s of %(_total_bytes_str)s at %(_speed_str)s ETA %(_eta_str)s'
  245. elif s.get('total_bytes_estimate') is not None:
  246. s['_total_bytes_estimate_str'] = format_bytes(s['total_bytes_estimate'])
  247. msg_template = '%(_percent_str)s of ~%(_total_bytes_estimate_str)s at %(_speed_str)s ETA %(_eta_str)s'
  248. else:
  249. if s.get('downloaded_bytes') is not None:
  250. s['_downloaded_bytes_str'] = format_bytes(s['downloaded_bytes'])
  251. if s.get('elapsed'):
  252. s['_elapsed_str'] = self.format_seconds(s['elapsed'])
  253. msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s (%(_elapsed_str)s)'
  254. else:
  255. msg_template = '%(_downloaded_bytes_str)s at %(_speed_str)s'
  256. else:
  257. msg_template = '%(_percent_str)s % at %(_speed_str)s ETA %(_eta_str)s'
  258. self._report_progress_status(msg_template % s)
  259. def report_resuming_byte(self, resume_len):
  260. """Report attempt to resume at given byte."""
  261. self.to_screen('[download] Resuming download at byte %s' % resume_len)
  262. def report_retry(self, count, retries):
  263. """Report retry in case of HTTP error 5xx"""
  264. self.to_screen(
  265. '[download] Got server HTTP error. Retrying (attempt %d of %s)...'
  266. % (count, self.format_retries(retries)))
  267. def report_file_already_downloaded(self, file_name):
  268. """Report file has already been fully downloaded."""
  269. try:
  270. self.to_screen('[download] %s has already been downloaded' % file_name)
  271. except UnicodeEncodeError:
  272. self.to_screen('[download] The file has already been downloaded')
  273. def report_unable_to_resume(self):
  274. """Report it was impossible to resume download."""
  275. self.to_screen('[download] Unable to resume')
  276. def download(self, filename, info_dict):
  277. """Download to a filename using the info from info_dict
  278. Return True on success and False otherwise
  279. """
  280. nooverwrites_and_exists = (
  281. self.params.get('nooverwrites', False) and
  282. os.path.exists(encodeFilename(filename))
  283. )
  284. if not hasattr(filename, 'write'):
  285. continuedl_and_exists = (
  286. self.params.get('continuedl', True) and
  287. os.path.isfile(encodeFilename(filename)) and
  288. not self.params.get('nopart', False)
  289. )
  290. # Check file already present
  291. if filename != '-' and (nooverwrites_and_exists or continuedl_and_exists):
  292. self.report_file_already_downloaded(filename)
  293. self._hook_progress({
  294. 'filename': filename,
  295. 'status': 'finished',
  296. 'total_bytes': os.path.getsize(encodeFilename(filename)),
  297. })
  298. return True
  299. min_sleep_interval = self.params.get('sleep_interval')
  300. if min_sleep_interval:
  301. max_sleep_interval = self.params.get('max_sleep_interval', min_sleep_interval)
  302. sleep_interval = random.uniform(min_sleep_interval, max_sleep_interval)
  303. self.to_screen(
  304. '[download] Sleeping %s seconds...' % (
  305. int(sleep_interval) if sleep_interval.is_integer()
  306. else '%.2f' % sleep_interval))
  307. time.sleep(sleep_interval)
  308. return self.real_download(filename, info_dict)
  309. def real_download(self, filename, info_dict):
  310. """Real download process. Redefine in subclasses."""
  311. raise NotImplementedError('This method must be implemented by subclasses')
  312. def _hook_progress(self, status):
  313. for ph in self._progress_hooks:
  314. ph(status)
  315. def add_progress_hook(self, ph):
  316. # See YoutubeDl.py (search for progress_hooks) for a description of
  317. # this interface
  318. self._progress_hooks.append(ph)
  319. def _debug_cmd(self, args, exe=None):
  320. if not self.params.get('verbose', False):
  321. return
  322. str_args = [decodeArgument(a) for a in args]
  323. if exe is None:
  324. exe = os.path.basename(str_args[0])
  325. self.to_screen('[debug] %s command line: %s' % (
  326. exe, shell_quote(str_args)))