common.py 11 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316
  1. import os
  2. import re
  3. import sys
  4. import time
  5. from ..utils import (
  6. encodeFilename,
  7. timeconvert,
  8. format_bytes,
  9. )
  10. class FileDownloader(object):
  11. """File Downloader class.
  12. File downloader objects are the ones responsible of downloading the
  13. actual video file and writing it to disk.
  14. File downloaders accept a lot of parameters. In order not to saturate
  15. the object constructor with arguments, it receives a dictionary of
  16. options instead.
  17. Available options:
  18. verbose: Print additional info to stdout.
  19. quiet: Do not print messages to stdout.
  20. ratelimit: Download speed limit, in bytes/sec.
  21. retries: Number of times to retry for HTTP error 5xx
  22. buffersize: Size of download buffer in bytes.
  23. noresizebuffer: Do not automatically resize the download buffer.
  24. continuedl: Try to continue downloads if possible.
  25. noprogress: Do not print the progress bar.
  26. logtostderr: Log messages to stderr instead of stdout.
  27. consoletitle: Display progress in console window's titlebar.
  28. nopart: Do not use temporary .part files.
  29. updatetime: Use the Last-modified header to set output file timestamps.
  30. test: Download only first bytes to test the downloader.
  31. min_filesize: Skip files smaller than this size
  32. max_filesize: Skip files larger than this size
  33. Subclasses of this one must re-define the real_download method.
  34. """
  35. params = None
  36. def __init__(self, ydl, params):
  37. """Create a FileDownloader object with the given options."""
  38. self.ydl = ydl
  39. self._progress_hooks = []
  40. self.params = params
  41. @staticmethod
  42. def format_seconds(seconds):
  43. (mins, secs) = divmod(seconds, 60)
  44. (hours, mins) = divmod(mins, 60)
  45. if hours > 99:
  46. return '--:--:--'
  47. if hours == 0:
  48. return '%02d:%02d' % (mins, secs)
  49. else:
  50. return '%02d:%02d:%02d' % (hours, mins, secs)
  51. @staticmethod
  52. def calc_percent(byte_counter, data_len):
  53. if data_len is None:
  54. return None
  55. return float(byte_counter) / float(data_len) * 100.0
  56. @staticmethod
  57. def format_percent(percent):
  58. if percent is None:
  59. return '---.-%'
  60. return '%6s' % ('%3.1f%%' % percent)
  61. @staticmethod
  62. def calc_eta(start, now, total, current):
  63. if total is None:
  64. return None
  65. dif = now - start
  66. if current == 0 or dif < 0.001: # One millisecond
  67. return None
  68. rate = float(current) / dif
  69. return int((float(total) - float(current)) / rate)
  70. @staticmethod
  71. def format_eta(eta):
  72. if eta is None:
  73. return '--:--'
  74. return FileDownloader.format_seconds(eta)
  75. @staticmethod
  76. def calc_speed(start, now, bytes):
  77. dif = now - start
  78. if bytes == 0 or dif < 0.001: # One millisecond
  79. return None
  80. return float(bytes) / dif
  81. @staticmethod
  82. def format_speed(speed):
  83. if speed is None:
  84. return '%10s' % '---b/s'
  85. return '%10s' % ('%s/s' % format_bytes(speed))
  86. @staticmethod
  87. def best_block_size(elapsed_time, bytes):
  88. new_min = max(bytes / 2.0, 1.0)
  89. new_max = min(max(bytes * 2.0, 1.0), 4194304) # Do not surpass 4 MB
  90. if elapsed_time < 0.001:
  91. return int(new_max)
  92. rate = bytes / elapsed_time
  93. if rate > new_max:
  94. return int(new_max)
  95. if rate < new_min:
  96. return int(new_min)
  97. return int(rate)
  98. @staticmethod
  99. def parse_bytes(bytestr):
  100. """Parse a string indicating a byte quantity into an integer."""
  101. matchobj = re.match(r'(?i)^(\d+(?:\.\d+)?)([kMGTPEZY]?)$', bytestr)
  102. if matchobj is None:
  103. return None
  104. number = float(matchobj.group(1))
  105. multiplier = 1024.0 ** 'bkmgtpezy'.index(matchobj.group(2).lower())
  106. return int(round(number * multiplier))
  107. def to_screen(self, *args, **kargs):
  108. self.ydl.to_screen(*args, **kargs)
  109. def to_stderr(self, message):
  110. self.ydl.to_screen(message)
  111. def to_console_title(self, message):
  112. self.ydl.to_console_title(message)
  113. def trouble(self, *args, **kargs):
  114. self.ydl.trouble(*args, **kargs)
  115. def report_warning(self, *args, **kargs):
  116. self.ydl.report_warning(*args, **kargs)
  117. def report_error(self, *args, **kargs):
  118. self.ydl.report_error(*args, **kargs)
  119. def slow_down(self, start_time, byte_counter):
  120. """Sleep if the download speed is over the rate limit."""
  121. rate_limit = self.params.get('ratelimit', None)
  122. if rate_limit is None or byte_counter == 0:
  123. return
  124. now = time.time()
  125. elapsed = now - start_time
  126. if elapsed <= 0.0:
  127. return
  128. speed = float(byte_counter) / elapsed
  129. if speed > rate_limit:
  130. time.sleep((byte_counter - rate_limit * (now - start_time)) / rate_limit)
  131. def temp_name(self, filename):
  132. """Returns a temporary filename for the given filename."""
  133. if self.params.get('nopart', False) or filename == u'-' or \
  134. (os.path.exists(encodeFilename(filename)) and not os.path.isfile(encodeFilename(filename))):
  135. return filename
  136. return filename + u'.part'
  137. def undo_temp_name(self, filename):
  138. if filename.endswith(u'.part'):
  139. return filename[:-len(u'.part')]
  140. return filename
  141. def try_rename(self, old_filename, new_filename):
  142. try:
  143. if old_filename == new_filename:
  144. return
  145. os.rename(encodeFilename(old_filename), encodeFilename(new_filename))
  146. except (IOError, OSError) as err:
  147. self.report_error(u'unable to rename file: %s' % str(err))
  148. def try_utime(self, filename, last_modified_hdr):
  149. """Try to set the last-modified time of the given file."""
  150. if last_modified_hdr is None:
  151. return
  152. if not os.path.isfile(encodeFilename(filename)):
  153. return
  154. timestr = last_modified_hdr
  155. if timestr is None:
  156. return
  157. filetime = timeconvert(timestr)
  158. if filetime is None:
  159. return filetime
  160. # Ignore obviously invalid dates
  161. if filetime == 0:
  162. return
  163. try:
  164. os.utime(filename, (time.time(), filetime))
  165. except:
  166. pass
  167. return filetime
  168. def report_destination(self, filename):
  169. """Report destination filename."""
  170. self.to_screen(u'[download] Destination: ' + filename)
  171. def _report_progress_status(self, msg, is_last_line=False):
  172. fullmsg = u'[download] ' + msg
  173. if self.params.get('progress_with_newline', False):
  174. self.to_screen(fullmsg)
  175. else:
  176. if os.name == 'nt':
  177. prev_len = getattr(self, '_report_progress_prev_line_length',
  178. 0)
  179. if prev_len > len(fullmsg):
  180. fullmsg += u' ' * (prev_len - len(fullmsg))
  181. self._report_progress_prev_line_length = len(fullmsg)
  182. clear_line = u'\r'
  183. else:
  184. clear_line = (u'\r\x1b[K' if sys.stderr.isatty() else u'\r')
  185. self.to_screen(clear_line + fullmsg, skip_eol=not is_last_line)
  186. self.to_console_title(u'youtube-dl ' + msg)
  187. def report_progress(self, percent, data_len_str, speed, eta):
  188. """Report download progress."""
  189. if self.params.get('noprogress', False):
  190. return
  191. if eta is not None:
  192. eta_str = self.format_eta(eta)
  193. else:
  194. eta_str = 'Unknown ETA'
  195. if percent is not None:
  196. percent_str = self.format_percent(percent)
  197. else:
  198. percent_str = 'Unknown %'
  199. speed_str = self.format_speed(speed)
  200. msg = (u'%s of %s at %s ETA %s' %
  201. (percent_str, data_len_str, speed_str, eta_str))
  202. self._report_progress_status(msg)
  203. def report_progress_live_stream(self, downloaded_data_len, speed, elapsed):
  204. if self.params.get('noprogress', False):
  205. return
  206. downloaded_str = format_bytes(downloaded_data_len)
  207. speed_str = self.format_speed(speed)
  208. elapsed_str = FileDownloader.format_seconds(elapsed)
  209. msg = u'%s at %s (%s)' % (downloaded_str, speed_str, elapsed_str)
  210. self._report_progress_status(msg)
  211. def report_finish(self, data_len_str, tot_time):
  212. """Report download finished."""
  213. if self.params.get('noprogress', False):
  214. self.to_screen(u'[download] Download completed')
  215. else:
  216. self._report_progress_status(
  217. (u'100%% of %s in %s' %
  218. (data_len_str, self.format_seconds(tot_time))),
  219. is_last_line=True)
  220. def report_resuming_byte(self, resume_len):
  221. """Report attempt to resume at given byte."""
  222. self.to_screen(u'[download] Resuming download at byte %s' % resume_len)
  223. def report_retry(self, count, retries):
  224. """Report retry in case of HTTP error 5xx"""
  225. self.to_screen(u'[download] Got server HTTP error. Retrying (attempt %d of %d)...' % (count, retries))
  226. def report_file_already_downloaded(self, file_name):
  227. """Report file has already been fully downloaded."""
  228. try:
  229. self.to_screen(u'[download] %s has already been downloaded' % file_name)
  230. except UnicodeEncodeError:
  231. self.to_screen(u'[download] The file has already been downloaded')
  232. def report_unable_to_resume(self):
  233. """Report it was impossible to resume download."""
  234. self.to_screen(u'[download] Unable to resume')
  235. def download(self, filename, info_dict):
  236. """Download to a filename using the info from info_dict
  237. Return True on success and False otherwise
  238. """
  239. # Check file already present
  240. if self.params.get('continuedl', False) and os.path.isfile(encodeFilename(filename)) and not self.params.get('nopart', False):
  241. self.report_file_already_downloaded(filename)
  242. self._hook_progress({
  243. 'filename': filename,
  244. 'status': 'finished',
  245. 'total_bytes': os.path.getsize(encodeFilename(filename)),
  246. })
  247. return True
  248. return self.real_download(filename, info_dict)
  249. def real_download(self, filename, info_dict):
  250. """Real download process. Redefine in subclasses."""
  251. raise NotImplementedError(u'This method must be implemented by sublcasses')
  252. def _hook_progress(self, status):
  253. for ph in self._progress_hooks:
  254. ph(status)
  255. def add_progress_hook(self, ph):
  256. """ ph gets called on download progress, with a dictionary with the entries
  257. * filename: The final filename
  258. * status: One of "downloading" and "finished"
  259. It can also have some of the following entries:
  260. * downloaded_bytes: Bytes on disks
  261. * total_bytes: Total bytes, None if unknown
  262. * tmpfilename: The filename we're currently writing to
  263. * eta: The estimated time in seconds, None if unknown
  264. * speed: The download speed in bytes/second, None if unknown
  265. Hooks are guaranteed to be called at least once (with status "finished")
  266. if the download is successful.
  267. """
  268. self._progress_hooks.append(ph)