create.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381
  1. import logging
  2. import os
  3. import pathlib
  4. import shlex
  5. import stat
  6. import textwrap
  7. import borgmatic.borg.pattern
  8. import borgmatic.config.paths
  9. import borgmatic.logger
  10. from borgmatic.borg import environment, feature, flags
  11. from borgmatic.execute import (
  12. DO_NOT_CAPTURE,
  13. execute_command,
  14. execute_command_and_capture_output,
  15. execute_command_with_processes,
  16. )
  17. logger = logging.getLogger(__name__)
  18. def special_file(path, working_directory=None):
  19. '''
  20. Return whether the given path is a special file (character device, block device, or named pipe
  21. / FIFO). If a working directory is given, take it into account when making the full path to
  22. check.
  23. '''
  24. try:
  25. mode = os.stat(os.path.join(working_directory or '', path)).st_mode
  26. except (FileNotFoundError, OSError):
  27. return False
  28. return stat.S_ISCHR(mode) or stat.S_ISBLK(mode) or stat.S_ISFIFO(mode)
  29. def any_parent_directories(path, candidate_parents):
  30. '''
  31. Return whether any of the given candidate parent directories are an actual parent of the given
  32. path. This includes grandparents, etc.
  33. '''
  34. for parent in candidate_parents:
  35. if pathlib.PurePosixPath(parent) in pathlib.PurePath(path).parents:
  36. return True
  37. return False
  38. def check_planned_backup_paths(
  39. dry_run,
  40. create_command,
  41. config,
  42. local_path,
  43. working_directory,
  44. borgmatic_runtime_directory,
  45. ):
  46. '''
  47. Given a dry-run flag, a Borg create command as a tuple, a configuration dict, a local Borg path,
  48. a working directory, and the borgmatic runtime directory, collect the paths for any special
  49. files (character devices, block devices, and named pipes / FIFOs) that Borg would encounter
  50. during a create. These are all paths that could cause Borg to hang if its --read-special flag is
  51. used.
  52. Skip looking for special files in the given borgmatic runtime directory, as borgmatic creates
  53. its own special files there for database dumps and we don't want those omitted.
  54. Additionally, if the borgmatic runtime directory is not contained somewhere in the files Borg
  55. plans to backup, that means the user must have excluded the runtime directory (e.g. via
  56. "exclude_patterns" or similar). Therefore, raise, because this means Borg won't be able to
  57. consume any database dumps and therefore borgmatic will hang when it tries to do so.
  58. '''
  59. # Omit "--exclude-nodump" from the Borg dry run command, because that flag causes Borg to open
  60. # files including any named pipe we've created. And omit "--filter" because that can break the
  61. # paths output parsing below such that path lines no longer start with the expected "- ".
  62. paths_output = execute_command_and_capture_output(
  63. (
  64. *flags.omit_flag_and_value(
  65. flags.omit_flag(create_command, '--exclude-nodump'), '--filter'
  66. ),
  67. '--dry-run',
  68. '--list',
  69. ),
  70. capture_stderr=True,
  71. working_directory=working_directory,
  72. environment=environment.make_environment(config),
  73. borg_local_path=local_path,
  74. borg_exit_codes=config.get('borg_exit_codes'),
  75. )
  76. # These are all the individual files that Borg is planning to backup as determined by the Borg
  77. # create dry run above.
  78. paths = tuple(
  79. path_line.split(' ', 1)[1]
  80. for path_line in paths_output.split('\n')
  81. if path_line and path_line.startswith(('- ', '+ '))
  82. )
  83. # These are the subset of those files that contain the borgmatic runtime directory.
  84. paths_containing_runtime_directory = {}
  85. if os.path.exists(borgmatic_runtime_directory):
  86. paths_containing_runtime_directory = {
  87. path for path in paths if any_parent_directories(path, (borgmatic_runtime_directory,))
  88. }
  89. # If no paths to backup contain the runtime directory, it must've been excluded.
  90. if not paths_containing_runtime_directory and not dry_run:
  91. raise ValueError(
  92. f'The runtime directory {os.path.normpath(borgmatic_runtime_directory)} overlaps with the configured excludes or patterns with excludes. Please ensure the runtime directory is not excluded.',
  93. )
  94. return tuple(
  95. path
  96. for path in paths
  97. if path not in paths_containing_runtime_directory
  98. )
  99. MAX_SPECIAL_FILE_PATHS_LENGTH = 1000
  100. def make_base_create_command(
  101. dry_run,
  102. repository_path,
  103. config,
  104. patterns,
  105. local_borg_version,
  106. global_arguments,
  107. borgmatic_runtime_directory,
  108. archive_suffix='',
  109. local_path='borg',
  110. remote_path=None,
  111. json=False,
  112. comment=None,
  113. stream_processes=None,
  114. ):
  115. '''
  116. Given verbosity/dry-run flags, a local or remote repository path, a configuration dict, a
  117. sequence of patterns as borgmatic.borg.pattern.Pattern instances, the local Borg version, global
  118. arguments as an argparse.Namespace instance, the borgmatic runtime directory, a string suffix to
  119. add to the archive name, the local Borg path, the remote Borg path, whether to output JSON,
  120. comment text to add to the created archive, and a sequence of processes streaming data to Borg,
  121. return a tuple of (base Borg create command flags, Borg create command positional arguments,
  122. open pattern file handle).
  123. '''
  124. if config.get('source_directories_must_exist', False):
  125. borgmatic.borg.pattern.check_all_root_patterns_exist(patterns)
  126. patterns_file = borgmatic.borg.pattern.write_patterns_file(
  127. patterns,
  128. borgmatic_runtime_directory,
  129. )
  130. checkpoint_interval = config.get('checkpoint_interval', None)
  131. checkpoint_volume = config.get('checkpoint_volume', None)
  132. chunker_params = config.get('chunker_params', None)
  133. compression = config.get('compression', None)
  134. upload_rate_limit = config.get('upload_rate_limit', None)
  135. upload_buffer_size = config.get('upload_buffer_size', None)
  136. umask = config.get('umask', None)
  137. lock_wait = config.get('lock_wait', None)
  138. list_filter_flags = flags.make_list_filter_flags(local_borg_version, dry_run)
  139. files_cache = config.get('files_cache')
  140. archive_name_format = (
  141. config.get('archive_name_format', flags.get_default_archive_name_format(local_borg_version))
  142. + archive_suffix
  143. )
  144. extra_borg_options = config.get('extra_borg_options', {}).get('create', '')
  145. if feature.available(feature.Feature.ATIME, local_borg_version):
  146. atime_flags = ('--atime',) if config.get('atime') is True else ()
  147. else:
  148. atime_flags = ('--noatime',) if config.get('atime') is False else ()
  149. if feature.available(feature.Feature.NOFLAGS, local_borg_version):
  150. noflags_flags = ('--noflags',) if config.get('flags') is False else ()
  151. else:
  152. noflags_flags = ('--nobsdflags',) if config.get('flags') is False else ()
  153. if feature.available(feature.Feature.NUMERIC_IDS, local_borg_version):
  154. numeric_ids_flags = ('--numeric-ids',) if config.get('numeric_ids') else ()
  155. else:
  156. numeric_ids_flags = ('--numeric-owner',) if config.get('numeric_ids') else ()
  157. if feature.available(feature.Feature.UPLOAD_RATELIMIT, local_borg_version):
  158. upload_ratelimit_flags = (
  159. ('--upload-ratelimit', str(upload_rate_limit)) if upload_rate_limit else ()
  160. )
  161. else:
  162. upload_ratelimit_flags = (
  163. ('--remote-ratelimit', str(upload_rate_limit)) if upload_rate_limit else ()
  164. )
  165. create_flags = (
  166. tuple(local_path.split(' '))
  167. + ('create',)
  168. + (('--patterns-from', patterns_file.name) if patterns_file else ())
  169. + flags.make_exclude_flags(config)
  170. + (('--comment', comment) if comment else ())
  171. + (('--checkpoint-interval', str(checkpoint_interval)) if checkpoint_interval else ())
  172. + (('--checkpoint-volume', str(checkpoint_volume)) if checkpoint_volume else ())
  173. + (('--chunker-params', chunker_params) if chunker_params else ())
  174. + (('--compression', compression) if compression else ())
  175. + upload_ratelimit_flags
  176. + (('--upload-buffer', str(upload_buffer_size)) if upload_buffer_size else ())
  177. + (('--one-file-system',) if config.get('one_file_system') else ())
  178. + numeric_ids_flags
  179. + atime_flags
  180. + (('--noctime',) if config.get('ctime') is False else ())
  181. + (('--nobirthtime',) if config.get('birthtime') is False else ())
  182. + (('--read-special',) if config.get('read_special') or stream_processes else ())
  183. + noflags_flags
  184. + (('--files-cache', files_cache) if files_cache else ())
  185. + (('--remote-path', remote_path) if remote_path else ())
  186. + (('--umask', str(umask)) if umask else ())
  187. + (('--log-json',) if config.get('log_json') else ())
  188. + (('--lock-wait', str(lock_wait)) if lock_wait else ())
  189. + (
  190. ('--list', '--filter', list_filter_flags)
  191. if config.get('list_details') and not json and not config.get('progress')
  192. else ()
  193. )
  194. + (('--dry-run',) if dry_run else ())
  195. + (tuple(shlex.split(extra_borg_options)) if extra_borg_options else ())
  196. )
  197. create_positional_arguments = flags.make_repository_archive_flags(
  198. repository_path,
  199. archive_name_format,
  200. local_borg_version,
  201. )
  202. working_directory = borgmatic.config.paths.get_working_directory(config)
  203. logger.debug('Checking file paths Borg plans to backup')
  204. planned_backup_paths = check_planned_backup_paths(
  205. dry_run,
  206. create_flags + create_positional_arguments,
  207. config,
  208. local_path,
  209. working_directory,
  210. borgmatic_runtime_directory=borgmatic_runtime_directory,
  211. )
  212. # If database hooks are enabled (as indicated by streaming processes), exclude files that might
  213. # cause Borg to hang. But skip this if the user has explicitly set the "read_special" to True.
  214. if stream_processes and not config.get('read_special'):
  215. logger.warning(
  216. 'Ignoring configured "read_special" value of false, as true is needed for database hooks.',
  217. )
  218. special_file_paths = tuple(
  219. path
  220. for path in planned_backup_paths
  221. if special_file(path, working_directory)
  222. )
  223. if special_file_paths:
  224. truncated_special_file_paths = textwrap.shorten(
  225. ', '.join(special_file_paths),
  226. width=MAX_SPECIAL_FILE_PATHS_LENGTH,
  227. placeholder=' ...',
  228. )
  229. logger.warning(
  230. f'Excluding special files to prevent Borg from hanging: {truncated_special_file_paths}',
  231. )
  232. patterns_file = borgmatic.borg.pattern.write_patterns_file(
  233. tuple(
  234. borgmatic.borg.pattern.Pattern(
  235. special_file_path,
  236. borgmatic.borg.pattern.Pattern_type.NO_RECURSE,
  237. borgmatic.borg.pattern.Pattern_style.FNMATCH,
  238. source=borgmatic.borg.pattern.Pattern_source.INTERNAL,
  239. )
  240. for special_file_path in special_file_paths
  241. ),
  242. borgmatic_runtime_directory,
  243. patterns_file=patterns_file,
  244. )
  245. if '--patterns-from' not in create_flags:
  246. create_flags += ('--patterns-from', patterns_file.name)
  247. return (create_flags, create_positional_arguments, patterns_file)
  248. def create_archive(
  249. dry_run,
  250. repository_path,
  251. config,
  252. patterns,
  253. local_borg_version,
  254. global_arguments,
  255. borgmatic_runtime_directory,
  256. archive_suffix='',
  257. local_path='borg',
  258. remote_path=None,
  259. json=False,
  260. comment=None,
  261. stream_processes=None,
  262. ):
  263. '''
  264. Given verbosity/dry-run flags, a local or remote repository path, a configuration dict, a
  265. sequence of loaded configuration paths, the local Borg version, global arguments as an
  266. argparse.Namespace instance, the borgmatic runtime directory, a string suffix to add to the
  267. archive name, the local Borg path, the remote Borg path, whether to output JSON, and comment
  268. text to add to the created archive, and a sequence of processes streaming data to Borg, create a
  269. Borg archive and return Borg's JSON output (if any).
  270. If a sequence of stream processes is given (instances of subprocess.Popen), then execute the
  271. create command while also triggering the given processes to produce output.
  272. '''
  273. borgmatic.logger.add_custom_log_levels()
  274. working_directory = borgmatic.config.paths.get_working_directory(config)
  275. (create_flags, create_positional_arguments, _) = make_base_create_command(
  276. dry_run,
  277. repository_path,
  278. config,
  279. patterns,
  280. local_borg_version,
  281. global_arguments,
  282. borgmatic_runtime_directory,
  283. archive_suffix,
  284. local_path,
  285. remote_path,
  286. json,
  287. comment,
  288. stream_processes,
  289. )
  290. if json:
  291. output_log_level = None
  292. elif config.get('list_details') or (config.get('statistics') and not dry_run):
  293. output_log_level = logging.ANSWER
  294. else:
  295. output_log_level = logging.INFO
  296. # The progress output isn't compatible with captured and logged output, as progress messes with
  297. # the terminal directly.
  298. output_file = DO_NOT_CAPTURE if config.get('progress') else None
  299. create_flags += (
  300. (('--info',) if logger.getEffectiveLevel() == logging.INFO and not json else ())
  301. + (('--stats',) if config.get('statistics') and not json and not dry_run else ())
  302. + (('--debug', '--show-rc') if logger.isEnabledFor(logging.DEBUG) and not json else ())
  303. + (('--progress',) if config.get('progress') else ())
  304. + (('--json',) if json else ())
  305. )
  306. borg_exit_codes = config.get('borg_exit_codes')
  307. if stream_processes:
  308. return execute_command_with_processes(
  309. create_flags + create_positional_arguments,
  310. stream_processes,
  311. output_log_level,
  312. output_file,
  313. working_directory=working_directory,
  314. environment=environment.make_environment(config),
  315. borg_local_path=local_path,
  316. borg_exit_codes=borg_exit_codes,
  317. )
  318. if output_log_level is None:
  319. return execute_command_and_capture_output(
  320. create_flags + create_positional_arguments,
  321. working_directory=working_directory,
  322. environment=environment.make_environment(config),
  323. borg_local_path=local_path,
  324. borg_exit_codes=borg_exit_codes,
  325. )
  326. execute_command(
  327. create_flags + create_positional_arguments,
  328. output_log_level,
  329. output_file,
  330. working_directory=working_directory,
  331. environment=environment.make_environment(config),
  332. borg_local_path=local_path,
  333. borg_exit_codes=borg_exit_codes,
  334. )
  335. return None