create.py 19 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524
  1. import glob
  2. import itertools
  3. import logging
  4. import os
  5. import pathlib
  6. import stat
  7. import tempfile
  8. import borgmatic.logger
  9. from borgmatic.borg import environment, feature, flags, state
  10. from borgmatic.execute import (
  11. DO_NOT_CAPTURE,
  12. execute_command,
  13. execute_command_and_capture_output,
  14. execute_command_with_processes,
  15. )
  16. logger = logging.getLogger(__name__)
  17. def expand_directory(directory):
  18. '''
  19. Given a directory path, expand any tilde (representing a user's home directory) and any globs
  20. therein. Return a list of one or more resulting paths.
  21. '''
  22. expanded_directory = os.path.expanduser(directory)
  23. return glob.glob(expanded_directory) or [expanded_directory]
  24. def expand_directories(directories):
  25. '''
  26. Given a sequence of directory paths, expand tildes and globs in each one. Return all the
  27. resulting directories as a single flattened tuple.
  28. '''
  29. if directories is None:
  30. return ()
  31. return tuple(
  32. itertools.chain.from_iterable(expand_directory(directory) for directory in directories)
  33. )
  34. def expand_home_directories(directories):
  35. '''
  36. Given a sequence of directory paths, expand tildes in each one. Do not perform any globbing.
  37. Return the results as a tuple.
  38. '''
  39. if directories is None:
  40. return ()
  41. return tuple(os.path.expanduser(directory) for directory in directories)
  42. def map_directories_to_devices(directories):
  43. '''
  44. Given a sequence of directories, return a map from directory to an identifier for the device on
  45. which that directory resides or None if the path doesn't exist.
  46. This is handy for determining whether two different directories are on the same filesystem (have
  47. the same device identifier).
  48. '''
  49. return {
  50. directory: os.stat(directory).st_dev if os.path.exists(directory) else None
  51. for directory in directories
  52. }
  53. def deduplicate_directories(directory_devices, additional_directory_devices):
  54. '''
  55. Given a map from directory to the identifier for the device on which that directory resides,
  56. return the directories as a sorted tuple with all duplicate child directories removed. For
  57. instance, if paths is ('/foo', '/foo/bar'), return just: ('/foo',)
  58. The one exception to this rule is if two paths are on different filesystems (devices). In that
  59. case, they won't get de-duplicated in case they both need to be passed to Borg (e.g. the
  60. location.one_file_system option is true).
  61. The idea is that if Borg is given a parent directory, then it doesn't also need to be given
  62. child directories, because it will naturally spider the contents of the parent directory. And
  63. there are cases where Borg coming across the same file twice will result in duplicate reads and
  64. even hangs, e.g. when a database hook is using a named pipe for streaming database dumps to
  65. Borg.
  66. If any additional directory devices are given, also deduplicate against them, but don't include
  67. them in the returned directories.
  68. '''
  69. deduplicated = set()
  70. directories = sorted(directory_devices.keys())
  71. additional_directories = sorted(additional_directory_devices.keys())
  72. all_devices = {**directory_devices, **additional_directory_devices}
  73. for directory in directories:
  74. deduplicated.add(directory)
  75. parents = pathlib.PurePath(directory).parents
  76. # If another directory in the given list (or the additional list) is a parent of current
  77. # directory (even n levels up) and both are on the same filesystem, then the current
  78. # directory is a duplicate.
  79. for other_directory in directories + additional_directories:
  80. for parent in parents:
  81. if (
  82. pathlib.PurePath(other_directory) == parent
  83. and all_devices[directory] is not None
  84. and all_devices[other_directory] == all_devices[directory]
  85. ):
  86. if directory in deduplicated:
  87. deduplicated.remove(directory)
  88. break
  89. return tuple(sorted(deduplicated))
  90. def write_pattern_file(patterns=None, sources=None, pattern_file=None):
  91. '''
  92. Given a sequence of patterns and an optional sequence of source directories, write them to a
  93. named temporary file (with the source directories as additional roots) and return the file.
  94. If an optional open pattern file is given, overwrite it instead of making a new temporary file.
  95. Return None if no patterns are provided.
  96. '''
  97. if not patterns and not sources:
  98. return None
  99. if pattern_file is None:
  100. pattern_file = tempfile.NamedTemporaryFile('w')
  101. else:
  102. pattern_file.seek(0)
  103. pattern_file.write(
  104. '\n'.join(tuple(patterns or ()) + tuple(f'R {source}' for source in (sources or [])))
  105. )
  106. pattern_file.flush()
  107. return pattern_file
  108. def ensure_files_readable(*filename_lists):
  109. '''
  110. Given a sequence of filename sequences, ensure that each filename is openable. This prevents
  111. unreadable files from being passed to Borg, which in certain situations only warns instead of
  112. erroring.
  113. '''
  114. for file_object in itertools.chain.from_iterable(
  115. filename_list for filename_list in filename_lists if filename_list
  116. ):
  117. open(file_object).close()
  118. def make_pattern_flags(location_config, pattern_filename=None):
  119. '''
  120. Given a location config dict with a potential patterns_from option, and a filename containing
  121. any additional patterns, return the corresponding Borg flags for those files as a tuple.
  122. '''
  123. pattern_filenames = tuple(location_config.get('patterns_from') or ()) + (
  124. (pattern_filename,) if pattern_filename else ()
  125. )
  126. return tuple(
  127. itertools.chain.from_iterable(
  128. ('--patterns-from', pattern_filename) for pattern_filename in pattern_filenames
  129. )
  130. )
  131. def make_exclude_flags(location_config, exclude_filename=None):
  132. '''
  133. Given a location config dict with various exclude options, and a filename containing any exclude
  134. patterns, return the corresponding Borg flags as a tuple.
  135. '''
  136. exclude_filenames = tuple(location_config.get('exclude_from') or ()) + (
  137. (exclude_filename,) if exclude_filename else ()
  138. )
  139. exclude_from_flags = tuple(
  140. itertools.chain.from_iterable(
  141. ('--exclude-from', exclude_filename) for exclude_filename in exclude_filenames
  142. )
  143. )
  144. caches_flag = ('--exclude-caches',) if location_config.get('exclude_caches') else ()
  145. if_present_flags = tuple(
  146. itertools.chain.from_iterable(
  147. ('--exclude-if-present', if_present)
  148. for if_present in location_config.get('exclude_if_present', ())
  149. )
  150. )
  151. keep_exclude_tags_flags = (
  152. ('--keep-exclude-tags',) if location_config.get('keep_exclude_tags') else ()
  153. )
  154. exclude_nodump_flags = ('--exclude-nodump',) if location_config.get('exclude_nodump') else ()
  155. return (
  156. exclude_from_flags
  157. + caches_flag
  158. + if_present_flags
  159. + keep_exclude_tags_flags
  160. + exclude_nodump_flags
  161. )
  162. def make_list_filter_flags(local_borg_version, dry_run):
  163. '''
  164. Given the local Borg version and whether this is a dry run, return the corresponding flags for
  165. passing to "--list --filter". The general idea is that excludes are shown for a dry run or when
  166. the verbosity is debug.
  167. '''
  168. base_flags = 'AME'
  169. show_excludes = logger.isEnabledFor(logging.DEBUG)
  170. if feature.available(feature.Feature.EXCLUDED_FILES_MINUS, local_borg_version):
  171. if show_excludes or dry_run:
  172. return f'{base_flags}+-'
  173. else:
  174. return base_flags
  175. if show_excludes:
  176. return f'{base_flags}x-'
  177. else:
  178. return f'{base_flags}-'
  179. DEFAULT_ARCHIVE_NAME_FORMAT = '{hostname}-{now:%Y-%m-%dT%H:%M:%S.%f}' # noqa: FS003
  180. def collect_borgmatic_source_directories(borgmatic_source_directory):
  181. '''
  182. Return a list of borgmatic-specific source directories used for state like database backups.
  183. '''
  184. if not borgmatic_source_directory:
  185. borgmatic_source_directory = state.DEFAULT_BORGMATIC_SOURCE_DIRECTORY
  186. return (
  187. [borgmatic_source_directory]
  188. if os.path.exists(os.path.expanduser(borgmatic_source_directory))
  189. else []
  190. )
  191. ROOT_PATTERN_PREFIX = 'R '
  192. def pattern_root_directories(patterns=None):
  193. '''
  194. Given a sequence of patterns, parse out and return just the root directories.
  195. '''
  196. if not patterns:
  197. return []
  198. return [
  199. pattern.split(ROOT_PATTERN_PREFIX, maxsplit=1)[1]
  200. for pattern in patterns
  201. if pattern.startswith(ROOT_PATTERN_PREFIX)
  202. ]
  203. def special_file(path):
  204. '''
  205. Return whether the given path is a special file (character device, block device, or named pipe
  206. / FIFO).
  207. '''
  208. try:
  209. mode = os.stat(path).st_mode
  210. except (FileNotFoundError, OSError):
  211. return False
  212. return stat.S_ISCHR(mode) or stat.S_ISBLK(mode) or stat.S_ISFIFO(mode)
  213. def any_parent_directories(path, candidate_parents):
  214. '''
  215. Return whether any of the given candidate parent directories are an actual parent of the given
  216. path. This includes grandparents, etc.
  217. '''
  218. for parent in candidate_parents:
  219. if pathlib.PurePosixPath(parent) in pathlib.PurePath(path).parents:
  220. return True
  221. return False
  222. def collect_special_file_paths(
  223. create_command, local_path, working_directory, borg_environment, skip_directories
  224. ):
  225. '''
  226. Given a Borg create command as a tuple, a local Borg path, a working directory, and a dict of
  227. environment variables to pass to Borg, and a sequence of parent directories to skip, collect the
  228. paths for any special files (character devices, block devices, and named pipes / FIFOs) that
  229. Borg would encounter during a create. These are all paths that could cause Borg to hang if its
  230. --read-special flag is used.
  231. '''
  232. paths_output = execute_command_and_capture_output(
  233. create_command + ('--dry-run', '--list'),
  234. capture_stderr=True,
  235. working_directory=working_directory,
  236. extra_environment=borg_environment,
  237. )
  238. paths = tuple(
  239. path_line.split(' ', 1)[1]
  240. for path_line in paths_output.split('\n')
  241. if path_line and path_line.startswith('- ') or path_line.startswith('+ ')
  242. )
  243. return tuple(
  244. path
  245. for path in paths
  246. if special_file(path) and not any_parent_directories(path, skip_directories)
  247. )
  248. def check_all_source_directories_exist(source_directories):
  249. '''
  250. Given a sequence of source directories, check that they all exist. If any do not, raise an
  251. exception.
  252. '''
  253. missing_directories = [
  254. source_directory
  255. for source_directory in source_directories
  256. if not all([os.path.exists(directory) for directory in expand_directory(source_directory)])
  257. ]
  258. if missing_directories:
  259. raise ValueError(f"Source directories do not exist: {', '.join(missing_directories)}")
  260. def create_archive(
  261. dry_run,
  262. repository_path,
  263. location_config,
  264. storage_config,
  265. local_borg_version,
  266. local_path='borg',
  267. remote_path=None,
  268. progress=False,
  269. stats=False,
  270. json=False,
  271. list_files=False,
  272. stream_processes=None,
  273. ):
  274. '''
  275. Given vebosity/dry-run flags, a local or remote repository path, a location config dict, and a
  276. storage config dict, create a Borg archive and return Borg's JSON output (if any).
  277. If a sequence of stream processes is given (instances of subprocess.Popen), then execute the
  278. create command while also triggering the given processes to produce output.
  279. '''
  280. borgmatic.logger.add_custom_log_levels()
  281. borgmatic_source_directories = expand_directories(
  282. collect_borgmatic_source_directories(location_config.get('borgmatic_source_directory'))
  283. )
  284. if location_config.get('source_directories_must_exist', False):
  285. check_all_source_directories_exist(location_config.get('source_directories'))
  286. sources = deduplicate_directories(
  287. map_directories_to_devices(
  288. expand_directories(
  289. tuple(location_config.get('source_directories', ())) + borgmatic_source_directories
  290. )
  291. ),
  292. additional_directory_devices=map_directories_to_devices(
  293. expand_directories(pattern_root_directories(location_config.get('patterns')))
  294. ),
  295. )
  296. ensure_files_readable(location_config.get('patterns_from'), location_config.get('exclude_from'))
  297. try:
  298. working_directory = os.path.expanduser(location_config.get('working_directory'))
  299. except TypeError:
  300. working_directory = None
  301. pattern_file = (
  302. write_pattern_file(location_config.get('patterns'), sources)
  303. if location_config.get('patterns') or location_config.get('patterns_from')
  304. else None
  305. )
  306. exclude_file = write_pattern_file(
  307. expand_home_directories(location_config.get('exclude_patterns'))
  308. )
  309. checkpoint_interval = storage_config.get('checkpoint_interval', None)
  310. checkpoint_volume = storage_config.get('checkpoint_volume', None)
  311. chunker_params = storage_config.get('chunker_params', None)
  312. compression = storage_config.get('compression', None)
  313. upload_rate_limit = storage_config.get('upload_rate_limit', None)
  314. umask = storage_config.get('umask', None)
  315. lock_wait = storage_config.get('lock_wait', None)
  316. list_filter_flags = make_list_filter_flags(local_borg_version, dry_run)
  317. files_cache = location_config.get('files_cache')
  318. archive_name_format = storage_config.get('archive_name_format', DEFAULT_ARCHIVE_NAME_FORMAT)
  319. extra_borg_options = storage_config.get('extra_borg_options', {}).get('create', '')
  320. if feature.available(feature.Feature.ATIME, local_borg_version):
  321. atime_flags = ('--atime',) if location_config.get('atime') is True else ()
  322. else:
  323. atime_flags = ('--noatime',) if location_config.get('atime') is False else ()
  324. if feature.available(feature.Feature.NOFLAGS, local_borg_version):
  325. noflags_flags = ('--noflags',) if location_config.get('flags') is False else ()
  326. else:
  327. noflags_flags = ('--nobsdflags',) if location_config.get('flags') is False else ()
  328. if feature.available(feature.Feature.NUMERIC_IDS, local_borg_version):
  329. numeric_ids_flags = ('--numeric-ids',) if location_config.get('numeric_ids') else ()
  330. else:
  331. numeric_ids_flags = ('--numeric-owner',) if location_config.get('numeric_ids') else ()
  332. if feature.available(feature.Feature.UPLOAD_RATELIMIT, local_borg_version):
  333. upload_ratelimit_flags = (
  334. ('--upload-ratelimit', str(upload_rate_limit)) if upload_rate_limit else ()
  335. )
  336. else:
  337. upload_ratelimit_flags = (
  338. ('--remote-ratelimit', str(upload_rate_limit)) if upload_rate_limit else ()
  339. )
  340. if stream_processes and location_config.get('read_special') is False:
  341. logger.warning(
  342. f'{repository_path}: Ignoring configured "read_special" value of false, as true is needed for database hooks.'
  343. )
  344. create_command = (
  345. tuple(local_path.split(' '))
  346. + ('create',)
  347. + make_pattern_flags(location_config, pattern_file.name if pattern_file else None)
  348. + make_exclude_flags(location_config, exclude_file.name if exclude_file else None)
  349. + (('--checkpoint-interval', str(checkpoint_interval)) if checkpoint_interval else ())
  350. + (('--checkpoint-volume', str(checkpoint_volume)) if checkpoint_volume else ())
  351. + (('--chunker-params', chunker_params) if chunker_params else ())
  352. + (('--compression', compression) if compression else ())
  353. + upload_ratelimit_flags
  354. + (
  355. ('--one-file-system',)
  356. if location_config.get('one_file_system') or stream_processes
  357. else ()
  358. )
  359. + numeric_ids_flags
  360. + atime_flags
  361. + (('--noctime',) if location_config.get('ctime') is False else ())
  362. + (('--nobirthtime',) if location_config.get('birthtime') is False else ())
  363. + (('--read-special',) if location_config.get('read_special') or stream_processes else ())
  364. + noflags_flags
  365. + (('--files-cache', files_cache) if files_cache else ())
  366. + (('--remote-path', remote_path) if remote_path else ())
  367. + (('--umask', str(umask)) if umask else ())
  368. + (('--lock-wait', str(lock_wait)) if lock_wait else ())
  369. + (
  370. ('--list', '--filter', list_filter_flags)
  371. if list_files and not json and not progress
  372. else ()
  373. )
  374. + (('--dry-run',) if dry_run else ())
  375. + (tuple(extra_borg_options.split(' ')) if extra_borg_options else ())
  376. + flags.make_repository_archive_flags(
  377. repository_path, archive_name_format, local_borg_version
  378. )
  379. + (sources if not pattern_file else ())
  380. )
  381. if json:
  382. output_log_level = None
  383. elif list_files or (stats and not dry_run):
  384. output_log_level = logging.ANSWER
  385. else:
  386. output_log_level = logging.INFO
  387. # The progress output isn't compatible with captured and logged output, as progress messes with
  388. # the terminal directly.
  389. output_file = DO_NOT_CAPTURE if progress else None
  390. borg_environment = environment.make_environment(storage_config)
  391. # If database hooks are enabled (as indicated by streaming processes), exclude files that might
  392. # cause Borg to hang. But skip this if the user has explicitly set the "read_special" to True.
  393. if stream_processes and not location_config.get('read_special'):
  394. logger.debug(f'{repository_path}: Collecting special file paths')
  395. special_file_paths = collect_special_file_paths(
  396. create_command,
  397. local_path,
  398. working_directory,
  399. borg_environment,
  400. skip_directories=borgmatic_source_directories,
  401. )
  402. if special_file_paths:
  403. logger.warning(
  404. f'{repository_path}: Excluding special files to prevent Borg from hanging: {", ".join(special_file_paths)}'
  405. )
  406. exclude_file = write_pattern_file(
  407. expand_home_directories(
  408. tuple(location_config.get('exclude_patterns') or ()) + special_file_paths
  409. ),
  410. pattern_file=exclude_file,
  411. )
  412. create_command += make_exclude_flags(location_config, exclude_file.name)
  413. create_command += (
  414. (('--info',) if logger.getEffectiveLevel() == logging.INFO and not json else ())
  415. + (('--stats',) if stats and not json and not dry_run else ())
  416. + (('--debug', '--show-rc') if logger.isEnabledFor(logging.DEBUG) and not json else ())
  417. + (('--progress',) if progress else ())
  418. + (('--json',) if json else ())
  419. )
  420. if stream_processes:
  421. return execute_command_with_processes(
  422. create_command,
  423. stream_processes,
  424. output_log_level,
  425. output_file,
  426. borg_local_path=local_path,
  427. working_directory=working_directory,
  428. extra_environment=borg_environment,
  429. )
  430. elif output_log_level is None:
  431. return execute_command_and_capture_output(
  432. create_command,
  433. working_directory=working_directory,
  434. extra_environment=borg_environment,
  435. )
  436. else:
  437. execute_command(
  438. create_command,
  439. output_log_level,
  440. output_file,
  441. borg_local_path=local_path,
  442. working_directory=working_directory,
  443. extra_environment=borg_environment,
  444. )