create.py 9.4 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275
  1. import glob
  2. import importlib.metadata
  3. import itertools
  4. import json
  5. import logging
  6. import os
  7. import pathlib
  8. import borgmatic.actions.json
  9. import borgmatic.borg.create
  10. import borgmatic.config.paths
  11. import borgmatic.config.validate
  12. import borgmatic.hooks.command
  13. import borgmatic.hooks.dispatch
  14. import borgmatic.hooks.dump
  15. logger = logging.getLogger(__name__)
  16. def create_borgmatic_manifest(config, config_paths, borgmatic_runtime_directory, dry_run):
  17. '''
  18. Given a configuration dict, a sequence of config file paths, the borgmatic runtime directory,
  19. and whether this is a dry run, create a borgmatic manifest file to store the paths to the
  20. configuration files used to create the archive.
  21. '''
  22. if dry_run:
  23. return
  24. borgmatic_manifest_path = os.path.join(
  25. borgmatic_runtime_directory, 'bootstrap', 'manifest.json'
  26. )
  27. if not os.path.exists(borgmatic_manifest_path):
  28. os.makedirs(os.path.dirname(borgmatic_manifest_path), exist_ok=True)
  29. with open(borgmatic_manifest_path, 'w') as config_list_file:
  30. json.dump(
  31. {
  32. 'borgmatic_version': importlib.metadata.version('borgmatic'),
  33. 'config_paths': config_paths,
  34. },
  35. config_list_file,
  36. )
  37. def expand_directory(directory, working_directory):
  38. '''
  39. Given a directory path, expand any tilde (representing a user's home directory) and any globs
  40. therein. Return a list of one or more resulting paths.
  41. '''
  42. expanded_directory = os.path.join(working_directory or '', os.path.expanduser(directory))
  43. return glob.glob(expanded_directory) or [expanded_directory]
  44. def expand_directories(directories, working_directory=None):
  45. '''
  46. Given a sequence of directory paths and an optional working directory, expand tildes and globs
  47. in each one. Return all the resulting directories as a single flattened tuple.
  48. '''
  49. if directories is None:
  50. return ()
  51. return tuple(
  52. itertools.chain.from_iterable(
  53. expand_directory(directory, working_directory) for directory in directories
  54. )
  55. )
  56. def map_directories_to_devices(directories, working_directory=None):
  57. '''
  58. Given a sequence of directories and an optional working directory, return a map from directory
  59. to an identifier for the device on which that directory resides or None if the path doesn't
  60. exist.
  61. This is handy for determining whether two different directories are on the same filesystem (have
  62. the same device identifier).
  63. '''
  64. return {
  65. directory: os.stat(full_directory).st_dev if os.path.exists(full_directory) else None
  66. for directory in directories
  67. for full_directory in (os.path.join(working_directory or '', directory),)
  68. }
  69. def deduplicate_directories(directory_devices, additional_directory_devices):
  70. '''
  71. Given a map from directory to the identifier for the device on which that directory resides,
  72. return the directories as a sorted sequence with all duplicate child directories removed. For
  73. instance, if paths is ['/foo', '/foo/bar'], return just: ['/foo']
  74. The one exception to this rule is if two paths are on different filesystems (devices). In that
  75. case, they won't get de-duplicated in case they both need to be passed to Borg (e.g. the
  76. location.one_file_system option is true).
  77. The idea is that if Borg is given a parent directory, then it doesn't also need to be given
  78. child directories, because it will naturally spider the contents of the parent directory. And
  79. there are cases where Borg coming across the same file twice will result in duplicate reads and
  80. even hangs, e.g. when a database hook is using a named pipe for streaming database dumps to
  81. Borg.
  82. If any additional directory devices are given, also deduplicate against them, but don't include
  83. them in the returned directories.
  84. '''
  85. deduplicated = set()
  86. directories = sorted(directory_devices.keys())
  87. additional_directories = sorted(additional_directory_devices.keys())
  88. all_devices = {**directory_devices, **additional_directory_devices}
  89. for directory in directories:
  90. deduplicated.add(directory)
  91. parents = pathlib.PurePath(directory).parents
  92. # If another directory in the given list (or the additional list) is a parent of current
  93. # directory (even n levels up) and both are on the same filesystem, then the current
  94. # directory is a duplicate.
  95. for other_directory in directories + additional_directories:
  96. for parent in parents:
  97. if (
  98. pathlib.PurePath(other_directory) == parent
  99. and all_devices[directory] is not None
  100. and all_devices[other_directory] == all_devices[directory]
  101. ):
  102. if directory in deduplicated:
  103. deduplicated.remove(directory)
  104. break
  105. return sorted(deduplicated)
  106. def pattern_root_directories(patterns=None):
  107. '''
  108. Given a sequence of patterns, parse out and return just the root directories.
  109. '''
  110. if not patterns:
  111. return []
  112. return [
  113. pattern.split(ROOT_PATTERN_PREFIX, maxsplit=1)[1]
  114. for pattern in patterns
  115. if pattern.startswith(ROOT_PATTERN_PREFIX)
  116. ]
  117. def process_source_directories(config, config_paths):
  118. '''
  119. Given a configuration dict and a sequence of configuration paths, expand and deduplicate the
  120. source directories from them.
  121. '''
  122. working_directory = borgmatic.config.paths.get_working_directory(config)
  123. return deduplicate_directories(
  124. map_directories_to_devices(
  125. expand_directories(
  126. tuple(config.get('source_directories', ()))
  127. + tuple(config_paths if config.get('store_config_files', True) else ()),
  128. working_directory=working_directory,
  129. )
  130. ),
  131. additional_directory_devices=map_directories_to_devices(
  132. expand_directories(
  133. pattern_root_directories(config.get('patterns')),
  134. working_directory=working_directory,
  135. )
  136. ),
  137. )
  138. def run_create(
  139. config_filename,
  140. repository,
  141. config,
  142. config_paths,
  143. hook_context,
  144. local_borg_version,
  145. create_arguments,
  146. global_arguments,
  147. dry_run_label,
  148. local_path,
  149. remote_path,
  150. ):
  151. '''
  152. Run the "create" action for the given repository.
  153. If create_arguments.json is True, yield the JSON output from creating the archive.
  154. '''
  155. if create_arguments.repository and not borgmatic.config.validate.repositories_match(
  156. repository, create_arguments.repository
  157. ):
  158. return
  159. borgmatic.hooks.command.execute_hook(
  160. config.get('before_backup'),
  161. config.get('umask'),
  162. config_filename,
  163. 'pre-backup',
  164. global_arguments.dry_run,
  165. **hook_context,
  166. )
  167. log_prefix = repository.get('label', repository['path'])
  168. logger.info(f'{log_prefix}: Creating archive{dry_run_label}')
  169. with borgmatic.config.paths.Runtime_directory(
  170. config, log_prefix
  171. ) as borgmatic_runtime_directory:
  172. borgmatic.hooks.dispatch.call_hooks_even_if_unconfigured(
  173. 'remove_data_source_dumps',
  174. config,
  175. repository['path'],
  176. borgmatic.hooks.dump.DATA_SOURCE_HOOK_NAMES,
  177. borgmatic_runtime_directory,
  178. global_arguments.dry_run,
  179. )
  180. source_directories = process_source_directories(config, config_paths)
  181. active_dumps = borgmatic.hooks.dispatch.call_hooks(
  182. 'dump_data_sources',
  183. config,
  184. repository['path'],
  185. borgmatic.hooks.dump.DATA_SOURCE_HOOK_NAMES,
  186. borgmatic_runtime_directory,
  187. source_directories,
  188. global_arguments.dry_run,
  189. )
  190. stream_processes = [process for processes in active_dumps.values() for process in processes]
  191. if config.get('store_config_files', True):
  192. create_borgmatic_manifest(
  193. config,
  194. config_paths,
  195. borgmatic_runtime_directory,
  196. global_arguments.dry_run,
  197. )
  198. if not global_arguments.dry_run:
  199. source_directories.append(os.path.join(borgmatic_runtime_directory, 'bootstrap'))
  200. json_output = borgmatic.borg.create.create_archive(
  201. global_arguments.dry_run,
  202. repository['path'],
  203. config,
  204. config_paths,
  205. source_directories,
  206. local_borg_version,
  207. global_arguments,
  208. borgmatic_runtime_directory,
  209. local_path=local_path,
  210. remote_path=remote_path,
  211. progress=create_arguments.progress,
  212. stats=create_arguments.stats,
  213. json=create_arguments.json,
  214. list_files=create_arguments.list_files,
  215. stream_processes=stream_processes,
  216. )
  217. if json_output:
  218. yield borgmatic.actions.json.parse_json(json_output, repository.get('label'))
  219. borgmatic.hooks.dispatch.call_hooks_even_if_unconfigured(
  220. 'remove_data_source_dumps',
  221. config,
  222. config_filename,
  223. borgmatic.hooks.dump.DATA_SOURCE_HOOK_NAMES,
  224. borgmatic_runtime_directory,
  225. global_arguments.dry_run,
  226. )
  227. borgmatic.hooks.command.execute_hook(
  228. config.get('after_backup'),
  229. config.get('umask'),
  230. config_filename,
  231. 'post-backup',
  232. global_arguments.dry_run,
  233. **hook_context,
  234. )