create.py 14 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365
  1. import glob
  2. import itertools
  3. import logging
  4. import os
  5. import pathlib
  6. import tempfile
  7. from borgmatic.borg import environment, feature, flags, state
  8. from borgmatic.execute import DO_NOT_CAPTURE, execute_command, execute_command_with_processes
  9. logger = logging.getLogger(__name__)
  10. def expand_directory(directory):
  11. '''
  12. Given a directory path, expand any tilde (representing a user's home directory) and any globs
  13. therein. Return a list of one or more resulting paths.
  14. '''
  15. expanded_directory = os.path.expanduser(directory)
  16. return glob.glob(expanded_directory) or [expanded_directory]
  17. def expand_directories(directories):
  18. '''
  19. Given a sequence of directory paths, expand tildes and globs in each one. Return all the
  20. resulting directories as a single flattened tuple.
  21. '''
  22. if directories is None:
  23. return ()
  24. return tuple(
  25. itertools.chain.from_iterable(expand_directory(directory) for directory in directories)
  26. )
  27. def expand_home_directories(directories):
  28. '''
  29. Given a sequence of directory paths, expand tildes in each one. Do not perform any globbing.
  30. Return the results as a tuple.
  31. '''
  32. if directories is None:
  33. return ()
  34. return tuple(os.path.expanduser(directory) for directory in directories)
  35. def map_directories_to_devices(directories):
  36. '''
  37. Given a sequence of directories, return a map from directory to an identifier for the device on
  38. which that directory resides or None if the path doesn't exist.
  39. This is handy for determining whether two different directories are on the same filesystem (have
  40. the same device identifier).
  41. '''
  42. return {
  43. directory: os.stat(directory).st_dev if os.path.exists(directory) else None
  44. for directory in directories
  45. }
  46. def deduplicate_directories(directory_devices, additional_directory_devices):
  47. '''
  48. Given a map from directory to the identifier for the device on which that directory resides,
  49. return the directories as a sorted tuple with all duplicate child directories removed. For
  50. instance, if paths is ('/foo', '/foo/bar'), return just: ('/foo',)
  51. The one exception to this rule is if two paths are on different filesystems (devices). In that
  52. case, they won't get de-duplicated in case they both need to be passed to Borg (e.g. the
  53. location.one_file_system option is true).
  54. The idea is that if Borg is given a parent directory, then it doesn't also need to be given
  55. child directories, because it will naturally spider the contents of the parent directory. And
  56. there are cases where Borg coming across the same file twice will result in duplicate reads and
  57. even hangs, e.g. when a database hook is using a named pipe for streaming database dumps to
  58. Borg.
  59. If any additional directory devices are given, also deduplicate against them, but don't include
  60. them in the returned directories.
  61. '''
  62. deduplicated = set()
  63. directories = sorted(directory_devices.keys())
  64. additional_directories = sorted(additional_directory_devices.keys())
  65. all_devices = {**directory_devices, **additional_directory_devices}
  66. for directory in directories:
  67. deduplicated.add(directory)
  68. parents = pathlib.PurePath(directory).parents
  69. # If another directory in the given list (or the additional list) is a parent of current
  70. # directory (even n levels up) and both are on the same filesystem, then the current
  71. # directory is a duplicate.
  72. for other_directory in directories + additional_directories:
  73. for parent in parents:
  74. if (
  75. pathlib.PurePath(other_directory) == parent
  76. and all_devices[directory] is not None
  77. and all_devices[other_directory] == all_devices[directory]
  78. ):
  79. if directory in deduplicated:
  80. deduplicated.remove(directory)
  81. break
  82. return tuple(sorted(deduplicated))
  83. def write_pattern_file(patterns=None, sources=None):
  84. '''
  85. Given a sequence of patterns and an optional sequence of source directories, write them to a
  86. named temporary file (with the source directories as additional roots) and return the file.
  87. Return None if no patterns are provided.
  88. '''
  89. if not patterns:
  90. return None
  91. pattern_file = tempfile.NamedTemporaryFile('w')
  92. pattern_file.write(
  93. '\n'.join(tuple(patterns) + tuple(f'R {source}' for source in (sources or [])))
  94. )
  95. pattern_file.flush()
  96. return pattern_file
  97. def ensure_files_readable(*filename_lists):
  98. '''
  99. Given a sequence of filename sequences, ensure that each filename is openable. This prevents
  100. unreadable files from being passed to Borg, which in certain situations only warns instead of
  101. erroring.
  102. '''
  103. for file_object in itertools.chain.from_iterable(
  104. filename_list for filename_list in filename_lists if filename_list
  105. ):
  106. open(file_object).close()
  107. def make_pattern_flags(location_config, pattern_filename=None):
  108. '''
  109. Given a location config dict with a potential patterns_from option, and a filename containing
  110. any additional patterns, return the corresponding Borg flags for those files as a tuple.
  111. '''
  112. pattern_filenames = tuple(location_config.get('patterns_from') or ()) + (
  113. (pattern_filename,) if pattern_filename else ()
  114. )
  115. return tuple(
  116. itertools.chain.from_iterable(
  117. ('--patterns-from', pattern_filename) for pattern_filename in pattern_filenames
  118. )
  119. )
  120. def make_exclude_flags(location_config, exclude_filename=None):
  121. '''
  122. Given a location config dict with various exclude options, and a filename containing any exclude
  123. patterns, return the corresponding Borg flags as a tuple.
  124. '''
  125. exclude_filenames = tuple(location_config.get('exclude_from') or ()) + (
  126. (exclude_filename,) if exclude_filename else ()
  127. )
  128. exclude_from_flags = tuple(
  129. itertools.chain.from_iterable(
  130. ('--exclude-from', exclude_filename) for exclude_filename in exclude_filenames
  131. )
  132. )
  133. caches_flag = ('--exclude-caches',) if location_config.get('exclude_caches') else ()
  134. if_present_flags = tuple(
  135. itertools.chain.from_iterable(
  136. ('--exclude-if-present', if_present)
  137. for if_present in location_config.get('exclude_if_present', ())
  138. )
  139. )
  140. keep_exclude_tags_flags = (
  141. ('--keep-exclude-tags',) if location_config.get('keep_exclude_tags') else ()
  142. )
  143. exclude_nodump_flags = ('--exclude-nodump',) if location_config.get('exclude_nodump') else ()
  144. return (
  145. exclude_from_flags
  146. + caches_flag
  147. + if_present_flags
  148. + keep_exclude_tags_flags
  149. + exclude_nodump_flags
  150. )
  151. DEFAULT_ARCHIVE_NAME_FORMAT = '{hostname}-{now:%Y-%m-%dT%H:%M:%S.%f}'
  152. def borgmatic_source_directories(borgmatic_source_directory):
  153. '''
  154. Return a list of borgmatic-specific source directories used for state like database backups.
  155. '''
  156. if not borgmatic_source_directory:
  157. borgmatic_source_directory = state.DEFAULT_BORGMATIC_SOURCE_DIRECTORY
  158. return (
  159. [borgmatic_source_directory]
  160. if os.path.exists(os.path.expanduser(borgmatic_source_directory))
  161. else []
  162. )
  163. ROOT_PATTERN_PREFIX = 'R '
  164. def pattern_root_directories(patterns=None):
  165. '''
  166. Given a sequence of patterns, parse out and return just the root directories.
  167. '''
  168. if not patterns:
  169. return []
  170. return [
  171. pattern.split(ROOT_PATTERN_PREFIX, maxsplit=1)[1]
  172. for pattern in patterns
  173. if pattern.startswith(ROOT_PATTERN_PREFIX)
  174. ]
  175. def create_archive(
  176. dry_run,
  177. repository,
  178. location_config,
  179. storage_config,
  180. local_borg_version,
  181. local_path='borg',
  182. remote_path=None,
  183. progress=False,
  184. stats=False,
  185. json=False,
  186. list_files=False,
  187. stream_processes=None,
  188. ):
  189. '''
  190. Given vebosity/dry-run flags, a local or remote repository path, a location config dict, and a
  191. storage config dict, create a Borg archive and return Borg's JSON output (if any).
  192. If a sequence of stream processes is given (instances of subprocess.Popen), then execute the
  193. create command while also triggering the given processes to produce output.
  194. '''
  195. sources = deduplicate_directories(
  196. map_directories_to_devices(
  197. expand_directories(
  198. location_config.get('source_directories', [])
  199. + borgmatic_source_directories(location_config.get('borgmatic_source_directory'))
  200. )
  201. ),
  202. additional_directory_devices=map_directories_to_devices(
  203. expand_directories(pattern_root_directories(location_config.get('patterns')))
  204. ),
  205. )
  206. try:
  207. working_directory = os.path.expanduser(location_config.get('working_directory'))
  208. except TypeError:
  209. working_directory = None
  210. pattern_file = write_pattern_file(location_config.get('patterns'), sources)
  211. exclude_file = write_pattern_file(
  212. expand_home_directories(location_config.get('exclude_patterns'))
  213. )
  214. checkpoint_interval = storage_config.get('checkpoint_interval', None)
  215. chunker_params = storage_config.get('chunker_params', None)
  216. compression = storage_config.get('compression', None)
  217. upload_rate_limit = storage_config.get('upload_rate_limit', None)
  218. umask = storage_config.get('umask', None)
  219. lock_wait = storage_config.get('lock_wait', None)
  220. files_cache = location_config.get('files_cache')
  221. archive_name_format = storage_config.get('archive_name_format', DEFAULT_ARCHIVE_NAME_FORMAT)
  222. extra_borg_options = storage_config.get('extra_borg_options', {}).get('create', '')
  223. if feature.available(feature.Feature.ATIME, local_borg_version):
  224. atime_flags = ('--atime',) if location_config.get('atime') is True else ()
  225. else:
  226. atime_flags = ('--noatime',) if location_config.get('atime') is False else ()
  227. if feature.available(feature.Feature.NOFLAGS, local_borg_version):
  228. noflags_flags = ('--noflags',) if location_config.get('flags') is False else ()
  229. else:
  230. noflags_flags = ('--nobsdflags',) if location_config.get('flags') is False else ()
  231. if feature.available(feature.Feature.NUMERIC_IDS, local_borg_version):
  232. numeric_ids_flags = ('--numeric-ids',) if location_config.get('numeric_ids') else ()
  233. else:
  234. numeric_ids_flags = ('--numeric-owner',) if location_config.get('numeric_ids') else ()
  235. if feature.available(feature.Feature.UPLOAD_RATELIMIT, local_borg_version):
  236. upload_ratelimit_flags = (
  237. ('--upload-ratelimit', str(upload_rate_limit)) if upload_rate_limit else ()
  238. )
  239. else:
  240. upload_ratelimit_flags = (
  241. ('--remote-ratelimit', str(upload_rate_limit)) if upload_rate_limit else ()
  242. )
  243. ensure_files_readable(location_config.get('patterns_from'), location_config.get('exclude_from'))
  244. full_command = (
  245. tuple(local_path.split(' '))
  246. + ('create',)
  247. + make_pattern_flags(location_config, pattern_file.name if pattern_file else None)
  248. + make_exclude_flags(location_config, exclude_file.name if exclude_file else None)
  249. + (('--checkpoint-interval', str(checkpoint_interval)) if checkpoint_interval else ())
  250. + (('--chunker-params', chunker_params) if chunker_params else ())
  251. + (('--compression', compression) if compression else ())
  252. + upload_ratelimit_flags
  253. + (
  254. ('--one-file-system',)
  255. if location_config.get('one_file_system') or stream_processes
  256. else ()
  257. )
  258. + numeric_ids_flags
  259. + atime_flags
  260. + (('--noctime',) if location_config.get('ctime') is False else ())
  261. + (('--nobirthtime',) if location_config.get('birthtime') is False else ())
  262. + (('--read-special',) if (location_config.get('read_special') or stream_processes) else ())
  263. + noflags_flags
  264. + (('--files-cache', files_cache) if files_cache else ())
  265. + (('--remote-path', remote_path) if remote_path else ())
  266. + (('--umask', str(umask)) if umask else ())
  267. + (('--lock-wait', str(lock_wait)) if lock_wait else ())
  268. + (('--list', '--filter', 'AME-') if list_files and not json and not progress else ())
  269. + (('--info',) if logger.getEffectiveLevel() == logging.INFO and not json else ())
  270. + (('--stats',) if stats and not json and not dry_run else ())
  271. + (('--debug', '--show-rc') if logger.isEnabledFor(logging.DEBUG) and not json else ())
  272. + (('--dry-run',) if dry_run else ())
  273. + (('--progress',) if progress else ())
  274. + (('--json',) if json else ())
  275. + (tuple(extra_borg_options.split(' ')) if extra_borg_options else ())
  276. + flags.make_repository_archive_flags(repository, archive_name_format, local_borg_version)
  277. + (sources if not pattern_file else ())
  278. )
  279. if json:
  280. output_log_level = None
  281. elif (stats or list_files) and logger.getEffectiveLevel() == logging.WARNING:
  282. output_log_level = logging.WARNING
  283. else:
  284. output_log_level = logging.INFO
  285. # The progress output isn't compatible with captured and logged output, as progress messes with
  286. # the terminal directly.
  287. output_file = DO_NOT_CAPTURE if progress else None
  288. borg_environment = environment.make_environment(storage_config)
  289. if stream_processes:
  290. return execute_command_with_processes(
  291. full_command,
  292. stream_processes,
  293. output_log_level,
  294. output_file,
  295. borg_local_path=local_path,
  296. working_directory=working_directory,
  297. extra_environment=borg_environment,
  298. )
  299. return execute_command(
  300. full_command,
  301. output_log_level,
  302. output_file,
  303. borg_local_path=local_path,
  304. working_directory=working_directory,
  305. extra_environment=borg_environment,
  306. )