123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365 |
- import glob
- import itertools
- import logging
- import os
- import pathlib
- import tempfile
- from borgmatic.borg import environment, feature, flags, state
- from borgmatic.execute import DO_NOT_CAPTURE, execute_command, execute_command_with_processes
- logger = logging.getLogger(__name__)
- def expand_directory(directory):
- '''
- Given a directory path, expand any tilde (representing a user's home directory) and any globs
- therein. Return a list of one or more resulting paths.
- '''
- expanded_directory = os.path.expanduser(directory)
- return glob.glob(expanded_directory) or [expanded_directory]
- def expand_directories(directories):
- '''
- Given a sequence of directory paths, expand tildes and globs in each one. Return all the
- resulting directories as a single flattened tuple.
- '''
- if directories is None:
- return ()
- return tuple(
- itertools.chain.from_iterable(expand_directory(directory) for directory in directories)
- )
- def expand_home_directories(directories):
- '''
- Given a sequence of directory paths, expand tildes in each one. Do not perform any globbing.
- Return the results as a tuple.
- '''
- if directories is None:
- return ()
- return tuple(os.path.expanduser(directory) for directory in directories)
- def map_directories_to_devices(directories):
- '''
- Given a sequence of directories, return a map from directory to an identifier for the device on
- which that directory resides or None if the path doesn't exist.
- This is handy for determining whether two different directories are on the same filesystem (have
- the same device identifier).
- '''
- return {
- directory: os.stat(directory).st_dev if os.path.exists(directory) else None
- for directory in directories
- }
- def deduplicate_directories(directory_devices, additional_directory_devices):
- '''
- Given a map from directory to the identifier for the device on which that directory resides,
- return the directories as a sorted tuple with all duplicate child directories removed. For
- instance, if paths is ('/foo', '/foo/bar'), return just: ('/foo',)
- The one exception to this rule is if two paths are on different filesystems (devices). In that
- case, they won't get de-duplicated in case they both need to be passed to Borg (e.g. the
- location.one_file_system option is true).
- The idea is that if Borg is given a parent directory, then it doesn't also need to be given
- child directories, because it will naturally spider the contents of the parent directory. And
- there are cases where Borg coming across the same file twice will result in duplicate reads and
- even hangs, e.g. when a database hook is using a named pipe for streaming database dumps to
- Borg.
- If any additional directory devices are given, also deduplicate against them, but don't include
- them in the returned directories.
- '''
- deduplicated = set()
- directories = sorted(directory_devices.keys())
- additional_directories = sorted(additional_directory_devices.keys())
- all_devices = {**directory_devices, **additional_directory_devices}
- for directory in directories:
- deduplicated.add(directory)
- parents = pathlib.PurePath(directory).parents
- # If another directory in the given list (or the additional list) is a parent of current
- # directory (even n levels up) and both are on the same filesystem, then the current
- # directory is a duplicate.
- for other_directory in directories + additional_directories:
- for parent in parents:
- if (
- pathlib.PurePath(other_directory) == parent
- and all_devices[directory] is not None
- and all_devices[other_directory] == all_devices[directory]
- ):
- if directory in deduplicated:
- deduplicated.remove(directory)
- break
- return tuple(sorted(deduplicated))
- def write_pattern_file(patterns=None, sources=None):
- '''
- Given a sequence of patterns and an optional sequence of source directories, write them to a
- named temporary file (with the source directories as additional roots) and return the file.
- Return None if no patterns are provided.
- '''
- if not patterns:
- return None
- pattern_file = tempfile.NamedTemporaryFile('w')
- pattern_file.write(
- '\n'.join(tuple(patterns) + tuple(f'R {source}' for source in (sources or [])))
- )
- pattern_file.flush()
- return pattern_file
- def ensure_files_readable(*filename_lists):
- '''
- Given a sequence of filename sequences, ensure that each filename is openable. This prevents
- unreadable files from being passed to Borg, which in certain situations only warns instead of
- erroring.
- '''
- for file_object in itertools.chain.from_iterable(
- filename_list for filename_list in filename_lists if filename_list
- ):
- open(file_object).close()
- def make_pattern_flags(location_config, pattern_filename=None):
- '''
- Given a location config dict with a potential patterns_from option, and a filename containing
- any additional patterns, return the corresponding Borg flags for those files as a tuple.
- '''
- pattern_filenames = tuple(location_config.get('patterns_from') or ()) + (
- (pattern_filename,) if pattern_filename else ()
- )
- return tuple(
- itertools.chain.from_iterable(
- ('--patterns-from', pattern_filename) for pattern_filename in pattern_filenames
- )
- )
- def make_exclude_flags(location_config, exclude_filename=None):
- '''
- Given a location config dict with various exclude options, and a filename containing any exclude
- patterns, return the corresponding Borg flags as a tuple.
- '''
- exclude_filenames = tuple(location_config.get('exclude_from') or ()) + (
- (exclude_filename,) if exclude_filename else ()
- )
- exclude_from_flags = tuple(
- itertools.chain.from_iterable(
- ('--exclude-from', exclude_filename) for exclude_filename in exclude_filenames
- )
- )
- caches_flag = ('--exclude-caches',) if location_config.get('exclude_caches') else ()
- if_present_flags = tuple(
- itertools.chain.from_iterable(
- ('--exclude-if-present', if_present)
- for if_present in location_config.get('exclude_if_present', ())
- )
- )
- keep_exclude_tags_flags = (
- ('--keep-exclude-tags',) if location_config.get('keep_exclude_tags') else ()
- )
- exclude_nodump_flags = ('--exclude-nodump',) if location_config.get('exclude_nodump') else ()
- return (
- exclude_from_flags
- + caches_flag
- + if_present_flags
- + keep_exclude_tags_flags
- + exclude_nodump_flags
- )
- DEFAULT_ARCHIVE_NAME_FORMAT = '{hostname}-{now:%Y-%m-%dT%H:%M:%S.%f}'
- def borgmatic_source_directories(borgmatic_source_directory):
- '''
- Return a list of borgmatic-specific source directories used for state like database backups.
- '''
- if not borgmatic_source_directory:
- borgmatic_source_directory = state.DEFAULT_BORGMATIC_SOURCE_DIRECTORY
- return (
- [borgmatic_source_directory]
- if os.path.exists(os.path.expanduser(borgmatic_source_directory))
- else []
- )
- ROOT_PATTERN_PREFIX = 'R '
- def pattern_root_directories(patterns=None):
- '''
- Given a sequence of patterns, parse out and return just the root directories.
- '''
- if not patterns:
- return []
- return [
- pattern.split(ROOT_PATTERN_PREFIX, maxsplit=1)[1]
- for pattern in patterns
- if pattern.startswith(ROOT_PATTERN_PREFIX)
- ]
- def create_archive(
- dry_run,
- repository,
- location_config,
- storage_config,
- local_borg_version,
- local_path='borg',
- remote_path=None,
- progress=False,
- stats=False,
- json=False,
- list_files=False,
- stream_processes=None,
- ):
- '''
- Given vebosity/dry-run flags, a local or remote repository path, a location config dict, and a
- storage config dict, create a Borg archive and return Borg's JSON output (if any).
- If a sequence of stream processes is given (instances of subprocess.Popen), then execute the
- create command while also triggering the given processes to produce output.
- '''
- sources = deduplicate_directories(
- map_directories_to_devices(
- expand_directories(
- location_config.get('source_directories', [])
- + borgmatic_source_directories(location_config.get('borgmatic_source_directory'))
- )
- ),
- additional_directory_devices=map_directories_to_devices(
- expand_directories(pattern_root_directories(location_config.get('patterns')))
- ),
- )
- try:
- working_directory = os.path.expanduser(location_config.get('working_directory'))
- except TypeError:
- working_directory = None
- pattern_file = write_pattern_file(location_config.get('patterns'), sources)
- exclude_file = write_pattern_file(
- expand_home_directories(location_config.get('exclude_patterns'))
- )
- checkpoint_interval = storage_config.get('checkpoint_interval', None)
- chunker_params = storage_config.get('chunker_params', None)
- compression = storage_config.get('compression', None)
- upload_rate_limit = storage_config.get('upload_rate_limit', None)
- umask = storage_config.get('umask', None)
- lock_wait = storage_config.get('lock_wait', None)
- files_cache = location_config.get('files_cache')
- archive_name_format = storage_config.get('archive_name_format', DEFAULT_ARCHIVE_NAME_FORMAT)
- extra_borg_options = storage_config.get('extra_borg_options', {}).get('create', '')
- if feature.available(feature.Feature.ATIME, local_borg_version):
- atime_flags = ('--atime',) if location_config.get('atime') is True else ()
- else:
- atime_flags = ('--noatime',) if location_config.get('atime') is False else ()
- if feature.available(feature.Feature.NOFLAGS, local_borg_version):
- noflags_flags = ('--noflags',) if location_config.get('flags') is False else ()
- else:
- noflags_flags = ('--nobsdflags',) if location_config.get('flags') is False else ()
- if feature.available(feature.Feature.NUMERIC_IDS, local_borg_version):
- numeric_ids_flags = ('--numeric-ids',) if location_config.get('numeric_ids') else ()
- else:
- numeric_ids_flags = ('--numeric-owner',) if location_config.get('numeric_ids') else ()
- if feature.available(feature.Feature.UPLOAD_RATELIMIT, local_borg_version):
- upload_ratelimit_flags = (
- ('--upload-ratelimit', str(upload_rate_limit)) if upload_rate_limit else ()
- )
- else:
- upload_ratelimit_flags = (
- ('--remote-ratelimit', str(upload_rate_limit)) if upload_rate_limit else ()
- )
- ensure_files_readable(location_config.get('patterns_from'), location_config.get('exclude_from'))
- full_command = (
- tuple(local_path.split(' '))
- + ('create',)
- + make_pattern_flags(location_config, pattern_file.name if pattern_file else None)
- + make_exclude_flags(location_config, exclude_file.name if exclude_file else None)
- + (('--checkpoint-interval', str(checkpoint_interval)) if checkpoint_interval else ())
- + (('--chunker-params', chunker_params) if chunker_params else ())
- + (('--compression', compression) if compression else ())
- + upload_ratelimit_flags
- + (
- ('--one-file-system',)
- if location_config.get('one_file_system') or stream_processes
- else ()
- )
- + numeric_ids_flags
- + atime_flags
- + (('--noctime',) if location_config.get('ctime') is False else ())
- + (('--nobirthtime',) if location_config.get('birthtime') is False else ())
- + (('--read-special',) if (location_config.get('read_special') or stream_processes) else ())
- + noflags_flags
- + (('--files-cache', files_cache) if files_cache else ())
- + (('--remote-path', remote_path) if remote_path else ())
- + (('--umask', str(umask)) if umask else ())
- + (('--lock-wait', str(lock_wait)) if lock_wait else ())
- + (('--list', '--filter', 'AME-') if list_files and not json and not progress else ())
- + (('--info',) if logger.getEffectiveLevel() == logging.INFO and not json else ())
- + (('--stats',) if stats and not json and not dry_run else ())
- + (('--debug', '--show-rc') if logger.isEnabledFor(logging.DEBUG) and not json else ())
- + (('--dry-run',) if dry_run else ())
- + (('--progress',) if progress else ())
- + (('--json',) if json else ())
- + (tuple(extra_borg_options.split(' ')) if extra_borg_options else ())
- + flags.make_repository_archive_flags(repository, archive_name_format, local_borg_version)
- + (sources if not pattern_file else ())
- )
- if json:
- output_log_level = None
- elif (stats or list_files) and logger.getEffectiveLevel() == logging.WARNING:
- output_log_level = logging.WARNING
- else:
- output_log_level = logging.INFO
- # The progress output isn't compatible with captured and logged output, as progress messes with
- # the terminal directly.
- output_file = DO_NOT_CAPTURE if progress else None
- borg_environment = environment.make_environment(storage_config)
- if stream_processes:
- return execute_command_with_processes(
- full_command,
- stream_processes,
- output_log_level,
- output_file,
- borg_local_path=local_path,
- working_directory=working_directory,
- extra_environment=borg_environment,
- )
- return execute_command(
- full_command,
- output_log_level,
- output_file,
- borg_local_path=local_path,
- working_directory=working_directory,
- extra_environment=borg_environment,
- )
|