浏览代码

Initial work on applying snapshot path rewriting to excludes and patterns (#962).

Dan Helfman 5 月之前
父节点
当前提交
120a29ab4d

+ 5 - 0
NEWS

@@ -4,6 +4,11 @@
  * #960: Fix for archives storing relative source directory paths such that they contain the working
    directory.
  * #960: Fix the "spot" check to support relative source directory paths.
+ * #962: For the ZFS, Btrfs, and LVM hooks, perform path rewriting for excludes and patterns in
+   addition to the existing source directories rewriting.
+ * #962: Under the hood, merge all configured source directories, excludes, and patterns into a
+   unified temporary patterns file for passing to Borg. The borgmatic configuration options remain
+   unchanged.
  * Fix the "spot" check to no longer consider pipe files within an archive for file comparisons.
  * Fix auto-excluding of special files (when databases are configured) to support relative source
    directory paths.

+ 1 - 1
borgmatic/actions/check.py

@@ -368,7 +368,7 @@ def collect_spot_check_source_paths(
         ).values()
     )
 
-    (create_flags, create_positional_arguments, pattern_file, exclude_file) = (
+    (create_flags, create_positional_arguments, pattern_file) = (
         borgmatic.borg.create.make_base_create_command(
             dry_run=True,
             repository_path=repository['path'],

+ 161 - 92
borgmatic/actions/create.py

@@ -1,3 +1,5 @@
+import collections
+import enum
 import glob
 import itertools
 import logging
@@ -6,6 +8,7 @@ import pathlib
 
 import borgmatic.actions.json
 import borgmatic.borg.create
+import borgmatic.borg.pattern
 import borgmatic.config.paths
 import borgmatic.config.validate
 import borgmatic.hooks.command
@@ -14,6 +17,69 @@ import borgmatic.hooks.dispatch
 logger = logging.getLogger(__name__)
 
 
+def parse_pattern(pattern_line):
+    '''
+    Given a Borg pattern as a string, parse it into a borgmatic.borg.pattern.Pattern instance and
+    return it.
+    '''
+    try:
+        (pattern_type, remainder) = pattern_line.split(' ', maxsplit=1)
+    except ValueError:
+        raise ValueError('Invalid pattern:', pattern_line)
+
+    try:
+        (pattern_style, path) = remainder.split(':', maxsplit=1)
+    except ValueError:
+        pattern_style = ''
+        path = remainder
+
+    return borgmatic.borg.pattern.Pattern(path, pattern_type, pattern_style)
+
+
+def collect_patterns(config):
+    '''
+    Given a configuration dict, produce a single sequence of patterns comprised of the configured
+    source directories, patterns, excludes, pattern files, and exclude files.
+
+    The idea is that Borg has all these different ways of specifying includes, excludes, source
+    directories, etc., but we'd like to collapse them all down to one common format (patterns) for
+    ease of manipulation within borgmatic.
+    '''
+    return (
+        tuple(
+            borgmatic.borg.pattern.Pattern(source_directory)
+            for source_directory in config.get('source_directories', ())
+        )
+        + tuple(parse_pattern(pattern_line) for pattern_line in config.get('patterns', ()))
+        + tuple(
+            borgmatic.borg.pattern.Pattern(
+                exclude_line,
+                borgmatic.borg.pattern.Pattern_type.EXCLUDE,
+                borgmatic.borg.pattern.Pattern_style.FNMATCH,
+            )
+            for exclude_line in config.get('exclude_patterns', ())
+        )
+        + tuple(
+            itertools.chain.from_iterable(
+                parse_pattern(pattern_line)
+                for filename in config.get('patterns_from', ())
+                for pattern_line in open(filename).readlines()
+            )
+        )
+        + tuple(
+            itertools.chain.from_iterable(
+                borgmatic.borg.pattern.Pattern(
+                    exclude_line,
+                    borgmatic.borg.pattern.Pattern_type.EXCLUDE,
+                    borgmatic.borg.pattern.Pattern_type.FNMATCH,
+                )
+                for filename in config.get('excludes_from', ())
+                for exclude_line in open(filename).readlines()
+            )
+        )
+    )
+
+
 def expand_directory(directory, working_directory):
     '''
     Given a directory path, expand any tilde (representing a user's home directory) and any globs
@@ -40,126 +106,130 @@ def expand_directory(directory, working_directory):
     ]
 
 
-def expand_directories(directories, working_directory=None):
+def expand_patterns(patterns, working_directory=None, skip_paths=None):
     '''
-    Given a sequence of directory paths and an optional working directory, expand tildes and globs
-    in each one. Return all the resulting directories as a single flattened tuple.
+    Given a sequence of borgmatic.borg.pattern.Pattern instances and an optional working directory,
+    expand tildes and globs in each root pattern. Return all the resulting patterns (not just the
+    root patterns) as a tuple.
+
+    If a set of paths are given to skip, then don't expand any patterns matching them.
     '''
-    if directories is None:
+    if patterns is None:
         return ()
 
     return tuple(
         itertools.chain.from_iterable(
-            expand_directory(directory, working_directory) for directory in directories
+            (
+                (
+                    borgmatic.borg.pattern.Pattern(
+                        expanded_path,
+                        pattern.type,
+                        pattern.style,
+                        pattern.device,
+                    )
+                    for expanded_path in expand_directory(pattern.path, working_directory)
+                )
+                if pattern.type == borgmatic.borg.pattern.Pattern_type.ROOT
+                and pattern.path not in skip_paths
+                else (pattern,)
+            )
+            for pattern in patterns
         )
     )
 
 
-def map_directories_to_devices(directories, working_directory=None):
+def device_map_patterns(patterns, working_directory=None):
     '''
-    Given a sequence of directories and an optional working directory, return a map from directory
-    to an identifier for the device on which that directory resides or None if the path doesn't
-    exist.
+    Given a sequence of borgmatic.borg.pattern.Pattern instances and an optional working directory,
+    determine the identifier for the device on which the pattern's path resides—or None if the path
+    doesn't exist or is from a non-root pattern. Return an updated sequence of patterns with the
+    device field populated. But if the device field is already set, don't bother setting it again.
 
-    This is handy for determining whether two different directories are on the same filesystem (have
-    the same device identifier).
+    This is handy for determining whether two different pattern paths are on the same filesystem
+    (have the same device identifier).
     '''
-    return {
-        directory: os.stat(full_directory).st_dev if os.path.exists(full_directory) else None
-        for directory in directories
-        for full_directory in (os.path.join(working_directory or '', directory),)
-    }
+    return tuple(
+        borgmatic.borg.pattern.Pattern(
+            pattern.path,
+            pattern.type,
+            pattern.style,
+            device=pattern.device
+            or (
+                os.stat(full_path).st_dev
+                if pattern.type == borgmatic.borg.pattern.Pattern_type.ROOT
+                and os.path.exists(full_path)
+                else None
+            ),
+        )
+        for pattern in patterns
+        for full_path in (os.path.join(working_directory or '', pattern.path),)
+    )
 
 
-def deduplicate_directories(directory_devices, additional_directory_devices):
+def deduplicate_patterns(patterns):
     '''
-    Given a map from directory to the identifier for the device on which that directory resides,
-    return the directories as a sorted sequence with all duplicate child directories removed. For
-    instance, if paths is ['/foo', '/foo/bar'], return just: ['/foo']
+    Given a sequence of borgmatic.borg.pattern.Pattern instances, return them with all duplicate
+    root child patterns removed. For instance, if two root patterns are given with paths "/foo" and
+    "/foo/bar", return just the one with "/foo". Non-root patterns are passed through with
+    modification.
 
     The one exception to this rule is if two paths are on different filesystems (devices). In that
-    case, they won't get de-duplicated in case they both need to be passed to Borg (e.g. the
-    location.one_file_system option is true).
+    case, they won't get de-duplicated, in case they both need to be passed to Borg (e.g. the
+    one_file_system option is true).
 
-    The idea is that if Borg is given a parent directory, then it doesn't also need to be given
-    child directories, because it will naturally spider the contents of the parent directory. And
+    The idea is that if Borg is given a root parent pattern, then it doesn't also need to be given
+    child patterns, because it will naturally spider the contents of the parent pattern's path. And
     there are cases where Borg coming across the same file twice will result in duplicate reads and
     even hangs, e.g. when a database hook is using a named pipe for streaming database dumps to
     Borg.
-
-    If any additional directory devices are given, also deduplicate against them, but don't include
-    them in the returned directories.
     '''
-    deduplicated = set()
-    directories = sorted(directory_devices.keys())
-    additional_directories = sorted(additional_directory_devices.keys())
-    all_devices = {**directory_devices, **additional_directory_devices}
-
-    for directory in directories:
-        deduplicated.add(directory)
-        parents = pathlib.PurePath(directory).parents
-
-        # If another directory in the given list (or the additional list) is a parent of current
-        # directory (even n levels up) and both are on the same filesystem, then the current
-        # directory is a duplicate.
-        for other_directory in directories + additional_directories:
-            for parent in parents:
-                if (
-                    pathlib.PurePath(other_directory) == parent
-                    and all_devices[directory] is not None
-                    and all_devices[other_directory] == all_devices[directory]
-                ):
-                    if directory in deduplicated:
-                        deduplicated.remove(directory)
-                    break
-
-    return sorted(deduplicated)
+    deduplicated = []
 
+    for pattern in patterns:
+        if pattern.type != borgmatic.borg.pattern.Pattern_type.ROOT:
+            deduplicated.append(pattern)
+            continue
 
-ROOT_PATTERN_PREFIX = 'R '
+        parents = pathlib.PurePath(pattern.path).parents
 
+        # If another directory in the given list is a parent of current directory (even n levels up)
+        # and both are on the same filesystem, then the current directory is a duplicate.
+        for other_pattern in patterns:
+            if other_pattern.type != borgmatic.borg.pattern.Pattern_type.ROOT:
+                continue
 
-def pattern_root_directories(patterns=None):
-    '''
-    Given a sequence of patterns, parse out and return just the root directories.
-    '''
-    if not patterns:
-        return []
+            for parent in parents:
+                if (
+                    pathlib.PurePath(other_pattern.path) == parent
+                    and pattern.device is not None
+                    and other_pattern.device == pattern.device
+                ):
+                    break
+        else:
+            deduplicated.append(pattern)
 
-    return [
-        pattern.split(ROOT_PATTERN_PREFIX, maxsplit=1)[1]
-        for pattern in patterns
-        if pattern.startswith(ROOT_PATTERN_PREFIX)
-    ]
+    return tuple(deduplicated)
 
 
-def process_source_directories(config, source_directories=None, skip_expand_paths=None):
+def process_patterns(patterns, working_directory, skip_expand_paths=None):
     '''
-    Given a sequence of source directories (either in the source_directories argument or, lacking
-    that, from config), expand and deduplicate the source directories, returning the result.
+    Given a sequence of Borg patterns and a configured working directory, expand and deduplicate any
+    "root" patterns, returning the resulting root and non-root patterns as a list.
 
     If any paths are given to skip, don't expand them.
     '''
-    working_directory = borgmatic.config.paths.get_working_directory(config)
     skip_paths = set(skip_expand_paths or ())
 
-    if source_directories is None:
-        source_directories = tuple(config.get('source_directories', ()))
-
-    return deduplicate_directories(
-        map_directories_to_devices(
-            expand_directories(
-                tuple(source for source in source_directories if source not in skip_paths),
-                working_directory=working_directory,
-            )
-            + tuple(skip_paths)
-        ),
-        additional_directory_devices=map_directories_to_devices(
-            expand_directories(
-                pattern_root_directories(config.get('patterns')),
-                working_directory=working_directory,
+    return list(
+        deduplicate_patterns(
+            device_map_patterns(
+                expand_patterns(
+                    patterns,
+                    working_directory=working_directory,
+                    skip_paths=skip_paths,
+                )
             )
-        ),
+        )
     )
 
 
@@ -197,6 +267,7 @@ def run_create(
 
     log_prefix = repository.get('label', repository['path'])
     logger.info(f'{log_prefix}: Creating archive{dry_run_label}')
+    working_directory = borgmatic.config.paths.get_working_directory(config)
 
     with borgmatic.config.paths.Runtime_directory(
         config, log_prefix
@@ -209,7 +280,7 @@ def run_create(
             borgmatic_runtime_directory,
             global_arguments.dry_run,
         )
-        source_directories = process_source_directories(config)
+        patterns = process_patterns(collect_patterns(config), working_directory)
         active_dumps = borgmatic.hooks.dispatch.call_hooks(
             'dump_data_sources',
             config,
@@ -217,23 +288,21 @@ def run_create(
             borgmatic.hooks.dispatch.Hook_type.DATA_SOURCE,
             config_paths,
             borgmatic_runtime_directory,
-            source_directories,
+            patterns,
             global_arguments.dry_run,
         )
 
-        # Process source directories again in case any data source hooks updated them. Without this
-        # step, we could end up with duplicate paths that cause Borg to hang when it tries to read
-        # from the same named pipe twice.
-        source_directories = process_source_directories(
-            config, source_directories, skip_expand_paths=config_paths
-        )
+        # Process the patterns again in case any data source hooks updated them. Without this step,
+        # we could end up with duplicate paths that cause Borg to hang when it tries to read from
+        # the same named pipe twice.
+        patterns = process_patterns(patterns, working_directory, skip_expand_paths=config_paths)
         stream_processes = [process for processes in active_dumps.values() for process in processes]
 
         json_output = borgmatic.borg.create.create_archive(
             global_arguments.dry_run,
             repository['path'],
             config,
-            source_directories,
+            patterns,
             local_borg_version,
             global_arguments,
             borgmatic_runtime_directory,

+ 73 - 115
borgmatic/borg/create.py

@@ -6,6 +6,7 @@ import stat
 import tempfile
 import textwrap
 
+import borgmatic.borg.pattern
 import borgmatic.config.paths
 import borgmatic.logger
 from borgmatic.borg import environment, feature, flags
@@ -19,81 +20,41 @@ from borgmatic.execute import (
 logger = logging.getLogger(__name__)
 
 
-def expand_home_directories(directories):
+def write_patterns_file(patterns, borgmatic_runtime_directory, log_prefix, patterns_file=None):
     '''
-    Given a sequence of directory paths, expand tildes in each one. Do not perform any globbing.
-    Return the results as a tuple.
-    '''
-    if directories is None:
-        return ()
-
-    return tuple(os.path.expanduser(directory) for directory in directories)
+    Given a sequence of patterns as borgmatic.borg.pattern.Pattern instances, write them to a
+    named temporary file in the given borgmatic runtime directory and return the file object.
 
+    Use the given log prefix in any logging.
 
-def write_pattern_file(patterns=None, sources=None, pattern_file=None):
-    '''
-    Given a sequence of patterns and an optional sequence of source directories, write them to a
-    named temporary file (with the source directories as additional roots) and return the file.
-    If an optional open pattern file is given, overwrite it instead of making a new temporary file.
+    If an optional open pattern file is given, append to it instead of making a new temporary file.
     Return None if no patterns are provided.
     '''
-    if not patterns and not sources:
+    if not patterns:
         return None
 
-    if pattern_file is None:
-        pattern_file = tempfile.NamedTemporaryFile('w')
+    if patterns_file is None:
+        patterns_file = tempfile.NamedTemporaryFile('w', dir=borgmatic_runtime_directory)
     else:
-        pattern_file.seek(0)
+        patterns_file.write('\n')
 
-    pattern_file.write(
-        '\n'.join(tuple(patterns or ()) + tuple(f'R {source}' for source in (sources or [])))
+    patterns_output = '\n'.join(
+        f'{pattern.type.value} {pattern.style.value}{":" if pattern.style.value else ""}{pattern.path}'
+        for pattern in patterns
     )
-    pattern_file.flush()
-
-    return pattern_file
-
-
-def ensure_files_readable(*filename_lists):
-    '''
-    Given a sequence of filename sequences, ensure that each filename is openable. This prevents
-    unreadable files from being passed to Borg, which in certain situations only warns instead of
-    erroring.
-    '''
-    for file_object in itertools.chain.from_iterable(
-        filename_list for filename_list in filename_lists if filename_list
-    ):
-        open(file_object).close()
+    logger.debug(f'{log_prefix}: Writing patterns to {patterns_file.name}:\n{patterns_output}')
 
+    patterns_file.write(patterns_output)
+    patterns_file.flush()
 
-def make_pattern_flags(config, pattern_filename=None):
-    '''
-    Given a configuration dict with a potential patterns_from option, and a filename containing any
-    additional patterns, return the corresponding Borg flags for those files as a tuple.
-    '''
-    pattern_filenames = tuple(config.get('patterns_from') or ()) + (
-        (pattern_filename,) if pattern_filename else ()
-    )
-
-    return tuple(
-        itertools.chain.from_iterable(
-            ('--patterns-from', pattern_filename) for pattern_filename in pattern_filenames
-        )
-    )
+    return patterns_file
 
 
-def make_exclude_flags(config, exclude_filename=None):
+def make_exclude_flags(config):
     '''
-    Given a configuration dict with various exclude options, and a filename containing any exclude
-    patterns, return the corresponding Borg flags as a tuple.
+    Given a configuration dict with various exclude options, return the corresponding Borg flags as
+    a tuple.
     '''
-    exclude_filenames = tuple(config.get('exclude_from') or ()) + (
-        (exclude_filename,) if exclude_filename else ()
-    )
-    exclude_from_flags = tuple(
-        itertools.chain.from_iterable(
-            ('--exclude-from', exclude_filename) for exclude_filename in exclude_filenames
-        )
-    )
     caches_flag = ('--exclude-caches',) if config.get('exclude_caches') else ()
     if_present_flags = tuple(
         itertools.chain.from_iterable(
@@ -104,13 +65,7 @@ def make_exclude_flags(config, exclude_filename=None):
     keep_exclude_tags_flags = ('--keep-exclude-tags',) if config.get('keep_exclude_tags') else ()
     exclude_nodump_flags = ('--exclude-nodump',) if config.get('exclude_nodump') else ()
 
-    return (
-        exclude_from_flags
-        + caches_flag
-        + if_present_flags
-        + keep_exclude_tags_flags
-        + exclude_nodump_flags
-    )
+    return caches_flag + if_present_flags + keep_exclude_tags_flags + exclude_nodump_flags
 
 
 def make_list_filter_flags(local_borg_version, dry_run):
@@ -214,18 +169,22 @@ def collect_special_file_paths(
     )
 
 
-def check_all_source_directories_exist(source_directories):
+def check_all_root_patterns_exist(patterns):
     '''
-    Given a sequence of source directories, check that the source directories all exist. If any do
-    not, raise an exception.
+    Given a sequence of borgmatic.borg.pattern.Pattern instances, check that all root pattern
+    paths exist. If any don't, raise an exception.
     '''
-    missing_directories = [
-        source_directory
-        for source_directory in source_directories
-        if not os.path.exists(source_directory)
+    missing_paths = [
+        pattern.path
+        for pattern in pattern
+        if pattern.type == borgmatic.borg.pattern.Pattern_type.ROOT
+        if not os.path.exists(pattern.path)
     ]
-    if missing_directories:
-        raise ValueError(f"Source directories do not exist: {', '.join(missing_directories)}")
+
+    if missing_paths:
+        raise ValueError(
+            f"Source directories / root pattern paths do not exist: {', '.join(missing_paths)}"
+        )
 
 
 MAX_SPECIAL_FILE_PATHS_LENGTH = 1000
@@ -235,7 +194,7 @@ def make_base_create_command(
     dry_run,
     repository_path,
     config,
-    source_directories,
+    patterns,
     local_borg_version,
     global_arguments,
     borgmatic_runtime_directory,
@@ -248,22 +207,15 @@ def make_base_create_command(
 ):
     '''
     Given vebosity/dry-run flags, a local or remote repository path, a configuration dict, a
-    sequence of loaded configuration paths, the local Borg version, global arguments as an
-    argparse.Namespace instance, and a sequence of borgmatic source directories, return a tuple of
-    (base Borg create command flags, Borg create command positional arguments, open pattern file
-    handle, open exclude file handle).
+    sequence of patterns as borgmatic.borg.pattern.Pattern instances, the local Borg version,
+    global arguments as an argparse.Namespace instance, and a sequence of borgmatic source
+    directories, return a tuple of (base Borg create command flags, Borg create command positional
+    arguments, open pattern file handle).
     '''
     if config.get('source_directories_must_exist', False):
-        check_all_source_directories_exist(source_directories)
+        check_all_root_patterns_exist(patterns)
 
-    ensure_files_readable(config.get('patterns_from'), config.get('exclude_from'))
-
-    pattern_file = (
-        write_pattern_file(config.get('patterns'), source_directories)
-        if config.get('patterns') or config.get('patterns_from')
-        else None
-    )
-    exclude_file = write_pattern_file(expand_home_directories(config.get('exclude_patterns')))
+    patterns_file = write_patterns_file(patterns, borgmatic_runtime_directory, log_prefix=repository_path)
     checkpoint_interval = config.get('checkpoint_interval', None)
     checkpoint_volume = config.get('checkpoint_volume', None)
     chunker_params = config.get('chunker_params', None)
@@ -306,8 +258,8 @@ def make_base_create_command(
     create_flags = (
         tuple(local_path.split(' '))
         + ('create',)
-        + make_pattern_flags(config, pattern_file.name if pattern_file else None)
-        + make_exclude_flags(config, exclude_file.name if exclude_file else None)
+        + (('--patterns-from', patterns_file.name) if patterns_file else ())
+        + make_exclude_flags(config)
         + (('--checkpoint-interval', str(checkpoint_interval)) if checkpoint_interval else ())
         + (('--checkpoint-volume', str(checkpoint_volume)) if checkpoint_volume else ())
         + (('--chunker-params', chunker_params) if chunker_params else ())
@@ -337,7 +289,7 @@ def make_base_create_command(
 
     create_positional_arguments = flags.make_repository_archive_flags(
         repository_path, archive_name_format, local_borg_version
-    ) + (tuple(source_directories) if not pattern_file else ())
+    )
 
     # If database hooks are enabled (as indicated by streaming processes), exclude files that might
     # cause Borg to hang. But skip this if the user has explicitly set the "read_special" to True.
@@ -367,22 +319,30 @@ def make_base_create_command(
             logger.warning(
                 f'{repository_path}: Excluding special files to prevent Borg from hanging: {truncated_special_file_paths}'
             )
-            exclude_file = write_pattern_file(
-                expand_home_directories(
-                    tuple(config.get('exclude_patterns') or ()) + special_file_paths
+            patterns_file = write_patterns_file(
+                tuple(
+                    borgmatic.borg.pattern.Pattern(
+                        special_file_path,
+                        borgmatic.borg.pattern.Pattern_type.EXCLUDE,
+                        borgmatic.borg.pattern.Pattern_style.FNMATCH,
+                    )
+                    for special_file_path in special_file_paths
                 ),
-                pattern_file=exclude_file,
+                borgmatic_runtime_directory,
+                log_prefix=repository_path,
+                patterns_file=patterns_file,
             )
-            create_flags += make_exclude_flags(config, exclude_file.name)
+            if '--patterns-from' not in create_flags:
+                create_flags.append(('--patterns-from', patterns_file.name))
 
-    return (create_flags, create_positional_arguments, pattern_file, exclude_file)
+    return (create_flags, create_positional_arguments, patterns_file)
 
 
 def create_archive(
     dry_run,
     repository_path,
     config,
-    source_directories,
+    patterns,
     local_borg_version,
     global_arguments,
     borgmatic_runtime_directory,
@@ -406,22 +366,20 @@ def create_archive(
 
     working_directory = borgmatic.config.paths.get_working_directory(config)
 
-    (create_flags, create_positional_arguments, pattern_file, exclude_file) = (
-        make_base_create_command(
-            dry_run,
-            repository_path,
-            config,
-            source_directories,
-            local_borg_version,
-            global_arguments,
-            borgmatic_runtime_directory,
-            local_path,
-            remote_path,
-            progress,
-            json,
-            list_files,
-            stream_processes,
-        )
+    (create_flags, create_positional_arguments, patterns_file) = make_base_create_command(
+        dry_run,
+        repository_path,
+        config,
+        patterns,
+        local_borg_version,
+        global_arguments,
+        borgmatic_runtime_directory,
+        local_path,
+        remote_path,
+        progress,
+        json,
+        list_files,
+        stream_processes,
     )
 
     if json:

+ 31 - 0
borgmatic/borg/pattern.py

@@ -0,0 +1,31 @@
+import collections
+import enum
+
+
+# See https://borgbackup.readthedocs.io/en/stable/usage/help.html#borg-help-patterns
+class Pattern_type(enum.Enum):
+    ROOT = 'R'
+    PATTERN_STYLE = 'P'
+    EXCLUDE = '-'
+    NO_RECURSE = '!'
+    INCLUDE = '+'
+
+
+class Pattern_style(enum.Enum):
+    NONE = ''
+    FNMATCH = 'fm'
+    SHELL = 'sh'
+    REGULAR_EXPRESSION = 're'
+    PATH_PREFIX = 'pp'
+    PATH_FULL_MATCH = 'pf'
+
+
+Pattern = collections.namedtuple(
+    'Pattern',
+    ('path', 'type', 'style', 'device'),
+    defaults=(
+        Pattern_type.ROOT,
+        Pattern_style.NONE,
+        None,
+    ),
+)

+ 3 - 4
borgmatic/config/schema.yaml

@@ -144,8 +144,7 @@ properties:
             type: string
         description: |
             Read include/exclude patterns from one or more separate named files,
-            one pattern per line. Note that Borg considers this option
-            experimental. See the output of "borg help patterns" for more
+            one pattern per line. See the output of "borg help patterns" for more
             details.
         example:
             - /etc/borgmatic/patterns
@@ -230,8 +229,8 @@ properties:
     source_directories_must_exist:
         type: boolean
         description: |
-            If true, then source directories must exist, otherwise an error is
-            raised. Defaults to false.
+            If true, then source directories (and root pattern paths) must
+            exist. If they don't, an error is raised. Defaults to false.
         example: true
     encryption_passcommand:
         type: string

+ 10 - 7
borgmatic/hooks/data_source/bootstrap.py

@@ -4,6 +4,7 @@ import json
 import logging
 import os
 
+import borgmatic.borg.pattern
 import borgmatic.config.paths
 
 logger = logging.getLogger(__name__)
@@ -22,15 +23,15 @@ def dump_data_sources(
     log_prefix,
     config_paths,
     borgmatic_runtime_directory,
-    source_directories,
+    patterns,
     dry_run,
 ):
     '''
     Given a bootstrap configuration dict, a configuration dict, a log prefix, the borgmatic
-    configuration file paths, the borgmatic runtime directory, the configured source directories,
-    and whether this is a dry run, create a borgmatic manifest file to store the paths of the
-    configuration files used to create the archive. But skip this if the bootstrap
-    store_config_files option is False or if this is a dry run.
+    configuration file paths, the borgmatic runtime directory, the configured patterns, and whether
+    this is a dry run, create a borgmatic manifest file to store the paths of the configuration
+    files used to create the archive. But skip this if the bootstrap store_config_files option is
+    False or if this is a dry run.
 
     Return an empty sequence, since there are no ongoing dump processes from this hook.
     '''
@@ -55,8 +56,10 @@ def dump_data_sources(
             manifest_file,
         )
 
-    source_directories.extend(config_paths)
-    source_directories.append(os.path.join(borgmatic_runtime_directory, 'bootstrap'))
+    patterns.extend(borgmatic.borg.pattern.Pattern(config_path) for config_path in config_paths)
+    patterns.append(
+        borgmatic.borg.pattern.Pattern(os.path.join(borgmatic_runtime_directory, 'bootstrap'))
+    )
 
     return []
 

+ 71 - 51
borgmatic/hooks/data_source/btrfs.py

@@ -6,6 +6,7 @@ import os
 import shutil
 import subprocess
 
+import borgmatic.borg.pattern
 import borgmatic.config.paths
 import borgmatic.execute
 import borgmatic.hooks.data_source.snapshot
@@ -73,40 +74,40 @@ def get_subvolumes_for_filesystem(btrfs_command, filesystem_mount_point):
 
 
 Subvolume = collections.namedtuple(
-    'Subvolume', ('path', 'contained_source_directories'), defaults=((),)
+    'Subvolume', ('path', 'contained_patterns'), defaults=((),)
 )
 
 
-def get_subvolumes(btrfs_command, findmnt_command, source_directories=None):
+def get_subvolumes(btrfs_command, findmnt_command, patterns=None):
     '''
-    Given a Btrfs command to run and a sequence of configured source directories, find the
-    intersection between the current Btrfs filesystem and subvolume mount points and the configured
-    borgmatic source directories. The idea is that these are the requested subvolumes to snapshot.
+    Given a Btrfs command to run and a sequence of configured patterns, find the intersection
+    between the current Btrfs filesystem and subvolume mount points and the paths of any patterns.
+    The idea is that these pattern paths represent the requested subvolumes to snapshot.
 
-    If the source directories is None, then return all subvolumes, sorted by path.
+    If patterns is None, then return all subvolumes, sorted by path.
 
     Return the result as a sequence of matching subvolume mount points.
     '''
-    candidate_source_directories = set(source_directories or ())
+    candidate_patterns = set(patterns or ())
     subvolumes = []
 
-    # For each filesystem mount point, find its subvolumes and match them against the given source
-    # directories to find the subvolumes to backup. And within this loop, sort the subvolumes from
-    # longest to shortest mount points, so longer mount points get a whack at the candidate source
-    # directory piñata before their parents do. (Source directories are consumed during this
-    # process, so no two datasets get the same contained source directories.)
+    # For each filesystem mount point, find its subvolumes and match them against the given patterns
+    # to find the subvolumes to backup. And within this loop, sort the subvolumes from longest to
+    # shortest mount points, so longer mount points get a whack at the candidate pattern piñata
+    # before their parents do. (Patterns are consumed during this process, so no two subvolumes end
+    # up with the same contained patterns.)
     for mount_point in get_filesystem_mount_points(findmnt_command):
         subvolumes.extend(
-            Subvolume(subvolume_path, contained_source_directories)
+            Subvolume(subvolume_path, contained_patterns)
             for subvolume_path in reversed(
                 get_subvolumes_for_filesystem(btrfs_command, mount_point)
             )
-            for contained_source_directories in (
-                borgmatic.hooks.data_source.snapshot.get_contained_directories(
-                    subvolume_path, candidate_source_directories
+            for contained_patterns in (
+                borgmatic.hooks.data_source.snapshot.get_contained_patterns(
+                    subvolume_path, candidate_patterns
                 ),
             )
-            if source_directories is None or contained_source_directories
+            if patterns is None or contained_patterns
         )
 
     return tuple(sorted(subvolumes, key=lambda subvolume: subvolume.path))
@@ -126,13 +127,13 @@ def make_snapshot_path(subvolume_path):
     ) + subvolume_path.rstrip(os.path.sep)
 
 
-def make_snapshot_exclude_path(subvolume_path):  # pragma: no cover
+def make_snapshot_exclude_pattern(subvolume_path):  # pragma: no cover
     '''
-    Given the path to a subvolume, make a corresponding exclude path for its embedded snapshot path.
-    This is to work around a quirk of Btrfs: If you make a snapshot path as a child directory of a
-    subvolume, then the snapshot's own initial directory component shows up as an empty directory
-    within the snapshot itself. For instance, if you have a Btrfs subvolume at /mnt and make a
-    snapshot of it at:
+    Given the path to a subvolume, make a corresponding exclude pattern for its embedded snapshot
+    path. This is to work around a quirk of Btrfs: If you make a snapshot path as a child directory
+    of a subvolume, then the snapshot's own initial directory component shows up as an empty
+    directory within the snapshot itself. For instance, if you have a Btrfs subvolume at /mnt and
+    make a snapshot of it at:
 
         /mnt/.borgmatic-snapshot-1234/mnt
 
@@ -140,30 +141,52 @@ def make_snapshot_exclude_path(subvolume_path):  # pragma: no cover
 
         /mnt/.borgmatic-snapshot-1234/mnt/.borgmatic-snapshot-1234
 
-    So to prevent that from ending up in the Borg archive, this function produces its path for
-    exclusion.
+    So to prevent that from ending up in the Borg archive, this function produces an exclude pattern
+    to exclude that path.
     '''
     snapshot_directory = f'{BORGMATIC_SNAPSHOT_PREFIX}{os.getpid()}'
 
-    return os.path.join(
-        subvolume_path,
-        snapshot_directory,
-        subvolume_path.lstrip(os.path.sep),
-        snapshot_directory,
+    return borgmatic.borg.pattern.Pattern(
+        os.path.join(
+            subvolume_path,
+            snapshot_directory,
+            subvolume_path.lstrip(os.path.sep),
+            snapshot_directory,
+        ),
+        borgmatic.borg.pattern.Pattern_type.EXCLUDE,
+        borgmatic.borg.pattern.Pattern_style.FNMATCH,
     )
 
 
-def make_borg_source_directory_path(subvolume_path, source_directory):
+def make_borg_snapshot_pattern(subvolume_path, pattern):
     '''
-    Given the path to a subvolume and a source directory inside it, make a corresponding path for
-    the source directory within a snapshot path intended for giving to Borg.
+    Given the path to a subvolume and a pattern as a borgmatic.borg.pattern.Pattern instance whose
+    path is inside the subvolume, return a new Pattern with its path rewritten to be in a snapshot
+    path intended for giving to Borg.
+
+    Move any initial caret in a regular expression pattern path to the beginning, so as not to break
+    the regular expression.
     '''
-    return os.path.join(
+    initial_caret = (
+        '^'
+        if pattern.style == borgmatic.borg.pattern.Pattern_style.REGULAR_EXPRESSION
+        and pattern.path.startswith('^')
+        else ''
+    )
+
+    written_path = initial_caret + os.path.join(
         subvolume_path,
         f'{BORGMATIC_SNAPSHOT_PREFIX}{os.getpid()}',
         '.',  # Borg 1.4+ "slashdot" hack.
         # Included so that the source directory ends up in the Borg archive at its "original" path.
-        source_directory.lstrip(os.path.sep),
+        pattern.path.lstrip('^').lstrip(os.path.sep),
+    )
+
+    return borgmatic.borg.pattern.Pattern(
+        rewritten_path,
+        pattern.type,
+        pattern.style,
+        pattern.device,
     )
 
 
@@ -193,16 +216,16 @@ def dump_data_sources(
     log_prefix,
     config_paths,
     borgmatic_runtime_directory,
-    source_directories,
+    patterns,
     dry_run,
 ):
     '''
     Given a Btrfs configuration dict, a configuration dict, a log prefix, the borgmatic
-    configuration file paths, the borgmatic runtime directory, the configured source directories,
-    and whether this is a dry run, auto-detect and snapshot any Btrfs subvolume mount points listed
-    in the given source directories. Also update those source directories, replacing subvolume mount
-    points with corresponding snapshot directories so they get stored in the Borg archive instead.
-    Use the log prefix in any log entries.
+    configuration file paths, the borgmatic runtime directory, the configured patterns, and whether
+    this is a dry run, auto-detect and snapshot any Btrfs subvolume mount points listed in the given
+    patterns. Also update those patterns, replacing subvolume mount points with corresponding
+    snapshot directories so they get stored in the Borg archive instead. Use the log prefix in any
+    log entries.
 
     Return an empty sequence, since there are no ongoing dump processes from this hook.
 
@@ -211,15 +234,15 @@ def dump_data_sources(
     dry_run_label = ' (dry run; not actually snapshotting anything)' if dry_run else ''
     logger.info(f'{log_prefix}: Snapshotting Btrfs subvolumes{dry_run_label}')
 
-    # Based on the configured source directories, determine Btrfs subvolumes to backup.
+    # Based on the configured patterns, determine Btrfs subvolumes to backup.
     btrfs_command = hook_config.get('btrfs_command', 'btrfs')
     findmnt_command = hook_config.get('findmnt_command', 'findmnt')
-    subvolumes = get_subvolumes(btrfs_command, findmnt_command, source_directories)
+    subvolumes = get_subvolumes(btrfs_command, findmnt_command, patterns)
 
     if not subvolumes:
         logger.warning(f'{log_prefix}: No Btrfs subvolumes found to snapshot{dry_run_label}')
 
-    # Snapshot each subvolume, rewriting source directories to use their snapshot paths.
+    # Snapshot each subvolume, rewriting patterns to use their snapshot paths.
     for subvolume in subvolumes:
         logger.debug(f'{log_prefix}: Creating Btrfs snapshot for {subvolume.path} subvolume')
 
@@ -230,17 +253,14 @@ def dump_data_sources(
 
         snapshot_subvolume(btrfs_command, subvolume.path, snapshot_path)
 
-        for source_directory in subvolume.contained_source_directories:
+        for pattern in subvolume.contained_patterns:
+            # Update the pattern in place, since pattern order matters to Borg.
             try:
-                source_directories.remove(source_directory)
+                patterns[patterns.index(pattern)] = make_borg_snapshot_pattern(subvolume.path, pattern)
             except ValueError:
                 pass
 
-            source_directories.append(
-                make_borg_source_directory_path(subvolume.path, source_directory)
-            )
-
-        config.setdefault('exclude_patterns', []).append(make_snapshot_exclude_path(subvolume.path))
+        patterns.append(make_snapshot_exclude_pattern(subvolume.path))
 
     return []
 

+ 59 - 33
borgmatic/hooks/data_source/lvm.py

@@ -6,6 +6,7 @@ import os
 import shutil
 import subprocess
 
+import borgmatic.borg.pattern
 import borgmatic.config.paths
 import borgmatic.execute
 import borgmatic.hooks.data_source.snapshot
@@ -22,18 +23,17 @@ def use_streaming(hook_config, config, log_prefix):  # pragma: no cover
 
 BORGMATIC_SNAPSHOT_PREFIX = 'borgmatic-'
 Logical_volume = collections.namedtuple(
-    'Logical_volume', ('name', 'device_path', 'mount_point', 'contained_source_directories')
+    'Logical_volume', ('name', 'device_path', 'mount_point', 'contained_patterns')
 )
 
 
-def get_logical_volumes(lsblk_command, source_directories=None):
+def get_logical_volumes(lsblk_command, patterns=None):
     '''
-    Given an lsblk command to run and a sequence of configured source directories, find the
-    intersection between the current LVM logical volume mount points and the configured borgmatic
-    source directories. The idea is that these are the requested logical volumes to snapshot.
+    Given an lsblk command to run and a sequence of configured patterns, find the intersection
+    between the current LVM logical volume mount points and the paths of any patterns. The idea is
+    that these pattern paths represent the requested logical volumes to snapshot.
 
-    If source directories is None, include all logical volume mounts points, not just those in
-    source directories.
+    If patterns is None, include all logical volume mounts points, not just those in patterns.
 
     Return the result as a sequence of Logical_volume instances.
     '''
@@ -53,21 +53,21 @@ def get_logical_volumes(lsblk_command, source_directories=None):
     except json.JSONDecodeError as error:
         raise ValueError(f'Invalid {lsblk_command} JSON output: {error}')
 
-    candidate_source_directories = set(source_directories or ())
+    candidate_patterns = set(patterns or ())
 
     try:
         return tuple(
             Logical_volume(
-                device['name'], device['path'], device['mountpoint'], contained_source_directories
+                device['name'], device['path'], device['mountpoint'], contained_patterns
             )
             for device in devices_info['blockdevices']
             if device['mountpoint'] and device['type'] == 'lvm'
-            for contained_source_directories in (
-                borgmatic.hooks.data_source.snapshot.get_contained_directories(
-                    device['mountpoint'], candidate_source_directories
+            for contained_patterns in (
+                borgmatic.hooks.data_source.snapshot.get_contained_patterns(
+                    device['mountpoint'], candidate_patterns
                 ),
             )
-            if not source_directories or contained_source_directories
+            if not patterns or contained_patterns
         )
     except KeyError as error:
         raise ValueError(f'Invalid {lsblk_command} output: Missing key "{error}"')
@@ -119,6 +119,37 @@ def mount_snapshot(mount_command, snapshot_device, snapshot_mount_path):  # prag
     )
 
 
+def make_borg_snapshot_pattern(pattern, normalized_runtime_directory):
+    '''
+    Given a Borg pattern as a borgmatic.borg.pattern.Pattern instance, return a new Pattern with its
+    path rewritten to be in a snapshot directory based on the given runtime directory.
+
+    Move any initial caret in a regular expression pattern path to the beginning, so as not to break
+    the regular expression.
+    '''
+    initial_caret = (
+        '^'
+        if pattern.style == borgmatic.borg.pattern.Pattern_style.REGULAR_EXPRESSION
+        and pattern.path.startswith('^')
+        else ''
+    )
+
+    rewritten_path = initial_caret + os.path.join(
+        normalized_runtime_directory,
+        'lvm_snapshots',
+        '.',  # Borg 1.4+ "slashdot" hack.
+        # Included so that the source directory ends up in the Borg archive at its "original" path.
+        pattern.path.lstrip('^').lstrip(os.path.sep),
+    )
+
+    return borgmatic.borg.pattern.Pattern(
+        rewritten_path,
+        pattern.type,
+        pattern.style,
+        pattern.device,
+    )
+
+
 DEFAULT_SNAPSHOT_SIZE = '10%ORIGIN'
 
 
@@ -128,16 +159,16 @@ def dump_data_sources(
     log_prefix,
     config_paths,
     borgmatic_runtime_directory,
-    source_directories,
+    patterns,
     dry_run,
 ):
     '''
     Given an LVM configuration dict, a configuration dict, a log prefix, the borgmatic configuration
-    file paths, the borgmatic runtime directory, the configured source directories, and whether this
-    is a dry run, auto-detect and snapshot any LVM logical volume mount points listed in the given
-    source directories. Also update those source directories, replacing logical volume mount points
-    with corresponding snapshot directories so they get stored in the Borg archive instead. Use the
-    log prefix in any log entries.
+    file paths, the borgmatic runtime directory, the configured patterns, and whether this is a dry
+    run, auto-detect and snapshot any LVM logical volume mount points listed in the given patterns.
+    Also update those patterns, replacing logical volume mount points with corresponding snapshot
+    directories so they get stored in the Borg archive instead. Use the log prefix in any log
+    entries.
 
     Return an empty sequence, since there are no ongoing dump processes from this hook.
 
@@ -148,7 +179,7 @@ def dump_data_sources(
 
     # List logical volumes to get their mount points.
     lsblk_command = hook_config.get('lsblk_command', 'lsblk')
-    requested_logical_volumes = get_logical_volumes(lsblk_command, source_directories)
+    requested_logical_volumes = get_logical_volumes(lsblk_command, patterns)
 
     # Snapshot each logical volume, rewriting source directories to use the snapshot paths.
     snapshot_suffix = f'{BORGMATIC_SNAPSHOT_PREFIX}{os.getpid()}'
@@ -198,23 +229,18 @@ def dump_data_sources(
             hook_config.get('mount_command', 'mount'), snapshot.device_path, snapshot_mount_path
         )
 
-        # Update the path for each contained source directory, so Borg sees it within the
-        # mounted snapshot.
-        for source_directory in logical_volume.contained_source_directories:
+        for pattern in logical_volume.contained_patterns:
+            # Update the pattern in place, since pattern order matters to Borg.
             try:
-                source_directories.remove(source_directory)
+                patterns[patterns.index(pattern)] = (
+                    make_borg_snapshot_pattern(
+                        pattern,
+                        normalized_runtime_directory,
+                    )
+                )
             except ValueError:
                 pass
 
-            source_directories.append(
-                os.path.join(
-                    normalized_runtime_directory,
-                    'lvm_snapshots',
-                    '.',  # Borg 1.4+ "slashdot" hack.
-                    source_directory.lstrip(os.path.sep),
-                )
-            )
-
     return []
 
 

+ 9 - 3
borgmatic/hooks/data_source/mariadb.py

@@ -3,6 +3,7 @@ import logging
 import os
 import shlex
 
+import borgmatic.borg.pattern
 import borgmatic.config.paths
 from borgmatic.execute import (
     execute_command,
@@ -131,7 +132,7 @@ def dump_data_sources(
     log_prefix,
     config_paths,
     borgmatic_runtime_directory,
-    source_directories,
+    patterns,
     dry_run,
 ):
     '''
@@ -142,7 +143,8 @@ def dump_data_sources(
 
     Return a sequence of subprocess.Popen instances for the dump processes ready to spew to a named
     pipe. But if this is a dry run, then don't actually dump anything and return an empty sequence.
-    Also append the given source directories with the parent directory of the database dumps.
+    Also append the the parent directory of the database dumps to the given patterns list, so the
+    dumps actually get backed up.
     '''
     dry_run_label = ' (dry run; not actually dumping anything)' if dry_run else ''
     processes = []
@@ -191,7 +193,11 @@ def dump_data_sources(
             )
 
     if not dry_run:
-        source_directories.append(os.path.join(borgmatic_runtime_directory, 'mariadb_databases'))
+        patterns.append(
+            borgmatic.borg.pattern.Pattern(
+                os.path.join(borgmatic_runtime_directory, 'mariadb_databases')
+            )
+        )
 
     return [process for process in processes if process]
 

+ 9 - 3
borgmatic/hooks/data_source/mongodb.py

@@ -2,6 +2,7 @@ import logging
 import os
 import shlex
 
+import borgmatic.borg.pattern
 import borgmatic.config.paths
 from borgmatic.execute import execute_command, execute_command_with_processes
 from borgmatic.hooks.data_source import dump
@@ -30,7 +31,7 @@ def dump_data_sources(
     log_prefix,
     config_paths,
     borgmatic_runtime_directory,
-    source_directories,
+    patterns,
     dry_run,
 ):
     '''
@@ -41,7 +42,8 @@ def dump_data_sources(
 
     Return a sequence of subprocess.Popen instances for the dump processes ready to spew to a named
     pipe. But if this is a dry run, then don't actually dump anything and return an empty sequence.
-    Also append the given source directories with the parent directory of the database dumps.
+    Also append the the parent directory of the database dumps to the given patterns list, so the
+    dumps actually get backed up.
     '''
     dry_run_label = ' (dry run; not actually dumping anything)' if dry_run else ''
 
@@ -74,7 +76,11 @@ def dump_data_sources(
             processes.append(execute_command(command, shell=True, run_to_completion=False))
 
     if not dry_run:
-        source_directories.append(os.path.join(borgmatic_runtime_directory, 'mongodb_databases'))
+        patterns.append(
+            borgmatic.borg.pattern.Pattern(
+                os.path.join(borgmatic_runtime_directory, 'mongodb_databases')
+            )
+        )
 
     return processes
 

+ 9 - 3
borgmatic/hooks/data_source/mysql.py

@@ -3,6 +3,7 @@ import logging
 import os
 import shlex
 
+import borgmatic.borg.pattern
 import borgmatic.config.paths
 from borgmatic.execute import (
     execute_command,
@@ -130,7 +131,7 @@ def dump_data_sources(
     log_prefix,
     config_paths,
     borgmatic_runtime_directory,
-    source_directories,
+    patterns,
     dry_run,
 ):
     '''
@@ -141,7 +142,8 @@ def dump_data_sources(
 
     Return a sequence of subprocess.Popen instances for the dump processes ready to spew to a named
     pipe. But if this is a dry run, then don't actually dump anything and return an empty sequence.
-    Also append the given source directories with the parent directory of the database dumps.
+    Also append the the parent directory of the database dumps to the given patterns list, so the
+    dumps actually get backed up.
     '''
     dry_run_label = ' (dry run; not actually dumping anything)' if dry_run else ''
     processes = []
@@ -190,7 +192,11 @@ def dump_data_sources(
             )
 
     if not dry_run:
-        source_directories.append(os.path.join(borgmatic_runtime_directory, 'mysql_databases'))
+        patterns.append(
+            borgmatic.borg.pattern.Pattern(
+                os.path.join(borgmatic_runtime_directory, 'mysql_databases')
+            )
+        )
 
     return [process for process in processes if process]
 

+ 9 - 3
borgmatic/hooks/data_source/postgresql.py

@@ -5,6 +5,7 @@ import os
 import pathlib
 import shlex
 
+import borgmatic.borg.pattern
 import borgmatic.config.paths
 from borgmatic.execute import (
     execute_command,
@@ -110,7 +111,7 @@ def dump_data_sources(
     log_prefix,
     config_paths,
     borgmatic_runtime_directory,
-    source_directories,
+    patterns,
     dry_run,
 ):
     '''
@@ -121,7 +122,8 @@ def dump_data_sources(
 
     Return a sequence of subprocess.Popen instances for the dump processes ready to spew to a named
     pipe. But if this is a dry run, then don't actually dump anything and return an empty sequence.
-    Also append the given source directories with the parent directory of the database dumps.
+    Also append the the parent directory of the database dumps to the given patterns list, so the
+    dumps actually get backed up.
 
     Raise ValueError if the databases to dump cannot be determined.
     '''
@@ -216,7 +218,11 @@ def dump_data_sources(
                 )
 
     if not dry_run:
-        source_directories.append(os.path.join(borgmatic_runtime_directory, 'postgresql_databases'))
+        patterns.append(
+            borgmatic.borg.pattern.Pattern(
+                os.path.join(borgmatic_runtime_directory, 'postgresql_databases')
+            )
+        )
 
     return processes
 

+ 30 - 18
borgmatic/hooks/data_source/snapshot.py

@@ -1,30 +1,42 @@
 import pathlib
 
+import borgmatic.borg.pattern
+
+
 IS_A_HOOK = False
 
 
-def get_contained_directories(parent_directory, candidate_contained_directories):
+def get_contained_patterns(parent_directory, candidate_patterns):
     '''
-    Given a parent directory and a set of candidate directories potentially inside it, get the
-    subset of contained directories for which the parent directory is actually the parent, a
-    grandparent, the very same directory, etc. The idea is if, say, /var/log and /var/lib are
-    candidate contained directories, but there's a parent directory (logical volume, dataset,
-    subvolume, etc.) at /var, then /var is what we want to snapshot.
-
-    Also mutate the given set of candidate contained directories to remove any actually contained
-    directories from it. That way, this function can be called multiple times, successively
-    processing candidate directories until none are left—and avoiding assigning any candidate
-    directory to more than one parent directory.
+    Given a parent directory and a set of candidate patterns potentially inside it, get the subset
+    of contained patterns for which the parent directory is actually the parent, a grandparent, the
+    very same directory, etc. The idea is if, say, /var/log and /var/lib are candidate pattern
+    paths, but there's a parent directory (logical volume, dataset, subvolume, etc.) at /var, then
+    /var is what we want to snapshot.
+
+    For this to work, a candidate pattern path can't have any globs or other non-literal characters
+    in the initial portion of the path that matches the parent directory. For instance, a parent
+    directory of /var would match a candidate pattern path of /var/log/*/data, but not a pattern
+    path like /v*/log/*/data.
+
+    The one exception is that if a regular expression pattern path starts with "^", that will get
+    stripped off for purposes of matching against a parent directory.
+
+    As part of this, also mutate the given set of candidate patterns to remove any actually
+    contained patterns from it. That way, this function can be called multiple times, successively
+    processing candidate patterns until none are left—and avoiding assigning any candidate pattern
+    to more than one parent directory.
     '''
-    if not candidate_contained_directories:
+    if not candidate_patterns:
         return ()
 
-    contained = tuple(
+    contained_patterns = tuple(
         candidate
-        for candidate in candidate_contained_directories
-        if pathlib.PurePath(parent_directory) == pathlib.PurePath(candidate)
-        or pathlib.PurePath(parent_directory) in pathlib.PurePath(candidate).parents
+        for candidate in candidate_patterns
+        for candidate_path in (pathlib.PurePath(candidate.path.lstrip('^')),)
+        if pathlib.PurePath(parent_directory) == candidate_path
+        or pathlib.PurePath(parent_directory) in candidate_path.parents
     )
-    candidate_contained_directories -= set(contained)
+    candidate_patterns -= set(contained_patterns)
 
-    return contained
+    return contained_patterns

+ 9 - 3
borgmatic/hooks/data_source/sqlite.py

@@ -2,6 +2,7 @@ import logging
 import os
 import shlex
 
+import borgmatic.borg.pattern
 import borgmatic.config.paths
 from borgmatic.execute import execute_command, execute_command_with_processes
 from borgmatic.hooks.data_source import dump
@@ -30,7 +31,7 @@ def dump_data_sources(
     log_prefix,
     config_paths,
     borgmatic_runtime_directory,
-    source_directories,
+    patterns,
     dry_run,
 ):
     '''
@@ -40,7 +41,8 @@ def dump_data_sources(
 
     Return a sequence of subprocess.Popen instances for the dump processes ready to spew to a named
     pipe. But if this is a dry run, then don't actually dump anything and return an empty sequence.
-    Also append the given source directories with the parent directory of the database dumps.
+    Also append the the parent directory of the database dumps to the given patterns list, so the
+    dumps actually get backed up.
     '''
     dry_run_label = ' (dry run; not actually dumping anything)' if dry_run else ''
     processes = []
@@ -83,7 +85,11 @@ def dump_data_sources(
         processes.append(execute_command(command, shell=True, run_to_completion=False))
 
     if not dry_run:
-        source_directories.append(os.path.join(borgmatic_runtime_directory, 'sqlite_databases'))
+        patterns.append(
+            borgmatic.borg.pattern.Pattern(
+                os.path.join(borgmatic_runtime_directory, 'sqlite_databases')
+            )
+        )
 
     return processes
 

+ 67 - 37
borgmatic/hooks/data_source/zfs.py

@@ -5,6 +5,7 @@ import os
 import shutil
 import subprocess
 
+import borgmatic.borg.pattern
 import borgmatic.config.paths
 import borgmatic.execute
 import borgmatic.hooks.data_source.snapshot
@@ -25,18 +26,17 @@ BORGMATIC_USER_PROPERTY = 'org.torsion.borgmatic:backup'
 
 Dataset = collections.namedtuple(
     'Dataset',
-    ('name', 'mount_point', 'auto_backup', 'contained_source_directories'),
+    ('name', 'mount_point', 'auto_backup', 'contained_patterns'),
     defaults=(False, ()),
 )
 
 
-def get_datasets_to_backup(zfs_command, source_directories):
+def get_datasets_to_backup(zfs_command, patterns):
     '''
-    Given a ZFS command to run and a sequence of configured source directories, find the
-    intersection between the current ZFS dataset mount points and the configured borgmatic source
-    directories. The idea is that these are the requested datasets to snapshot. But also include any
-    datasets tagged with a borgmatic-specific user property, whether or not they appear in source
-    directories.
+    Given a ZFS command to run and a sequence of configured patterns, find the intersection between
+    the current ZFS dataset mount points and the paths of any patterns. The idea is that these
+    pattern paths represent the requested datasets to snapshot. But also include any datasets tagged
+    with a borgmatic-specific user property, whether or not they appear in the patterns.
 
     Return the result as a sequence of Dataset instances, sorted by mount point.
     '''
@@ -54,9 +54,8 @@ def get_datasets_to_backup(zfs_command, source_directories):
 
     try:
         # Sort from longest to shortest mount points, so longer mount points get a whack at the
-        # candidate source directory piñata before their parents do. (Source directories are
-        # consumed during the second loop below, so no two datasets get the same contained source
-        # directories.)
+        # candidate pattern piñata before their parents do. (Patterns are consumed during the second
+        # loop below, so no two datasets end up with the same contained patterns.)
         datasets = sorted(
             (
                 Dataset(dataset_name, mount_point, (user_property_value == 'auto'), ())
@@ -69,7 +68,7 @@ def get_datasets_to_backup(zfs_command, source_directories):
     except ValueError:
         raise ValueError(f'Invalid {zfs_command} list output')
 
-    candidate_source_directories = set(source_directories)
+    candidate_patterns = set(patterns)
 
     return tuple(
         sorted(
@@ -78,19 +77,22 @@ def get_datasets_to_backup(zfs_command, source_directories):
                     dataset.name,
                     dataset.mount_point,
                     dataset.auto_backup,
-                    contained_source_directories,
+                    contained_patterns,
                 )
                 for dataset in datasets
-                for contained_source_directories in (
+                for contained_patterns in (
                     (
-                        (dataset.mount_point,)
-                        if dataset.auto_backup
-                        else borgmatic.hooks.data_source.snapshot.get_contained_directories(
-                            dataset.mount_point, candidate_source_directories
+                        (
+                            (borgmatic.borg.pattern.Pattern(dataset.mount_point),)
+                            if dataset.auto_backup
+                            else ()
+                        )
+                        + borgmatic.hooks.data_source.snapshot.get_contained_patterns(
+                            dataset.mount_point, candidate_patterns
                         )
                     ),
                 )
-                if contained_source_directories
+                if contained_patterns
             ),
             key=lambda dataset: dataset.mount_point,
         )
@@ -153,22 +155,53 @@ def mount_snapshot(mount_command, full_snapshot_name, snapshot_mount_path):  # p
     )
 
 
+def make_borg_snapshot_pattern(pattern, normalized_runtime_directory):
+    '''
+    Given a Borg pattern as a borgmatic.borg.pattern.Pattern instance, return a new Pattern with its
+    path rewritten to be in a snapshot directory based on the given runtime directory.
+
+    Move any initial caret in a regular expression pattern path to the beginning, so as not to break
+    the regular expression.
+    '''
+    initial_caret = (
+        '^'
+        if pattern.style == borgmatic.borg.pattern.Pattern_style.REGULAR_EXPRESSION
+        and pattern.path.startswith('^')
+        else ''
+    )
+
+    rewritten_path = initial_caret + os.path.join(
+        normalized_runtime_directory,
+        'zfs_snapshots',
+        '.',  # Borg 1.4+ "slashdot" hack.
+        # Included so that the source directory ends up in the Borg archive at its "original" path.
+        pattern.path.lstrip('^').lstrip(os.path.sep),
+    )
+
+    return borgmatic.borg.pattern.Pattern(
+        rewritten_path,
+        pattern.type,
+        pattern.style,
+        pattern.device,
+    )
+
+
 def dump_data_sources(
     hook_config,
     config,
     log_prefix,
     config_paths,
     borgmatic_runtime_directory,
-    source_directories,
+    patterns,
     dry_run,
 ):
     '''
     Given a ZFS configuration dict, a configuration dict, a log prefix, the borgmatic configuration
-    file paths, the borgmatic runtime directory, the configured source directories, and whether this
-    is a dry run, auto-detect and snapshot any ZFS dataset mount points listed in the given source
-    directories and any dataset with a borgmatic-specific user property. Also update those source
-    directories, replacing dataset mount points with corresponding snapshot directories so they get
-    stored in the Borg archive instead. Use the log prefix in any log entries.
+    file paths, the borgmatic runtime directory, the configured patterns, and whether this is a dry
+    run, auto-detect and snapshot any ZFS dataset mount points listed in the given patterns and any
+    dataset with a borgmatic-specific user property. Also update those patterns, replacing dataset
+    mount points with corresponding snapshot directories so they get stored in the Borg archive
+    instead. Use the log prefix in any log entries.
 
     Return an empty sequence, since there are no ongoing dump processes from this hook.
 
@@ -179,9 +212,9 @@ def dump_data_sources(
 
     # List ZFS datasets to get their mount points.
     zfs_command = hook_config.get('zfs_command', 'zfs')
-    requested_datasets = get_datasets_to_backup(zfs_command, source_directories)
+    requested_datasets = get_datasets_to_backup(zfs_command, patterns)
 
-    # Snapshot each dataset, rewriting source directories to use the snapshot paths.
+    # Snapshot each dataset, rewriting patterns to use the snapshot paths.
     snapshot_name = f'{BORGMATIC_SNAPSHOT_PREFIX}{os.getpid()}'
     normalized_runtime_directory = os.path.normpath(borgmatic_runtime_directory)
 
@@ -216,21 +249,18 @@ def dump_data_sources(
             hook_config.get('mount_command', 'mount'), full_snapshot_name, snapshot_mount_path
         )
 
-        for source_directory in dataset.contained_source_directories:
+        for pattern in dataset.contained_patterns:
+            # Update the pattern in place, since pattern order matters to Borg.
             try:
-                source_directories.remove(source_directory)
+                patterns[patterns.index(pattern)] = (
+                    make_borg_snapshot_pattern(
+                        pattern,
+                        normalized_runtime_directory,
+                    )
+                )
             except ValueError:
                 pass
 
-            source_directories.append(
-                os.path.join(
-                    normalized_runtime_directory,
-                    'zfs_snapshots',
-                    '.',  # Borg 1.4+ "slashdot" hack.
-                    source_directory.lstrip(os.path.sep),
-                )
-            )
-
     return []