Ver Fonte

Initial work on fixing ZFS mount errors (#1001).

Dan Helfman há 3 meses atrás
pai
commit
58aed0892c

+ 6 - 1
borgmatic/actions/create.py

@@ -36,6 +36,7 @@ def parse_pattern(pattern_line, default_style=borgmatic.borg.pattern.Pattern_sty
         path,
         borgmatic.borg.pattern.Pattern_type(pattern_type),
         borgmatic.borg.pattern.Pattern_style(pattern_style),
+        source=borgmatic.borg.pattern.Pattern_source.CONFIG,
     )
 
 
@@ -51,7 +52,9 @@ def collect_patterns(config):
     try:
         return (
             tuple(
-                borgmatic.borg.pattern.Pattern(source_directory)
+                borgmatic.borg.pattern.Pattern(
+                    source_directory, source=borgmatic.borg.pattern.Pattern_source.CONFIG
+                )
                 for source_directory in config.get('source_directories', ())
             )
             + tuple(
@@ -144,6 +147,7 @@ def expand_patterns(patterns, working_directory=None, skip_paths=None):
                         pattern.type,
                         pattern.style,
                         pattern.device,
+                        pattern.source,
                     )
                     for expanded_path in expand_directory(pattern.path, working_directory)
                 )
@@ -178,6 +182,7 @@ def device_map_patterns(patterns, working_directory=None):
                 and os.path.exists(full_path)
                 else None
             ),
+            source=pattern.source,
         )
         for pattern in patterns
         for full_path in (os.path.join(working_directory or '', pattern.path),)

+ 2 - 1
borgmatic/borg/create.py

@@ -167,7 +167,7 @@ def collect_special_file_paths(
             path for path in paths if any_parent_directories(path, (borgmatic_runtime_directory,))
         }
 
-        # No paths to backup contain the runtime directory, so therefore it must've been excluded.
+        # If no paths to backup contain the runtime directory, it must've been excluded.
         if not paths_containing_runtime_directory and not dry_run:
             raise ValueError(
                 f'The runtime directory {os.path.normpath(borgmatic_runtime_directory)} overlaps with the configured excludes or patterns with excludes. Please ensure the runtime directory is not excluded.'
@@ -336,6 +336,7 @@ def make_base_create_command(
                         special_file_path,
                         borgmatic.borg.pattern.Pattern_type.NO_RECURSE,
                         borgmatic.borg.pattern.Pattern_style.FNMATCH,
+                        source=borgmatic.borg.pattern.Pattern_source.INTERNAL,
                     )
                     for special_file_path in special_file_paths
                 ),

+ 20 - 1
borgmatic/borg/pattern.py

@@ -20,12 +20,31 @@ class Pattern_style(enum.Enum):
     PATH_FULL_MATCH = 'pf'
 
 
+class Pattern_source(enum.Enum):
+    '''
+    Where the pattern came from within borgmatic. This is important because certain use cases (like
+    filesystem snapshotting) only want to consider patterns that the user actually put in a
+    configuration file and not patterns from other sources.
+    '''
+
+    # The pattern is from a borgmatic configuration option, e.g. listed in "source_directories".
+    CONFIG = 'config'
+
+    # The pattern is generated internally within borgmatic, e.g. for special file excludes.
+    INTERNAL = 'internal'
+
+    # The pattern originates from within a borgmatic hook, e.g. a database hook that adds its dump
+    # directory.
+    HOOK = 'hook'
+
+
 Pattern = collections.namedtuple(
     'Pattern',
-    ('path', 'type', 'style', 'device'),
+    ('path', 'type', 'style', 'device', 'source'),
     defaults=(
         Pattern_type.ROOT,
         Pattern_style.NONE,
         None,
+        Pattern_source.HOOK,
     ),
 )

+ 10 - 2
borgmatic/hooks/data_source/bootstrap.py

@@ -55,9 +55,17 @@ def dump_data_sources(
             manifest_file,
         )
 
-    patterns.extend(borgmatic.borg.pattern.Pattern(config_path) for config_path in config_paths)
+    patterns.extend(
+        borgmatic.borg.pattern.Pattern(
+            config_path, source=borgmatic.borg.pattern.Pattern_source.HOOK
+        )
+        for config_path in config_paths
+    )
     patterns.append(
-        borgmatic.borg.pattern.Pattern(os.path.join(borgmatic_runtime_directory, 'bootstrap'))
+        borgmatic.borg.pattern.Pattern(
+            os.path.join(borgmatic_runtime_directory, 'bootstrap'),
+            source=borgmatic.borg.pattern.Pattern_source.HOOK,
+        )
     )
 
     return []

+ 13 - 3
borgmatic/hooks/data_source/btrfs.py

@@ -54,7 +54,9 @@ def get_subvolumes(btrfs_command, findmnt_command, patterns=None):
     between the current Btrfs filesystem and subvolume mount points and the paths of any patterns.
     The idea is that these pattern paths represent the requested subvolumes to snapshot.
 
-    If patterns is None, then return all subvolumes, sorted by path.
+    Only include subvolumes that contain at least one root pattern sourced from borgmatic
+    configuration (as opposed to generated elsewhere in borgmatic). But if patterns is None, then
+    return all subvolumes instead, sorted by path.
 
     Return the result as a sequence of matching subvolume mount points.
     '''
@@ -73,7 +75,12 @@ def get_subvolumes(btrfs_command, findmnt_command, patterns=None):
                     mount_point, candidate_patterns
                 ),
             )
-            if patterns is None or contained_patterns
+            if patterns is None
+            or any(
+                pattern.type == borgmatic.borg.pattern.Pattern_type.ROOT
+                and pattern.source == borgmatic.borg.pattern.Pattern_source.CONFIG
+                for pattern in contained_patterns
+            )
         )
 
     return tuple(sorted(subvolumes, key=lambda subvolume: subvolume.path))
@@ -118,6 +125,7 @@ def make_snapshot_exclude_pattern(subvolume_path):  # pragma: no cover
             snapshot_directory,
             subvolume_path.lstrip(os.path.sep),
             snapshot_directory,
+            source=borgmatic.borg.pattern.Pattern_source.HOOK,
         ),
         borgmatic.borg.pattern.Pattern_type.NO_RECURSE,
         borgmatic.borg.pattern.Pattern_style.FNMATCH,
@@ -153,6 +161,7 @@ def make_borg_snapshot_pattern(subvolume_path, pattern):
         pattern.type,
         pattern.style,
         pattern.device,
+        source=borgmatic.borg.pattern.Pattern_source.HOOK,
     )
 
 
@@ -198,7 +207,8 @@ def dump_data_sources(
     dry_run_label = ' (dry run; not actually snapshotting anything)' if dry_run else ''
     logger.info(f'Snapshotting Btrfs subvolumes{dry_run_label}')
 
-    # Based on the configured patterns, determine Btrfs subvolumes to backup.
+    # Based on the configured patterns, determine Btrfs subvolumes to backup. Only consider those
+    # patterns that came from actual user configuration (as opposed to, say, other hooks).
     btrfs_command = hook_config.get('btrfs_command', 'btrfs')
     findmnt_command = hook_config.get('findmnt_command', 'findmnt')
     subvolumes = get_subvolumes(btrfs_command, findmnt_command, patterns)

+ 30 - 7
borgmatic/hooks/data_source/lvm.py

@@ -1,5 +1,6 @@
 import collections
 import glob
+import hashlib
 import json
 import logging
 import os
@@ -33,7 +34,9 @@ def get_logical_volumes(lsblk_command, patterns=None):
     between the current LVM logical volume mount points and the paths of any patterns. The idea is
     that these pattern paths represent the requested logical volumes to snapshot.
 
-    If patterns is None, include all logical volume mounts points, not just those in patterns.
+    Only include logical volumes that contain at least one root pattern sourced from borgmatic
+    configuration (as opposed to generated elsewhere in borgmatic). But if patterns is None, include
+    all logical volume mounts points instead, not just those in patterns.
 
     Return the result as a sequence of Logical_volume instances.
     '''
@@ -72,7 +75,12 @@ def get_logical_volumes(lsblk_command, patterns=None):
                     device['mountpoint'], candidate_patterns
                 ),
             )
-            if not patterns or contained_patterns
+            if not patterns
+            or any(
+                pattern.type == borgmatic.borg.pattern.Pattern_type.ROOT
+                and pattern.source == borgmatic.borg.pattern.Pattern_source.CONFIG
+                for pattern in contained_patterns
+            )
         )
     except KeyError as error:
         raise ValueError(f'Invalid {lsblk_command} output: Missing key "{error}"')
@@ -124,10 +132,14 @@ def mount_snapshot(mount_command, snapshot_device, snapshot_mount_path):  # prag
     )
 
 
-def make_borg_snapshot_pattern(pattern, normalized_runtime_directory):
+MOUNT_POINT_HASH_LENGTH = 10
+
+
+def make_borg_snapshot_pattern(pattern, logical_volume, normalized_runtime_directory):
     '''
-    Given a Borg pattern as a borgmatic.borg.pattern.Pattern instance, return a new Pattern with its
-    path rewritten to be in a snapshot directory based on the given runtime directory.
+    Given a Borg pattern as a borgmatic.borg.pattern.Pattern instance and a Logical_volume
+    contianing it, return a new Pattern with its path rewritten to be in a snapshot directory based
+    on both the given runtime directory and the given Logical_volume's mount point.
 
     Move any initial caret in a regular expression pattern path to the beginning, so as not to break
     the regular expression.
@@ -142,6 +154,11 @@ def make_borg_snapshot_pattern(pattern, normalized_runtime_directory):
     rewritten_path = initial_caret + os.path.join(
         normalized_runtime_directory,
         'lvm_snapshots',
+        # Including this hash prevents conflicts between snapshot patterns for different logical
+        # volumes. For instance, without this, snapshotting a logical volume at /var and another at
+        # /var/spool would result in overlapping snapshot patterns and therefore colliding mount
+        # attempts.
+        hashlib.shake_256(dataset.mount_point.encode('utf-8')).hexdigest(MOUNT_POINT_HASH_LENGTH),
         '.',  # Borg 1.4+ "slashdot" hack.
         # Included so that the source directory ends up in the Borg archive at its "original" path.
         pattern.path.lstrip('^').lstrip(os.path.sep),
@@ -152,6 +169,7 @@ def make_borg_snapshot_pattern(pattern, normalized_runtime_directory):
         pattern.type,
         pattern.style,
         pattern.device,
+        source=borgmatic.borg.pattern.Pattern_source.HOOK,
     )
 
 
@@ -180,7 +198,8 @@ def dump_data_sources(
     dry_run_label = ' (dry run; not actually snapshotting anything)' if dry_run else ''
     logger.info(f'Snapshotting LVM logical volumes{dry_run_label}')
 
-    # List logical volumes to get their mount points.
+    # List logical volumes to get their mount points, but only consider those patterns that came
+    # from actual user configuration (as opposed to, say, other hooks).
     lsblk_command = hook_config.get('lsblk_command', 'lsblk')
     requested_logical_volumes = get_logical_volumes(lsblk_command, patterns)
 
@@ -218,6 +237,7 @@ def dump_data_sources(
         snapshot_mount_path = os.path.join(
             normalized_runtime_directory,
             'lvm_snapshots',
+            hashlib.shake_256(dataset.mount_point.encode('utf-8')).hexdigest(MOUNT_POINT_HASH_LENGTH),
             logical_volume.mount_point.lstrip(os.path.sep),
         )
 
@@ -233,7 +253,9 @@ def dump_data_sources(
         )
 
         for pattern in logical_volume.contained_patterns:
-            snapshot_pattern = make_borg_snapshot_pattern(pattern, normalized_runtime_directory)
+            snapshot_pattern = make_borg_snapshot_pattern(
+                pattern, logical_volume, normalized_runtime_directory
+            )
 
             # Attempt to update the pattern in place, since pattern order matters to Borg.
             try:
@@ -337,6 +359,7 @@ def remove_data_source_dumps(hook_config, config, borgmatic_runtime_directory, d
             os.path.normpath(borgmatic_runtime_directory),
         ),
         'lvm_snapshots',
+        '*',
     )
     logger.debug(f'Looking for snapshots to remove in {snapshots_glob}{dry_run_label}')
     umount_command = hook_config.get('umount_command', 'umount')

+ 2 - 1
borgmatic/hooks/data_source/mariadb.py

@@ -215,7 +215,8 @@ def dump_data_sources(
     if not dry_run:
         patterns.append(
             borgmatic.borg.pattern.Pattern(
-                os.path.join(borgmatic_runtime_directory, 'mariadb_databases')
+                os.path.join(borgmatic_runtime_directory, 'mariadb_databases'),
+                source=borgmatic.borg.pattern.Pattern_source.HOOK,
             )
         )
 

+ 2 - 1
borgmatic/hooks/data_source/mongodb.py

@@ -81,7 +81,8 @@ def dump_data_sources(
     if not dry_run:
         patterns.append(
             borgmatic.borg.pattern.Pattern(
-                os.path.join(borgmatic_runtime_directory, 'mongodb_databases')
+                os.path.join(borgmatic_runtime_directory, 'mongodb_databases'),
+                source=borgmatic.borg.pattern.Pattern_source.HOOK,
             )
         )
 

+ 2 - 1
borgmatic/hooks/data_source/mysql.py

@@ -214,7 +214,8 @@ def dump_data_sources(
     if not dry_run:
         patterns.append(
             borgmatic.borg.pattern.Pattern(
-                os.path.join(borgmatic_runtime_directory, 'mysql_databases')
+                os.path.join(borgmatic_runtime_directory, 'mysql_databases'),
+                source=borgmatic.borg.pattern.Pattern_source.HOOK,
             )
         )
 

+ 2 - 1
borgmatic/hooks/data_source/postgresql.py

@@ -241,7 +241,8 @@ def dump_data_sources(
     if not dry_run:
         patterns.append(
             borgmatic.borg.pattern.Pattern(
-                os.path.join(borgmatic_runtime_directory, 'postgresql_databases')
+                os.path.join(borgmatic_runtime_directory, 'postgresql_databases'),
+                source=borgmatic.borg.pattern.Pattern_source.HOOK,
             )
         )
 

+ 10 - 6
borgmatic/hooks/data_source/snapshot.py

@@ -1,3 +1,5 @@
+import logging
+
 import pathlib
 
 IS_A_HOOK = False
@@ -11,10 +13,10 @@ def get_contained_patterns(parent_directory, candidate_patterns):
     paths, but there's a parent directory (logical volume, dataset, subvolume, etc.) at /var, then
     /var is what we want to snapshot.
 
-    For this to work, a candidate pattern path can't have any globs or other non-literal characters
-    in the initial portion of the path that matches the parent directory. For instance, a parent
-    directory of /var would match a candidate pattern path of /var/log/*/data, but not a pattern
-    path like /v*/log/*/data.
+    For this function to work, a candidate pattern path can't have any globs or other non-literal
+    characters in the initial portion of the path that matches the parent directory. For instance, a
+    parent directory of /var would match a candidate pattern path of /var/log/*/data, but not a
+    pattern path like /v*/log/*/data.
 
     The one exception is that if a regular expression pattern path starts with "^", that will get
     stripped off for purposes of matching against a parent directory.
@@ -31,8 +33,10 @@ def get_contained_patterns(parent_directory, candidate_patterns):
         candidate
         for candidate in candidate_patterns
         for candidate_path in (pathlib.PurePath(candidate.path.lstrip('^')),)
-        if pathlib.PurePath(parent_directory) == candidate_path
-        or pathlib.PurePath(parent_directory) in candidate_path.parents
+        if (
+            pathlib.PurePath(parent_directory) == candidate_path
+            or pathlib.PurePath(parent_directory) in candidate_path.parents
+        )
     )
     candidate_patterns -= set(contained_patterns)
 

+ 2 - 1
borgmatic/hooks/data_source/sqlite.py

@@ -90,7 +90,8 @@ def dump_data_sources(
     if not dry_run:
         patterns.append(
             borgmatic.borg.pattern.Pattern(
-                os.path.join(borgmatic_runtime_directory, 'sqlite_databases')
+                os.path.join(borgmatic_runtime_directory, 'sqlite_databases'),
+                source=borgmatic.borg.pattern.Pattern_source.HOOK,
             )
         )
 

+ 42 - 12
borgmatic/hooks/data_source/zfs.py

@@ -1,5 +1,6 @@
 import collections
 import glob
+import hashlib
 import logging
 import os
 import shutil
@@ -38,6 +39,9 @@ def get_datasets_to_backup(zfs_command, patterns):
     pattern paths represent the requested datasets to snapshot. But also include any datasets tagged
     with a borgmatic-specific user property, whether or not they appear in the patterns.
 
+    Only include datasets that contain at least one root pattern sourced from borgmatic
+    configuration (as opposed to generated elsewhere in borgmatic).
+
     Return the result as a sequence of Dataset instances, sorted by mount point.
     '''
     list_output = borgmatic.execute.execute_command_and_capture_output(
@@ -48,7 +52,7 @@ def get_datasets_to_backup(zfs_command, patterns):
             '-t',
             'filesystem',
             '-o',
-            f'name,mountpoint,{BORGMATIC_USER_PROPERTY}',
+            f'name,mountpoint,canmount,{BORGMATIC_USER_PROPERTY}',
         )
     )
 
@@ -60,7 +64,10 @@ def get_datasets_to_backup(zfs_command, patterns):
             (
                 Dataset(dataset_name, mount_point, (user_property_value == 'auto'), ())
                 for line in list_output.splitlines()
-                for (dataset_name, mount_point, user_property_value) in (line.rstrip().split('\t'),)
+                for (dataset_name, mount_point, can_mount, user_property_value) in (line.rstrip().split('\t'),)
+                # Skip datasets that are marked "canmount=off", because mounting their snapshots will
+                # result in completely empty mount points—thereby preventing us from backing them up.
+                if can_mount == 'on'
             ),
             key=lambda dataset: dataset.mount_point,
             reverse=True,
@@ -83,7 +90,12 @@ def get_datasets_to_backup(zfs_command, patterns):
                 for contained_patterns in (
                     (
                         (
-                            (borgmatic.borg.pattern.Pattern(dataset.mount_point),)
+                            (
+                                borgmatic.borg.pattern.Pattern(
+                                    dataset.mount_point,
+                                    source=borgmatic.borg.pattern.Pattern_source.HOOK,
+                                ),
+                            )
                             if dataset.auto_backup
                             else ()
                         )
@@ -92,7 +104,11 @@ def get_datasets_to_backup(zfs_command, patterns):
                         )
                     ),
                 )
-                if contained_patterns
+                if any(
+                    pattern.type == borgmatic.borg.pattern.Pattern_type.ROOT
+                    and pattern.source == borgmatic.borg.pattern.Pattern_source.CONFIG
+                    for pattern in contained_patterns
+                )
             ),
             key=lambda dataset: dataset.mount_point,
         )
@@ -155,10 +171,14 @@ def mount_snapshot(mount_command, full_snapshot_name, snapshot_mount_path):  # p
     )
 
 
-def make_borg_snapshot_pattern(pattern, normalized_runtime_directory):
+MOUNT_POINT_HASH_LENGTH = 10
+
+
+def make_borg_snapshot_pattern(pattern, dataset, normalized_runtime_directory):
     '''
-    Given a Borg pattern as a borgmatic.borg.pattern.Pattern instance, return a new Pattern with its
-    path rewritten to be in a snapshot directory based on the given runtime directory.
+    Given a Borg pattern as a borgmatic.borg.pattern.Pattern instance and the Dataset containing it,
+    return a new Pattern with its path rewritten to be in a snapshot directory based on both the
+    given runtime directory and the given Dataset's mount point.
 
     Move any initial caret in a regular expression pattern path to the beginning, so as not to break
     the regular expression.
@@ -173,6 +193,10 @@ def make_borg_snapshot_pattern(pattern, normalized_runtime_directory):
     rewritten_path = initial_caret + os.path.join(
         normalized_runtime_directory,
         'zfs_snapshots',
+        # Including this hash prevents conflicts between snapshot patterns for different datasets.
+        # For instance, without this, snapshotting a dataset at /var and another at /var/spool would
+        # result in overlapping snapshot patterns and therefore colliding mount attempts.
+        hashlib.shake_256(dataset.mount_point.encode('utf-8')).hexdigest(MOUNT_POINT_HASH_LENGTH),
         '.',  # Borg 1.4+ "slashdot" hack.
         # Included so that the source directory ends up in the Borg archive at its "original" path.
         pattern.path.lstrip('^').lstrip(os.path.sep),
@@ -183,6 +207,7 @@ def make_borg_snapshot_pattern(pattern, normalized_runtime_directory):
         pattern.type,
         pattern.style,
         pattern.device,
+        source=borgmatic.borg.pattern.Pattern_source.HOOK,
     )
 
 
@@ -209,7 +234,8 @@ def dump_data_sources(
     dry_run_label = ' (dry run; not actually snapshotting anything)' if dry_run else ''
     logger.info(f'Snapshotting ZFS datasets{dry_run_label}')
 
-    # List ZFS datasets to get their mount points.
+    # List ZFS datasets to get their mount points, but only consider those patterns that came from
+    # actual user configuration (as opposed to, say, other hooks).
     zfs_command = hook_config.get('zfs_command', 'zfs')
     requested_datasets = get_datasets_to_backup(zfs_command, patterns)
 
@@ -234,6 +260,7 @@ def dump_data_sources(
         snapshot_mount_path = os.path.join(
             normalized_runtime_directory,
             'zfs_snapshots',
+            hashlib.shake_256(dataset.mount_point.encode('utf-8')).hexdigest(MOUNT_POINT_HASH_LENGTH),
             dataset.mount_point.lstrip(os.path.sep),
         )
 
@@ -249,7 +276,9 @@ def dump_data_sources(
         )
 
         for pattern in dataset.contained_patterns:
-            snapshot_pattern = make_borg_snapshot_pattern(pattern, normalized_runtime_directory)
+            snapshot_pattern = make_borg_snapshot_pattern(
+                pattern, dataset, normalized_runtime_directory
+            )
 
             # Attempt to update the pattern in place, since pattern order matters to Borg.
             try:
@@ -334,6 +363,7 @@ def remove_data_source_dumps(hook_config, config, borgmatic_runtime_directory, d
             os.path.normpath(borgmatic_runtime_directory),
         ),
         'zfs_snapshots',
+        '*',
     )
     logger.debug(f'Looking for snapshots to remove in {snapshots_glob}{dry_run_label}')
     umount_command = hook_config.get('umount_command', 'umount')
@@ -367,13 +397,13 @@ def remove_data_source_dumps(hook_config, config, borgmatic_runtime_directory, d
                     unmount_snapshot(umount_command, snapshot_mount_path)
                 except FileNotFoundError:
                     logger.debug(f'Could not find "{umount_command}" command')
-                    return
+                    continue
                 except subprocess.CalledProcessError as error:
                     logger.debug(error)
-                    return
+                    continue
 
         if not dry_run:
-            shutil.rmtree(snapshots_directory)
+            shutil.rmtree(snapshots_directory, ignore_errors=True)
 
     # Destroy snapshots.
     full_snapshot_names = get_all_snapshots(zfs_command)