Browse Source

More accurately collect Btrfs subvolumes to snapshot by using the "btrfs" command rather than "findmnt" (#1105).

Dan Helfman 1 month ago
parent
commit
339186b579

+ 2 - 0
NEWS

@@ -1,4 +1,6 @@
 2.0.9.dev0
+ * #1105: More accurately collect Btrfs subvolumes to snapshot by using the "btrfs" command rather
+   than "findmnt". The Btrfs "findmnt_command" option is now deprecated.
  * #1123: Add loading of systemd credentials even when running borgmatic outside of a systemd
    service.
  * #1149: Add support for Python 3.14.

+ 7 - 5
borgmatic/actions/create.py

@@ -50,18 +50,19 @@ def run_create(
     working_directory = borgmatic.config.paths.get_working_directory(config)
 
     with borgmatic.config.paths.Runtime_directory(config) as borgmatic_runtime_directory:
+        patterns = pattern.process_patterns(
+            pattern.collect_patterns(config),
+            config,
+            working_directory,
+        )
         borgmatic.hooks.dispatch.call_hooks_even_if_unconfigured(
             'remove_data_source_dumps',
             config,
             borgmatic.hooks.dispatch.Hook_type.DATA_SOURCE,
             borgmatic_runtime_directory,
+            patterns,
             global_arguments.dry_run,
         )
-        patterns = pattern.process_patterns(
-            pattern.collect_patterns(config),
-            config,
-            working_directory,
-        )
         active_dumps = borgmatic.hooks.dispatch.call_hooks(
             'dump_data_sources',
             config,
@@ -138,6 +139,7 @@ def run_create(
             config,
             borgmatic.hooks.dispatch.Hook_type.DATA_SOURCE,
             borgmatic_runtime_directory,
+            patterns,
             global_arguments.dry_run,
         )
 

+ 1 - 1
borgmatic/hooks/data_source/bootstrap.py

@@ -75,7 +75,7 @@ def dump_data_sources(
     return []
 
 
-def remove_data_source_dumps(hook_config, config, borgmatic_runtime_directory, dry_run):
+def remove_data_source_dumps(hook_config, config, borgmatic_runtime_directory, patterns, dry_run):
     '''
     Given a bootstrap configuration dict, a configuration dict, the borgmatic runtime directory, and
     whether this is a dry run, then remove the manifest file created above. If this is a dry run,

+ 66 - 72
borgmatic/hooks/data_source/btrfs.py

@@ -1,9 +1,11 @@
 import collections
+import functools
 import glob
 import itertools
 import json
 import logging
 import os
+import pathlib
 import shutil
 import subprocess
 
@@ -22,83 +24,73 @@ def use_streaming(hook_config, config):  # pragma: no cover
     return False
 
 
-def get_contained_subvolume_paths(btrfs_command, subvolume_path):
+@functools.cache
+def path_is_a_subvolume(btrfs_command, path):
     '''
-    Given the path of a Btrfs subvolume, return it in a sequence along with the paths of its
-    contained subvolumes.
+    Given a btrfs command and a path, return whether the path is a Btrfs subvolume.
 
-    If the btrfs command errors, log that error and return an empty sequence.
+    As a performance optimization, multiple calls to this function with the same arguments are
+    cached.
     '''
     try:
-        btrfs_output = borgmatic.execute.execute_command_and_capture_output(
+        borgmatic.execute.execute_command(
             (
                 *btrfs_command.split(' '),
                 'subvolume',
-                'list',
-                subvolume_path,
+                'show',
+                path,
             ),
+            output_log_level=None,
             close_fds=True,
         )
-    except subprocess.CalledProcessError as error:
-        logger.debug(
-            f'Ignoring Btrfs subvolume {subvolume_path} because of error listing its subvolumes: {error}',
-        )
+    # An error from the command (probably) indicates that the path is not actually a subvolume.
+    except subprocess.CalledProcessError:
+        return False
 
-        return ()
+    return True
 
-    return (
-        subvolume_path,
-        *tuple(
-            os.path.join(subvolume_path, line.split(' ')[-1])
-            for line in btrfs_output.splitlines()
-            if line.strip()
-        ),
-    )
 
+def get_containing_subvolume_path(btrfs_command, pattern):
+    '''
+    Given a btrfs command and a pattern as a borgmatic.borg.pattern.Pattern instance, return the
+    subvolume path that contains the pattern's path (or is the same as the pattern's path).
+
+    Return None if the btrfs command errors, which probably indicates there isn't a containing Btrfs
+    subvolume for the given pattern.
+    '''
+    pattern_path = pattern.path.lstrip('^')
+
+    # Probe the given pattern's path and all of its parents, grandparents, etc. to try to find a
+    # Btrfs subvolume.
+    for candidate_path in (
+        pattern_path,
+        *tuple(str(path) for path in pathlib.PurePath(pattern_path).parents),
+    ):
+        if path_is_a_subvolume(btrfs_command, candidate_path):
+            logger.debug(f'Path {candidate_path} is a Btrfs subvolume')
+            return candidate_path
 
-FINDMNT_BTRFS_ROOT_SUBVOLUME_OPTION = 'subvolid=5'
+    return None
 
 
-def get_all_subvolume_paths(btrfs_command, findmnt_command):
+def get_all_subvolume_paths(btrfs_command, patterns):
     '''
-    Given btrfs and findmnt commands to run, get the sorted paths for all Btrfs subvolumes on the
-    system.
+    Given a btrfs command and a sequence of patterns, get the sorted paths for all Btrfs subvolumes
+    containing those patterns.
     '''
-    findmnt_output = borgmatic.execute.execute_command_and_capture_output(
-        (
-            *findmnt_command.split(' '),
-            '-t',  # Filesystem type.
-            'btrfs',
-            '--json',
-            '--list',  # Request a flat list instead of a nested subvolume hierarchy.
+    return tuple(
+        sorted(
+            {
+                subvolume_path
+                for pattern in patterns
+                if pattern.type == borgmatic.borg.pattern.Pattern_type.ROOT
+                if pattern.source == borgmatic.borg.pattern.Pattern_source.CONFIG
+                for subvolume_path in (get_containing_subvolume_path(btrfs_command, pattern),)
+                if subvolume_path
+            }
         ),
-        close_fds=True,
     )
 
-    try:
-        return tuple(
-            sorted(
-                itertools.chain.from_iterable(
-                    # If findmnt gave us a Btrfs root filesystem, list the subvolumes within it.
-                    # This is necessary because findmnt only returns a subvolume's mount point
-                    # rather than its original subvolume path (which can differ). For instance,
-                    # a subvolume might exist at /mnt/subvolume but be mounted at /home/myuser.
-                    # findmnt is still useful though because it's a global way to discover all
-                    # Btrfs subvolumes—even if we have to do some additional legwork ourselves.
-                    (
-                        get_contained_subvolume_paths(btrfs_command, filesystem['target'])
-                        if FINDMNT_BTRFS_ROOT_SUBVOLUME_OPTION in filesystem['options'].split(',')
-                        else (filesystem['target'],)
-                    )
-                    for filesystem in json.loads(findmnt_output)['filesystems']
-                ),
-            ),
-        )
-    except json.JSONDecodeError as error:
-        raise ValueError(f'Invalid {findmnt_command} JSON output: {error}')
-    except KeyError as error:
-        raise ValueError(f'Invalid {findmnt_command} output: Missing key "{error}"')
-
 
 Subvolume = collections.namedtuple('Subvolume', ('path', 'contained_patterns'), defaults=((),))
 
@@ -156,15 +148,14 @@ def omit_read_only_subvolume_paths(btrfs_command, subvolume_paths):
     return tuple(retained_subvolume_paths)
 
 
-def get_subvolumes(btrfs_command, findmnt_command, patterns=None):
+def get_subvolumes(btrfs_command, patterns):
     '''
     Given a Btrfs command to run and a sequence of configured patterns, find the intersection
-    between the current Btrfs filesystem and subvolume paths and the paths of any patterns.  The
+    between the current Btrfs filesystem and subvolume paths and the paths of any patterns. The
     idea is that these pattern paths represent the requested subvolumes to snapshot.
 
     Only include subvolumes that contain at least one root pattern sourced from borgmatic
-    configuration (as opposed to generated elsewhere in borgmatic). But if patterns is None, then
-    return all subvolumes instead, sorted by path.
+    configuration (as opposed to generated elsewhere in borgmatic).
 
     Return the result as a sequence of matching Subvolume instances.
     '''
@@ -172,13 +163,13 @@ def get_subvolumes(btrfs_command, findmnt_command, patterns=None):
     subvolumes = []
 
     # For each subvolume path, match it against the given patterns to find the subvolumes to
-    # backup. Sort the subvolumes from longest to shortest mount points, so longer mount points get
+    # backup. Sort the subvolumes from longest to shortest mount points, so longer subvolumes get
     # a whack at the candidate pattern piñata before their parents do. (Patterns are consumed during
     # this process, so no two subvolumes end up with the same contained patterns.)
     for subvolume_path in reversed(
         omit_read_only_subvolume_paths(
             btrfs_command,
-            get_all_subvolume_paths(btrfs_command, findmnt_command),
+            get_all_subvolume_paths(btrfs_command, patterns),
         ),
     ):
         subvolumes.extend(
@@ -189,8 +180,7 @@ def get_subvolumes(btrfs_command, findmnt_command, patterns=None):
                     candidate_patterns,
                 ),
             )
-            if patterns is None
-            or any(
+            if any(
                 pattern.type == borgmatic.borg.pattern.Pattern_type.ROOT
                 and pattern.source == borgmatic.borg.pattern.Pattern_source.CONFIG
                 for pattern in contained_patterns
@@ -325,11 +315,15 @@ def dump_data_sources(
     dry_run_label = ' (dry run; not actually snapshotting anything)' if dry_run else ''
     logger.info(f'Snapshotting Btrfs subvolumes{dry_run_label}')
 
+    if 'findmnt_command' in hook_config:
+        logger.warning(
+            'The Btrfs "findmnt_command" option is deprecated and will be removed from a future release; findmnt is no longer used',
+        )
+
     # Based on the configured patterns, determine Btrfs subvolumes to backup. Only consider those
     # patterns that came from actual user configuration (as opposed to, say, other hooks).
     btrfs_command = hook_config.get('btrfs_command', 'btrfs')
-    findmnt_command = hook_config.get('findmnt_command', 'findmnt')
-    subvolumes = get_subvolumes(btrfs_command, findmnt_command, patterns)
+    subvolumes = get_subvolumes(btrfs_command, patterns)
 
     if not subvolumes:
         logger.warning(f'No Btrfs subvolumes found to snapshot{dry_run_label}')
@@ -375,11 +369,12 @@ def delete_snapshot(btrfs_command, snapshot_path):  # pragma: no cover
     )
 
 
-def remove_data_source_dumps(hook_config, config, borgmatic_runtime_directory, dry_run):
+def remove_data_source_dumps(hook_config, config, borgmatic_runtime_directory, patterns, dry_run):
     '''
-    Given a Btrfs configuration dict, a configuration dict, the borgmatic runtime directory, and
-    whether this is a dry run, delete any Btrfs snapshots created by borgmatic. If this is a dry run
-    or Btrfs isn't configured in borgmatic's configuration, then don't actually remove anything.
+    Given a Btrfs configuration dict, a configuration dict, the borgmatic runtime directory, the
+    configured patterns, and whether this is a dry run, delete any Btrfs snapshots created by
+    borgmatic. If this is a dry run or Btrfs isn't configured in borgmatic's configuration, then
+    don't actually remove anything.
     '''
     if hook_config is None:
         return
@@ -387,10 +382,9 @@ def remove_data_source_dumps(hook_config, config, borgmatic_runtime_directory, d
     dry_run_label = ' (dry run; not actually removing anything)' if dry_run else ''
 
     btrfs_command = hook_config.get('btrfs_command', 'btrfs')
-    findmnt_command = hook_config.get('findmnt_command', 'findmnt')
 
     try:
-        all_subvolumes = get_subvolumes(btrfs_command, findmnt_command)
+        all_subvolumes = get_subvolumes(btrfs_command, patterns)
     except FileNotFoundError as error:
         logger.debug(f'Could not find "{error.filename}" command')
         return

+ 1 - 1
borgmatic/hooks/data_source/lvm.py

@@ -353,7 +353,7 @@ def get_snapshots(lvs_command, snapshot_name=None):
         raise ValueError(f'Invalid {lvs_command} output: Missing key "{error}"')
 
 
-def remove_data_source_dumps(hook_config, config, borgmatic_runtime_directory, dry_run):  # noqa: PLR0912
+def remove_data_source_dumps(hook_config, config, borgmatic_runtime_directory, patterns, dry_run):  # noqa: PLR0912
     '''
     Given an LVM configuration dict, a configuration dict, the borgmatic runtime directory, and
     whether this is a dry run, unmount and delete any LVM snapshots created by borgmatic. If this is

+ 1 - 0
borgmatic/hooks/data_source/mariadb.py

@@ -369,6 +369,7 @@ def remove_data_source_dumps(
     databases,
     config,
     borgmatic_runtime_directory,
+    patterns,
     dry_run,
 ):  # pragma: no cover
     '''

+ 1 - 0
borgmatic/hooks/data_source/mongodb.py

@@ -185,6 +185,7 @@ def remove_data_source_dumps(
     databases,
     config,
     borgmatic_runtime_directory,
+    patterns,
     dry_run,
 ):  # pragma: no cover
     '''

+ 1 - 0
borgmatic/hooks/data_source/mysql.py

@@ -300,6 +300,7 @@ def remove_data_source_dumps(
     databases,
     config,
     borgmatic_runtime_directory,
+    patterns,
     dry_run,
 ):  # pragma: no cover
     '''

+ 1 - 0
borgmatic/hooks/data_source/postgresql.py

@@ -274,6 +274,7 @@ def remove_data_source_dumps(
     databases,
     config,
     borgmatic_runtime_directory,
+    patterns,
     dry_run,
 ):  # pragma: no cover
     '''

+ 1 - 0
borgmatic/hooks/data_source/sqlite.py

@@ -120,6 +120,7 @@ def remove_data_source_dumps(
     databases,
     config,
     borgmatic_runtime_directory,
+    patterns,
     dry_run,
 ):  # pragma: no cover
     '''

+ 1 - 1
borgmatic/hooks/data_source/zfs.py

@@ -363,7 +363,7 @@ def get_all_snapshots(zfs_command):
     return tuple(line.rstrip() for line in list_output.splitlines())
 
 
-def remove_data_source_dumps(hook_config, config, borgmatic_runtime_directory, dry_run):  # noqa: PLR0912
+def remove_data_source_dumps(hook_config, config, borgmatic_runtime_directory, patterns, dry_run):  # noqa: PLR0912
     '''
     Given a ZFS configuration dict, a configuration dict, the borgmatic runtime directory, and
     whether this is a dry run, unmount and destroy any ZFS snapshots created by borgmatic. If this