8 місяців тому · 9b77de3d66
--- a/borgmatic/hooks/data_source/lvm.py
+++ b/borgmatic/hooks/data_source/lvm.py
@@ -50,6 +50,9 @@ def get_logical_volumes(lsblk_command, source_directories=None):
 
				     except json.JSONDecodeError as error:
			
 
				         raise ValueError('Invalid {lsblk_command} JSON output: {error}')
			
 
				 
			
 
				+
			
 
				+    candidate_source_directories = set(source_directories or ())
			
 
				+
			
 
				     try:
			
 
				         return tuple(
			
 
				             (device['name'], device['path'], device['mountpoint'], contained_source_directories)
			
@@ -57,7 +60,7 @@ def get_logical_volumes(lsblk_command, source_directories=None):
 
				             if device['mountpoint'] and device['type'] == 'lvm'
			
 
				             for contained_source_directories in (
			
 
				                 borgmatic.hooks.data_source.snapshot.get_contained_directories(
			
 
				-                    device['mountpoint'], source_directories
			
 
				+                    device['mountpoint'], candidate_source_directories
			
 
				                 ),
			
 
				             )
			
 
				             if not source_directories or contained_source_directories
			
--- a/borgmatic/hooks/data_source/snapshot.py
+++ b/borgmatic/hooks/data_source/snapshot.py
@@ -1,3 +1,4 @@
 
				+import itertools
			
 
				 import pathlib
			
 
				 
			
 
				 
			
@@ -6,18 +7,24 @@ IS_A_HOOK = False
 
				 
			
 
				 def get_contained_directories(parent_directory, candidate_contained_directories):
			
 
				     '''
			
 
				-    Given a parent directory and a sequence of candiate directories potentially inside it, get the
			
 
				-    subset of contained directories for which the parent directory is actually the parent, a
			
 
				-    grandparent, the very same directory, etc. The idea is if, say, /var/log and /var/lib are
			
 
				-    candidate contained directories, but there's a parent directory (logical volume, dataset,
			
 
				-    subvolume, etc.) at /var, then /var is what we want to snapshot.
			
 
				+    Given a parent directory and a set of candiate directories potentially inside it, get the subset
			
 
				+    of contained directories for which the parent directory is actually the parent, a grandparent,
			
 
				+    the very same directory, etc. The idea is if, say, /var/log and /var/lib are candidate contained
			
 
				+    directories, but there's a parent directory (logical volume, dataset, subvolume, etc.) at /var,
			
 
				+    then /var is what we want to snapshot.
			
 
				+
			
 
				+    Also mutate the given set of candidate contained directories to remove any actually contained
			
 
				+    directories from it.
			
 
				     '''
			
 
				     if not candidate_contained_directories:
			
 
				         return ()
			
 
				 
			
 
				-    return tuple(
			
 
				+    contained = tuple(
			
 
				         candidate
			
 
				         for candidate in candidate_contained_directories
			
 
				         if parent_directory == candidate
			
 
				         or pathlib.PurePosixPath(parent_directory) in pathlib.PurePath(candidate).parents
			
 
				     )
			
 
				+    candidate_contained_directories -= set(contained)
			
 
				+
			
 
				+    return contained
			
--- a/borgmatic/hooks/data_source/zfs.py
+++ b/borgmatic/hooks/data_source/zfs.py
@@ -1,3 +1,4 @@
 
				+import collections
			
 
				 import glob
			
 
				 import logging
			
 
				 import os
			
@@ -5,6 +6,7 @@ import shutil
 
				 import subprocess
			
 
				 
			
 
				 import borgmatic.config.paths
			
 
				+import borgmatic.hooks.data_source.snapshot
			
 
				 import borgmatic.execute
			
 
				 
			
 
				 logger = logging.getLogger(__name__)
			
@@ -21,6 +23,9 @@ BORGMATIC_SNAPSHOT_PREFIX = 'borgmatic-'
 
				 BORGMATIC_USER_PROPERTY = 'org.torsion.borgmatic:backup'
			
 
				 
			
 
				 
			
 
				+Dataset = collections.namedtuple('Dataset', ('name', 'mount_point', 'user_property_value', 'contained_source_directories'))
			
 
				+
			
 
				+
			
 
				 def get_datasets_to_backup(zfs_command, source_directories):
			
 
				     '''
			
 
				     Given a ZFS command to run and a sequence of configured source directories, find the
			
@@ -29,7 +34,7 @@ def get_datasets_to_backup(zfs_command, source_directories):
 
				     datasets tagged with a borgmatic-specific user property, whether or not they appear in source
			
 
				     directories.
			
 
				 
			
 
				-    Return the result as a sequence of (dataset name, mount point) pairs.
			
 
				+    Return the result as a sequence of Dataset instances, sorted by mount point.
			
 
				     '''
			
 
				     list_output = borgmatic.execute.execute_command_and_capture_output(
			
 
				         (
			
@@ -42,23 +47,44 @@ def get_datasets_to_backup(zfs_command, source_directories):
 
				             f'name,mountpoint,{BORGMATIC_USER_PROPERTY}',
			
 
				         )
			
 
				     )
			
 
				-    source_directories_set = set(source_directories)
			
 
				 
			
 
				     try:
			
 
				-        return tuple(
			
 
				-            (dataset_name, mount_point)
			
 
				-            for line in list_output.splitlines()
			
 
				-            for (dataset_name, mount_point, user_property_value) in (line.rstrip().split('\t'),)
			
 
				-            if mount_point in source_directories_set or user_property_value == 'auto'
			
 
				+        # Sort from longest to shortest mount points, so longer mount points get a whack at the
			
 
				+        # candidate source directory piñata before their parents do. (Source directories are
			
 
				+        # consumed during the second loop below, so no two datasets get the same contained source
			
 
				+        # directories.)
			
 
				+        datasets = sorted(
			
 
				+            (
			
 
				+                Dataset(dataset_name, mount_point, user_property_value, ())
			
 
				+                for line in list_output.splitlines()
			
 
				+                for (dataset_name, mount_point, user_property_value) in (line.rstrip().split('\t'),)
			
 
				+            ),
			
 
				+            key=lambda dataset: dataset.mount_point,
			
 
				+            reverse=True,
			
 
				         )
			
 
				     except ValueError:
			
 
				         raise ValueError('Invalid {zfs_command} list output')
			
 
				 
			
 
				+    candidate_source_directories = set(source_directories)
			
 
				+
			
 
				+    return sorted(
			
 
				+        tuple(
			
 
				+            Dataset(dataset.name, dataset.mount_point, dataset.user_property_value, contained_source_directories)
			
 
				+            for dataset in datasets
			
 
				+            for contained_source_directories in (
			
 
				+                borgmatic.hooks.data_source.snapshot.get_contained_directories(
			
 
				+                    dataset.mount_point, candidate_source_directories
			
 
				+                ),
			
 
				+            )
			
 
				+            if contained_source_directories or dataset.user_property_value == 'auto'
			
 
				+        ),
			
 
				+        key=lambda dataset: dataset.mount_point,
			
 
				+    )
			
 
				+
			
 
				 
			
 
				-def get_all_datasets(zfs_command):
			
 
				+def get_all_dataset_mount_points(zfs_command):
			
 
				     '''
			
 
				-    Given a ZFS command to run, return all ZFS datasets as a sequence of (dataset name, mount point)
			
 
				-    pairs.
			
 
				+    Given a ZFS command to run, return all ZFS datasets as a sequence of sorted mount points.
			
 
				     '''
			
 
				     list_output = borgmatic.execute.execute_command_and_capture_output(
			
 
				         (
			
@@ -68,15 +94,13 @@ def get_all_datasets(zfs_command):
 
				             '-t',
			
 
				             'filesystem',
			
 
				             '-o',
			
 
				-            'name,mountpoint',
			
 
				+            'mountpoint',
			
 
				         )
			
 
				     )
			
 
				 
			
 
				     try:
			
 
				         return tuple(
			
 
				-            (dataset_name, mount_point)
			
 
				-            for line in list_output.splitlines()
			
 
				-            for (dataset_name, mount_point) in (line.rstrip().split('\t'),)
			
 
				+            sorted(line.rstrip() for line in list_output.splitlines())
			
 
				         )
			
 
				     except ValueError:
			
 
				         raise ValueError('Invalid {zfs_command} list output')
			
@@ -147,39 +171,51 @@ def dump_data_sources(
 
				 
			
 
				     # Snapshot each dataset, rewriting source directories to use the snapshot paths.
			
 
				     snapshot_name = f'{BORGMATIC_SNAPSHOT_PREFIX}{os.getpid()}'
			
 
				+    normalized_runtime_directory = os.path.normpath(borgmatic_runtime_directory)
			
 
				 
			
 
				     if not requested_datasets:
			
 
				         logger.warning(f'{log_prefix}: No ZFS datasets found to snapshot{dry_run_label}')
			
 
				 
			
 
				-    for dataset_name, mount_point in requested_datasets:
			
 
				-        full_snapshot_name = f'{dataset_name}@{snapshot_name}'
			
 
				-        logger.debug(f'{log_prefix}: Creating ZFS snapshot {full_snapshot_name}{dry_run_label}')
			
 
				+    for dataset in requested_datasets:
			
 
				+        full_snapshot_name = f'{dataset.name}@{snapshot_name}'
			
 
				+        logger.debug(f'{log_prefix}: Creating ZFS snapshot {full_snapshot_name} of {dataset.mount_point}{dry_run_label}')
			
 
				 
			
 
				         if not dry_run:
			
 
				             snapshot_dataset(zfs_command, full_snapshot_name)
			
 
				 
			
 
				         # Mount the snapshot into a particular named temporary directory so that the snapshot ends
			
 
				         # up in the Borg archive at the "original" dataset mount point path.
			
 
				-        snapshot_mount_path_for_borg = os.path.join(
			
 
				-            os.path.normpath(borgmatic_runtime_directory),
			
 
				+        snapshot_mount_path = os.path.join(
			
 
				+            normalized_runtime_directory,
			
 
				             'zfs_snapshots',
			
 
				-            '.',  # Borg 1.4+ "slashdot" hack.
			
 
				-            mount_point.lstrip(os.path.sep),
			
 
				+            dataset.mount_point.lstrip(os.path.sep),
			
 
				         )
			
 
				-        snapshot_mount_path = os.path.normpath(snapshot_mount_path_for_borg)
			
 
				+
			
 
				         logger.debug(
			
 
				             f'{log_prefix}: Mounting ZFS snapshot {full_snapshot_name} at {snapshot_mount_path}{dry_run_label}'
			
 
				         )
			
 
				 
			
 
				-        if not dry_run:
			
 
				-            mount_snapshot(
			
 
				-                hook_config.get('mount_command', 'mount'), full_snapshot_name, snapshot_mount_path
			
 
				-            )
			
 
				+        if dry_run:
			
 
				+            continue
			
 
				 
			
 
				-            if mount_point in source_directories:
			
 
				-                source_directories.remove(mount_point)
			
 
				+        mount_snapshot(
			
 
				+            hook_config.get('mount_command', 'mount'), full_snapshot_name, snapshot_mount_path
			
 
				+        )
			
 
				 
			
 
				-            source_directories.append(snapshot_mount_path_for_borg)
			
 
				+        for source_directory in dataset.contained_source_directories:
			
 
				+            try:
			
 
				+                source_directories.remove(source_directory)
			
 
				+            except ValueError:
			
 
				+                pass
			
 
				+
			
 
				+            source_directories.append(
			
 
				+                os.path.join(
			
 
				+                    normalized_runtime_directory,
			
 
				+                    'zfs_snapshots',
			
 
				+                    '.',  # Borg 1.4+ "slashdot" hack.
			
 
				+                    source_directory.lstrip(os.path.sep),
			
 
				+                )
			
 
				+            )
			
 
				 
			
 
				     return []
			
 
				 
			
@@ -245,7 +281,7 @@ def remove_data_source_dumps(hook_config, config, log_prefix, borgmatic_runtime_
 
				     zfs_command = hook_config.get('zfs_command', 'zfs')
			
 
				 
			
 
				     try:
			
 
				-        datasets = get_all_datasets(zfs_command)
			
 
				+        dataset_mount_points = get_all_dataset_mount_points(zfs_command)
			
 
				     except FileNotFoundError:
			
 
				         logger.debug(f'{log_prefix}: Could not find "{zfs_command}" command')
			
 
				         return
			
@@ -275,7 +311,9 @@ def remove_data_source_dumps(hook_config, config, log_prefix, borgmatic_runtime_
 
				         if not dry_run:
			
 
				             shutil.rmtree(snapshots_directory, ignore_errors=True)
			
 
				 
			
 
				-        for _, mount_point in datasets:
			
 
				+        # Reversing the sorted datasets ensures that we unmount the longer mount point paths of
			
 
				+        # child datasets before the shorter mount point paths of parent datasets.
			
 
				+        for mount_point in reversed(dataset_mount_points):
			
 
				             snapshot_mount_path = os.path.join(snapshots_directory, mount_point.lstrip(os.path.sep))
			
 
				             if not os.path.isdir(snapshot_mount_path):
			
 
				                 continue
			
--- a/docs/how-to/snapshot-your-filesystems.md
+++ b/docs/how-to/snapshot-your-filesystems.md
@@ -76,6 +76,14 @@ in an archive at `/var/dataset` as well—even if borgmatic has to mount the
 
				 snapshot somewhere in `/run/user/1000/borgmatic/zfs_snapshots/` to perform the
			
 
				 backup.
			
 
				 
			
 
				+<span class="minilink minilink-addedin">New in version 1.9.4</span> borgmatic
			
 
				+is smart enough to look at the parent (and grandparent, etc.) directories of
			
 
				+each of your `source_directories` to discover any datasets. For instance,
			
 
				+let's say you add `/var/log` and `/var/lib` to your source directories, but
			
 
				+`/var` is a dataset. borgmatic will discover that and snapshot `/var`
			
 
				+accordingly. This also works even with nested datasets; borgmatic selects
			
 
				+the dataset that's the "closest" parent to your source directories.
			
 
				+
			
 
				 <span class="minilink minilink-addedin">With Borg version 1.2 and
			
 
				 earlier</span>Snapshotted files are instead stored at a path dependent on the
			
 
				 [runtime