Forráskód Böngészése

Fix path rewriting for non-root patterns in the ZFS, Btrfs, and LVM hooks (#1072).

Dan Helfman 1 hónapja
szülő
commit
3847f31939

+ 1 - 0
NEWS

@@ -1,4 +1,5 @@
 2.0.4.dev0
 2.0.4.dev0
+ * #1072: Fix path rewriting for non-root patterns in the ZFS, Btrfs, and LVM hooks.
  * #1073: Clarify the documentation about when an "after: error" command hook runs and how it
  * #1073: Clarify the documentation about when an "after: error" command hook runs and how it
    differs from other hooks:
    differs from other hooks:
    https://torsion.org/borgmatic/docs/how-to/add-preparation-and-cleanup-steps-to-backups/
    https://torsion.org/borgmatic/docs/how-to/add-preparation-and-cleanup-steps-to-backups/

+ 36 - 8
borgmatic/actions/pattern.py

@@ -165,6 +165,26 @@ def expand_patterns(patterns, working_directory=None, skip_paths=None):
     )
     )
 
 
 
 
+def get_existent_path_or_parent(path):
+    '''
+    Given a path, return it if it exists. Otherwise, return the longest parent directory of the path
+    that exists. Return None if none of these paths exist.
+
+    This is used below for finding an existent path prefix of pattern's path, which is necessary if
+    the path contain globs or other special characters that we don't want to try to interpret
+    (because we want to leave that responsibility to Borg).
+    '''
+    try:
+        return next(
+            candidate_path
+            for candidate_path in (path,)
+            + tuple(str(parent) for parent in pathlib.PurePath(path).parents)
+            if os.path.exists(candidate_path)
+        )
+    except StopIteration:
+        return None
+
+
 def device_map_patterns(patterns, working_directory=None):
 def device_map_patterns(patterns, working_directory=None):
     '''
     '''
     Given a sequence of borgmatic.borg.pattern.Pattern instances and an optional working directory,
     Given a sequence of borgmatic.borg.pattern.Pattern instances and an optional working directory,
@@ -174,23 +194,31 @@ def device_map_patterns(patterns, working_directory=None):
 
 
     This is handy for determining whether two different pattern paths are on the same filesystem
     This is handy for determining whether two different pattern paths are on the same filesystem
     (have the same device identifier).
     (have the same device identifier).
+
+    This function only considers the start of a pattern's path—from the start of the path up until
+    there's a path component with a glob or other non-literal character. If there are no such
+    characters, the whole path is considered. The rationale is that it's not feasible for borgmatic
+    to interpret Borg's patterns to see which actual files (and therefore devices) they map to. So
+    for instance, a pattern with a path of "/var/log/*/data" would end up with its device set to the
+    device of "/var/log"—ignoring the "/*/data" part due to that glob.
+
+    The one exception is that if a regular expression pattern path starts with "^", that will get
+    stripped off for purposes of determining its device.
     '''
     '''
     return tuple(
     return tuple(
         borgmatic.borg.pattern.Pattern(
         borgmatic.borg.pattern.Pattern(
             pattern.path,
             pattern.path,
             pattern.type,
             pattern.type,
             pattern.style,
             pattern.style,
-            device=pattern.device
-            or (
-                os.stat(full_path).st_dev
-                if pattern.type == borgmatic.borg.pattern.Pattern_type.ROOT
-                and os.path.exists(full_path)
-                else None
-            ),
+            device=pattern.device or (os.stat(full_path).st_dev if full_path else None),
             source=pattern.source,
             source=pattern.source,
         )
         )
         for pattern in patterns
         for pattern in patterns
-        for full_path in (os.path.join(working_directory or '', pattern.path),)
+        for full_path in (
+            get_existent_path_or_parent(
+                os.path.join(working_directory or '', pattern.path.lstrip('^'))
+            ),
+        )
     )
     )
 
 
 
 

+ 5 - 5
borgmatic/hooks/data_source/snapshot.py

@@ -8,9 +8,9 @@ def get_contained_patterns(parent_directory, candidate_patterns):
     '''
     '''
     Given a parent directory and a set of candidate patterns potentially inside it, get the subset
     Given a parent directory and a set of candidate patterns potentially inside it, get the subset
     of contained patterns for which the parent directory is actually the parent, a grandparent, the
     of contained patterns for which the parent directory is actually the parent, a grandparent, the
-    very same directory, etc. The idea is if, say, /var/log and /var/lib are candidate pattern
-    paths, but there's a parent directory (logical volume, dataset, subvolume, etc.) at /var, then
-    /var is what we want to snapshot.
+    very same directory, etc. The idea is if, say, "/var/log" and "/var/lib" are candidate pattern
+    paths, but there's a parent directory (logical volume, dataset, subvolume, etc.) at "/var", then
+    "/var" is what we want to snapshot.
 
 
     If a parent directory and a candidate pattern are on different devices, skip the pattern. That's
     If a parent directory and a candidate pattern are on different devices, skip the pattern. That's
     because any snapshot of a parent directory won't actually include "contained" directories if
     because any snapshot of a parent directory won't actually include "contained" directories if
@@ -18,8 +18,8 @@ def get_contained_patterns(parent_directory, candidate_patterns):
 
 
     For this function to work, a candidate pattern path can't have any globs or other non-literal
     For this function to work, a candidate pattern path can't have any globs or other non-literal
     characters in the initial portion of the path that matches the parent directory. For instance, a
     characters in the initial portion of the path that matches the parent directory. For instance, a
-    parent directory of /var would match a candidate pattern path of /var/log/*/data, but not a
-    pattern path like /v*/log/*/data.
+    parent directory of "/var" would match a candidate pattern path of "/var/log/*/data", but not a
+    pattern path like "/v*/log/*/data".
 
 
     The one exception is that if a regular expression pattern path starts with "^", that will get
     The one exception is that if a regular expression pattern path starts with "^", that will get
     stripped off for purposes of matching against a parent directory.
     stripped off for purposes of matching against a parent directory.

+ 43 - 16
tests/unit/actions/test_pattern.py

@@ -245,36 +245,63 @@ def test_expand_patterns_expands_only_tildes_in_non_root_patterns():
     assert paths == (Pattern('/root/bar/*', Pattern_type.INCLUDE),)
     assert paths == (Pattern('/root/bar/*', Pattern_type.INCLUDE),)
 
 
 
 
-def test_device_map_patterns_gives_device_id_per_path():
+def test_get_existent_path_or_parent_passes_through_existent_path():
     flexmock(module.os.path).should_receive('exists').and_return(True)
     flexmock(module.os.path).should_receive('exists').and_return(True)
-    flexmock(module.os).should_receive('stat').with_args('/foo').and_return(flexmock(st_dev=55))
-    flexmock(module.os).should_receive('stat').with_args('/bar').and_return(flexmock(st_dev=66))
 
 
-    device_map = module.device_map_patterns((Pattern('/foo'), Pattern('/bar')))
+    assert module.get_existent_path_or_parent('/foo/bar/baz') == '/foo/bar/baz'
 
 
-    assert device_map == (
-        Pattern('/foo', device=55),
-        Pattern('/bar', device=66),
-    )
 
 
+def test_get_existent_path_or_parent_with_non_existent_path_returns_none():
+    flexmock(module.os.path).should_receive('exists').and_return(False)
 
 
-def test_device_map_patterns_only_considers_root_patterns():
-    flexmock(module.os.path).should_receive('exists').and_return(True)
+    assert module.get_existent_path_or_parent('/foo/bar/baz') is None
+
+
+def test_get_existent_path_or_parent_with_non_existent_path_returns_existent_parent():
+    flexmock(module.os.path).should_receive('exists').with_args('/foo/bar/baz*').and_return(False)
+    flexmock(module.os.path).should_receive('exists').with_args('/foo/bar').and_return(True)
+    flexmock(module.os.path).should_receive('exists').with_args('/foo').never()
+    flexmock(module.os.path).should_receive('exists').with_args('/').never()
+
+    assert module.get_existent_path_or_parent('/foo/bar/baz*') == '/foo/bar'
+
+
+def test_get_existent_path_or_parent_with_non_existent_path_returns_existent_grandparent():
+    flexmock(module.os.path).should_receive('exists').with_args('/foo/bar/baz*').and_return(False)
+    flexmock(module.os.path).should_receive('exists').with_args('/foo/bar').and_return(False)
+    flexmock(module.os.path).should_receive('exists').with_args('/foo').and_return(True)
+    flexmock(module.os.path).should_receive('exists').with_args('/').never()
+
+    assert module.get_existent_path_or_parent('/foo/bar/baz*') == '/foo'
+
+
+def test_device_map_patterns_gives_device_id_per_path():
+    flexmock(module).should_receive('get_existent_path_or_parent').replace_with(lambda path: path)
     flexmock(module.os).should_receive('stat').with_args('/foo').and_return(flexmock(st_dev=55))
     flexmock(module.os).should_receive('stat').with_args('/foo').and_return(flexmock(st_dev=55))
-    flexmock(module.os).should_receive('stat').with_args('/bar*').never()
+    flexmock(module.os).should_receive('stat').with_args('/bar').and_return(flexmock(st_dev=66))
 
 
     device_map = module.device_map_patterns(
     device_map = module.device_map_patterns(
-        (Pattern('/foo'), Pattern('/bar*', Pattern_type.INCLUDE))
+        (
+            Pattern('/foo'),
+            Pattern('^/bar', type=Pattern_type.INCLUDE, style=Pattern_style.REGULAR_EXPRESSION),
+        )
     )
     )
 
 
     assert device_map == (
     assert device_map == (
         Pattern('/foo', device=55),
         Pattern('/foo', device=55),
-        Pattern('/bar*', Pattern_type.INCLUDE),
+        Pattern(
+            '^/bar', type=Pattern_type.INCLUDE, style=Pattern_style.REGULAR_EXPRESSION, device=66
+        ),
     )
     )
 
 
 
 
 def test_device_map_patterns_with_missing_path_does_not_error():
 def test_device_map_patterns_with_missing_path_does_not_error():
-    flexmock(module.os.path).should_receive('exists').and_return(True).and_return(False)
+    flexmock(module).should_receive('get_existent_path_or_parent').with_args('/foo').and_return(
+        '/foo'
+    )
+    flexmock(module).should_receive('get_existent_path_or_parent').with_args('/bar').and_return(
+        None
+    )
     flexmock(module.os).should_receive('stat').with_args('/foo').and_return(flexmock(st_dev=55))
     flexmock(module.os).should_receive('stat').with_args('/foo').and_return(flexmock(st_dev=55))
     flexmock(module.os).should_receive('stat').with_args('/bar').never()
     flexmock(module.os).should_receive('stat').with_args('/bar').never()
 
 
@@ -287,7 +314,7 @@ def test_device_map_patterns_with_missing_path_does_not_error():
 
 
 
 
 def test_device_map_patterns_uses_working_directory_to_construct_path():
 def test_device_map_patterns_uses_working_directory_to_construct_path():
-    flexmock(module.os.path).should_receive('exists').and_return(True)
+    flexmock(module).should_receive('get_existent_path_or_parent').replace_with(lambda path: path)
     flexmock(module.os).should_receive('stat').with_args('/foo').and_return(flexmock(st_dev=55))
     flexmock(module.os).should_receive('stat').with_args('/foo').and_return(flexmock(st_dev=55))
     flexmock(module.os).should_receive('stat').with_args('/working/dir/bar').and_return(
     flexmock(module.os).should_receive('stat').with_args('/working/dir/bar').and_return(
         flexmock(st_dev=66)
         flexmock(st_dev=66)
@@ -304,7 +331,7 @@ def test_device_map_patterns_uses_working_directory_to_construct_path():
 
 
 
 
 def test_device_map_patterns_with_existing_device_id_does_not_overwrite_it():
 def test_device_map_patterns_with_existing_device_id_does_not_overwrite_it():
-    flexmock(module.os.path).should_receive('exists').and_return(True)
+    flexmock(module).should_receive('get_existent_path_or_parent').replace_with(lambda path: path)
     flexmock(module.os).should_receive('stat').with_args('/foo').and_return(flexmock(st_dev=55))
     flexmock(module.os).should_receive('stat').with_args('/foo').and_return(flexmock(st_dev=55))
     flexmock(module.os).should_receive('stat').with_args('/bar').and_return(flexmock(st_dev=100))
     flexmock(module.os).should_receive('stat').with_args('/bar').and_return(flexmock(st_dev=100))