Просмотр исходного кода

Fix path rewriting for non-root patterns in the ZFS, Btrfs, and LVM hooks (#1072).

Dan Helfman 1 месяц назад
Родитель
Сommit
3847f31939
4 измененных файлов с 85 добавлено и 29 удалено
  1. 1 0
      NEWS
  2. 36 8
      borgmatic/actions/pattern.py
  3. 5 5
      borgmatic/hooks/data_source/snapshot.py
  4. 43 16
      tests/unit/actions/test_pattern.py

+ 1 - 0
NEWS

@@ -1,4 +1,5 @@
 2.0.4.dev0
+ * #1072: Fix path rewriting for non-root patterns in the ZFS, Btrfs, and LVM hooks.
  * #1073: Clarify the documentation about when an "after: error" command hook runs and how it
    differs from other hooks:
    https://torsion.org/borgmatic/docs/how-to/add-preparation-and-cleanup-steps-to-backups/

+ 36 - 8
borgmatic/actions/pattern.py

@@ -165,6 +165,26 @@ def expand_patterns(patterns, working_directory=None, skip_paths=None):
     )
 
 
+def get_existent_path_or_parent(path):
+    '''
+    Given a path, return it if it exists. Otherwise, return the longest parent directory of the path
+    that exists. Return None if none of these paths exist.
+
+    This is used below for finding an existent path prefix of pattern's path, which is necessary if
+    the path contain globs or other special characters that we don't want to try to interpret
+    (because we want to leave that responsibility to Borg).
+    '''
+    try:
+        return next(
+            candidate_path
+            for candidate_path in (path,)
+            + tuple(str(parent) for parent in pathlib.PurePath(path).parents)
+            if os.path.exists(candidate_path)
+        )
+    except StopIteration:
+        return None
+
+
 def device_map_patterns(patterns, working_directory=None):
     '''
     Given a sequence of borgmatic.borg.pattern.Pattern instances and an optional working directory,
@@ -174,23 +194,31 @@ def device_map_patterns(patterns, working_directory=None):
 
     This is handy for determining whether two different pattern paths are on the same filesystem
     (have the same device identifier).
+
+    This function only considers the start of a pattern's path—from the start of the path up until
+    there's a path component with a glob or other non-literal character. If there are no such
+    characters, the whole path is considered. The rationale is that it's not feasible for borgmatic
+    to interpret Borg's patterns to see which actual files (and therefore devices) they map to. So
+    for instance, a pattern with a path of "/var/log/*/data" would end up with its device set to the
+    device of "/var/log"—ignoring the "/*/data" part due to that glob.
+
+    The one exception is that if a regular expression pattern path starts with "^", that will get
+    stripped off for purposes of determining its device.
     '''
     return tuple(
         borgmatic.borg.pattern.Pattern(
             pattern.path,
             pattern.type,
             pattern.style,
-            device=pattern.device
-            or (
-                os.stat(full_path).st_dev
-                if pattern.type == borgmatic.borg.pattern.Pattern_type.ROOT
-                and os.path.exists(full_path)
-                else None
-            ),
+            device=pattern.device or (os.stat(full_path).st_dev if full_path else None),
             source=pattern.source,
         )
         for pattern in patterns
-        for full_path in (os.path.join(working_directory or '', pattern.path),)
+        for full_path in (
+            get_existent_path_or_parent(
+                os.path.join(working_directory or '', pattern.path.lstrip('^'))
+            ),
+        )
     )
 
 

+ 5 - 5
borgmatic/hooks/data_source/snapshot.py

@@ -8,9 +8,9 @@ def get_contained_patterns(parent_directory, candidate_patterns):
     '''
     Given a parent directory and a set of candidate patterns potentially inside it, get the subset
     of contained patterns for which the parent directory is actually the parent, a grandparent, the
-    very same directory, etc. The idea is if, say, /var/log and /var/lib are candidate pattern
-    paths, but there's a parent directory (logical volume, dataset, subvolume, etc.) at /var, then
-    /var is what we want to snapshot.
+    very same directory, etc. The idea is if, say, "/var/log" and "/var/lib" are candidate pattern
+    paths, but there's a parent directory (logical volume, dataset, subvolume, etc.) at "/var", then
+    "/var" is what we want to snapshot.
 
     If a parent directory and a candidate pattern are on different devices, skip the pattern. That's
     because any snapshot of a parent directory won't actually include "contained" directories if
@@ -18,8 +18,8 @@ def get_contained_patterns(parent_directory, candidate_patterns):
 
     For this function to work, a candidate pattern path can't have any globs or other non-literal
     characters in the initial portion of the path that matches the parent directory. For instance, a
-    parent directory of /var would match a candidate pattern path of /var/log/*/data, but not a
-    pattern path like /v*/log/*/data.
+    parent directory of "/var" would match a candidate pattern path of "/var/log/*/data", but not a
+    pattern path like "/v*/log/*/data".
 
     The one exception is that if a regular expression pattern path starts with "^", that will get
     stripped off for purposes of matching against a parent directory.

+ 43 - 16
tests/unit/actions/test_pattern.py

@@ -245,36 +245,63 @@ def test_expand_patterns_expands_only_tildes_in_non_root_patterns():
     assert paths == (Pattern('/root/bar/*', Pattern_type.INCLUDE),)
 
 
-def test_device_map_patterns_gives_device_id_per_path():
+def test_get_existent_path_or_parent_passes_through_existent_path():
     flexmock(module.os.path).should_receive('exists').and_return(True)
-    flexmock(module.os).should_receive('stat').with_args('/foo').and_return(flexmock(st_dev=55))
-    flexmock(module.os).should_receive('stat').with_args('/bar').and_return(flexmock(st_dev=66))
 
-    device_map = module.device_map_patterns((Pattern('/foo'), Pattern('/bar')))
+    assert module.get_existent_path_or_parent('/foo/bar/baz') == '/foo/bar/baz'
 
-    assert device_map == (
-        Pattern('/foo', device=55),
-        Pattern('/bar', device=66),
-    )
 
+def test_get_existent_path_or_parent_with_non_existent_path_returns_none():
+    flexmock(module.os.path).should_receive('exists').and_return(False)
 
-def test_device_map_patterns_only_considers_root_patterns():
-    flexmock(module.os.path).should_receive('exists').and_return(True)
+    assert module.get_existent_path_or_parent('/foo/bar/baz') is None
+
+
+def test_get_existent_path_or_parent_with_non_existent_path_returns_existent_parent():
+    flexmock(module.os.path).should_receive('exists').with_args('/foo/bar/baz*').and_return(False)
+    flexmock(module.os.path).should_receive('exists').with_args('/foo/bar').and_return(True)
+    flexmock(module.os.path).should_receive('exists').with_args('/foo').never()
+    flexmock(module.os.path).should_receive('exists').with_args('/').never()
+
+    assert module.get_existent_path_or_parent('/foo/bar/baz*') == '/foo/bar'
+
+
+def test_get_existent_path_or_parent_with_non_existent_path_returns_existent_grandparent():
+    flexmock(module.os.path).should_receive('exists').with_args('/foo/bar/baz*').and_return(False)
+    flexmock(module.os.path).should_receive('exists').with_args('/foo/bar').and_return(False)
+    flexmock(module.os.path).should_receive('exists').with_args('/foo').and_return(True)
+    flexmock(module.os.path).should_receive('exists').with_args('/').never()
+
+    assert module.get_existent_path_or_parent('/foo/bar/baz*') == '/foo'
+
+
+def test_device_map_patterns_gives_device_id_per_path():
+    flexmock(module).should_receive('get_existent_path_or_parent').replace_with(lambda path: path)
     flexmock(module.os).should_receive('stat').with_args('/foo').and_return(flexmock(st_dev=55))
-    flexmock(module.os).should_receive('stat').with_args('/bar*').never()
+    flexmock(module.os).should_receive('stat').with_args('/bar').and_return(flexmock(st_dev=66))
 
     device_map = module.device_map_patterns(
-        (Pattern('/foo'), Pattern('/bar*', Pattern_type.INCLUDE))
+        (
+            Pattern('/foo'),
+            Pattern('^/bar', type=Pattern_type.INCLUDE, style=Pattern_style.REGULAR_EXPRESSION),
+        )
     )
 
     assert device_map == (
         Pattern('/foo', device=55),
-        Pattern('/bar*', Pattern_type.INCLUDE),
+        Pattern(
+            '^/bar', type=Pattern_type.INCLUDE, style=Pattern_style.REGULAR_EXPRESSION, device=66
+        ),
     )
 
 
 def test_device_map_patterns_with_missing_path_does_not_error():
-    flexmock(module.os.path).should_receive('exists').and_return(True).and_return(False)
+    flexmock(module).should_receive('get_existent_path_or_parent').with_args('/foo').and_return(
+        '/foo'
+    )
+    flexmock(module).should_receive('get_existent_path_or_parent').with_args('/bar').and_return(
+        None
+    )
     flexmock(module.os).should_receive('stat').with_args('/foo').and_return(flexmock(st_dev=55))
     flexmock(module.os).should_receive('stat').with_args('/bar').never()
 
@@ -287,7 +314,7 @@ def test_device_map_patterns_with_missing_path_does_not_error():
 
 
 def test_device_map_patterns_uses_working_directory_to_construct_path():
-    flexmock(module.os.path).should_receive('exists').and_return(True)
+    flexmock(module).should_receive('get_existent_path_or_parent').replace_with(lambda path: path)
     flexmock(module.os).should_receive('stat').with_args('/foo').and_return(flexmock(st_dev=55))
     flexmock(module.os).should_receive('stat').with_args('/working/dir/bar').and_return(
         flexmock(st_dev=66)
@@ -304,7 +331,7 @@ def test_device_map_patterns_uses_working_directory_to_construct_path():
 
 
 def test_device_map_patterns_with_existing_device_id_does_not_overwrite_it():
-    flexmock(module.os.path).should_receive('exists').and_return(True)
+    flexmock(module).should_receive('get_existent_path_or_parent').replace_with(lambda path: path)
     flexmock(module.os).should_receive('stat').with_args('/foo').and_return(flexmock(st_dev=55))
     flexmock(module.os).should_receive('stat').with_args('/bar').and_return(flexmock(st_dev=100))