فهرست منبع

Merge pull request #2334 from ThomasWaldmann/precise-pattern

add PathFullPattern / optimization for it
TW 8 سال پیش
والد
کامیت
f2e9e862d8
3فایلهای تغییر یافته به همراه82 افزوده شده و 5 حذف شده
  1. 17 1
      src/borg/archiver.py
  2. 34 3
      src/borg/helpers.py
  3. 31 1
      src/borg/testsuite/helpers.py

+ 17 - 1
src/borg/archiver.py

@@ -1604,11 +1604,27 @@ class Archiver:
             regular expression syntax is described in the `Python documentation for
             the re module <https://docs.python.org/3/library/re.html>`_.
 
-        Prefix path, selector `pp:`
+        Path prefix, selector `pp:`
 
             This pattern style is useful to match whole sub-directories. The pattern
             `pp:/data/bar` matches `/data/bar` and everything therein.
 
+        Path full-match, selector `pf:`
+
+            This pattern style is useful to match whole paths.
+            This is kind of a pseudo pattern as it can not have any variable or
+            unspecified parts - the full, precise path must be given.
+            `pf:/data/foo.txt` matches `/data/foo.txt` only.
+
+            Implementation note: this is implemented via very time-efficient O(1)
+            hashtable lookups (this means you can have huge amounts of such patterns
+            without impacting performance much).
+            Due to that, this kind of pattern does not respect any context or order.
+            If you use such a pattern to include a file, it will always be included
+            (if the directory recursion encounters it).
+            Other include/exclude patterns that would normally match will be ignored.
+            Same logic applies for exclude.
+
         Exclusions can be passed via the command line option `--exclude`. When used
         from within a shell the patterns should be quoted to protect them from
         expansion.

+ 34 - 3
src/borg/helpers.py

@@ -451,23 +451,42 @@ class PatternMatcher:
         # Value to return from match function when none of the patterns match.
         self.fallback = fallback
 
+        # optimizations
+        self._path_full_patterns = {}  # full path -> return value
+
     def empty(self):
-        return not len(self._items)
+        return not len(self._items) and not len(self._path_full_patterns)
+
+    def _add(self, pattern, value):
+        if isinstance(pattern, PathFullPattern):
+            key = pattern.pattern  # full, normalized path
+            self._path_full_patterns[key] = value
+        else:
+            self._items.append((pattern, value))
 
     def add(self, patterns, value):
         """Add list of patterns to internal list. The given value is returned from the match function when one of the
         given patterns matches.
         """
-        self._items.extend((i, value) for i in patterns)
+        for pattern in patterns:
+            self._add(pattern, value)
 
     def add_inclexcl(self, patterns):
         """Add list of patterns (of type InclExclPattern) to internal list. The patterns ptype member is returned from
         the match function when one of the given patterns matches.
         """
-        self._items.extend(patterns)
+        for pattern, pattern_type in patterns:
+            self._add(pattern, pattern_type)
 
     def match(self, path):
         path = normalize_path(path)
+        # do a fast lookup for full path matches (note: we do not count such matches):
+        non_existent = object()
+        value = self._path_full_patterns.get(path, non_existent)
+        if value is not non_existent:
+            # we have a full path match!
+            return value
+        # this is the slow way, if we have many patterns in self._items:
         for (pattern, value) in self._items:
             if pattern.match(path, normalize=False):
                 return value
@@ -518,6 +537,17 @@ class PatternBase:
         raise NotImplementedError
 
 
+class PathFullPattern(PatternBase):
+    """Full match of a path."""
+    PREFIX = "pf"
+
+    def _prepare(self, pattern):
+        self.pattern = os.path.normpath(pattern)
+
+    def _match(self, path):
+        return path == self.pattern
+
+
 # For PathPrefixPattern, FnmatchPattern and ShellPattern, we require that the pattern either match the whole path
 # or an initial segment of the path up to but not including a path separator. To unify the two cases, we add a path
 # separator to the end of the path before matching.
@@ -600,6 +630,7 @@ class RegexPattern(PatternBase):
 
 _PATTERN_STYLES = set([
     FnmatchPattern,
+    PathFullPattern,
     PathPrefixPattern,
     RegexPattern,
     ShellPattern,

+ 31 - 1
src/borg/testsuite/helpers.py

@@ -25,7 +25,8 @@ from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams,
 from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
 from ..helpers import load_exclude_file, load_pattern_file
 from ..helpers import CompressionSpec, ComprSpec, CompressionDecider1, CompressionDecider2
-from ..helpers import parse_pattern, PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern
+from ..helpers import parse_pattern, PatternMatcher
+from ..helpers import PathFullPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, RegexPattern
 from ..helpers import swidth_slice
 from ..helpers import chunkit
 from ..helpers import safe_ns, safe_s
@@ -254,6 +255,35 @@ def check_patterns(files, pattern, expected):
     assert matched == (files if expected is None else expected)
 
 
+@pytest.mark.parametrize("pattern, expected", [
+    # "None" means all files, i.e. all match the given pattern
+    ("/", []),
+    ("/home", ["/home"]),
+    ("/home///", ["/home"]),
+    ("/./home", ["/home"]),
+    ("/home/user", ["/home/user"]),
+    ("/home/user2", ["/home/user2"]),
+    ("/home/user/.bashrc", ["/home/user/.bashrc"]),
+    ])
+def test_patterns_full(pattern, expected):
+    files = ["/home", "/home/user", "/home/user2", "/home/user/.bashrc", ]
+
+    check_patterns(files, PathFullPattern(pattern), expected)
+
+
+@pytest.mark.parametrize("pattern, expected", [
+    # "None" means all files, i.e. all match the given pattern
+    ("", []),
+    ("relative", []),
+    ("relative/path/", ["relative/path"]),
+    ("relative/path", ["relative/path"]),
+    ])
+def test_patterns_full_relative(pattern, expected):
+    files = ["relative/path", "relative/path2", ]
+
+    check_patterns(files, PathFullPattern(pattern), expected)
+
+
 @pytest.mark.parametrize("pattern, expected", [
     # "None" means all files, i.e. all match the given pattern
     ("/", None),