Преглед изворни кода

Merge pull request #2334 from ThomasWaldmann/precise-pattern

add PathFullPattern / optimization for it
TW пре 8 година
родитељ
комит
f2e9e862d8
3 измењених фајлова са 82 додато и 5 уклоњено
  1. 17 1
      src/borg/archiver.py
  2. 34 3
      src/borg/helpers.py
  3. 31 1
      src/borg/testsuite/helpers.py

+ 17 - 1
src/borg/archiver.py

@@ -1604,11 +1604,27 @@ class Archiver:
             regular expression syntax is described in the `Python documentation for
             regular expression syntax is described in the `Python documentation for
             the re module <https://docs.python.org/3/library/re.html>`_.
             the re module <https://docs.python.org/3/library/re.html>`_.
 
 
-        Prefix path, selector `pp:`
+        Path prefix, selector `pp:`
 
 
             This pattern style is useful to match whole sub-directories. The pattern
             This pattern style is useful to match whole sub-directories. The pattern
             `pp:/data/bar` matches `/data/bar` and everything therein.
             `pp:/data/bar` matches `/data/bar` and everything therein.
 
 
+        Path full-match, selector `pf:`
+
+            This pattern style is useful to match whole paths.
+            This is kind of a pseudo pattern as it can not have any variable or
+            unspecified parts - the full, precise path must be given.
+            `pf:/data/foo.txt` matches `/data/foo.txt` only.
+
+            Implementation note: this is implemented via very time-efficient O(1)
+            hashtable lookups (this means you can have huge amounts of such patterns
+            without impacting performance much).
+            Due to that, this kind of pattern does not respect any context or order.
+            If you use such a pattern to include a file, it will always be included
+            (if the directory recursion encounters it).
+            Other include/exclude patterns that would normally match will be ignored.
+            Same logic applies for exclude.
+
         Exclusions can be passed via the command line option `--exclude`. When used
         Exclusions can be passed via the command line option `--exclude`. When used
         from within a shell the patterns should be quoted to protect them from
         from within a shell the patterns should be quoted to protect them from
         expansion.
         expansion.

+ 34 - 3
src/borg/helpers.py

@@ -451,23 +451,42 @@ class PatternMatcher:
         # Value to return from match function when none of the patterns match.
         # Value to return from match function when none of the patterns match.
         self.fallback = fallback
         self.fallback = fallback
 
 
+        # optimizations
+        self._path_full_patterns = {}  # full path -> return value
+
     def empty(self):
     def empty(self):
-        return not len(self._items)
+        return not len(self._items) and not len(self._path_full_patterns)
+
+    def _add(self, pattern, value):
+        if isinstance(pattern, PathFullPattern):
+            key = pattern.pattern  # full, normalized path
+            self._path_full_patterns[key] = value
+        else:
+            self._items.append((pattern, value))
 
 
     def add(self, patterns, value):
     def add(self, patterns, value):
         """Add list of patterns to internal list. The given value is returned from the match function when one of the
         """Add list of patterns to internal list. The given value is returned from the match function when one of the
         given patterns matches.
         given patterns matches.
         """
         """
-        self._items.extend((i, value) for i in patterns)
+        for pattern in patterns:
+            self._add(pattern, value)
 
 
     def add_inclexcl(self, patterns):
     def add_inclexcl(self, patterns):
         """Add list of patterns (of type InclExclPattern) to internal list. The patterns ptype member is returned from
         """Add list of patterns (of type InclExclPattern) to internal list. The patterns ptype member is returned from
         the match function when one of the given patterns matches.
         the match function when one of the given patterns matches.
         """
         """
-        self._items.extend(patterns)
+        for pattern, pattern_type in patterns:
+            self._add(pattern, pattern_type)
 
 
     def match(self, path):
     def match(self, path):
         path = normalize_path(path)
         path = normalize_path(path)
+        # do a fast lookup for full path matches (note: we do not count such matches):
+        non_existent = object()
+        value = self._path_full_patterns.get(path, non_existent)
+        if value is not non_existent:
+            # we have a full path match!
+            return value
+        # this is the slow way, if we have many patterns in self._items:
         for (pattern, value) in self._items:
         for (pattern, value) in self._items:
             if pattern.match(path, normalize=False):
             if pattern.match(path, normalize=False):
                 return value
                 return value
@@ -518,6 +537,17 @@ class PatternBase:
         raise NotImplementedError
         raise NotImplementedError
 
 
 
 
+class PathFullPattern(PatternBase):
+    """Full match of a path."""
+    PREFIX = "pf"
+
+    def _prepare(self, pattern):
+        self.pattern = os.path.normpath(pattern)
+
+    def _match(self, path):
+        return path == self.pattern
+
+
 # For PathPrefixPattern, FnmatchPattern and ShellPattern, we require that the pattern either match the whole path
 # For PathPrefixPattern, FnmatchPattern and ShellPattern, we require that the pattern either match the whole path
 # or an initial segment of the path up to but not including a path separator. To unify the two cases, we add a path
 # or an initial segment of the path up to but not including a path separator. To unify the two cases, we add a path
 # separator to the end of the path before matching.
 # separator to the end of the path before matching.
@@ -600,6 +630,7 @@ class RegexPattern(PatternBase):
 
 
 _PATTERN_STYLES = set([
 _PATTERN_STYLES = set([
     FnmatchPattern,
     FnmatchPattern,
+    PathFullPattern,
     PathPrefixPattern,
     PathPrefixPattern,
     RegexPattern,
     RegexPattern,
     ShellPattern,
     ShellPattern,

+ 31 - 1
src/borg/testsuite/helpers.py

@@ -25,7 +25,8 @@ from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams,
 from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
 from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
 from ..helpers import load_exclude_file, load_pattern_file
 from ..helpers import load_exclude_file, load_pattern_file
 from ..helpers import CompressionSpec, ComprSpec, CompressionDecider1, CompressionDecider2
 from ..helpers import CompressionSpec, ComprSpec, CompressionDecider1, CompressionDecider2
-from ..helpers import parse_pattern, PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern
+from ..helpers import parse_pattern, PatternMatcher
+from ..helpers import PathFullPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, RegexPattern
 from ..helpers import swidth_slice
 from ..helpers import swidth_slice
 from ..helpers import chunkit
 from ..helpers import chunkit
 from ..helpers import safe_ns, safe_s
 from ..helpers import safe_ns, safe_s
@@ -254,6 +255,35 @@ def check_patterns(files, pattern, expected):
     assert matched == (files if expected is None else expected)
     assert matched == (files if expected is None else expected)
 
 
 
 
+@pytest.mark.parametrize("pattern, expected", [
+    # "None" means all files, i.e. all match the given pattern
+    ("/", []),
+    ("/home", ["/home"]),
+    ("/home///", ["/home"]),
+    ("/./home", ["/home"]),
+    ("/home/user", ["/home/user"]),
+    ("/home/user2", ["/home/user2"]),
+    ("/home/user/.bashrc", ["/home/user/.bashrc"]),
+    ])
+def test_patterns_full(pattern, expected):
+    files = ["/home", "/home/user", "/home/user2", "/home/user/.bashrc", ]
+
+    check_patterns(files, PathFullPattern(pattern), expected)
+
+
+@pytest.mark.parametrize("pattern, expected", [
+    # "None" means all files, i.e. all match the given pattern
+    ("", []),
+    ("relative", []),
+    ("relative/path/", ["relative/path"]),
+    ("relative/path", ["relative/path"]),
+    ])
+def test_patterns_full_relative(pattern, expected):
+    files = ["relative/path", "relative/path2", ]
+
+    check_patterns(files, PathFullPattern(pattern), expected)
+
+
 @pytest.mark.parametrize("pattern, expected", [
 @pytest.mark.parametrize("pattern, expected", [
     # "None" means all files, i.e. all match the given pattern
     # "None" means all files, i.e. all match the given pattern
     ("/", None),
     ("/", None),