Explorar o código

Merge pull request #2322 from edgimar/master

allow excluding parent and including child, fixes #2314
TW %!s(int64=8) %!d(string=hai) anos
pai
achega
6f47b797f9
Modificáronse 5 ficheiros con 221 adicións e 102 borrados
  1. 6 6
      src/borg/archive.py
  2. 30 26
      src/borg/archiver.py
  3. 139 63
      src/borg/helpers.py
  4. 36 1
      src/borg/testsuite/archiver.py
  5. 10 6
      src/borg/testsuite/helpers.py

+ 6 - 6
src/borg/archive.py

@@ -36,7 +36,7 @@ from .helpers import StableDict
 from .helpers import bin_to_hex
 from .helpers import safe_ns
 from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi
-from .helpers import PathPrefixPattern, FnmatchPattern
+from .helpers import PathPrefixPattern, FnmatchPattern, IECommand
 from .item import Item, ArchiveItem
 from .key import key_factory
 from .platform import acl_get, acl_set, set_flags, get_flags, swidth
@@ -1721,10 +1721,10 @@ class ArchiveRecreater:
         """Add excludes to the matcher created by exclude_cache and exclude_if_present."""
         def exclude(dir, tag_item):
             if self.keep_exclude_tags:
-                tag_files.append(PathPrefixPattern(tag_item.path))
-                tagged_dirs.append(FnmatchPattern(dir + '/'))
+                tag_files.append(PathPrefixPattern(tag_item.path, recurse_dir=False))
+                tagged_dirs.append(FnmatchPattern(dir + '/', recurse_dir=False))
             else:
-                tagged_dirs.append(PathPrefixPattern(dir))
+                tagged_dirs.append(PathPrefixPattern(dir, recurse_dir=False))
 
         matcher = self.matcher
         tag_files = []
@@ -1747,8 +1747,8 @@ class ArchiveRecreater:
                         file = open_item(archive, cachedir_masters[item.source])
                     if file.read(len(CACHE_TAG_CONTENTS)).startswith(CACHE_TAG_CONTENTS):
                         exclude(dir, item)
-        matcher.add(tag_files, True)
-        matcher.add(tagged_dirs, False)
+        matcher.add(tag_files, IECommand.Include)
+        matcher.add(tagged_dirs, IECommand.ExcludeNoRecurse)
 
     def create_target(self, archive, target_name=None):
         """Create target archive."""

+ 30 - 26
src/borg/archiver.py

@@ -54,7 +54,7 @@ from .helpers import check_extension_modules
 from .helpers import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern
 from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo
 from .helpers import log_multi
-from .helpers import parse_pattern, PatternMatcher, PathPrefixPattern
+from .helpers import PatternMatcher
 from .helpers import signal_handler, raising_signal_handler, SigHup, SigTerm
 from .helpers import ErrorIgnoringTextIOWrapper
 from .helpers import ProgressIndicatorPercent
@@ -190,16 +190,11 @@ class Archiver:
             bi += slicelen
 
     @staticmethod
-    def build_matcher(inclexcl_patterns, paths):
+    def build_matcher(inclexcl_patterns, include_paths):
         matcher = PatternMatcher()
-        if inclexcl_patterns:
-            matcher.add_inclexcl(inclexcl_patterns)
-        include_patterns = []
-        if paths:
-            include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in paths)
-            matcher.add(include_patterns, True)
-        matcher.fallback = not include_patterns
-        return matcher, include_patterns
+        matcher.add_inclexcl(inclexcl_patterns)
+        matcher.add_includepaths(include_paths)
+        return matcher
 
     def do_serve(self, args):
         """Start in server mode. This command is usually not used manually."""
@@ -493,13 +488,20 @@ class Archiver:
 
         This should only raise on critical errors. Per-item errors must be handled within this method.
         """
+        if st is None:
+            with backup_io('stat'):
+                st = os.lstat(path)
+
+        recurse_excluded_dir = False
         if not matcher.match(path):
             self.print_file_status('x', path)
-            return
+
+            if stat.S_ISDIR(st.st_mode) and matcher.recurse_dir:
+                recurse_excluded_dir = True
+            else:
+                return
+
         try:
-            if st is None:
-                with backup_io('stat'):
-                    st = os.lstat(path)
             if (st.st_ino, st.st_dev) in skip_inodes:
                 return
             # if restrict_dev is given, we do not want to recurse into a new filesystem,
@@ -527,7 +529,8 @@ class Archiver:
                                               read_special=read_special, dry_run=dry_run)
                         return
                 if not dry_run:
-                    status = archive.process_dir(path, st)
+                    if not recurse_excluded_dir:
+                        status = archive.process_dir(path, st)
                 if recurse:
                     with backup_io('scandir'):
                         entries = helpers.scandir_inorder(path)
@@ -590,7 +593,9 @@ class Archiver:
                 status = '?'  # need to add a status code somewhere
             else:
                 status = '-'  # dry run, item was not backed up
-        self.print_file_status(status, path)
+
+        if not recurse_excluded_dir:
+            self.print_file_status(status, path)
 
     @staticmethod
     def build_filter(matcher, peek_and_store_hardlink_masters, strip_components):
@@ -616,7 +621,7 @@ class Archiver:
             if sys.platform.startswith(('linux', 'freebsd', 'netbsd', 'openbsd', 'darwin', )):
                 logger.warning('Hint: You likely need to fix your locale setup. E.g. install locales and use: LANG=en_US.UTF-8')
 
-        matcher, include_patterns = self.build_matcher(args.patterns, args.paths)
+        matcher = self.build_matcher(args.patterns, args.paths)
 
         progress = args.progress
         output_list = args.output_list
@@ -681,9 +686,8 @@ class Archiver:
                     archive.extract_item(dir_item)
                 except BackupOSError as e:
                     self.print_warning('%s: %s', remove_surrogates(dir_item.path), e)
-        for pattern in include_patterns:
-            if pattern.match_count == 0:
-                self.print_warning("Include pattern '%s' never matched.", pattern)
+        for pattern in matcher.get_unmatched_include_patterns():
+            self.print_warning("Include pattern '%s' never matched.", pattern)
         if pi:
             # clear progress output
             pi.finish()
@@ -893,13 +897,13 @@ class Archiver:
                                'If you know for certain that they are the same, pass --same-chunker-params '
                                'to override this check.')
 
-        matcher, include_patterns = self.build_matcher(args.patterns, args.paths)
+        matcher = self.build_matcher(args.patterns, args.paths)
 
         compare_archives(archive1, archive2, matcher)
 
-        for pattern in include_patterns:
-            if pattern.match_count == 0:
-                self.print_warning("Include pattern '%s' never matched.", pattern)
+        for pattern in matcher.get_unmatched_include_patterns():
+            self.print_warning("Include pattern '%s' never matched.", pattern)
+
         return self.exit_code
 
     @with_repository(exclusive=True, cache=True)
@@ -1048,7 +1052,7 @@ class Archiver:
             return self._list_repository(args, manifest, write)
 
     def _list_archive(self, args, repository, manifest, key, write):
-        matcher, _ = self.build_matcher(args.patterns, args.paths)
+        matcher = self.build_matcher(args.patterns, args.paths)
         if args.format is not None:
             format = args.format
         elif args.short:
@@ -1330,7 +1334,7 @@ class Archiver:
                    env_var_override='BORG_RECREATE_I_KNOW_WHAT_I_AM_DOING'):
             return EXIT_ERROR
 
-        matcher, include_patterns = self.build_matcher(args.patterns, args.paths)
+        matcher = self.build_matcher(args.patterns, args.paths)
         self.output_list = args.output_list
         self.output_filter = args.output_filter
         recompress = args.recompress != 'never'

+ 139 - 63
src/borg/helpers.py

@@ -23,6 +23,7 @@ import uuid
 from binascii import hexlify
 from collections import namedtuple, deque, abc, Counter
 from datetime import datetime, timezone, timedelta
+from enum import Enum
 from fnmatch import translate
 from functools import wraps, partial, lru_cache
 from itertools import islice
@@ -388,23 +389,24 @@ def parse_timestamp(timestamp):
         return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S').replace(tzinfo=timezone.utc)
 
 
-def parse_add_pattern(patternstr, roots, patterns, fallback):
-    """Parse a pattern string and add it to roots or patterns depending on the pattern type."""
-    pattern = parse_inclexcl_pattern(patternstr, fallback=fallback)
-    if pattern.ptype is RootPath:
-        roots.append(pattern.pattern)
-    elif pattern.ptype is PatternStyle:
-        fallback = pattern.pattern
+def parse_patternfile_line(line, roots, ie_commands, fallback):
+    """Parse a pattern-file line and act depending on which command it represents."""
+    ie_command = parse_inclexcl_command(line, fallback=fallback)
+    if ie_command.cmd is IECommand.RootPath:
+        roots.append(ie_command.val)
+    elif ie_command.cmd is IECommand.PatternStyle:
+        fallback = ie_command.val
     else:
-        patterns.append(pattern)
+        # it is some kind of include/exclude command
+        ie_commands.append(ie_command)
     return fallback
 
 
-def load_pattern_file(fileobj, roots, patterns, fallback=None):
+def load_pattern_file(fileobj, roots, ie_commands, fallback=None):
     if fallback is None:
         fallback = ShellPattern  # ShellPattern is defined later in this module
-    for patternstr in clean_lines(fileobj):
-        fallback = parse_add_pattern(patternstr, roots, patterns, fallback)
+    for line in clean_lines(fileobj):
+        fallback = parse_patternfile_line(line, roots, ie_commands, fallback)
 
 
 def load_exclude_file(fileobj, patterns):
@@ -417,7 +419,7 @@ class ArgparsePatternAction(argparse.Action):
         super().__init__(nargs=nargs, **kw)
 
     def __call__(self, parser, args, values, option_string=None):
-        parse_add_pattern(values[0], args.paths, args.patterns, ShellPattern)
+        parse_patternfile_line(values[0], args.paths, args.patterns, ShellPattern)
 
 
 class ArgparsePatternFileAction(argparse.Action):
@@ -442,6 +444,11 @@ class ArgparseExcludeFileAction(ArgparsePatternFileAction):
 
 
 class PatternMatcher:
+    """Represents a collection of pattern objects to match paths against.
+
+    *fallback* is a boolean value that *match()* returns if no matching patterns are found.
+
+    """
     def __init__(self, fallback=None):
         self._items = []
 
@@ -451,42 +458,88 @@ class PatternMatcher:
         # optimizations
         self._path_full_patterns = {}  # full path -> return value
 
+        # indicates whether the last match() call ended on a pattern for which
+        # we should recurse into any matching folder.  Will be set to True or
+        # False when calling match().
+        self.recurse_dir = None
+
+        # whether to recurse into directories when no match is found
+        # TODO: allow modification as a config option?
+        self.recurse_dir_default = True
+
+        self.include_patterns = []
+
+        # TODO: move this info to parse_inclexcl_command and store in PatternBase subclass?
+        self.is_include_cmd = {
+            IECommand.Exclude: False,
+            IECommand.ExcludeNoRecurse: False,
+            IECommand.Include: True
+        }
+
     def empty(self):
         return not len(self._items) and not len(self._path_full_patterns)
 
-    def _add(self, pattern, value):
+    def _add(self, pattern, cmd):
+        """*cmd* is an IECommand value.
+        """
         if isinstance(pattern, PathFullPattern):
             key = pattern.pattern  # full, normalized path
-            self._path_full_patterns[key] = value
+            self._path_full_patterns[key] = cmd
         else:
-            self._items.append((pattern, value))
+            self._items.append((pattern, cmd))
 
-    def add(self, patterns, value):
-        """Add list of patterns to internal list. The given value is returned from the match function when one of the
-        given patterns matches.
+    def add(self, patterns, cmd):
+        """Add list of patterns to internal list. *cmd* indicates whether the
+        pattern is an include/exclude pattern, and whether recursion should be
+        done on excluded folders.
         """
         for pattern in patterns:
-            self._add(pattern, value)
+            self._add(pattern, cmd)
+
+    def add_includepaths(self, include_paths):
+        """Used to add inclusion-paths from args.paths (from commandline).
+        """
+        include_patterns = [parse_pattern(p, PathPrefixPattern) for p in include_paths]
+        self.add(include_patterns, IECommand.Include)
+        self.fallback = not include_patterns
+        self.include_patterns = include_patterns
+
+    def get_unmatched_include_patterns(self):
+        "Note that this only returns patterns added via *add_includepaths*."
+        return [p for p in self.include_patterns if p.match_count == 0]
 
     def add_inclexcl(self, patterns):
-        """Add list of patterns (of type InclExclPattern) to internal list. The patterns ptype member is returned from
-        the match function when one of the given patterns matches.
+        """Add list of patterns (of type CmdTuple) to internal list.
         """
-        for pattern, pattern_type in patterns:
-            self._add(pattern, pattern_type)
+        for pattern, cmd in patterns:
+            self._add(pattern, cmd)
 
     def match(self, path):
+        """Return True or False depending on whether *path* is matched.
+
+        If no match is found among the patterns in this matcher, then the value
+        in self.fallback is returned (defaults to None).
+
+        """
         path = normalize_path(path)
         # do a fast lookup for full path matches (note: we do not count such matches):
         non_existent = object()
         value = self._path_full_patterns.get(path, non_existent)
+
         if value is not non_existent:
             # we have a full path match!
+            # TODO: get from pattern; don't hard-code
+            self.recurse_dir = True
             return value
+
         # this is the slow way, if we have many patterns in self._items:
-        for (pattern, value) in self._items:
+        for (pattern, cmd) in self._items:
             if pattern.match(path, normalize=False):
-                return value
+                self.recurse_dir = pattern.recurse_dir
+                return self.is_include_cmd[cmd]
+
+        # by default we will recurse if there is no match
+        self.recurse_dir = self.recurse_dir_default
         return self.fallback
 
 
@@ -502,14 +555,15 @@ class PatternBase:
     """
     PREFIX = NotImplemented
 
-    def __init__(self, pattern):
+    def __init__(self, pattern, recurse_dir=False):
         self.pattern_orig = pattern
         self.match_count = 0
         pattern = normalize_path(pattern)
         self._prepare(pattern)
+        self.recurse_dir = recurse_dir
 
     def match(self, path, normalize=True):
-        """match the given path against this pattern.
+        """Return a boolean indicating whether *path* is matched by this pattern.
 
         If normalize is True (default), the path will get normalized using normalize_path(),
         otherwise it is assumed that it already is normalized using that function.
@@ -528,6 +582,7 @@ class PatternBase:
         return self.pattern_orig
 
     def _prepare(self, pattern):
+        "Should set the value of self.pattern"
         raise NotImplementedError
 
     def _match(self, path):
@@ -625,7 +680,7 @@ class RegexPattern(PatternBase):
         return (self.regex.search(path) is not None)
 
 
-_PATTERN_STYLES = set([
+_PATTERN_CLASSES = set([
     FnmatchPattern,
     PathFullPattern,
     PathPrefixPattern,
@@ -633,65 +688,86 @@ _PATTERN_STYLES = set([
     ShellPattern,
 ])
 
-_PATTERN_STYLE_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_STYLES)
+_PATTERN_CLASS_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_CLASSES)
 
-InclExclPattern = namedtuple('InclExclPattern', 'pattern ptype')
-RootPath = object()
-PatternStyle = object()
+CmdTuple = namedtuple('CmdTuple', 'val cmd')
 
 
-def get_pattern_style(prefix):
+class IECommand(Enum):
+    """A command that an InclExcl file line can represent.
+    """
+    RootPath = 1
+    PatternStyle = 2
+    Include = 3
+    Exclude = 4
+    ExcludeNoRecurse = 5
+
+
+def get_pattern_class(prefix):
     try:
-        return _PATTERN_STYLE_BY_PREFIX[prefix]
+        return _PATTERN_CLASS_BY_PREFIX[prefix]
     except KeyError:
         raise ValueError("Unknown pattern style: {}".format(prefix)) from None
 
 
-def parse_pattern(pattern, fallback=FnmatchPattern):
+def parse_pattern(pattern, fallback=FnmatchPattern, recurse_dir=True):
     """Read pattern from string and return an instance of the appropriate implementation class.
+
     """
     if len(pattern) > 2 and pattern[2] == ":" and pattern[:2].isalnum():
         (style, pattern) = (pattern[:2], pattern[3:])
-        cls = get_pattern_style(style)
+        cls = get_pattern_class(style)
     else:
         cls = fallback
-    return cls(pattern)
+    return cls(pattern, recurse_dir)
 
 
-def parse_exclude_pattern(pattern, fallback=FnmatchPattern):
+def parse_exclude_pattern(pattern_str, fallback=FnmatchPattern):
     """Read pattern from string and return an instance of the appropriate implementation class.
     """
-    epattern = parse_pattern(pattern, fallback)
-    return InclExclPattern(epattern, False)
-
-
-def parse_inclexcl_pattern(pattern, fallback=ShellPattern):
-    """Read pattern from string and return a InclExclPattern object."""
-    type_prefix_map = {
-        '-': False,
-        '+': True,
-        'R': RootPath,
-        'r': RootPath,
-        'P': PatternStyle,
-        'p': PatternStyle,
+    epattern_obj = parse_pattern(pattern_str, fallback)
+    return CmdTuple(epattern_obj, IECommand.Exclude)
+
+
+def parse_inclexcl_command(cmd_line_str, fallback=ShellPattern):
+    """Read a --patterns-from command from string and return a CmdTuple object."""
+
+    cmd_prefix_map = {
+        '-': IECommand.Exclude,
+        '!': IECommand.ExcludeNoRecurse,
+        '+': IECommand.Include,
+        'R': IECommand.RootPath,
+        'r': IECommand.RootPath,
+        'P': IECommand.PatternStyle,
+        'p': IECommand.PatternStyle,
     }
+
     try:
-        ptype = type_prefix_map[pattern[0]]
-        pattern = pattern[1:].lstrip()
-        if not pattern:
-            raise ValueError("Missing pattern!")
+        cmd = cmd_prefix_map[cmd_line_str[0]]
+
+        # remaining text on command-line following the command character
+        remainder_str = cmd_line_str[1:].lstrip()
+
+        if not remainder_str:
+            raise ValueError("Missing pattern/information!")
     except (IndexError, KeyError, ValueError):
-        raise argparse.ArgumentTypeError("Unable to parse pattern: {}".format(pattern))
-    if ptype is RootPath:
-        pobj = pattern
-    elif ptype is PatternStyle:
+        raise argparse.ArgumentTypeError("Unable to parse pattern/command: {}".format(cmd_line_str))
+
+    if cmd is IECommand.RootPath:
+        # TODO: validate string?
+        val = remainder_str
+    elif cmd is IECommand.PatternStyle:
+        # then remainder_str is something like 're' or 'sh'
         try:
-            pobj = get_pattern_style(pattern)
+            val = get_pattern_class(remainder_str)
         except ValueError:
-            raise argparse.ArgumentTypeError("Unable to parse pattern: {}".format(pattern))
+            raise argparse.ArgumentTypeError("Invalid pattern style: {}".format(remainder_str))
     else:
-        pobj = parse_pattern(pattern, fallback)
-    return InclExclPattern(pobj, ptype)
+        # determine recurse_dir based on command type
+        recurse_dir = cmd not in [IECommand.ExcludeNoRecurse]
+        val = parse_pattern(remainder_str, fallback, recurse_dir)
+
+    return CmdTuple(val, cmd)
 
 
 def timestamp(s):

+ 36 - 1
src/borg/testsuite/archiver.py

@@ -37,6 +37,7 @@ from ..helpers import PatternMatcher, parse_pattern, Location, get_security_dir
 from ..helpers import Manifest
 from ..helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
 from ..helpers import bin_to_hex
+from ..helpers import IECommand
 from ..item import Item
 from ..key import KeyfileKeyBase, RepoKey, KeyfileKey, Passphrase, TAMRequiredError
 from ..keymanager import RepoIdMismatch, NotABorgKeyFile
@@ -929,6 +930,40 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.assert_in('x input/file2', output)
         self.assert_in('x input/otherfile', output)
 
+    def test_create_pattern_exclude_folder_but_recurse(self):
+        """test when patterns exclude a parent folder, but include a child"""
+        self.patterns_file_path2 = os.path.join(self.tmpdir, 'patterns2')
+        with open(self.patterns_file_path2, 'wb') as fd:
+            fd.write(b'+ input/x/b\n- input/x*\n')
+
+        self.cmd('init', '--encryption=repokey', self.repository_location)
+        self.create_regular_file('x/a/foo_a', size=1024 * 80)
+        self.create_regular_file('x/b/foo_b', size=1024 * 80)
+        self.create_regular_file('y/foo_y', size=1024 * 80)
+        output = self.cmd('create', '-v', '--list',
+                          '--patterns-from=' + self.patterns_file_path2,
+                          self.repository_location + '::test', 'input')
+        self.assert_in('x input/x/a/foo_a', output)
+        self.assert_in("A input/x/b/foo_b", output)
+        self.assert_in('A input/y/foo_y', output)
+
+    def test_create_pattern_exclude_folder_no_recurse(self):
+        """test when patterns exclude a parent folder and, but include a child"""
+        self.patterns_file_path2 = os.path.join(self.tmpdir, 'patterns2')
+        with open(self.patterns_file_path2, 'wb') as fd:
+            fd.write(b'+ input/x/b\n! input/x*\n')
+
+        self.cmd('init', '--encryption=repokey', self.repository_location)
+        self.create_regular_file('x/a/foo_a', size=1024 * 80)
+        self.create_regular_file('x/b/foo_b', size=1024 * 80)
+        self.create_regular_file('y/foo_y', size=1024 * 80)
+        output = self.cmd('create', '-v', '--list',
+                          '--patterns-from=' + self.patterns_file_path2,
+                          self.repository_location + '::test', 'input')
+        self.assert_not_in('input/x/a/foo_a', output)
+        self.assert_not_in('input/x/a', output)
+        self.assert_in('A input/y/foo_y', output)
+
     def test_extract_pattern_opt(self):
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.create_regular_file('file1', size=1024 * 80)
@@ -2889,7 +2924,7 @@ class TestBuildFilter:
 
     def test_basic(self):
         matcher = PatternMatcher()
-        matcher.add([parse_pattern('included')], True)
+        matcher.add([parse_pattern('included')], IECommand.Include)
         filter = Archiver.build_filter(matcher, self.peek_and_store_hardlink_masters, 0)
         assert filter(Item(path='included'))
         assert filter(Item(path='included/file'))

+ 10 - 6
src/borg/testsuite/helpers.py

@@ -557,12 +557,12 @@ def test_switch_patterns_style():
     roots, patterns = [], []
     load_pattern_file(pattern_file, roots, patterns)
     assert len(patterns) == 6
-    assert isinstance(patterns[0].pattern, ShellPattern)
-    assert isinstance(patterns[1].pattern, FnmatchPattern)
-    assert isinstance(patterns[2].pattern, RegexPattern)
-    assert isinstance(patterns[3].pattern, RegexPattern)
-    assert isinstance(patterns[4].pattern, PathPrefixPattern)
-    assert isinstance(patterns[5].pattern, ShellPattern)
+    assert isinstance(patterns[0].val, ShellPattern)
+    assert isinstance(patterns[1].val, FnmatchPattern)
+    assert isinstance(patterns[2].val, RegexPattern)
+    assert isinstance(patterns[3].val, RegexPattern)
+    assert isinstance(patterns[4].val, PathPrefixPattern)
+    assert isinstance(patterns[5].val, ShellPattern)
 
 
 @pytest.mark.parametrize("lines", [
@@ -682,6 +682,10 @@ def test_pattern_matcher():
     for i in ["", "foo", "bar"]:
         assert pm.match(i) is None
 
+    # add extra entries to aid in testing
+    for target in ["A", "B", "Empty", "FileNotFound"]:
+        pm.is_include_cmd[target] = target
+
     pm.add([RegexPattern("^a")], "A")
     pm.add([RegexPattern("^b"), RegexPattern("^z")], "B")
     pm.add([RegexPattern("^$")], "Empty")