Explorar o código

add two new options --pattern and --patterns-from as discussed in #1406

Alexander 'Leo' Bergolth %!s(int64=9) %!d(string=hai) anos
pai
achega
876b670d60
Modificáronse 3 ficheiros con 240 adicións e 42 borrados
  1. 84 30
      borg/archiver.py
  2. 65 9
      borg/helpers.py
  3. 91 3
      borg/testsuite/helpers.py

+ 84 - 30
borg/archiver.py

@@ -18,9 +18,9 @@ import collections
 
 from . import __version__
 from .helpers import Error, location_validator, archivename_validator, format_line, format_time, format_file_size, \
-    parse_pattern, PathPrefixPattern, to_localtime, timestamp, safe_timestamp, bin_to_hex, \
-    get_cache_dir, prune_within, prune_split, \
-    Manifest, NoManifestError, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
+    parse_pattern, parse_exclude_pattern, parse_inclexcl_pattern, PathPrefixPattern, to_localtime, timestamp, \
+    safe_timestamp, bin_to_hex, get_cache_dir, prune_within, prune_split, \
+    Manifest, NoManifestError, remove_surrogates, update_patterns, format_archive, check_extension_modules, Statistics, \
     dir_is_tagged, bigint_to_int, ChunkerParams, CompressionSpec, PrefixSpec, is_slow_msgpack, yes, sysinfo, \
     EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR, log_multi, PatternMatcher, ErrorIgnoringTextIOWrapper
 from .helpers import signal_handler, raising_signal_handler, SigHup, SigTerm
@@ -121,6 +121,18 @@ class Archiver:
         if self.output_list and (self.output_filter is None or status in self.output_filter):
             logger.info("%1s %s", status, remove_surrogates(path))
 
+    @staticmethod
+    def build_matcher(inclexcl_patterns, paths):
+        matcher = PatternMatcher()
+        if inclexcl_patterns:
+            matcher.add_inclexcl(inclexcl_patterns)
+        include_patterns = []
+        if paths:
+            include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in paths)
+            matcher.add(include_patterns, True)
+        matcher.fallback = not include_patterns
+        return matcher, include_patterns
+
     def do_serve(self, args):
         """Start in server mode. This command is usually not used manually.
         """
@@ -233,8 +245,7 @@ class Archiver:
     def do_create(self, args, repository, manifest=None, key=None):
         """Create new archive"""
         matcher = PatternMatcher(fallback=True)
-        if args.excludes:
-            matcher.add(args.excludes, False)
+        matcher.add_inclexcl(args.pattern)
 
         def create_inner(archive, cache):
             # Add cache dir to inode_skip list
@@ -424,17 +435,7 @@ class Archiver:
             if sys.platform.startswith(('linux', 'freebsd', 'netbsd', 'openbsd', 'darwin', )):
                 logger.warning('Hint: You likely need to fix your locale setup. E.g. install locales and use: LANG=en_US.UTF-8')
 
-        matcher = PatternMatcher()
-        if args.excludes:
-            matcher.add(args.excludes, False)
-
-        include_patterns = []
-
-        if args.paths:
-            include_patterns.extend(parse_pattern(i, PathPrefixPattern) for i in args.paths)
-            matcher.add(include_patterns, True)
-
-        matcher.fallback = not include_patterns
+        matcher, include_patterns = self.build_matcher(args.pattern, args.paths)
 
         output_list = args.output_list
         dry_run = args.dry_run
@@ -893,8 +894,9 @@ class Archiver:
 
     helptext = collections.OrderedDict()
     helptext['patterns'] = textwrap.dedent('''
-        Exclusion patterns support four separate styles, fnmatch, shell, regular
-        expressions and path prefixes. By default, fnmatch is used. If followed
+        File patterns support four separate styles, fnmatch, shell, regular
+        expressions and path prefixes. By default, fnmatch is used for 
+        `--exclude` patterns and shell-style is used for `--pattern`. If followed
         by a colon (':') the first two characters of a pattern are used as a
         style selector. Explicit style selection is necessary when a
         non-default style is desired or when the desired pattern starts with
@@ -902,12 +904,12 @@ class Archiver:
 
         `Fnmatch <https://docs.python.org/3/library/fnmatch.html>`_, selector `fm:`
 
-            This is the default style.  These patterns use a variant of shell
-            pattern syntax, with '*' matching any number of characters, '?'
-            matching any single character, '[...]' matching any single
-            character specified, including ranges, and '[!...]' matching any
-            character not specified. For the purpose of these patterns, the
-            path separator ('\\' for Windows and '/' on other systems) is not
+            This is the default style for --exclude and --exclude-from.
+            These patterns use a variant of shell pattern syntax, with '*' matching
+            any number of characters, '?' matching any single character, '[...]'
+            matching any single character specified, including ranges, and '[!...]'
+            matching any character not specified. For the purpose of these patterns,
+            the path separator ('\\' for Windows and '/' on other systems) is not
             treated specially. Wrap meta-characters in brackets for a literal
             match (i.e. `[?]` to match the literal character `?`). For a path
             to match a pattern, it must completely match from start to end, or
@@ -918,6 +920,7 @@ class Archiver:
 
         Shell-style patterns, selector `sh:`
 
+            This is the default style for --pattern and --patterns-from.
             Like fnmatch patterns these are similar to shell patterns. The difference
             is that the pattern may include `**/` for matching zero or more directory
             levels, `*` for matching zero or more arbitrary characters with the
@@ -978,7 +981,44 @@ class Archiver:
             re:^/home/[^/]\.tmp/
             sh:/home/*/.thumbnails
             EOF
-            $ borg create --exclude-from exclude.txt backup /\n\n''')
+            $ borg create --exclude-from exclude.txt backup /
+
+            # exclude the contents of /data/docs/ but not /data/docs/pdf
+            $ borg create -e +/data/docs/pdf -e /data/docs/ backup /
+            # equivalent:
+            $ borg create -e +pm:/data/docs/pdf -e -pm:/data/docs/ backup /
+
+
+        A more general way to define filename matching patterns may be passed via
+        `--pattern` and `--patterns-from`. Using these options, you may specify the
+        backup roots (starting points) and patterns for inclusion/exclusion. A
+        root path starts with the prefix `R`, followed by a path (a plain path, not a
+        file pattern). An include rule is specified by `+` followed by a pattern.
+        Exclude rules start with a `-`.
+        Inclusion patterns are useful to e.g. exclude the contents of a directory
+        except for some important files in this directory. The first matching pattern
+        is used so if an include pattern matches before an exclude pattern, the file
+        is backed up.
+
+        Note that the default pattern style for `--pattern` and `--patterns-from` is
+        shell style (`sh:`), so those patterns behave like rsync include/exclude patterns.
+
+        An example `--patterns-from` file could look like that::
+
+            R /
+            # can be rebuild
+            - /home/*/.cache
+            # they're downloads for a reason
+            - /home/*/Downloads
+            # susan is a nice person
+            # include susans home
+            + /home/susan
+            # ... and its contents
+            + /home/susan/*
+            # don't backup the other home directories
+            - /home/*
+
+\n\n''')
     helptext['placeholders'] = textwrap.dedent('''
         Repository (or Archive) URLs, --prefix and --remote-path values support these
         placeholders:
@@ -1339,10 +1379,10 @@ class Archiver:
         subparser.add_argument('--filter', dest='output_filter', metavar='STATUSCHARS',
                                help='only display items with the given status characters')
         subparser.add_argument('-e', '--exclude', dest='excludes',
-                               type=parse_pattern, action='append',
+                               type=parse_exclude_pattern, action='append', dest='pattern',
                                metavar="PATTERN", help='exclude paths matching PATTERN')
         subparser.add_argument('--exclude-from', dest='exclude_files',
-                               type=argparse.FileType('r'), action='append',
+                               type=argparse.FileType('r'), action='append', default=[],
                                metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line')
         subparser.add_argument('--exclude-caches', dest='exclude_caches',
                                action='store_true', default=False,
@@ -1353,6 +1393,13 @@ class Archiver:
         subparser.add_argument('--keep-tag-files', dest='keep_tag_files',
                                action='store_true', default=False,
                                help='keep tag files of excluded caches/directories')
+        subparser.add_argument('--pattern', dest='pattern',
+                               type=parse_inclexcl_pattern, action='append',
+                               metavar="PATTERN", help='include/exclude paths matching PATTERN')
+        subparser.set_defaults(pattern=[])
+        subparser.add_argument('--patterns-from', dest='pattern_files',
+                               type=argparse.FileType('r'), action='append', default=[],
+                               metavar='PATTERNFILE', help='read include/exclude patterns from PATTERNFILE, one per line')
         subparser.add_argument('-c', '--checkpoint-interval', dest='checkpoint_interval',
                                type=int, default=300, metavar='SECONDS',
                                help='write checkpoint every SECONDS seconds (Default: 300)')
@@ -1423,11 +1470,18 @@ class Archiver:
                                default=False, action='store_true',
                                help='do not actually change any files')
         subparser.add_argument('-e', '--exclude', dest='excludes',
-                               type=parse_pattern, action='append',
+                               type=parse_exclude_pattern, action='append', dest='pattern',
                                metavar="PATTERN", help='exclude paths matching PATTERN')
         subparser.add_argument('--exclude-from', dest='exclude_files',
-                               type=argparse.FileType('r'), action='append',
+                               type=argparse.FileType('r'), action='append', default=[],
                                metavar='EXCLUDEFILE', help='read exclude patterns from EXCLUDEFILE, one per line')
+        subparser.add_argument('--pattern', dest='pattern',
+                               type=parse_inclexcl_pattern, action='append',
+                               metavar="PATTERN", help='include/exclude paths matching PATTERN')
+        subparser.set_defaults(pattern=[])
+        subparser.add_argument('--patterns-from', dest='pattern_files',
+                               type=argparse.FileType('r'), action='append', default=[],
+                               metavar='PATTERNFILE', help='read include/exclude patterns from PATTERNFILE, one per line')
         subparser.add_argument('--numeric-owner', dest='numeric_owner',
                                action='store_true', default=False,
                                help='only obey numeric user and group identifiers')
@@ -1982,7 +2036,7 @@ class Archiver:
             args = self.preprocess_args(args)
         parser = self.build_parser(args)
         args = parser.parse_args(args or ['-h'])
-        update_excludes(args)
+        update_patterns(args)
         return args
 
     def run(self, args):

+ 65 - 9
borg/helpers.py

@@ -312,17 +312,37 @@ def load_excludes(fh):
     both line ends are ignored.
     """
     patterns = (line for line in (i.strip() for i in fh) if not line.startswith('#'))
-    return [parse_pattern(pattern) for pattern in patterns if pattern]
+    return [parse_exclude_pattern(pattern)
+            for pattern in patterns if pattern]
 
 
-def update_excludes(args):
-    """Merge exclude patterns from files with those on command line."""
-    if hasattr(args, 'exclude_files') and args.exclude_files:
-        if not hasattr(args, 'excludes') or args.excludes is None:
-            args.excludes = []
-        for file in args.exclude_files:
-            args.excludes += load_excludes(file)
-            file.close()
+def load_patterns(fh):
+    """Load and parse include/exclude/root patterns from file object.
+    Lines empty or starting with '#' after stripping whitespace on both line ends are ignored.
+    """
+    patternlines = (line for line in (i.strip() for i in fh) if not line.startswith('#'))
+    roots = []
+    inclexclpatterns = []
+    for patternline in patternlines:
+        pattern = parse_inclexcl_pattern(patternline)
+        if pattern:
+            if pattern.ptype is RootPath:
+                roots.append(pattern.pattern)
+            else:
+                inclexclpatterns.append(pattern)
+    return roots, inclexclpatterns
+
+
+def update_patterns(args):
+    """Merge patterns from exclude- and pattern-files with those on command line."""
+    for file in args.pattern_files:
+        roots, inclexclpatterns = load_patterns(file)
+        args.paths += roots
+        args.pattern += inclexclpatterns
+        file.close()
+    for file in args.exclude_files:
+        args.pattern += load_excludes(file)
+        file.close()
 
 
 class PatternMatcher:
@@ -338,6 +358,12 @@ class PatternMatcher:
         """
         self._items.extend((i, value) for i in patterns)
 
+    def add_inclexcl(self, patterns):
+        """Add list of patterns (of type InclExclPattern) to internal list. The patterns ptype member is returned from
+        the match function when one of the given patterns matches.
+        """
+        self._items.extend(patterns)
+
     def match(self, path):
         for (pattern, value) in self._items:
             if pattern.match(path):
@@ -489,6 +515,8 @@ _PATTERN_STYLES = set([
 
 _PATTERN_STYLE_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_STYLES)
 
+InclExclPattern = namedtuple('InclExclPattern', 'pattern ptype')
+RootPath = object()
 
 def parse_pattern(pattern, fallback=FnmatchPattern):
     """Read pattern from string and return an instance of the appropriate implementation class.
@@ -506,6 +534,34 @@ def parse_pattern(pattern, fallback=FnmatchPattern):
     return cls(pattern)
 
 
+def parse_exclude_pattern(pattern, fallback=FnmatchPattern):
+    """Read pattern from string and return an instance of the appropriate implementation class.
+    """
+    epattern = parse_pattern(pattern, fallback)
+    return InclExclPattern(epattern, False)
+
+
+def parse_inclexcl_pattern(pattern, fallback=ShellPattern):
+    """Read pattern from string and return a InclExclPattern object."""
+    type_prefix_map = {
+        '-': False,
+        '+': True,
+        'R': RootPath,
+        'r': RootPath,
+    }
+    ptype = None
+    if len(pattern) > 1 and pattern[0] in type_prefix_map:
+        (ptype, pattern) = (type_prefix_map[pattern[0]], pattern[1:])
+        pattern = pattern.lstrip()
+    if ptype is None or not pattern:
+        raise argparse.ArgumentTypeError("Unable to parse pattern: {}".format(pattern))
+    if ptype is RootPath:
+        pobj = pattern
+    else:
+        pobj = parse_pattern(pattern, fallback)
+    return InclExclPattern(pobj, ptype)
+
+
 def timestamp(s):
     """Convert a --timestamp=s argument to a datetime object"""
     try:

+ 91 - 3
borg/testsuite/helpers.py

@@ -9,12 +9,13 @@ import sys
 import msgpack
 import msgpack.fallback
 import time
+import argparse
 
 from ..helpers import Location, format_file_size, format_timedelta, format_line, PlaceholderError, make_path_safe, \
     prune_within, prune_split, get_cache_dir, get_keys_dir, get_security_dir, Statistics, is_slow_msgpack, \
     yes, TRUISH, FALSISH, DEFAULTISH, \
     StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, \
-    ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern, \
+    ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, load_patterns, parse_pattern, \
     PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, \
     Buffer
 from . import BaseTestCase, FakeInputs
@@ -424,7 +425,7 @@ def test_invalid_unicode_pattern(pattern):
     (["pp:aaabbb"], None),
     (["pp:/data", "pp: #/", "pp:\tstart", "pp:/whitespace"], ["/more/data", "/home"]),
     ])
-def test_patterns_from_file(tmpdir, lines, expected):
+def test_exclude_patterns_from_file(tmpdir, lines, expected):
     files = [
         '/data/something00.txt', '/more/data', '/home',
         ' #/wsfoobar',
@@ -434,7 +435,7 @@ def test_patterns_from_file(tmpdir, lines, expected):
 
     def evaluate(filename):
         matcher = PatternMatcher(fallback=True)
-        matcher.add(load_excludes(open(filename, "rt")), False)
+        matcher.add_inclexcl(load_excludes(open(filename, "rt")))
         return [path for path in files if matcher.match(path)]
 
     exclfile = tmpdir.join("exclude.txt")
@@ -445,6 +446,93 @@ def test_patterns_from_file(tmpdir, lines, expected):
     assert evaluate(str(exclfile)) == (files if expected is None else expected)
 
 
+@pytest.mark.parametrize("lines, expected_roots, expected_numpatterns", [
+    # "None" means all files, i.e. none excluded
+    ([], [], 0),
+    (["# Comment only"], [], 0),
+    (["- *"], [], 1),
+    (["+fm:*/something00.txt",
+      "-/data"], [], 2),
+    (["R /"], ["/"], 0),
+    (["R /",
+      "# comment"], ["/"], 0),
+    (["# comment",
+      "- /data",
+      "R /home"], ["/home"], 1),
+])
+def test_load_patterns_from_file(tmpdir, lines, expected_roots, expected_numpatterns):
+    def evaluate(filename):
+        matcher = PatternMatcher(fallback=True)
+        roots, inclexclpatterns = load_patterns(open(filename, "rt"))
+        return roots, len(inclexclpatterns)
+    patternfile = tmpdir.join("exclude.txt")
+
+    with patternfile.open("wt") as fh:
+        fh.write("\n".join(lines))
+
+    roots, numpatterns = evaluate(str(patternfile))
+    assert roots == expected_roots
+    assert numpatterns == expected_numpatterns
+
+
+@pytest.mark.parametrize("lines", [
+    (["X /data"]), # illegal pattern type prefix
+    (["/data"]), # need a pattern type prefix
+])
+def test_load_invalid_patterns_from_file(tmpdir, lines):
+    patternfile = tmpdir.join("exclude.txt")
+    with patternfile.open("wt") as fh:
+        fh.write("\n".join(lines))
+    filename = str(patternfile)
+    with pytest.raises(argparse.ArgumentTypeError):
+        matcher = PatternMatcher(fallback=True)
+        roots, inclexclpatterns = load_patterns(open(filename, "rt"))
+
+
+@pytest.mark.parametrize("lines, expected", [
+    # "None" means all files, i.e. none excluded
+    ([], None),
+    (["# Comment only"], None),
+    (["- *"], []),
+    # default match type is sh: for patterns -> * doesn't match a /
+    (["-*/something0?.txt"],
+     ['/data', '/data/something00.txt', '/data/subdir/something01.txt',
+      '/home', '/home/leo', '/home/leo/t', '/home/other']),
+    (["-fm:*/something00.txt"],
+     ['/data', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t', '/home/other']),
+    (["-fm:*/something0?.txt"],
+     ["/data", '/home', '/home/leo', '/home/leo/t', '/home/other']),
+    (["+/*/something0?.txt",
+      "-/data"],
+     ["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
+    (["+fm:*/something00.txt",
+      "-/data"],
+     ["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
+    (["+fm:/home/leo",
+      "-/home/"],
+     ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']),
+])
+def test_inclexcl_patterns_from_file(tmpdir, lines, expected):
+    files = [
+        '/data', '/data/something00.txt', '/data/subdir/something01.txt',
+        '/home', '/home/leo', '/home/leo/t', '/home/other'
+    ]
+
+    def evaluate(filename):
+        matcher = PatternMatcher(fallback=True)
+        roots, inclexclpatterns = load_patterns(open(filename, "rt"))
+        matcher.add_inclexcl(inclexclpatterns)
+        return [path for path in files if matcher.match(path)]
+
+    patternfile = tmpdir.join("exclude.txt")
+
+    with patternfile.open("wt") as fh:
+        fh.write("\n".join(lines))
+
+    assert evaluate(str(patternfile)) == (files if expected is None else expected)
+
+
+    
 @pytest.mark.parametrize("pattern, cls", [
     ("", FnmatchPattern),