Răsfoiți Sursa

Merge pull request #2471 from enkore/issue/2469

Move patterns to module
TW 8 ani în urmă
părinte
comite
7c9a57bee5

+ 1 - 1
src/borg/archive.py

@@ -36,7 +36,7 @@ from .helpers import StableDict
 from .helpers import bin_to_hex
 from .helpers import safe_ns
 from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi
-from .helpers import PathPrefixPattern, FnmatchPattern, IECommand
+from .patterns import PathPrefixPattern, FnmatchPattern, IECommand
 from .item import Item, ArchiveItem
 from .key import key_factory
 from .platform import acl_get, acl_set, set_flags, get_flags, swidth

+ 2 - 2
src/borg/archiver.py

@@ -51,15 +51,15 @@ from .helpers import Manifest
 from .helpers import hardlinkable
 from .helpers import StableDict
 from .helpers import check_extension_modules
-from .helpers import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern
 from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo
 from .helpers import log_multi
-from .helpers import PatternMatcher
 from .helpers import signal_handler, raising_signal_handler, SigHup, SigTerm
 from .helpers import ErrorIgnoringTextIOWrapper
 from .helpers import ProgressIndicatorPercent
 from .helpers import basic_json_data, json_print
 from .helpers import replace_placeholders
+from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern
+from .patterns import PatternMatcher
 from .item import Item
 from .key import key_creator, tam_required_file, tam_required, RepoKey, PassphraseKey
 from .keymanager import KeyManager

+ 1 - 386
src/borg/helpers.py

@@ -18,14 +18,11 @@ import sys
 import textwrap
 import threading
 import time
-import unicodedata
 import uuid
 from binascii import hexlify
 from collections import namedtuple, deque, abc, Counter
 from datetime import datetime, timezone, timedelta
-from enum import Enum
-from fnmatch import translate
-from functools import wraps, partial, lru_cache
+from functools import partial, lru_cache
 from itertools import islice
 from operator import attrgetter
 from string import Formatter
@@ -42,7 +39,6 @@ from . import __version_tuple__ as borg_version_tuple
 from . import chunker
 from . import crypto
 from . import hashindex
-from . import shellpattern
 from .constants import *  # NOQA
 
 
@@ -389,387 +385,6 @@ def parse_timestamp(timestamp):
         return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S').replace(tzinfo=timezone.utc)
 
 
-def parse_patternfile_line(line, roots, ie_commands, fallback):
-    """Parse a pattern-file line and act depending on which command it represents."""
-    ie_command = parse_inclexcl_command(line, fallback=fallback)
-    if ie_command.cmd is IECommand.RootPath:
-        roots.append(ie_command.val)
-    elif ie_command.cmd is IECommand.PatternStyle:
-        fallback = ie_command.val
-    else:
-        # it is some kind of include/exclude command
-        ie_commands.append(ie_command)
-    return fallback
-
-
-def load_pattern_file(fileobj, roots, ie_commands, fallback=None):
-    if fallback is None:
-        fallback = ShellPattern  # ShellPattern is defined later in this module
-    for line in clean_lines(fileobj):
-        fallback = parse_patternfile_line(line, roots, ie_commands, fallback)
-
-
-def load_exclude_file(fileobj, patterns):
-    for patternstr in clean_lines(fileobj):
-        patterns.append(parse_exclude_pattern(patternstr))
-
-
-class ArgparsePatternAction(argparse.Action):
-    def __init__(self, nargs=1, **kw):
-        super().__init__(nargs=nargs, **kw)
-
-    def __call__(self, parser, args, values, option_string=None):
-        parse_patternfile_line(values[0], args.paths, args.patterns, ShellPattern)
-
-
-class ArgparsePatternFileAction(argparse.Action):
-    def __init__(self, nargs=1, **kw):
-        super().__init__(nargs=nargs, **kw)
-
-    def __call__(self, parser, args, values, option_string=None):
-        """Load and parse patterns from a file.
-        Lines empty or starting with '#' after stripping whitespace on both line ends are ignored.
-        """
-        filename = values[0]
-        with open(filename) as f:
-            self.parse(f, args)
-
-    def parse(self, fobj, args):
-        load_pattern_file(fobj, args.paths, args.patterns)
-
-
-class ArgparseExcludeFileAction(ArgparsePatternFileAction):
-    def parse(self, fobj, args):
-        load_exclude_file(fobj, args.patterns)
-
-
-class PatternMatcher:
-    """Represents a collection of pattern objects to match paths against.
-
-    *fallback* is a boolean value that *match()* returns if no matching patterns are found.
-
-    """
-    def __init__(self, fallback=None):
-        self._items = []
-
-        # Value to return from match function when none of the patterns match.
-        self.fallback = fallback
-
-        # optimizations
-        self._path_full_patterns = {}  # full path -> return value
-
-        # indicates whether the last match() call ended on a pattern for which
-        # we should recurse into any matching folder.  Will be set to True or
-        # False when calling match().
-        self.recurse_dir = None
-
-        # whether to recurse into directories when no match is found
-        # TODO: allow modification as a config option?
-        self.recurse_dir_default = True
-
-        self.include_patterns = []
-
-        # TODO: move this info to parse_inclexcl_command and store in PatternBase subclass?
-        self.is_include_cmd = {
-            IECommand.Exclude: False,
-            IECommand.ExcludeNoRecurse: False,
-            IECommand.Include: True
-        }
-
-    def empty(self):
-        return not len(self._items) and not len(self._path_full_patterns)
-
-    def _add(self, pattern, cmd):
-        """*cmd* is an IECommand value.
-        """
-        if isinstance(pattern, PathFullPattern):
-            key = pattern.pattern  # full, normalized path
-            self._path_full_patterns[key] = cmd
-        else:
-            self._items.append((pattern, cmd))
-
-    def add(self, patterns, cmd):
-        """Add list of patterns to internal list. *cmd* indicates whether the
-        pattern is an include/exclude pattern, and whether recursion should be
-        done on excluded folders.
-        """
-        for pattern in patterns:
-            self._add(pattern, cmd)
-
-    def add_includepaths(self, include_paths):
-        """Used to add inclusion-paths from args.paths (from commandline).
-        """
-        include_patterns = [parse_pattern(p, PathPrefixPattern) for p in include_paths]
-        self.add(include_patterns, IECommand.Include)
-        self.fallback = not include_patterns
-        self.include_patterns = include_patterns
-
-    def get_unmatched_include_patterns(self):
-        "Note that this only returns patterns added via *add_includepaths*."
-        return [p for p in self.include_patterns if p.match_count == 0]
-
-    def add_inclexcl(self, patterns):
-        """Add list of patterns (of type CmdTuple) to internal list.
-        """
-        for pattern, cmd in patterns:
-            self._add(pattern, cmd)
-
-    def match(self, path):
-        """Return True or False depending on whether *path* is matched.
-
-        If no match is found among the patterns in this matcher, then the value
-        in self.fallback is returned (defaults to None).
-
-        """
-        path = normalize_path(path)
-        # do a fast lookup for full path matches (note: we do not count such matches):
-        non_existent = object()
-        value = self._path_full_patterns.get(path, non_existent)
-
-        if value is not non_existent:
-            # we have a full path match!
-            # TODO: get from pattern; don't hard-code
-            self.recurse_dir = True
-            return value
-
-        # this is the slow way, if we have many patterns in self._items:
-        for (pattern, cmd) in self._items:
-            if pattern.match(path, normalize=False):
-                self.recurse_dir = pattern.recurse_dir
-                return self.is_include_cmd[cmd]
-
-        # by default we will recurse if there is no match
-        self.recurse_dir = self.recurse_dir_default
-        return self.fallback
-
-
-def normalize_path(path):
-    """normalize paths for MacOS (but do nothing on other platforms)"""
-    # HFS+ converts paths to a canonical form, so users shouldn't be required to enter an exact match.
-    # Windows and Unix filesystems allow different forms, so users always have to enter an exact match.
-    return unicodedata.normalize('NFD', path) if sys.platform == 'darwin' else path
-
-
-class PatternBase:
-    """Shared logic for inclusion/exclusion patterns.
-    """
-    PREFIX = NotImplemented
-
-    def __init__(self, pattern, recurse_dir=False):
-        self.pattern_orig = pattern
-        self.match_count = 0
-        pattern = normalize_path(pattern)
-        self._prepare(pattern)
-        self.recurse_dir = recurse_dir
-
-    def match(self, path, normalize=True):
-        """Return a boolean indicating whether *path* is matched by this pattern.
-
-        If normalize is True (default), the path will get normalized using normalize_path(),
-        otherwise it is assumed that it already is normalized using that function.
-        """
-        if normalize:
-            path = normalize_path(path)
-        matches = self._match(path)
-        if matches:
-            self.match_count += 1
-        return matches
-
-    def __repr__(self):
-        return '%s(%s)' % (type(self), self.pattern)
-
-    def __str__(self):
-        return self.pattern_orig
-
-    def _prepare(self, pattern):
-        "Should set the value of self.pattern"
-        raise NotImplementedError
-
-    def _match(self, path):
-        raise NotImplementedError
-
-
-class PathFullPattern(PatternBase):
-    """Full match of a path."""
-    PREFIX = "pf"
-
-    def _prepare(self, pattern):
-        self.pattern = os.path.normpath(pattern)
-
-    def _match(self, path):
-        return path == self.pattern
-
-
-# For PathPrefixPattern, FnmatchPattern and ShellPattern, we require that the pattern either match the whole path
-# or an initial segment of the path up to but not including a path separator. To unify the two cases, we add a path
-# separator to the end of the path before matching.
-
-
-class PathPrefixPattern(PatternBase):
-    """Literal files or directories listed on the command line
-    for some operations (e.g. extract, but not create).
-    If a directory is specified, all paths that start with that
-    path match as well.  A trailing slash makes no difference.
-    """
-    PREFIX = "pp"
-
-    def _prepare(self, pattern):
-        self.pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep
-
-    def _match(self, path):
-        return (path + os.path.sep).startswith(self.pattern)
-
-
-class FnmatchPattern(PatternBase):
-    """Shell glob patterns to exclude.  A trailing slash means to
-    exclude the contents of a directory, but not the directory itself.
-    """
-    PREFIX = "fm"
-
-    def _prepare(self, pattern):
-        if pattern.endswith(os.path.sep):
-            pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + '*' + os.path.sep
-        else:
-            pattern = os.path.normpath(pattern) + os.path.sep + '*'
-
-        self.pattern = pattern
-
-        # fnmatch and re.match both cache compiled regular expressions.
-        # Nevertheless, this is about 10 times faster.
-        self.regex = re.compile(translate(self.pattern))
-
-    def _match(self, path):
-        return (self.regex.match(path + os.path.sep) is not None)
-
-
-class ShellPattern(PatternBase):
-    """Shell glob patterns to exclude.  A trailing slash means to
-    exclude the contents of a directory, but not the directory itself.
-    """
-    PREFIX = "sh"
-
-    def _prepare(self, pattern):
-        sep = os.path.sep
-
-        if pattern.endswith(sep):
-            pattern = os.path.normpath(pattern).rstrip(sep) + sep + "**" + sep + "*" + sep
-        else:
-            pattern = os.path.normpath(pattern) + sep + "**" + sep + "*"
-
-        self.pattern = pattern
-        self.regex = re.compile(shellpattern.translate(self.pattern))
-
-    def _match(self, path):
-        return (self.regex.match(path + os.path.sep) is not None)
-
-
-class RegexPattern(PatternBase):
-    """Regular expression to exclude.
-    """
-    PREFIX = "re"
-
-    def _prepare(self, pattern):
-        self.pattern = pattern
-        self.regex = re.compile(pattern)
-
-    def _match(self, path):
-        # Normalize path separators
-        if os.path.sep != '/':
-            path = path.replace(os.path.sep, '/')
-
-        return (self.regex.search(path) is not None)
-
-
-_PATTERN_CLASSES = set([
-    FnmatchPattern,
-    PathFullPattern,
-    PathPrefixPattern,
-    RegexPattern,
-    ShellPattern,
-])
-
-_PATTERN_CLASS_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_CLASSES)
-
-CmdTuple = namedtuple('CmdTuple', 'val cmd')
-
-
-class IECommand(Enum):
-    """A command that an InclExcl file line can represent.
-    """
-    RootPath = 1
-    PatternStyle = 2
-    Include = 3
-    Exclude = 4
-    ExcludeNoRecurse = 5
-
-
-def get_pattern_class(prefix):
-    try:
-        return _PATTERN_CLASS_BY_PREFIX[prefix]
-    except KeyError:
-        raise ValueError("Unknown pattern style: {}".format(prefix)) from None
-
-
-def parse_pattern(pattern, fallback=FnmatchPattern, recurse_dir=True):
-    """Read pattern from string and return an instance of the appropriate implementation class.
-
-    """
-    if len(pattern) > 2 and pattern[2] == ":" and pattern[:2].isalnum():
-        (style, pattern) = (pattern[:2], pattern[3:])
-        cls = get_pattern_class(style)
-    else:
-        cls = fallback
-    return cls(pattern, recurse_dir)
-
-
-def parse_exclude_pattern(pattern_str, fallback=FnmatchPattern):
-    """Read pattern from string and return an instance of the appropriate implementation class.
-    """
-    epattern_obj = parse_pattern(pattern_str, fallback)
-    return CmdTuple(epattern_obj, IECommand.Exclude)
-
-
-def parse_inclexcl_command(cmd_line_str, fallback=ShellPattern):
-    """Read a --patterns-from command from string and return a CmdTuple object."""
-
-    cmd_prefix_map = {
-        '-': IECommand.Exclude,
-        '!': IECommand.ExcludeNoRecurse,
-        '+': IECommand.Include,
-        'R': IECommand.RootPath,
-        'r': IECommand.RootPath,
-        'P': IECommand.PatternStyle,
-        'p': IECommand.PatternStyle,
-    }
-
-    try:
-        cmd = cmd_prefix_map[cmd_line_str[0]]
-
-        # remaining text on command-line following the command character
-        remainder_str = cmd_line_str[1:].lstrip()
-
-        if not remainder_str:
-            raise ValueError("Missing pattern/information!")
-    except (IndexError, KeyError, ValueError):
-        raise argparse.ArgumentTypeError("Unable to parse pattern/command: {}".format(cmd_line_str))
-
-    if cmd is IECommand.RootPath:
-        # TODO: validate string?
-        val = remainder_str
-    elif cmd is IECommand.PatternStyle:
-        # then remainder_str is something like 're' or 'sh'
-        try:
-            val = get_pattern_class(remainder_str)
-        except ValueError:
-            raise argparse.ArgumentTypeError("Invalid pattern style: {}".format(remainder_str))
-    else:
-        # determine recurse_dir based on command type
-        recurse_dir = cmd not in [IECommand.ExcludeNoRecurse]
-        val = parse_pattern(remainder_str, fallback, recurse_dir)
-
-    return CmdTuple(val, cmd)
-
-
 def timestamp(s):
     """Convert a --timestamp=s argument to a datetime object"""
     try:

+ 392 - 0
src/borg/patterns.py

@@ -0,0 +1,392 @@
+import argparse
+import fnmatch
+import os.path
+import re
+import sys
+import unicodedata
+from collections import namedtuple
+from enum import Enum
+
+from . import shellpattern
+from .helpers import clean_lines
+
+
+def parse_patternfile_line(line, roots, ie_commands, fallback):
+    """Parse a pattern-file line and act depending on which command it represents."""
+    ie_command = parse_inclexcl_command(line, fallback=fallback)
+    if ie_command.cmd is IECommand.RootPath:
+        roots.append(ie_command.val)
+    elif ie_command.cmd is IECommand.PatternStyle:
+        fallback = ie_command.val
+    else:
+        # it is some kind of include/exclude command
+        ie_commands.append(ie_command)
+    return fallback
+
+
+def load_pattern_file(fileobj, roots, ie_commands, fallback=None):
+    if fallback is None:
+        fallback = ShellPattern  # ShellPattern is defined later in this module
+    for line in clean_lines(fileobj):
+        fallback = parse_patternfile_line(line, roots, ie_commands, fallback)
+
+
+def load_exclude_file(fileobj, patterns):
+    for patternstr in clean_lines(fileobj):
+        patterns.append(parse_exclude_pattern(patternstr))
+
+
+class ArgparsePatternAction(argparse.Action):
+    def __init__(self, nargs=1, **kw):
+        super().__init__(nargs=nargs, **kw)
+
+    def __call__(self, parser, args, values, option_string=None):
+        parse_patternfile_line(values[0], args.paths, args.patterns, ShellPattern)
+
+
+class ArgparsePatternFileAction(argparse.Action):
+    def __init__(self, nargs=1, **kw):
+        super().__init__(nargs=nargs, **kw)
+
+    def __call__(self, parser, args, values, option_string=None):
+        """Load and parse patterns from a file.
+        Lines empty or starting with '#' after stripping whitespace on both line ends are ignored.
+        """
+        filename = values[0]
+        with open(filename) as f:
+            self.parse(f, args)
+
+    def parse(self, fobj, args):
+        load_pattern_file(fobj, args.paths, args.patterns)
+
+
+class ArgparseExcludeFileAction(ArgparsePatternFileAction):
+    def parse(self, fobj, args):
+        load_exclude_file(fobj, args.patterns)
+
+
+class PatternMatcher:
+    """Represents a collection of pattern objects to match paths against.
+
+    *fallback* is a boolean value that *match()* returns if no matching patterns are found.
+
+    """
+    def __init__(self, fallback=None):
+        self._items = []
+
+        # Value to return from match function when none of the patterns match.
+        self.fallback = fallback
+
+        # optimizations
+        self._path_full_patterns = {}  # full path -> return value
+
+        # indicates whether the last match() call ended on a pattern for which
+        # we should recurse into any matching folder.  Will be set to True or
+        # False when calling match().
+        self.recurse_dir = None
+
+        # whether to recurse into directories when no match is found
+        # TODO: allow modification as a config option?
+        self.recurse_dir_default = True
+
+        self.include_patterns = []
+
+        # TODO: move this info to parse_inclexcl_command and store in PatternBase subclass?
+        self.is_include_cmd = {
+            IECommand.Exclude: False,
+            IECommand.ExcludeNoRecurse: False,
+            IECommand.Include: True
+        }
+
+    def empty(self):
+        return not len(self._items) and not len(self._path_full_patterns)
+
+    def _add(self, pattern, cmd):
+        """*cmd* is an IECommand value.
+        """
+        if isinstance(pattern, PathFullPattern):
+            key = pattern.pattern  # full, normalized path
+            self._path_full_patterns[key] = cmd
+        else:
+            self._items.append((pattern, cmd))
+
+    def add(self, patterns, cmd):
+        """Add list of patterns to internal list. *cmd* indicates whether the
+        pattern is an include/exclude pattern, and whether recursion should be
+        done on excluded folders.
+        """
+        for pattern in patterns:
+            self._add(pattern, cmd)
+
+    def add_includepaths(self, include_paths):
+        """Used to add inclusion-paths from args.paths (from commandline).
+        """
+        include_patterns = [parse_pattern(p, PathPrefixPattern) for p in include_paths]
+        self.add(include_patterns, IECommand.Include)
+        self.fallback = not include_patterns
+        self.include_patterns = include_patterns
+
+    def get_unmatched_include_patterns(self):
+        "Note that this only returns patterns added via *add_includepaths*."
+        return [p for p in self.include_patterns if p.match_count == 0]
+
+    def add_inclexcl(self, patterns):
+        """Add list of patterns (of type CmdTuple) to internal list.
+        """
+        for pattern, cmd in patterns:
+            self._add(pattern, cmd)
+
+    def match(self, path):
+        """Return True or False depending on whether *path* is matched.
+
+        If no match is found among the patterns in this matcher, then the value
+        in self.fallback is returned (defaults to None).
+
+        """
+        path = normalize_path(path)
+        # do a fast lookup for full path matches (note: we do not count such matches):
+        non_existent = object()
+        value = self._path_full_patterns.get(path, non_existent)
+
+        if value is not non_existent:
+            # we have a full path match!
+            # TODO: get from pattern; don't hard-code
+            self.recurse_dir = True
+            return value
+
+        # this is the slow way, if we have many patterns in self._items:
+        for (pattern, cmd) in self._items:
+            if pattern.match(path, normalize=False):
+                self.recurse_dir = pattern.recurse_dir
+                return self.is_include_cmd[cmd]
+
+        # by default we will recurse if there is no match
+        self.recurse_dir = self.recurse_dir_default
+        return self.fallback
+
+
+def normalize_path(path):
+    """normalize paths for MacOS (but do nothing on other platforms)"""
+    # HFS+ converts paths to a canonical form, so users shouldn't be required to enter an exact match.
+    # Windows and Unix filesystems allow different forms, so users always have to enter an exact match.
+    return unicodedata.normalize('NFD', path) if sys.platform == 'darwin' else path
+
+
+class PatternBase:
+    """Shared logic for inclusion/exclusion patterns.
+    """
+    PREFIX = NotImplemented
+
+    def __init__(self, pattern, recurse_dir=False):
+        self.pattern_orig = pattern
+        self.match_count = 0
+        pattern = normalize_path(pattern)
+        self._prepare(pattern)
+        self.recurse_dir = recurse_dir
+
+    def match(self, path, normalize=True):
+        """Return a boolean indicating whether *path* is matched by this pattern.
+
+        If normalize is True (default), the path will get normalized using normalize_path(),
+        otherwise it is assumed that it already is normalized using that function.
+        """
+        if normalize:
+            path = normalize_path(path)
+        matches = self._match(path)
+        if matches:
+            self.match_count += 1
+        return matches
+
+    def __repr__(self):
+        return '%s(%s)' % (type(self), self.pattern)
+
+    def __str__(self):
+        return self.pattern_orig
+
+    def _prepare(self, pattern):
+        "Should set the value of self.pattern"
+        raise NotImplementedError
+
+    def _match(self, path):
+        raise NotImplementedError
+
+
+class PathFullPattern(PatternBase):
+    """Full match of a path."""
+    PREFIX = "pf"
+
+    def _prepare(self, pattern):
+        self.pattern = os.path.normpath(pattern)
+
+    def _match(self, path):
+        return path == self.pattern
+
+
+# For PathPrefixPattern, FnmatchPattern and ShellPattern, we require that the pattern either match the whole path
+# or an initial segment of the path up to but not including a path separator. To unify the two cases, we add a path
+# separator to the end of the path before matching.
+
+
+class PathPrefixPattern(PatternBase):
+    """Literal files or directories listed on the command line
+    for some operations (e.g. extract, but not create).
+    If a directory is specified, all paths that start with that
+    path match as well.  A trailing slash makes no difference.
+    """
+    PREFIX = "pp"
+
+    def _prepare(self, pattern):
+        self.pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep
+
+    def _match(self, path):
+        return (path + os.path.sep).startswith(self.pattern)
+
+
+class FnmatchPattern(PatternBase):
+    """Shell glob patterns to exclude.  A trailing slash means to
+    exclude the contents of a directory, but not the directory itself.
+    """
+    PREFIX = "fm"
+
+    def _prepare(self, pattern):
+        if pattern.endswith(os.path.sep):
+            pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + '*' + os.path.sep
+        else:
+            pattern = os.path.normpath(pattern) + os.path.sep + '*'
+
+        self.pattern = pattern
+
+        # fnmatch and re.match both cache compiled regular expressions.
+        # Nevertheless, this is about 10 times faster.
+        self.regex = re.compile(fnmatch.translate(self.pattern))
+
+    def _match(self, path):
+        return (self.regex.match(path + os.path.sep) is not None)
+
+
+class ShellPattern(PatternBase):
+    """Shell glob patterns to exclude.  A trailing slash means to
+    exclude the contents of a directory, but not the directory itself.
+    """
+    PREFIX = "sh"
+
+    def _prepare(self, pattern):
+        sep = os.path.sep
+
+        if pattern.endswith(sep):
+            pattern = os.path.normpath(pattern).rstrip(sep) + sep + "**" + sep + "*" + sep
+        else:
+            pattern = os.path.normpath(pattern) + sep + "**" + sep + "*"
+
+        self.pattern = pattern
+        self.regex = re.compile(shellpattern.translate(self.pattern))
+
+    def _match(self, path):
+        return (self.regex.match(path + os.path.sep) is not None)
+
+
+class RegexPattern(PatternBase):
+    """Regular expression to exclude.
+    """
+    PREFIX = "re"
+
+    def _prepare(self, pattern):
+        self.pattern = pattern
+        self.regex = re.compile(pattern)
+
+    def _match(self, path):
+        # Normalize path separators
+        if os.path.sep != '/':
+            path = path.replace(os.path.sep, '/')
+
+        return (self.regex.search(path) is not None)
+
+
+_PATTERN_CLASSES = {
+    FnmatchPattern,
+    PathFullPattern,
+    PathPrefixPattern,
+    RegexPattern,
+    ShellPattern,
+}
+
+_PATTERN_CLASS_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_CLASSES)
+
+CmdTuple = namedtuple('CmdTuple', 'val cmd')
+
+
+class IECommand(Enum):
+    """A command that an InclExcl file line can represent.
+    """
+    RootPath = 1
+    PatternStyle = 2
+    Include = 3
+    Exclude = 4
+    ExcludeNoRecurse = 5
+
+
+def get_pattern_class(prefix):
+    try:
+        return _PATTERN_CLASS_BY_PREFIX[prefix]
+    except KeyError:
+        raise ValueError("Unknown pattern style: {}".format(prefix)) from None
+
+
+def parse_pattern(pattern, fallback=FnmatchPattern, recurse_dir=True):
+    """Read pattern from string and return an instance of the appropriate implementation class.
+
+    """
+    if len(pattern) > 2 and pattern[2] == ":" and pattern[:2].isalnum():
+        (style, pattern) = (pattern[:2], pattern[3:])
+        cls = get_pattern_class(style)
+    else:
+        cls = fallback
+    return cls(pattern, recurse_dir)
+
+
+def parse_exclude_pattern(pattern_str, fallback=FnmatchPattern):
+    """Read pattern from string and return an instance of the appropriate implementation class.
+    """
+    epattern_obj = parse_pattern(pattern_str, fallback)
+    return CmdTuple(epattern_obj, IECommand.Exclude)
+
+
+def parse_inclexcl_command(cmd_line_str, fallback=ShellPattern):
+    """Read a --patterns-from command from string and return a CmdTuple object."""
+
+    cmd_prefix_map = {
+        '-': IECommand.Exclude,
+        '!': IECommand.ExcludeNoRecurse,
+        '+': IECommand.Include,
+        'R': IECommand.RootPath,
+        'r': IECommand.RootPath,
+        'P': IECommand.PatternStyle,
+        'p': IECommand.PatternStyle,
+    }
+
+    try:
+        cmd = cmd_prefix_map[cmd_line_str[0]]
+
+        # remaining text on command-line following the command character
+        remainder_str = cmd_line_str[1:].lstrip()
+
+        if not remainder_str:
+            raise ValueError("Missing pattern/information!")
+    except (IndexError, KeyError, ValueError):
+        raise argparse.ArgumentTypeError("Unable to parse pattern/command: {}".format(cmd_line_str))
+
+    if cmd is IECommand.RootPath:
+        # TODO: validate string?
+        val = remainder_str
+    elif cmd is IECommand.PatternStyle:
+        # then remainder_str is something like 're' or 'sh'
+        try:
+            val = get_pattern_class(remainder_str)
+        except ValueError:
+            raise argparse.ArgumentTypeError("Invalid pattern style: {}".format(remainder_str))
+    else:
+        # determine recurse_dir based on command type
+        recurse_dir = cmd not in [IECommand.ExcludeNoRecurse]
+        val = parse_pattern(remainder_str, fallback, recurse_dir)
+
+    return CmdTuple(val, cmd)

+ 2 - 2
src/borg/testsuite/archiver.py

@@ -33,12 +33,12 @@ from ..archiver import Archiver
 from ..cache import Cache
 from ..constants import *  # NOQA
 from ..crypto import bytes_to_long, num_aes_blocks
-from ..helpers import PatternMatcher, parse_pattern, Location, get_security_dir
+from ..helpers import Location, get_security_dir
 from ..helpers import Manifest
 from ..helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
 from ..helpers import bin_to_hex
-from ..helpers import IECommand
 from ..helpers import MAX_S
+from ..patterns import IECommand, PatternMatcher, parse_pattern
 from ..item import Item
 from ..key import KeyfileKeyBase, RepoKey, KeyfileKey, Passphrase, TAMRequiredError
 from ..keymanager import RepoIdMismatch, NotABorgKeyFile

+ 0 - 460
src/borg/testsuite/helpers.py

@@ -23,9 +23,6 @@ from ..helpers import yes, TRUISH, FALSISH, DEFAULTISH
 from ..helpers import StableDict, int_to_bigint, bigint_to_int, bin_to_hex
 from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams
 from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
-from ..helpers import load_exclude_file, load_pattern_file
-from ..helpers import parse_pattern, PatternMatcher
-from ..helpers import PathFullPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, RegexPattern
 from ..helpers import swidth_slice
 from ..helpers import chunkit
 from ..helpers import safe_ns, safe_s, SUPPORT_32BIT_PLATFORMS
@@ -244,463 +241,6 @@ class FormatTimedeltaTestCase(BaseTestCase):
         )
 
 
-def check_patterns(files, pattern, expected):
-    """Utility for testing patterns.
-    """
-    assert all([f == os.path.normpath(f) for f in files]), "Pattern matchers expect normalized input paths"
-
-    matched = [f for f in files if pattern.match(f)]
-
-    assert matched == (files if expected is None else expected)
-
-
-@pytest.mark.parametrize("pattern, expected", [
-    # "None" means all files, i.e. all match the given pattern
-    ("/", []),
-    ("/home", ["/home"]),
-    ("/home///", ["/home"]),
-    ("/./home", ["/home"]),
-    ("/home/user", ["/home/user"]),
-    ("/home/user2", ["/home/user2"]),
-    ("/home/user/.bashrc", ["/home/user/.bashrc"]),
-    ])
-def test_patterns_full(pattern, expected):
-    files = ["/home", "/home/user", "/home/user2", "/home/user/.bashrc", ]
-
-    check_patterns(files, PathFullPattern(pattern), expected)
-
-
-@pytest.mark.parametrize("pattern, expected", [
-    # "None" means all files, i.e. all match the given pattern
-    ("", []),
-    ("relative", []),
-    ("relative/path/", ["relative/path"]),
-    ("relative/path", ["relative/path"]),
-    ])
-def test_patterns_full_relative(pattern, expected):
-    files = ["relative/path", "relative/path2", ]
-
-    check_patterns(files, PathFullPattern(pattern), expected)
-
-
-@pytest.mark.parametrize("pattern, expected", [
-    # "None" means all files, i.e. all match the given pattern
-    ("/", None),
-    ("/./", None),
-    ("", []),
-    ("/home/u", []),
-    ("/home/user", ["/home/user/.profile", "/home/user/.bashrc"]),
-    ("/etc", ["/etc/server/config", "/etc/server/hosts"]),
-    ("///etc//////", ["/etc/server/config", "/etc/server/hosts"]),
-    ("/./home//..//home/user2", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
-    ("/srv", ["/srv/messages", "/srv/dmesg"]),
-    ])
-def test_patterns_prefix(pattern, expected):
-    files = [
-        "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
-        "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg",
-    ]
-
-    check_patterns(files, PathPrefixPattern(pattern), expected)
-
-
-@pytest.mark.parametrize("pattern, expected", [
-    # "None" means all files, i.e. all match the given pattern
-    ("", []),
-    ("foo", []),
-    ("relative", ["relative/path1", "relative/two"]),
-    ("more", ["more/relative"]),
-    ])
-def test_patterns_prefix_relative(pattern, expected):
-    files = ["relative/path1", "relative/two", "more/relative"]
-
-    check_patterns(files, PathPrefixPattern(pattern), expected)
-
-
-@pytest.mark.parametrize("pattern, expected", [
-    # "None" means all files, i.e. all match the given pattern
-    ("/*", None),
-    ("/./*", None),
-    ("*", None),
-    ("*/*", None),
-    ("*///*", None),
-    ("/home/u", []),
-    ("/home/*",
-     ["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html",
-      "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
-    ("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]),
-    ("/etc/*", ["/etc/server/config", "/etc/server/hosts"]),
-    ("*/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]),
-    ("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]),
-    ("/./home//..//home/user2/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
-    ("/srv*", ["/srv/messages", "/srv/dmesg"]),
-    ("/home/*/.thumbnails", ["/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
-    ])
-def test_patterns_fnmatch(pattern, expected):
-    files = [
-        "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
-        "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg",
-        "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
-    ]
-
-    check_patterns(files, FnmatchPattern(pattern), expected)
-
-
-@pytest.mark.parametrize("pattern, expected", [
-    # "None" means all files, i.e. all match the given pattern
-    ("*", None),
-    ("**/*", None),
-    ("/**/*", None),
-    ("/./*", None),
-    ("*/*", None),
-    ("*///*", None),
-    ("/home/u", []),
-    ("/home/*",
-     ["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html",
-      "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
-    ("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]),
-    ("/etc/*/*", ["/etc/server/config", "/etc/server/hosts"]),
-    ("/etc/**/*", ["/etc/server/config", "/etc/server/hosts"]),
-    ("/etc/**/*/*", ["/etc/server/config", "/etc/server/hosts"]),
-    ("*/.pr????e", []),
-    ("**/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]),
-    ("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]),
-    ("/./home//..//home/user2/", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
-    ("/./home//..//home/user2/**/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
-    ("/srv*/", ["/srv/messages", "/srv/dmesg", "/srv2/blafasel"]),
-    ("/srv*", ["/srv", "/srv/messages", "/srv/dmesg", "/srv2", "/srv2/blafasel"]),
-    ("/srv/*", ["/srv/messages", "/srv/dmesg"]),
-    ("/srv2/**", ["/srv2", "/srv2/blafasel"]),
-    ("/srv2/**/", ["/srv2/blafasel"]),
-    ("/home/*/.thumbnails", ["/home/foo/.thumbnails"]),
-    ("/home/*/*/.thumbnails", ["/home/foo/bar/.thumbnails"]),
-    ])
-def test_patterns_shell(pattern, expected):
-    files = [
-        "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
-        "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv", "/srv/messages", "/srv/dmesg",
-        "/srv2", "/srv2/blafasel", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
-    ]
-
-    check_patterns(files, ShellPattern(pattern), expected)
-
-
-@pytest.mark.parametrize("pattern, expected", [
-    # "None" means all files, i.e. all match the given pattern
-    ("", None),
-    (".*", None),
-    ("^/", None),
-    ("^abc$", []),
-    ("^[^/]", []),
-    ("^(?!/srv|/foo|/opt)",
-     ["/home", "/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile",
-      "/home/user2/public_html/index.html", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", ]),
-    ])
-def test_patterns_regex(pattern, expected):
-    files = [
-        '/srv/data', '/foo/bar', '/home',
-        '/home/user/.profile', '/home/user/.bashrc',
-        '/home/user2/.profile', '/home/user2/public_html/index.html',
-        '/opt/log/messages.txt', '/opt/log/dmesg.txt',
-        "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
-    ]
-
-    obj = RegexPattern(pattern)
-    assert str(obj) == pattern
-    assert obj.pattern == pattern
-
-    check_patterns(files, obj, expected)
-
-
-def test_regex_pattern():
-    # The forward slash must match the platform-specific path separator
-    assert RegexPattern("^/$").match("/")
-    assert RegexPattern("^/$").match(os.path.sep)
-    assert not RegexPattern(r"^\\$").match("/")
-
-
-def use_normalized_unicode():
-    return sys.platform in ("darwin",)
-
-
-def _make_test_patterns(pattern):
-    return [PathPrefixPattern(pattern),
-            FnmatchPattern(pattern),
-            RegexPattern("^{}/foo$".format(pattern)),
-            ShellPattern(pattern),
-            ]
-
-
-@pytest.mark.parametrize("pattern", _make_test_patterns("b\N{LATIN SMALL LETTER A WITH ACUTE}"))
-def test_composed_unicode_pattern(pattern):
-    assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
-    assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo") == use_normalized_unicode()
-
-
-@pytest.mark.parametrize("pattern", _make_test_patterns("ba\N{COMBINING ACUTE ACCENT}"))
-def test_decomposed_unicode_pattern(pattern):
-    assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") == use_normalized_unicode()
-    assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo")
-
-
-@pytest.mark.parametrize("pattern", _make_test_patterns(str(b"ba\x80", "latin1")))
-def test_invalid_unicode_pattern(pattern):
-    assert not pattern.match("ba/foo")
-    assert pattern.match(str(b"ba\x80/foo", "latin1"))
-
-
-@pytest.mark.parametrize("lines, expected", [
-    # "None" means all files, i.e. none excluded
-    ([], None),
-    (["# Comment only"], None),
-    (["*"], []),
-    (["# Comment",
-      "*/something00.txt",
-      "  *whitespace*  ",
-      # Whitespace before comment
-      " #/ws*",
-      # Empty line
-      "",
-      "# EOF"],
-     ["/more/data", "/home", " #/wsfoobar"]),
-    (["re:.*"], []),
-    (["re:\s"], ["/data/something00.txt", "/more/data", "/home"]),
-    ([r"re:(.)(\1)"], ["/more/data", "/home", "\tstart/whitespace", "/whitespace/end\t"]),
-    (["", "", "",
-      "# This is a test with mixed pattern styles",
-      # Case-insensitive pattern
-      "re:(?i)BAR|ME$",
-      "",
-      "*whitespace*",
-      "fm:*/something00*"],
-     ["/more/data"]),
-    ([r"  re:^\s  "], ["/data/something00.txt", "/more/data", "/home", "/whitespace/end\t"]),
-    ([r"  re:\s$  "], ["/data/something00.txt", "/more/data", "/home", " #/wsfoobar", "\tstart/whitespace"]),
-    (["pp:./"], None),
-    (["pp:/"], [" #/wsfoobar", "\tstart/whitespace"]),
-    (["pp:aaabbb"], None),
-    (["pp:/data", "pp: #/", "pp:\tstart", "pp:/whitespace"], ["/more/data", "/home"]),
-    (["/nomatch", "/more/*"],
-     ['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']),
-    # the order of exclude patterns shouldn't matter
-    (["/more/*", "/nomatch"],
-     ['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']),
-    ])
-def test_exclude_patterns_from_file(tmpdir, lines, expected):
-    files = [
-        '/data/something00.txt', '/more/data', '/home',
-        ' #/wsfoobar',
-        '\tstart/whitespace',
-        '/whitespace/end\t',
-    ]
-
-    def evaluate(filename):
-        patterns = []
-        load_exclude_file(open(filename, "rt"), patterns)
-        matcher = PatternMatcher(fallback=True)
-        matcher.add_inclexcl(patterns)
-        return [path for path in files if matcher.match(path)]
-
-    exclfile = tmpdir.join("exclude.txt")
-
-    with exclfile.open("wt") as fh:
-        fh.write("\n".join(lines))
-
-    assert evaluate(str(exclfile)) == (files if expected is None else expected)
-
-
-@pytest.mark.parametrize("lines, expected_roots, expected_numpatterns", [
-    # "None" means all files, i.e. none excluded
-    ([], [], 0),
-    (["# Comment only"], [], 0),
-    (["- *"], [], 1),
-    (["+fm:*/something00.txt",
-      "-/data"], [], 2),
-    (["R /"], ["/"], 0),
-    (["R /",
-      "# comment"], ["/"], 0),
-    (["# comment",
-      "- /data",
-      "R /home"], ["/home"], 1),
-])
-def test_load_patterns_from_file(tmpdir, lines, expected_roots, expected_numpatterns):
-    def evaluate(filename):
-        roots = []
-        inclexclpatterns = []
-        load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
-        return roots, len(inclexclpatterns)
-    patternfile = tmpdir.join("patterns.txt")
-
-    with patternfile.open("wt") as fh:
-        fh.write("\n".join(lines))
-
-    roots, numpatterns = evaluate(str(patternfile))
-    assert roots == expected_roots
-    assert numpatterns == expected_numpatterns
-
-
-def test_switch_patterns_style():
-    patterns = """\
-        +0_initial_default_is_shell
-        p fm
-        +1_fnmatch
-        P re
-        +2_regex
-        +3_more_regex
-        P pp
-        +4_pathprefix
-        p fm
-        p sh
-        +5_shell
-    """
-    pattern_file = io.StringIO(patterns)
-    roots, patterns = [], []
-    load_pattern_file(pattern_file, roots, patterns)
-    assert len(patterns) == 6
-    assert isinstance(patterns[0].val, ShellPattern)
-    assert isinstance(patterns[1].val, FnmatchPattern)
-    assert isinstance(patterns[2].val, RegexPattern)
-    assert isinstance(patterns[3].val, RegexPattern)
-    assert isinstance(patterns[4].val, PathPrefixPattern)
-    assert isinstance(patterns[5].val, ShellPattern)
-
-
-@pytest.mark.parametrize("lines", [
-    (["X /data"]),  # illegal pattern type prefix
-    (["/data"]),    # need a pattern type prefix
-])
-def test_load_invalid_patterns_from_file(tmpdir, lines):
-    patternfile = tmpdir.join("patterns.txt")
-    with patternfile.open("wt") as fh:
-        fh.write("\n".join(lines))
-    filename = str(patternfile)
-    with pytest.raises(argparse.ArgumentTypeError):
-        roots = []
-        inclexclpatterns = []
-        load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
-
-
-@pytest.mark.parametrize("lines, expected", [
-    # "None" means all files, i.e. none excluded
-    ([], None),
-    (["# Comment only"], None),
-    (["- *"], []),
-    # default match type is sh: for patterns -> * doesn't match a /
-    (["-*/something0?.txt"],
-     ['/data', '/data/something00.txt', '/data/subdir/something01.txt',
-      '/home', '/home/leo', '/home/leo/t', '/home/other']),
-    (["-fm:*/something00.txt"],
-     ['/data', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t', '/home/other']),
-    (["-fm:*/something0?.txt"],
-     ["/data", '/home', '/home/leo', '/home/leo/t', '/home/other']),
-    (["+/*/something0?.txt",
-      "-/data"],
-     ["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
-    (["+fm:*/something00.txt",
-      "-/data"],
-     ["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
-    # include /home/leo and exclude the rest of /home:
-    (["+/home/leo",
-      "-/home/*"],
-     ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']),
-    # wrong order, /home/leo is already excluded by -/home/*:
-    (["-/home/*",
-      "+/home/leo"],
-     ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home']),
-    (["+fm:/home/leo",
-      "-/home/"],
-     ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']),
-])
-def test_inclexcl_patterns_from_file(tmpdir, lines, expected):
-    files = [
-        '/data', '/data/something00.txt', '/data/subdir/something01.txt',
-        '/home', '/home/leo', '/home/leo/t', '/home/other'
-    ]
-
-    def evaluate(filename):
-        matcher = PatternMatcher(fallback=True)
-        roots = []
-        inclexclpatterns = []
-        load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
-        matcher.add_inclexcl(inclexclpatterns)
-        return [path for path in files if matcher.match(path)]
-
-    patternfile = tmpdir.join("patterns.txt")
-
-    with patternfile.open("wt") as fh:
-        fh.write("\n".join(lines))
-
-    assert evaluate(str(patternfile)) == (files if expected is None else expected)
-
-
-@pytest.mark.parametrize("pattern, cls", [
-    ("", FnmatchPattern),
-
-    # Default style
-    ("*", FnmatchPattern),
-    ("/data/*", FnmatchPattern),
-
-    # fnmatch style
-    ("fm:", FnmatchPattern),
-    ("fm:*", FnmatchPattern),
-    ("fm:/data/*", FnmatchPattern),
-    ("fm:fm:/data/*", FnmatchPattern),
-
-    # Regular expression
-    ("re:", RegexPattern),
-    ("re:.*", RegexPattern),
-    ("re:^/something/", RegexPattern),
-    ("re:re:^/something/", RegexPattern),
-
-    # Path prefix
-    ("pp:", PathPrefixPattern),
-    ("pp:/", PathPrefixPattern),
-    ("pp:/data/", PathPrefixPattern),
-    ("pp:pp:/data/", PathPrefixPattern),
-
-    # Shell-pattern style
-    ("sh:", ShellPattern),
-    ("sh:*", ShellPattern),
-    ("sh:/data/*", ShellPattern),
-    ("sh:sh:/data/*", ShellPattern),
-    ])
-def test_parse_pattern(pattern, cls):
-    assert isinstance(parse_pattern(pattern), cls)
-
-
-@pytest.mark.parametrize("pattern", ["aa:", "fo:*", "00:", "x1:abc"])
-def test_parse_pattern_error(pattern):
-    with pytest.raises(ValueError):
-        parse_pattern(pattern)
-
-
-def test_pattern_matcher():
-    pm = PatternMatcher()
-
-    assert pm.fallback is None
-
-    for i in ["", "foo", "bar"]:
-        assert pm.match(i) is None
-
-    # add extra entries to aid in testing
-    for target in ["A", "B", "Empty", "FileNotFound"]:
-        pm.is_include_cmd[target] = target
-
-    pm.add([RegexPattern("^a")], "A")
-    pm.add([RegexPattern("^b"), RegexPattern("^z")], "B")
-    pm.add([RegexPattern("^$")], "Empty")
-    pm.fallback = "FileNotFound"
-
-    assert pm.match("") == "Empty"
-    assert pm.match("aaa") == "A"
-    assert pm.match("bbb") == "B"
-    assert pm.match("ccc") == "FileNotFound"
-    assert pm.match("xyz") == "FileNotFound"
-    assert pm.match("z") == "B"
-
-    assert PatternMatcher(fallback="hey!").fallback == "hey!"
-
-
 def test_chunkerparams():
     assert ChunkerParams('19,23,21,4095') == (19, 23, 21, 4095)
     assert ChunkerParams('10,23,16,4095') == (10, 23, 16, 4095)

+ 467 - 0
src/borg/testsuite/patterns.py

@@ -0,0 +1,467 @@
+import argparse
+import io
+import os.path
+import sys
+
+import pytest
+
+from ..patterns import PathFullPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, RegexPattern
+from ..patterns import load_exclude_file, load_pattern_file
+from ..patterns import parse_pattern, PatternMatcher
+
+
+def check_patterns(files, pattern, expected):
+    """Utility for testing patterns.
+    """
+    assert all([f == os.path.normpath(f) for f in files]), "Pattern matchers expect normalized input paths"
+
+    matched = [f for f in files if pattern.match(f)]
+
+    assert matched == (files if expected is None else expected)
+
+
+@pytest.mark.parametrize("pattern, expected", [
+    # "None" means all files, i.e. all match the given pattern
+    ("/", []),
+    ("/home", ["/home"]),
+    ("/home///", ["/home"]),
+    ("/./home", ["/home"]),
+    ("/home/user", ["/home/user"]),
+    ("/home/user2", ["/home/user2"]),
+    ("/home/user/.bashrc", ["/home/user/.bashrc"]),
+    ])
+def test_patterns_full(pattern, expected):
+    files = ["/home", "/home/user", "/home/user2", "/home/user/.bashrc", ]
+
+    check_patterns(files, PathFullPattern(pattern), expected)
+
+
+@pytest.mark.parametrize("pattern, expected", [
+    # "None" means all files, i.e. all match the given pattern
+    ("", []),
+    ("relative", []),
+    ("relative/path/", ["relative/path"]),
+    ("relative/path", ["relative/path"]),
+    ])
+def test_patterns_full_relative(pattern, expected):
+    files = ["relative/path", "relative/path2", ]
+
+    check_patterns(files, PathFullPattern(pattern), expected)
+
+
+@pytest.mark.parametrize("pattern, expected", [
+    # "None" means all files, i.e. all match the given pattern
+    ("/", None),
+    ("/./", None),
+    ("", []),
+    ("/home/u", []),
+    ("/home/user", ["/home/user/.profile", "/home/user/.bashrc"]),
+    ("/etc", ["/etc/server/config", "/etc/server/hosts"]),
+    ("///etc//////", ["/etc/server/config", "/etc/server/hosts"]),
+    ("/./home//..//home/user2", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
+    ("/srv", ["/srv/messages", "/srv/dmesg"]),
+    ])
+def test_patterns_prefix(pattern, expected):
+    files = [
+        "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
+        "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg",
+    ]
+
+    check_patterns(files, PathPrefixPattern(pattern), expected)
+
+
+@pytest.mark.parametrize("pattern, expected", [
+    # "None" means all files, i.e. all match the given pattern
+    ("", []),
+    ("foo", []),
+    ("relative", ["relative/path1", "relative/two"]),
+    ("more", ["more/relative"]),
+    ])
+def test_patterns_prefix_relative(pattern, expected):
+    files = ["relative/path1", "relative/two", "more/relative"]
+
+    check_patterns(files, PathPrefixPattern(pattern), expected)
+
+
+@pytest.mark.parametrize("pattern, expected", [
+    # "None" means all files, i.e. all match the given pattern
+    ("/*", None),
+    ("/./*", None),
+    ("*", None),
+    ("*/*", None),
+    ("*///*", None),
+    ("/home/u", []),
+    ("/home/*",
+     ["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html",
+      "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
+    ("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]),
+    ("/etc/*", ["/etc/server/config", "/etc/server/hosts"]),
+    ("*/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]),
+    ("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]),
+    ("/./home//..//home/user2/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
+    ("/srv*", ["/srv/messages", "/srv/dmesg"]),
+    ("/home/*/.thumbnails", ["/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
+    ])
+def test_patterns_fnmatch(pattern, expected):
+    files = [
+        "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
+        "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg",
+        "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
+    ]
+
+    check_patterns(files, FnmatchPattern(pattern), expected)
+
+
+@pytest.mark.parametrize("pattern, expected", [
+    # "None" means all files, i.e. all match the given pattern
+    ("*", None),
+    ("**/*", None),
+    ("/**/*", None),
+    ("/./*", None),
+    ("*/*", None),
+    ("*///*", None),
+    ("/home/u", []),
+    ("/home/*",
+     ["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html",
+      "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
+    ("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]),
+    ("/etc/*/*", ["/etc/server/config", "/etc/server/hosts"]),
+    ("/etc/**/*", ["/etc/server/config", "/etc/server/hosts"]),
+    ("/etc/**/*/*", ["/etc/server/config", "/etc/server/hosts"]),
+    ("*/.pr????e", []),
+    ("**/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]),
+    ("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]),
+    ("/./home//..//home/user2/", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
+    ("/./home//..//home/user2/**/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
+    ("/srv*/", ["/srv/messages", "/srv/dmesg", "/srv2/blafasel"]),
+    ("/srv*", ["/srv", "/srv/messages", "/srv/dmesg", "/srv2", "/srv2/blafasel"]),
+    ("/srv/*", ["/srv/messages", "/srv/dmesg"]),
+    ("/srv2/**", ["/srv2", "/srv2/blafasel"]),
+    ("/srv2/**/", ["/srv2/blafasel"]),
+    ("/home/*/.thumbnails", ["/home/foo/.thumbnails"]),
+    ("/home/*/*/.thumbnails", ["/home/foo/bar/.thumbnails"]),
+    ])
+def test_patterns_shell(pattern, expected):
+    files = [
+        "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
+        "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv", "/srv/messages", "/srv/dmesg",
+        "/srv2", "/srv2/blafasel", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
+    ]
+
+    check_patterns(files, ShellPattern(pattern), expected)
+
+
+@pytest.mark.parametrize("pattern, expected", [
+    # "None" means all files, i.e. all match the given pattern
+    ("", None),
+    (".*", None),
+    ("^/", None),
+    ("^abc$", []),
+    ("^[^/]", []),
+    ("^(?!/srv|/foo|/opt)",
+     ["/home", "/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile",
+      "/home/user2/public_html/index.html", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", ]),
+    ])
+def test_patterns_regex(pattern, expected):
+    files = [
+        '/srv/data', '/foo/bar', '/home',
+        '/home/user/.profile', '/home/user/.bashrc',
+        '/home/user2/.profile', '/home/user2/public_html/index.html',
+        '/opt/log/messages.txt', '/opt/log/dmesg.txt',
+        "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
+    ]
+
+    obj = RegexPattern(pattern)
+    assert str(obj) == pattern
+    assert obj.pattern == pattern
+
+    check_patterns(files, obj, expected)
+
+
+def test_regex_pattern():
+    # The forward slash must match the platform-specific path separator
+    assert RegexPattern("^/$").match("/")
+    assert RegexPattern("^/$").match(os.path.sep)
+    assert not RegexPattern(r"^\\$").match("/")
+
+
+def use_normalized_unicode():
+    return sys.platform in ("darwin",)
+
+
+def _make_test_patterns(pattern):
+    return [PathPrefixPattern(pattern),
+            FnmatchPattern(pattern),
+            RegexPattern("^{}/foo$".format(pattern)),
+            ShellPattern(pattern),
+            ]
+
+
+@pytest.mark.parametrize("pattern", _make_test_patterns("b\N{LATIN SMALL LETTER A WITH ACUTE}"))
+def test_composed_unicode_pattern(pattern):
+    assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+    assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo") == use_normalized_unicode()
+
+
+@pytest.mark.parametrize("pattern", _make_test_patterns("ba\N{COMBINING ACUTE ACCENT}"))
+def test_decomposed_unicode_pattern(pattern):
+    assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") == use_normalized_unicode()
+    assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+
+
+@pytest.mark.parametrize("pattern", _make_test_patterns(str(b"ba\x80", "latin1")))
+def test_invalid_unicode_pattern(pattern):
+    assert not pattern.match("ba/foo")
+    assert pattern.match(str(b"ba\x80/foo", "latin1"))
+
+
+@pytest.mark.parametrize("lines, expected", [
+    # "None" means all files, i.e. none excluded
+    ([], None),
+    (["# Comment only"], None),
+    (["*"], []),
+    (["# Comment",
+      "*/something00.txt",
+      "  *whitespace*  ",
+      # Whitespace before comment
+      " #/ws*",
+      # Empty line
+      "",
+      "# EOF"],
+     ["/more/data", "/home", " #/wsfoobar"]),
+    (["re:.*"], []),
+    (["re:\s"], ["/data/something00.txt", "/more/data", "/home"]),
+    ([r"re:(.)(\1)"], ["/more/data", "/home", "\tstart/whitespace", "/whitespace/end\t"]),
+    (["", "", "",
+      "# This is a test with mixed pattern styles",
+      # Case-insensitive pattern
+      "re:(?i)BAR|ME$",
+      "",
+      "*whitespace*",
+      "fm:*/something00*"],
+     ["/more/data"]),
+    ([r"  re:^\s  "], ["/data/something00.txt", "/more/data", "/home", "/whitespace/end\t"]),
+    ([r"  re:\s$  "], ["/data/something00.txt", "/more/data", "/home", " #/wsfoobar", "\tstart/whitespace"]),
+    (["pp:./"], None),
+    (["pp:/"], [" #/wsfoobar", "\tstart/whitespace"]),
+    (["pp:aaabbb"], None),
+    (["pp:/data", "pp: #/", "pp:\tstart", "pp:/whitespace"], ["/more/data", "/home"]),
+    (["/nomatch", "/more/*"],
+     ['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']),
+    # the order of exclude patterns shouldn't matter
+    (["/more/*", "/nomatch"],
+     ['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']),
+    ])
+def test_exclude_patterns_from_file(tmpdir, lines, expected):
+    files = [
+        '/data/something00.txt', '/more/data', '/home',
+        ' #/wsfoobar',
+        '\tstart/whitespace',
+        '/whitespace/end\t',
+    ]
+
+    def evaluate(filename):
+        patterns = []
+        load_exclude_file(open(filename, "rt"), patterns)
+        matcher = PatternMatcher(fallback=True)
+        matcher.add_inclexcl(patterns)
+        return [path for path in files if matcher.match(path)]
+
+    exclfile = tmpdir.join("exclude.txt")
+
+    with exclfile.open("wt") as fh:
+        fh.write("\n".join(lines))
+
+    assert evaluate(str(exclfile)) == (files if expected is None else expected)
+
+
+@pytest.mark.parametrize("lines, expected_roots, expected_numpatterns", [
+    # "None" means all files, i.e. none excluded
+    ([], [], 0),
+    (["# Comment only"], [], 0),
+    (["- *"], [], 1),
+    (["+fm:*/something00.txt",
+      "-/data"], [], 2),
+    (["R /"], ["/"], 0),
+    (["R /",
+      "# comment"], ["/"], 0),
+    (["# comment",
+      "- /data",
+      "R /home"], ["/home"], 1),
+])
+def test_load_patterns_from_file(tmpdir, lines, expected_roots, expected_numpatterns):
+    def evaluate(filename):
+        roots = []
+        inclexclpatterns = []
+        load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
+        return roots, len(inclexclpatterns)
+    patternfile = tmpdir.join("patterns.txt")
+
+    with patternfile.open("wt") as fh:
+        fh.write("\n".join(lines))
+
+    roots, numpatterns = evaluate(str(patternfile))
+    assert roots == expected_roots
+    assert numpatterns == expected_numpatterns
+
+
+def test_switch_patterns_style():
+    patterns = """\
+        +0_initial_default_is_shell
+        p fm
+        +1_fnmatch
+        P re
+        +2_regex
+        +3_more_regex
+        P pp
+        +4_pathprefix
+        p fm
+        p sh
+        +5_shell
+    """
+    pattern_file = io.StringIO(patterns)
+    roots, patterns = [], []
+    load_pattern_file(pattern_file, roots, patterns)
+    assert len(patterns) == 6
+    assert isinstance(patterns[0].val, ShellPattern)
+    assert isinstance(patterns[1].val, FnmatchPattern)
+    assert isinstance(patterns[2].val, RegexPattern)
+    assert isinstance(patterns[3].val, RegexPattern)
+    assert isinstance(patterns[4].val, PathPrefixPattern)
+    assert isinstance(patterns[5].val, ShellPattern)
+
+
+@pytest.mark.parametrize("lines", [
+    (["X /data"]),  # illegal pattern type prefix
+    (["/data"]),    # need a pattern type prefix
+])
+def test_load_invalid_patterns_from_file(tmpdir, lines):
+    patternfile = tmpdir.join("patterns.txt")
+    with patternfile.open("wt") as fh:
+        fh.write("\n".join(lines))
+    filename = str(patternfile)
+    with pytest.raises(argparse.ArgumentTypeError):
+        roots = []
+        inclexclpatterns = []
+        load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
+
+
+@pytest.mark.parametrize("lines, expected", [
+    # "None" means all files, i.e. none excluded
+    ([], None),
+    (["# Comment only"], None),
+    (["- *"], []),
+    # default match type is sh: for patterns -> * doesn't match a /
+    (["-*/something0?.txt"],
+     ['/data', '/data/something00.txt', '/data/subdir/something01.txt',
+      '/home', '/home/leo', '/home/leo/t', '/home/other']),
+    (["-fm:*/something00.txt"],
+     ['/data', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t', '/home/other']),
+    (["-fm:*/something0?.txt"],
+     ["/data", '/home', '/home/leo', '/home/leo/t', '/home/other']),
+    (["+/*/something0?.txt",
+      "-/data"],
+     ["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
+    (["+fm:*/something00.txt",
+      "-/data"],
+     ["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
+    # include /home/leo and exclude the rest of /home:
+    (["+/home/leo",
+      "-/home/*"],
+     ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']),
+    # wrong order, /home/leo is already excluded by -/home/*:
+    (["-/home/*",
+      "+/home/leo"],
+     ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home']),
+    (["+fm:/home/leo",
+      "-/home/"],
+     ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']),
+])
+def test_inclexcl_patterns_from_file(tmpdir, lines, expected):
+    files = [
+        '/data', '/data/something00.txt', '/data/subdir/something01.txt',
+        '/home', '/home/leo', '/home/leo/t', '/home/other'
+    ]
+
+    def evaluate(filename):
+        matcher = PatternMatcher(fallback=True)
+        roots = []
+        inclexclpatterns = []
+        load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
+        matcher.add_inclexcl(inclexclpatterns)
+        return [path for path in files if matcher.match(path)]
+
+    patternfile = tmpdir.join("patterns.txt")
+
+    with patternfile.open("wt") as fh:
+        fh.write("\n".join(lines))
+
+    assert evaluate(str(patternfile)) == (files if expected is None else expected)
+
+
+@pytest.mark.parametrize("pattern, cls", [
+    ("", FnmatchPattern),
+
+    # Default style
+    ("*", FnmatchPattern),
+    ("/data/*", FnmatchPattern),
+
+    # fnmatch style
+    ("fm:", FnmatchPattern),
+    ("fm:*", FnmatchPattern),
+    ("fm:/data/*", FnmatchPattern),
+    ("fm:fm:/data/*", FnmatchPattern),
+
+    # Regular expression
+    ("re:", RegexPattern),
+    ("re:.*", RegexPattern),
+    ("re:^/something/", RegexPattern),
+    ("re:re:^/something/", RegexPattern),
+
+    # Path prefix
+    ("pp:", PathPrefixPattern),
+    ("pp:/", PathPrefixPattern),
+    ("pp:/data/", PathPrefixPattern),
+    ("pp:pp:/data/", PathPrefixPattern),
+
+    # Shell-pattern style
+    ("sh:", ShellPattern),
+    ("sh:*", ShellPattern),
+    ("sh:/data/*", ShellPattern),
+    ("sh:sh:/data/*", ShellPattern),
+    ])
+def test_parse_pattern(pattern, cls):
+    assert isinstance(parse_pattern(pattern), cls)
+
+
+@pytest.mark.parametrize("pattern", ["aa:", "fo:*", "00:", "x1:abc"])
+def test_parse_pattern_error(pattern):
+    with pytest.raises(ValueError):
+        parse_pattern(pattern)
+
+
+def test_pattern_matcher():
+    pm = PatternMatcher()
+
+    assert pm.fallback is None
+
+    for i in ["", "foo", "bar"]:
+        assert pm.match(i) is None
+
+    # add extra entries to aid in testing
+    for target in ["A", "B", "Empty", "FileNotFound"]:
+        pm.is_include_cmd[target] = target
+
+    pm.add([RegexPattern("^a")], "A")
+    pm.add([RegexPattern("^b"), RegexPattern("^z")], "B")
+    pm.add([RegexPattern("^$")], "Empty")
+    pm.fallback = "FileNotFound"
+
+    assert pm.match("") == "Empty"
+    assert pm.match("aaa") == "A"
+    assert pm.match("bbb") == "B"
+    assert pm.match("ccc") == "FileNotFound"
+    assert pm.match("xyz") == "FileNotFound"
+    assert pm.match("z") == "B"
+
+    assert PatternMatcher(fallback="hey!").fallback == "hey!"