Browse Source

Merge pull request #2471 from enkore/issue/2469

Move patterns to module
TW 8 years ago
parent
commit
7c9a57bee5

+ 1 - 1
src/borg/archive.py

@@ -36,7 +36,7 @@ from .helpers import StableDict
 from .helpers import bin_to_hex
 from .helpers import bin_to_hex
 from .helpers import safe_ns
 from .helpers import safe_ns
 from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi
 from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi
-from .helpers import PathPrefixPattern, FnmatchPattern, IECommand
+from .patterns import PathPrefixPattern, FnmatchPattern, IECommand
 from .item import Item, ArchiveItem
 from .item import Item, ArchiveItem
 from .key import key_factory
 from .key import key_factory
 from .platform import acl_get, acl_set, set_flags, get_flags, swidth
 from .platform import acl_get, acl_set, set_flags, get_flags, swidth

+ 2 - 2
src/borg/archiver.py

@@ -51,15 +51,15 @@ from .helpers import Manifest
 from .helpers import hardlinkable
 from .helpers import hardlinkable
 from .helpers import StableDict
 from .helpers import StableDict
 from .helpers import check_extension_modules
 from .helpers import check_extension_modules
-from .helpers import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern
 from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo
 from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo
 from .helpers import log_multi
 from .helpers import log_multi
-from .helpers import PatternMatcher
 from .helpers import signal_handler, raising_signal_handler, SigHup, SigTerm
 from .helpers import signal_handler, raising_signal_handler, SigHup, SigTerm
 from .helpers import ErrorIgnoringTextIOWrapper
 from .helpers import ErrorIgnoringTextIOWrapper
 from .helpers import ProgressIndicatorPercent
 from .helpers import ProgressIndicatorPercent
 from .helpers import basic_json_data, json_print
 from .helpers import basic_json_data, json_print
 from .helpers import replace_placeholders
 from .helpers import replace_placeholders
+from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern
+from .patterns import PatternMatcher
 from .item import Item
 from .item import Item
 from .key import key_creator, tam_required_file, tam_required, RepoKey, PassphraseKey
 from .key import key_creator, tam_required_file, tam_required, RepoKey, PassphraseKey
 from .keymanager import KeyManager
 from .keymanager import KeyManager

+ 1 - 386
src/borg/helpers.py

@@ -18,14 +18,11 @@ import sys
 import textwrap
 import textwrap
 import threading
 import threading
 import time
 import time
-import unicodedata
 import uuid
 import uuid
 from binascii import hexlify
 from binascii import hexlify
 from collections import namedtuple, deque, abc, Counter
 from collections import namedtuple, deque, abc, Counter
 from datetime import datetime, timezone, timedelta
 from datetime import datetime, timezone, timedelta
-from enum import Enum
-from fnmatch import translate
-from functools import wraps, partial, lru_cache
+from functools import partial, lru_cache
 from itertools import islice
 from itertools import islice
 from operator import attrgetter
 from operator import attrgetter
 from string import Formatter
 from string import Formatter
@@ -42,7 +39,6 @@ from . import __version_tuple__ as borg_version_tuple
 from . import chunker
 from . import chunker
 from . import crypto
 from . import crypto
 from . import hashindex
 from . import hashindex
-from . import shellpattern
 from .constants import *  # NOQA
 from .constants import *  # NOQA
 
 
 
 
@@ -389,387 +385,6 @@ def parse_timestamp(timestamp):
         return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S').replace(tzinfo=timezone.utc)
         return datetime.strptime(timestamp, '%Y-%m-%dT%H:%M:%S').replace(tzinfo=timezone.utc)
 
 
 
 
-def parse_patternfile_line(line, roots, ie_commands, fallback):
-    """Parse a pattern-file line and act depending on which command it represents."""
-    ie_command = parse_inclexcl_command(line, fallback=fallback)
-    if ie_command.cmd is IECommand.RootPath:
-        roots.append(ie_command.val)
-    elif ie_command.cmd is IECommand.PatternStyle:
-        fallback = ie_command.val
-    else:
-        # it is some kind of include/exclude command
-        ie_commands.append(ie_command)
-    return fallback
-
-
-def load_pattern_file(fileobj, roots, ie_commands, fallback=None):
-    if fallback is None:
-        fallback = ShellPattern  # ShellPattern is defined later in this module
-    for line in clean_lines(fileobj):
-        fallback = parse_patternfile_line(line, roots, ie_commands, fallback)
-
-
-def load_exclude_file(fileobj, patterns):
-    for patternstr in clean_lines(fileobj):
-        patterns.append(parse_exclude_pattern(patternstr))
-
-
-class ArgparsePatternAction(argparse.Action):
-    def __init__(self, nargs=1, **kw):
-        super().__init__(nargs=nargs, **kw)
-
-    def __call__(self, parser, args, values, option_string=None):
-        parse_patternfile_line(values[0], args.paths, args.patterns, ShellPattern)
-
-
-class ArgparsePatternFileAction(argparse.Action):
-    def __init__(self, nargs=1, **kw):
-        super().__init__(nargs=nargs, **kw)
-
-    def __call__(self, parser, args, values, option_string=None):
-        """Load and parse patterns from a file.
-        Lines empty or starting with '#' after stripping whitespace on both line ends are ignored.
-        """
-        filename = values[0]
-        with open(filename) as f:
-            self.parse(f, args)
-
-    def parse(self, fobj, args):
-        load_pattern_file(fobj, args.paths, args.patterns)
-
-
-class ArgparseExcludeFileAction(ArgparsePatternFileAction):
-    def parse(self, fobj, args):
-        load_exclude_file(fobj, args.patterns)
-
-
-class PatternMatcher:
-    """Represents a collection of pattern objects to match paths against.
-
-    *fallback* is a boolean value that *match()* returns if no matching patterns are found.
-
-    """
-    def __init__(self, fallback=None):
-        self._items = []
-
-        # Value to return from match function when none of the patterns match.
-        self.fallback = fallback
-
-        # optimizations
-        self._path_full_patterns = {}  # full path -> return value
-
-        # indicates whether the last match() call ended on a pattern for which
-        # we should recurse into any matching folder.  Will be set to True or
-        # False when calling match().
-        self.recurse_dir = None
-
-        # whether to recurse into directories when no match is found
-        # TODO: allow modification as a config option?
-        self.recurse_dir_default = True
-
-        self.include_patterns = []
-
-        # TODO: move this info to parse_inclexcl_command and store in PatternBase subclass?
-        self.is_include_cmd = {
-            IECommand.Exclude: False,
-            IECommand.ExcludeNoRecurse: False,
-            IECommand.Include: True
-        }
-
-    def empty(self):
-        return not len(self._items) and not len(self._path_full_patterns)
-
-    def _add(self, pattern, cmd):
-        """*cmd* is an IECommand value.
-        """
-        if isinstance(pattern, PathFullPattern):
-            key = pattern.pattern  # full, normalized path
-            self._path_full_patterns[key] = cmd
-        else:
-            self._items.append((pattern, cmd))
-
-    def add(self, patterns, cmd):
-        """Add list of patterns to internal list. *cmd* indicates whether the
-        pattern is an include/exclude pattern, and whether recursion should be
-        done on excluded folders.
-        """
-        for pattern in patterns:
-            self._add(pattern, cmd)
-
-    def add_includepaths(self, include_paths):
-        """Used to add inclusion-paths from args.paths (from commandline).
-        """
-        include_patterns = [parse_pattern(p, PathPrefixPattern) for p in include_paths]
-        self.add(include_patterns, IECommand.Include)
-        self.fallback = not include_patterns
-        self.include_patterns = include_patterns
-
-    def get_unmatched_include_patterns(self):
-        "Note that this only returns patterns added via *add_includepaths*."
-        return [p for p in self.include_patterns if p.match_count == 0]
-
-    def add_inclexcl(self, patterns):
-        """Add list of patterns (of type CmdTuple) to internal list.
-        """
-        for pattern, cmd in patterns:
-            self._add(pattern, cmd)
-
-    def match(self, path):
-        """Return True or False depending on whether *path* is matched.
-
-        If no match is found among the patterns in this matcher, then the value
-        in self.fallback is returned (defaults to None).
-
-        """
-        path = normalize_path(path)
-        # do a fast lookup for full path matches (note: we do not count such matches):
-        non_existent = object()
-        value = self._path_full_patterns.get(path, non_existent)
-
-        if value is not non_existent:
-            # we have a full path match!
-            # TODO: get from pattern; don't hard-code
-            self.recurse_dir = True
-            return value
-
-        # this is the slow way, if we have many patterns in self._items:
-        for (pattern, cmd) in self._items:
-            if pattern.match(path, normalize=False):
-                self.recurse_dir = pattern.recurse_dir
-                return self.is_include_cmd[cmd]
-
-        # by default we will recurse if there is no match
-        self.recurse_dir = self.recurse_dir_default
-        return self.fallback
-
-
-def normalize_path(path):
-    """normalize paths for MacOS (but do nothing on other platforms)"""
-    # HFS+ converts paths to a canonical form, so users shouldn't be required to enter an exact match.
-    # Windows and Unix filesystems allow different forms, so users always have to enter an exact match.
-    return unicodedata.normalize('NFD', path) if sys.platform == 'darwin' else path
-
-
-class PatternBase:
-    """Shared logic for inclusion/exclusion patterns.
-    """
-    PREFIX = NotImplemented
-
-    def __init__(self, pattern, recurse_dir=False):
-        self.pattern_orig = pattern
-        self.match_count = 0
-        pattern = normalize_path(pattern)
-        self._prepare(pattern)
-        self.recurse_dir = recurse_dir
-
-    def match(self, path, normalize=True):
-        """Return a boolean indicating whether *path* is matched by this pattern.
-
-        If normalize is True (default), the path will get normalized using normalize_path(),
-        otherwise it is assumed that it already is normalized using that function.
-        """
-        if normalize:
-            path = normalize_path(path)
-        matches = self._match(path)
-        if matches:
-            self.match_count += 1
-        return matches
-
-    def __repr__(self):
-        return '%s(%s)' % (type(self), self.pattern)
-
-    def __str__(self):
-        return self.pattern_orig
-
-    def _prepare(self, pattern):
-        "Should set the value of self.pattern"
-        raise NotImplementedError
-
-    def _match(self, path):
-        raise NotImplementedError
-
-
-class PathFullPattern(PatternBase):
-    """Full match of a path."""
-    PREFIX = "pf"
-
-    def _prepare(self, pattern):
-        self.pattern = os.path.normpath(pattern)
-
-    def _match(self, path):
-        return path == self.pattern
-
-
-# For PathPrefixPattern, FnmatchPattern and ShellPattern, we require that the pattern either match the whole path
-# or an initial segment of the path up to but not including a path separator. To unify the two cases, we add a path
-# separator to the end of the path before matching.
-
-
-class PathPrefixPattern(PatternBase):
-    """Literal files or directories listed on the command line
-    for some operations (e.g. extract, but not create).
-    If a directory is specified, all paths that start with that
-    path match as well.  A trailing slash makes no difference.
-    """
-    PREFIX = "pp"
-
-    def _prepare(self, pattern):
-        self.pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep
-
-    def _match(self, path):
-        return (path + os.path.sep).startswith(self.pattern)
-
-
-class FnmatchPattern(PatternBase):
-    """Shell glob patterns to exclude.  A trailing slash means to
-    exclude the contents of a directory, but not the directory itself.
-    """
-    PREFIX = "fm"
-
-    def _prepare(self, pattern):
-        if pattern.endswith(os.path.sep):
-            pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + '*' + os.path.sep
-        else:
-            pattern = os.path.normpath(pattern) + os.path.sep + '*'
-
-        self.pattern = pattern
-
-        # fnmatch and re.match both cache compiled regular expressions.
-        # Nevertheless, this is about 10 times faster.
-        self.regex = re.compile(translate(self.pattern))
-
-    def _match(self, path):
-        return (self.regex.match(path + os.path.sep) is not None)
-
-
-class ShellPattern(PatternBase):
-    """Shell glob patterns to exclude.  A trailing slash means to
-    exclude the contents of a directory, but not the directory itself.
-    """
-    PREFIX = "sh"
-
-    def _prepare(self, pattern):
-        sep = os.path.sep
-
-        if pattern.endswith(sep):
-            pattern = os.path.normpath(pattern).rstrip(sep) + sep + "**" + sep + "*" + sep
-        else:
-            pattern = os.path.normpath(pattern) + sep + "**" + sep + "*"
-
-        self.pattern = pattern
-        self.regex = re.compile(shellpattern.translate(self.pattern))
-
-    def _match(self, path):
-        return (self.regex.match(path + os.path.sep) is not None)
-
-
-class RegexPattern(PatternBase):
-    """Regular expression to exclude.
-    """
-    PREFIX = "re"
-
-    def _prepare(self, pattern):
-        self.pattern = pattern
-        self.regex = re.compile(pattern)
-
-    def _match(self, path):
-        # Normalize path separators
-        if os.path.sep != '/':
-            path = path.replace(os.path.sep, '/')
-
-        return (self.regex.search(path) is not None)
-
-
-_PATTERN_CLASSES = set([
-    FnmatchPattern,
-    PathFullPattern,
-    PathPrefixPattern,
-    RegexPattern,
-    ShellPattern,
-])
-
-_PATTERN_CLASS_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_CLASSES)
-
-CmdTuple = namedtuple('CmdTuple', 'val cmd')
-
-
-class IECommand(Enum):
-    """A command that an InclExcl file line can represent.
-    """
-    RootPath = 1
-    PatternStyle = 2
-    Include = 3
-    Exclude = 4
-    ExcludeNoRecurse = 5
-
-
-def get_pattern_class(prefix):
-    try:
-        return _PATTERN_CLASS_BY_PREFIX[prefix]
-    except KeyError:
-        raise ValueError("Unknown pattern style: {}".format(prefix)) from None
-
-
-def parse_pattern(pattern, fallback=FnmatchPattern, recurse_dir=True):
-    """Read pattern from string and return an instance of the appropriate implementation class.
-
-    """
-    if len(pattern) > 2 and pattern[2] == ":" and pattern[:2].isalnum():
-        (style, pattern) = (pattern[:2], pattern[3:])
-        cls = get_pattern_class(style)
-    else:
-        cls = fallback
-    return cls(pattern, recurse_dir)
-
-
-def parse_exclude_pattern(pattern_str, fallback=FnmatchPattern):
-    """Read pattern from string and return an instance of the appropriate implementation class.
-    """
-    epattern_obj = parse_pattern(pattern_str, fallback)
-    return CmdTuple(epattern_obj, IECommand.Exclude)
-
-
-def parse_inclexcl_command(cmd_line_str, fallback=ShellPattern):
-    """Read a --patterns-from command from string and return a CmdTuple object."""
-
-    cmd_prefix_map = {
-        '-': IECommand.Exclude,
-        '!': IECommand.ExcludeNoRecurse,
-        '+': IECommand.Include,
-        'R': IECommand.RootPath,
-        'r': IECommand.RootPath,
-        'P': IECommand.PatternStyle,
-        'p': IECommand.PatternStyle,
-    }
-
-    try:
-        cmd = cmd_prefix_map[cmd_line_str[0]]
-
-        # remaining text on command-line following the command character
-        remainder_str = cmd_line_str[1:].lstrip()
-
-        if not remainder_str:
-            raise ValueError("Missing pattern/information!")
-    except (IndexError, KeyError, ValueError):
-        raise argparse.ArgumentTypeError("Unable to parse pattern/command: {}".format(cmd_line_str))
-
-    if cmd is IECommand.RootPath:
-        # TODO: validate string?
-        val = remainder_str
-    elif cmd is IECommand.PatternStyle:
-        # then remainder_str is something like 're' or 'sh'
-        try:
-            val = get_pattern_class(remainder_str)
-        except ValueError:
-            raise argparse.ArgumentTypeError("Invalid pattern style: {}".format(remainder_str))
-    else:
-        # determine recurse_dir based on command type
-        recurse_dir = cmd not in [IECommand.ExcludeNoRecurse]
-        val = parse_pattern(remainder_str, fallback, recurse_dir)
-
-    return CmdTuple(val, cmd)
-
-
 def timestamp(s):
 def timestamp(s):
     """Convert a --timestamp=s argument to a datetime object"""
     """Convert a --timestamp=s argument to a datetime object"""
     try:
     try:

+ 392 - 0
src/borg/patterns.py

@@ -0,0 +1,392 @@
+import argparse
+import fnmatch
+import os.path
+import re
+import sys
+import unicodedata
+from collections import namedtuple
+from enum import Enum
+
+from . import shellpattern
+from .helpers import clean_lines
+
+
+def parse_patternfile_line(line, roots, ie_commands, fallback):
+    """Parse a pattern-file line and act depending on which command it represents."""
+    ie_command = parse_inclexcl_command(line, fallback=fallback)
+    if ie_command.cmd is IECommand.RootPath:
+        roots.append(ie_command.val)
+    elif ie_command.cmd is IECommand.PatternStyle:
+        fallback = ie_command.val
+    else:
+        # it is some kind of include/exclude command
+        ie_commands.append(ie_command)
+    return fallback
+
+
+def load_pattern_file(fileobj, roots, ie_commands, fallback=None):
+    if fallback is None:
+        fallback = ShellPattern  # ShellPattern is defined later in this module
+    for line in clean_lines(fileobj):
+        fallback = parse_patternfile_line(line, roots, ie_commands, fallback)
+
+
+def load_exclude_file(fileobj, patterns):
+    for patternstr in clean_lines(fileobj):
+        patterns.append(parse_exclude_pattern(patternstr))
+
+
+class ArgparsePatternAction(argparse.Action):
+    def __init__(self, nargs=1, **kw):
+        super().__init__(nargs=nargs, **kw)
+
+    def __call__(self, parser, args, values, option_string=None):
+        parse_patternfile_line(values[0], args.paths, args.patterns, ShellPattern)
+
+
+class ArgparsePatternFileAction(argparse.Action):
+    def __init__(self, nargs=1, **kw):
+        super().__init__(nargs=nargs, **kw)
+
+    def __call__(self, parser, args, values, option_string=None):
+        """Load and parse patterns from a file.
+        Lines empty or starting with '#' after stripping whitespace on both line ends are ignored.
+        """
+        filename = values[0]
+        with open(filename) as f:
+            self.parse(f, args)
+
+    def parse(self, fobj, args):
+        load_pattern_file(fobj, args.paths, args.patterns)
+
+
+class ArgparseExcludeFileAction(ArgparsePatternFileAction):
+    def parse(self, fobj, args):
+        load_exclude_file(fobj, args.patterns)
+
+
+class PatternMatcher:
+    """Represents a collection of pattern objects to match paths against.
+
+    *fallback* is a boolean value that *match()* returns if no matching patterns are found.
+
+    """
+    def __init__(self, fallback=None):
+        self._items = []
+
+        # Value to return from match function when none of the patterns match.
+        self.fallback = fallback
+
+        # optimizations
+        self._path_full_patterns = {}  # full path -> return value
+
+        # indicates whether the last match() call ended on a pattern for which
+        # we should recurse into any matching folder.  Will be set to True or
+        # False when calling match().
+        self.recurse_dir = None
+
+        # whether to recurse into directories when no match is found
+        # TODO: allow modification as a config option?
+        self.recurse_dir_default = True
+
+        self.include_patterns = []
+
+        # TODO: move this info to parse_inclexcl_command and store in PatternBase subclass?
+        self.is_include_cmd = {
+            IECommand.Exclude: False,
+            IECommand.ExcludeNoRecurse: False,
+            IECommand.Include: True
+        }
+
+    def empty(self):
+        return not len(self._items) and not len(self._path_full_patterns)
+
+    def _add(self, pattern, cmd):
+        """*cmd* is an IECommand value.
+        """
+        if isinstance(pattern, PathFullPattern):
+            key = pattern.pattern  # full, normalized path
+            self._path_full_patterns[key] = cmd
+        else:
+            self._items.append((pattern, cmd))
+
+    def add(self, patterns, cmd):
+        """Add list of patterns to internal list. *cmd* indicates whether the
+        pattern is an include/exclude pattern, and whether recursion should be
+        done on excluded folders.
+        """
+        for pattern in patterns:
+            self._add(pattern, cmd)
+
+    def add_includepaths(self, include_paths):
+        """Used to add inclusion-paths from args.paths (from commandline).
+        """
+        include_patterns = [parse_pattern(p, PathPrefixPattern) for p in include_paths]
+        self.add(include_patterns, IECommand.Include)
+        self.fallback = not include_patterns
+        self.include_patterns = include_patterns
+
+    def get_unmatched_include_patterns(self):
+        "Note that this only returns patterns added via *add_includepaths*."
+        return [p for p in self.include_patterns if p.match_count == 0]
+
+    def add_inclexcl(self, patterns):
+        """Add list of patterns (of type CmdTuple) to internal list.
+        """
+        for pattern, cmd in patterns:
+            self._add(pattern, cmd)
+
+    def match(self, path):
+        """Return True or False depending on whether *path* is matched.
+
+        If no match is found among the patterns in this matcher, then the value
+        in self.fallback is returned (defaults to None).
+
+        """
+        path = normalize_path(path)
+        # do a fast lookup for full path matches (note: we do not count such matches):
+        non_existent = object()
+        value = self._path_full_patterns.get(path, non_existent)
+
+        if value is not non_existent:
+            # we have a full path match!
+            # TODO: get from pattern; don't hard-code
+            self.recurse_dir = True
+            return value
+
+        # this is the slow way, if we have many patterns in self._items:
+        for (pattern, cmd) in self._items:
+            if pattern.match(path, normalize=False):
+                self.recurse_dir = pattern.recurse_dir
+                return self.is_include_cmd[cmd]
+
+        # by default we will recurse if there is no match
+        self.recurse_dir = self.recurse_dir_default
+        return self.fallback
+
+
+def normalize_path(path):
+    """normalize paths for MacOS (but do nothing on other platforms)"""
+    # HFS+ converts paths to a canonical form, so users shouldn't be required to enter an exact match.
+    # Windows and Unix filesystems allow different forms, so users always have to enter an exact match.
+    return unicodedata.normalize('NFD', path) if sys.platform == 'darwin' else path
+
+
+class PatternBase:
+    """Shared logic for inclusion/exclusion patterns.
+    """
+    PREFIX = NotImplemented
+
+    def __init__(self, pattern, recurse_dir=False):
+        self.pattern_orig = pattern
+        self.match_count = 0
+        pattern = normalize_path(pattern)
+        self._prepare(pattern)
+        self.recurse_dir = recurse_dir
+
+    def match(self, path, normalize=True):
+        """Return a boolean indicating whether *path* is matched by this pattern.
+
+        If normalize is True (default), the path will get normalized using normalize_path(),
+        otherwise it is assumed that it already is normalized using that function.
+        """
+        if normalize:
+            path = normalize_path(path)
+        matches = self._match(path)
+        if matches:
+            self.match_count += 1
+        return matches
+
+    def __repr__(self):
+        return '%s(%s)' % (type(self), self.pattern)
+
+    def __str__(self):
+        return self.pattern_orig
+
+    def _prepare(self, pattern):
+        "Should set the value of self.pattern"
+        raise NotImplementedError
+
+    def _match(self, path):
+        raise NotImplementedError
+
+
+class PathFullPattern(PatternBase):
+    """Full match of a path."""
+    PREFIX = "pf"
+
+    def _prepare(self, pattern):
+        self.pattern = os.path.normpath(pattern)
+
+    def _match(self, path):
+        return path == self.pattern
+
+
+# For PathPrefixPattern, FnmatchPattern and ShellPattern, we require that the pattern either match the whole path
+# or an initial segment of the path up to but not including a path separator. To unify the two cases, we add a path
+# separator to the end of the path before matching.
+
+
+class PathPrefixPattern(PatternBase):
+    """Literal files or directories listed on the command line
+    for some operations (e.g. extract, but not create).
+    If a directory is specified, all paths that start with that
+    path match as well.  A trailing slash makes no difference.
+    """
+    PREFIX = "pp"
+
+    def _prepare(self, pattern):
+        self.pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep
+
+    def _match(self, path):
+        return (path + os.path.sep).startswith(self.pattern)
+
+
+class FnmatchPattern(PatternBase):
+    """Shell glob patterns to exclude.  A trailing slash means to
+    exclude the contents of a directory, but not the directory itself.
+    """
+    PREFIX = "fm"
+
+    def _prepare(self, pattern):
+        if pattern.endswith(os.path.sep):
+            pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + '*' + os.path.sep
+        else:
+            pattern = os.path.normpath(pattern) + os.path.sep + '*'
+
+        self.pattern = pattern
+
+        # fnmatch and re.match both cache compiled regular expressions.
+        # Nevertheless, this is about 10 times faster.
+        self.regex = re.compile(fnmatch.translate(self.pattern))
+
+    def _match(self, path):
+        return (self.regex.match(path + os.path.sep) is not None)
+
+
+class ShellPattern(PatternBase):
+    """Shell glob patterns to exclude.  A trailing slash means to
+    exclude the contents of a directory, but not the directory itself.
+    """
+    PREFIX = "sh"
+
+    def _prepare(self, pattern):
+        sep = os.path.sep
+
+        if pattern.endswith(sep):
+            pattern = os.path.normpath(pattern).rstrip(sep) + sep + "**" + sep + "*" + sep
+        else:
+            pattern = os.path.normpath(pattern) + sep + "**" + sep + "*"
+
+        self.pattern = pattern
+        self.regex = re.compile(shellpattern.translate(self.pattern))
+
+    def _match(self, path):
+        return (self.regex.match(path + os.path.sep) is not None)
+
+
+class RegexPattern(PatternBase):
+    """Regular expression to exclude.
+    """
+    PREFIX = "re"
+
+    def _prepare(self, pattern):
+        self.pattern = pattern
+        self.regex = re.compile(pattern)
+
+    def _match(self, path):
+        # Normalize path separators
+        if os.path.sep != '/':
+            path = path.replace(os.path.sep, '/')
+
+        return (self.regex.search(path) is not None)
+
+
+_PATTERN_CLASSES = {
+    FnmatchPattern,
+    PathFullPattern,
+    PathPrefixPattern,
+    RegexPattern,
+    ShellPattern,
+}
+
+_PATTERN_CLASS_BY_PREFIX = dict((i.PREFIX, i) for i in _PATTERN_CLASSES)
+
+CmdTuple = namedtuple('CmdTuple', 'val cmd')
+
+
+class IECommand(Enum):
+    """A command that an InclExcl file line can represent.
+    """
+    RootPath = 1
+    PatternStyle = 2
+    Include = 3
+    Exclude = 4
+    ExcludeNoRecurse = 5
+
+
+def get_pattern_class(prefix):
+    try:
+        return _PATTERN_CLASS_BY_PREFIX[prefix]
+    except KeyError:
+        raise ValueError("Unknown pattern style: {}".format(prefix)) from None
+
+
+def parse_pattern(pattern, fallback=FnmatchPattern, recurse_dir=True):
+    """Read pattern from string and return an instance of the appropriate implementation class.
+
+    """
+    if len(pattern) > 2 and pattern[2] == ":" and pattern[:2].isalnum():
+        (style, pattern) = (pattern[:2], pattern[3:])
+        cls = get_pattern_class(style)
+    else:
+        cls = fallback
+    return cls(pattern, recurse_dir)
+
+
+def parse_exclude_pattern(pattern_str, fallback=FnmatchPattern):
+    """Read pattern from string and return an instance of the appropriate implementation class.
+    """
+    epattern_obj = parse_pattern(pattern_str, fallback)
+    return CmdTuple(epattern_obj, IECommand.Exclude)
+
+
+def parse_inclexcl_command(cmd_line_str, fallback=ShellPattern):
+    """Read a --patterns-from command from string and return a CmdTuple object."""
+
+    cmd_prefix_map = {
+        '-': IECommand.Exclude,
+        '!': IECommand.ExcludeNoRecurse,
+        '+': IECommand.Include,
+        'R': IECommand.RootPath,
+        'r': IECommand.RootPath,
+        'P': IECommand.PatternStyle,
+        'p': IECommand.PatternStyle,
+    }
+
+    try:
+        cmd = cmd_prefix_map[cmd_line_str[0]]
+
+        # remaining text on command-line following the command character
+        remainder_str = cmd_line_str[1:].lstrip()
+
+        if not remainder_str:
+            raise ValueError("Missing pattern/information!")
+    except (IndexError, KeyError, ValueError):
+        raise argparse.ArgumentTypeError("Unable to parse pattern/command: {}".format(cmd_line_str))
+
+    if cmd is IECommand.RootPath:
+        # TODO: validate string?
+        val = remainder_str
+    elif cmd is IECommand.PatternStyle:
+        # then remainder_str is something like 're' or 'sh'
+        try:
+            val = get_pattern_class(remainder_str)
+        except ValueError:
+            raise argparse.ArgumentTypeError("Invalid pattern style: {}".format(remainder_str))
+    else:
+        # determine recurse_dir based on command type
+        recurse_dir = cmd not in [IECommand.ExcludeNoRecurse]
+        val = parse_pattern(remainder_str, fallback, recurse_dir)
+
+    return CmdTuple(val, cmd)

+ 2 - 2
src/borg/testsuite/archiver.py

@@ -33,12 +33,12 @@ from ..archiver import Archiver
 from ..cache import Cache
 from ..cache import Cache
 from ..constants import *  # NOQA
 from ..constants import *  # NOQA
 from ..crypto import bytes_to_long, num_aes_blocks
 from ..crypto import bytes_to_long, num_aes_blocks
-from ..helpers import PatternMatcher, parse_pattern, Location, get_security_dir
+from ..helpers import Location, get_security_dir
 from ..helpers import Manifest
 from ..helpers import Manifest
 from ..helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
 from ..helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
 from ..helpers import bin_to_hex
 from ..helpers import bin_to_hex
-from ..helpers import IECommand
 from ..helpers import MAX_S
 from ..helpers import MAX_S
+from ..patterns import IECommand, PatternMatcher, parse_pattern
 from ..item import Item
 from ..item import Item
 from ..key import KeyfileKeyBase, RepoKey, KeyfileKey, Passphrase, TAMRequiredError
 from ..key import KeyfileKeyBase, RepoKey, KeyfileKey, Passphrase, TAMRequiredError
 from ..keymanager import RepoIdMismatch, NotABorgKeyFile
 from ..keymanager import RepoIdMismatch, NotABorgKeyFile

+ 0 - 460
src/borg/testsuite/helpers.py

@@ -23,9 +23,6 @@ from ..helpers import yes, TRUISH, FALSISH, DEFAULTISH
 from ..helpers import StableDict, int_to_bigint, bigint_to_int, bin_to_hex
 from ..helpers import StableDict, int_to_bigint, bigint_to_int, bin_to_hex
 from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams
 from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams
 from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
 from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
-from ..helpers import load_exclude_file, load_pattern_file
-from ..helpers import parse_pattern, PatternMatcher
-from ..helpers import PathFullPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, RegexPattern
 from ..helpers import swidth_slice
 from ..helpers import swidth_slice
 from ..helpers import chunkit
 from ..helpers import chunkit
 from ..helpers import safe_ns, safe_s, SUPPORT_32BIT_PLATFORMS
 from ..helpers import safe_ns, safe_s, SUPPORT_32BIT_PLATFORMS
@@ -244,463 +241,6 @@ class FormatTimedeltaTestCase(BaseTestCase):
         )
         )
 
 
 
 
-def check_patterns(files, pattern, expected):
-    """Utility for testing patterns.
-    """
-    assert all([f == os.path.normpath(f) for f in files]), "Pattern matchers expect normalized input paths"
-
-    matched = [f for f in files if pattern.match(f)]
-
-    assert matched == (files if expected is None else expected)
-
-
-@pytest.mark.parametrize("pattern, expected", [
-    # "None" means all files, i.e. all match the given pattern
-    ("/", []),
-    ("/home", ["/home"]),
-    ("/home///", ["/home"]),
-    ("/./home", ["/home"]),
-    ("/home/user", ["/home/user"]),
-    ("/home/user2", ["/home/user2"]),
-    ("/home/user/.bashrc", ["/home/user/.bashrc"]),
-    ])
-def test_patterns_full(pattern, expected):
-    files = ["/home", "/home/user", "/home/user2", "/home/user/.bashrc", ]
-
-    check_patterns(files, PathFullPattern(pattern), expected)
-
-
-@pytest.mark.parametrize("pattern, expected", [
-    # "None" means all files, i.e. all match the given pattern
-    ("", []),
-    ("relative", []),
-    ("relative/path/", ["relative/path"]),
-    ("relative/path", ["relative/path"]),
-    ])
-def test_patterns_full_relative(pattern, expected):
-    files = ["relative/path", "relative/path2", ]
-
-    check_patterns(files, PathFullPattern(pattern), expected)
-
-
-@pytest.mark.parametrize("pattern, expected", [
-    # "None" means all files, i.e. all match the given pattern
-    ("/", None),
-    ("/./", None),
-    ("", []),
-    ("/home/u", []),
-    ("/home/user", ["/home/user/.profile", "/home/user/.bashrc"]),
-    ("/etc", ["/etc/server/config", "/etc/server/hosts"]),
-    ("///etc//////", ["/etc/server/config", "/etc/server/hosts"]),
-    ("/./home//..//home/user2", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
-    ("/srv", ["/srv/messages", "/srv/dmesg"]),
-    ])
-def test_patterns_prefix(pattern, expected):
-    files = [
-        "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
-        "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg",
-    ]
-
-    check_patterns(files, PathPrefixPattern(pattern), expected)
-
-
-@pytest.mark.parametrize("pattern, expected", [
-    # "None" means all files, i.e. all match the given pattern
-    ("", []),
-    ("foo", []),
-    ("relative", ["relative/path1", "relative/two"]),
-    ("more", ["more/relative"]),
-    ])
-def test_patterns_prefix_relative(pattern, expected):
-    files = ["relative/path1", "relative/two", "more/relative"]
-
-    check_patterns(files, PathPrefixPattern(pattern), expected)
-
-
-@pytest.mark.parametrize("pattern, expected", [
-    # "None" means all files, i.e. all match the given pattern
-    ("/*", None),
-    ("/./*", None),
-    ("*", None),
-    ("*/*", None),
-    ("*///*", None),
-    ("/home/u", []),
-    ("/home/*",
-     ["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html",
-      "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
-    ("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]),
-    ("/etc/*", ["/etc/server/config", "/etc/server/hosts"]),
-    ("*/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]),
-    ("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]),
-    ("/./home//..//home/user2/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
-    ("/srv*", ["/srv/messages", "/srv/dmesg"]),
-    ("/home/*/.thumbnails", ["/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
-    ])
-def test_patterns_fnmatch(pattern, expected):
-    files = [
-        "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
-        "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg",
-        "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
-    ]
-
-    check_patterns(files, FnmatchPattern(pattern), expected)
-
-
-@pytest.mark.parametrize("pattern, expected", [
-    # "None" means all files, i.e. all match the given pattern
-    ("*", None),
-    ("**/*", None),
-    ("/**/*", None),
-    ("/./*", None),
-    ("*/*", None),
-    ("*///*", None),
-    ("/home/u", []),
-    ("/home/*",
-     ["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html",
-      "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
-    ("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]),
-    ("/etc/*/*", ["/etc/server/config", "/etc/server/hosts"]),
-    ("/etc/**/*", ["/etc/server/config", "/etc/server/hosts"]),
-    ("/etc/**/*/*", ["/etc/server/config", "/etc/server/hosts"]),
-    ("*/.pr????e", []),
-    ("**/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]),
-    ("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]),
-    ("/./home//..//home/user2/", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
-    ("/./home//..//home/user2/**/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
-    ("/srv*/", ["/srv/messages", "/srv/dmesg", "/srv2/blafasel"]),
-    ("/srv*", ["/srv", "/srv/messages", "/srv/dmesg", "/srv2", "/srv2/blafasel"]),
-    ("/srv/*", ["/srv/messages", "/srv/dmesg"]),
-    ("/srv2/**", ["/srv2", "/srv2/blafasel"]),
-    ("/srv2/**/", ["/srv2/blafasel"]),
-    ("/home/*/.thumbnails", ["/home/foo/.thumbnails"]),
-    ("/home/*/*/.thumbnails", ["/home/foo/bar/.thumbnails"]),
-    ])
-def test_patterns_shell(pattern, expected):
-    files = [
-        "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
-        "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv", "/srv/messages", "/srv/dmesg",
-        "/srv2", "/srv2/blafasel", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
-    ]
-
-    check_patterns(files, ShellPattern(pattern), expected)
-
-
-@pytest.mark.parametrize("pattern, expected", [
-    # "None" means all files, i.e. all match the given pattern
-    ("", None),
-    (".*", None),
-    ("^/", None),
-    ("^abc$", []),
-    ("^[^/]", []),
-    ("^(?!/srv|/foo|/opt)",
-     ["/home", "/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile",
-      "/home/user2/public_html/index.html", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", ]),
-    ])
-def test_patterns_regex(pattern, expected):
-    files = [
-        '/srv/data', '/foo/bar', '/home',
-        '/home/user/.profile', '/home/user/.bashrc',
-        '/home/user2/.profile', '/home/user2/public_html/index.html',
-        '/opt/log/messages.txt', '/opt/log/dmesg.txt',
-        "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
-    ]
-
-    obj = RegexPattern(pattern)
-    assert str(obj) == pattern
-    assert obj.pattern == pattern
-
-    check_patterns(files, obj, expected)
-
-
-def test_regex_pattern():
-    # The forward slash must match the platform-specific path separator
-    assert RegexPattern("^/$").match("/")
-    assert RegexPattern("^/$").match(os.path.sep)
-    assert not RegexPattern(r"^\\$").match("/")
-
-
-def use_normalized_unicode():
-    return sys.platform in ("darwin",)
-
-
-def _make_test_patterns(pattern):
-    return [PathPrefixPattern(pattern),
-            FnmatchPattern(pattern),
-            RegexPattern("^{}/foo$".format(pattern)),
-            ShellPattern(pattern),
-            ]
-
-
-@pytest.mark.parametrize("pattern", _make_test_patterns("b\N{LATIN SMALL LETTER A WITH ACUTE}"))
-def test_composed_unicode_pattern(pattern):
-    assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
-    assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo") == use_normalized_unicode()
-
-
-@pytest.mark.parametrize("pattern", _make_test_patterns("ba\N{COMBINING ACUTE ACCENT}"))
-def test_decomposed_unicode_pattern(pattern):
-    assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") == use_normalized_unicode()
-    assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo")
-
-
-@pytest.mark.parametrize("pattern", _make_test_patterns(str(b"ba\x80", "latin1")))
-def test_invalid_unicode_pattern(pattern):
-    assert not pattern.match("ba/foo")
-    assert pattern.match(str(b"ba\x80/foo", "latin1"))
-
-
-@pytest.mark.parametrize("lines, expected", [
-    # "None" means all files, i.e. none excluded
-    ([], None),
-    (["# Comment only"], None),
-    (["*"], []),
-    (["# Comment",
-      "*/something00.txt",
-      "  *whitespace*  ",
-      # Whitespace before comment
-      " #/ws*",
-      # Empty line
-      "",
-      "# EOF"],
-     ["/more/data", "/home", " #/wsfoobar"]),
-    (["re:.*"], []),
-    (["re:\s"], ["/data/something00.txt", "/more/data", "/home"]),
-    ([r"re:(.)(\1)"], ["/more/data", "/home", "\tstart/whitespace", "/whitespace/end\t"]),
-    (["", "", "",
-      "# This is a test with mixed pattern styles",
-      # Case-insensitive pattern
-      "re:(?i)BAR|ME$",
-      "",
-      "*whitespace*",
-      "fm:*/something00*"],
-     ["/more/data"]),
-    ([r"  re:^\s  "], ["/data/something00.txt", "/more/data", "/home", "/whitespace/end\t"]),
-    ([r"  re:\s$  "], ["/data/something00.txt", "/more/data", "/home", " #/wsfoobar", "\tstart/whitespace"]),
-    (["pp:./"], None),
-    (["pp:/"], [" #/wsfoobar", "\tstart/whitespace"]),
-    (["pp:aaabbb"], None),
-    (["pp:/data", "pp: #/", "pp:\tstart", "pp:/whitespace"], ["/more/data", "/home"]),
-    (["/nomatch", "/more/*"],
-     ['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']),
-    # the order of exclude patterns shouldn't matter
-    (["/more/*", "/nomatch"],
-     ['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']),
-    ])
-def test_exclude_patterns_from_file(tmpdir, lines, expected):
-    files = [
-        '/data/something00.txt', '/more/data', '/home',
-        ' #/wsfoobar',
-        '\tstart/whitespace',
-        '/whitespace/end\t',
-    ]
-
-    def evaluate(filename):
-        patterns = []
-        load_exclude_file(open(filename, "rt"), patterns)
-        matcher = PatternMatcher(fallback=True)
-        matcher.add_inclexcl(patterns)
-        return [path for path in files if matcher.match(path)]
-
-    exclfile = tmpdir.join("exclude.txt")
-
-    with exclfile.open("wt") as fh:
-        fh.write("\n".join(lines))
-
-    assert evaluate(str(exclfile)) == (files if expected is None else expected)
-
-
-@pytest.mark.parametrize("lines, expected_roots, expected_numpatterns", [
-    # "None" means all files, i.e. none excluded
-    ([], [], 0),
-    (["# Comment only"], [], 0),
-    (["- *"], [], 1),
-    (["+fm:*/something00.txt",
-      "-/data"], [], 2),
-    (["R /"], ["/"], 0),
-    (["R /",
-      "# comment"], ["/"], 0),
-    (["# comment",
-      "- /data",
-      "R /home"], ["/home"], 1),
-])
-def test_load_patterns_from_file(tmpdir, lines, expected_roots, expected_numpatterns):
-    def evaluate(filename):
-        roots = []
-        inclexclpatterns = []
-        load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
-        return roots, len(inclexclpatterns)
-    patternfile = tmpdir.join("patterns.txt")
-
-    with patternfile.open("wt") as fh:
-        fh.write("\n".join(lines))
-
-    roots, numpatterns = evaluate(str(patternfile))
-    assert roots == expected_roots
-    assert numpatterns == expected_numpatterns
-
-
-def test_switch_patterns_style():
-    patterns = """\
-        +0_initial_default_is_shell
-        p fm
-        +1_fnmatch
-        P re
-        +2_regex
-        +3_more_regex
-        P pp
-        +4_pathprefix
-        p fm
-        p sh
-        +5_shell
-    """
-    pattern_file = io.StringIO(patterns)
-    roots, patterns = [], []
-    load_pattern_file(pattern_file, roots, patterns)
-    assert len(patterns) == 6
-    assert isinstance(patterns[0].val, ShellPattern)
-    assert isinstance(patterns[1].val, FnmatchPattern)
-    assert isinstance(patterns[2].val, RegexPattern)
-    assert isinstance(patterns[3].val, RegexPattern)
-    assert isinstance(patterns[4].val, PathPrefixPattern)
-    assert isinstance(patterns[5].val, ShellPattern)
-
-
-@pytest.mark.parametrize("lines", [
-    (["X /data"]),  # illegal pattern type prefix
-    (["/data"]),    # need a pattern type prefix
-])
-def test_load_invalid_patterns_from_file(tmpdir, lines):
-    patternfile = tmpdir.join("patterns.txt")
-    with patternfile.open("wt") as fh:
-        fh.write("\n".join(lines))
-    filename = str(patternfile)
-    with pytest.raises(argparse.ArgumentTypeError):
-        roots = []
-        inclexclpatterns = []
-        load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
-
-
-@pytest.mark.parametrize("lines, expected", [
-    # "None" means all files, i.e. none excluded
-    ([], None),
-    (["# Comment only"], None),
-    (["- *"], []),
-    # default match type is sh: for patterns -> * doesn't match a /
-    (["-*/something0?.txt"],
-     ['/data', '/data/something00.txt', '/data/subdir/something01.txt',
-      '/home', '/home/leo', '/home/leo/t', '/home/other']),
-    (["-fm:*/something00.txt"],
-     ['/data', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t', '/home/other']),
-    (["-fm:*/something0?.txt"],
-     ["/data", '/home', '/home/leo', '/home/leo/t', '/home/other']),
-    (["+/*/something0?.txt",
-      "-/data"],
-     ["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
-    (["+fm:*/something00.txt",
-      "-/data"],
-     ["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
-    # include /home/leo and exclude the rest of /home:
-    (["+/home/leo",
-      "-/home/*"],
-     ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']),
-    # wrong order, /home/leo is already excluded by -/home/*:
-    (["-/home/*",
-      "+/home/leo"],
-     ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home']),
-    (["+fm:/home/leo",
-      "-/home/"],
-     ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']),
-])
-def test_inclexcl_patterns_from_file(tmpdir, lines, expected):
-    files = [
-        '/data', '/data/something00.txt', '/data/subdir/something01.txt',
-        '/home', '/home/leo', '/home/leo/t', '/home/other'
-    ]
-
-    def evaluate(filename):
-        matcher = PatternMatcher(fallback=True)
-        roots = []
-        inclexclpatterns = []
-        load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
-        matcher.add_inclexcl(inclexclpatterns)
-        return [path for path in files if matcher.match(path)]
-
-    patternfile = tmpdir.join("patterns.txt")
-
-    with patternfile.open("wt") as fh:
-        fh.write("\n".join(lines))
-
-    assert evaluate(str(patternfile)) == (files if expected is None else expected)
-
-
-@pytest.mark.parametrize("pattern, cls", [
-    ("", FnmatchPattern),
-
-    # Default style
-    ("*", FnmatchPattern),
-    ("/data/*", FnmatchPattern),
-
-    # fnmatch style
-    ("fm:", FnmatchPattern),
-    ("fm:*", FnmatchPattern),
-    ("fm:/data/*", FnmatchPattern),
-    ("fm:fm:/data/*", FnmatchPattern),
-
-    # Regular expression
-    ("re:", RegexPattern),
-    ("re:.*", RegexPattern),
-    ("re:^/something/", RegexPattern),
-    ("re:re:^/something/", RegexPattern),
-
-    # Path prefix
-    ("pp:", PathPrefixPattern),
-    ("pp:/", PathPrefixPattern),
-    ("pp:/data/", PathPrefixPattern),
-    ("pp:pp:/data/", PathPrefixPattern),
-
-    # Shell-pattern style
-    ("sh:", ShellPattern),
-    ("sh:*", ShellPattern),
-    ("sh:/data/*", ShellPattern),
-    ("sh:sh:/data/*", ShellPattern),
-    ])
-def test_parse_pattern(pattern, cls):
-    assert isinstance(parse_pattern(pattern), cls)
-
-
-@pytest.mark.parametrize("pattern", ["aa:", "fo:*", "00:", "x1:abc"])
-def test_parse_pattern_error(pattern):
-    with pytest.raises(ValueError):
-        parse_pattern(pattern)
-
-
-def test_pattern_matcher():
-    pm = PatternMatcher()
-
-    assert pm.fallback is None
-
-    for i in ["", "foo", "bar"]:
-        assert pm.match(i) is None
-
-    # add extra entries to aid in testing
-    for target in ["A", "B", "Empty", "FileNotFound"]:
-        pm.is_include_cmd[target] = target
-
-    pm.add([RegexPattern("^a")], "A")
-    pm.add([RegexPattern("^b"), RegexPattern("^z")], "B")
-    pm.add([RegexPattern("^$")], "Empty")
-    pm.fallback = "FileNotFound"
-
-    assert pm.match("") == "Empty"
-    assert pm.match("aaa") == "A"
-    assert pm.match("bbb") == "B"
-    assert pm.match("ccc") == "FileNotFound"
-    assert pm.match("xyz") == "FileNotFound"
-    assert pm.match("z") == "B"
-
-    assert PatternMatcher(fallback="hey!").fallback == "hey!"
-
-
 def test_chunkerparams():
 def test_chunkerparams():
     assert ChunkerParams('19,23,21,4095') == (19, 23, 21, 4095)
     assert ChunkerParams('19,23,21,4095') == (19, 23, 21, 4095)
     assert ChunkerParams('10,23,16,4095') == (10, 23, 16, 4095)
     assert ChunkerParams('10,23,16,4095') == (10, 23, 16, 4095)

+ 467 - 0
src/borg/testsuite/patterns.py

@@ -0,0 +1,467 @@
+import argparse
+import io
+import os.path
+import sys
+
+import pytest
+
+from ..patterns import PathFullPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, RegexPattern
+from ..patterns import load_exclude_file, load_pattern_file
+from ..patterns import parse_pattern, PatternMatcher
+
+
+def check_patterns(files, pattern, expected):
+    """Utility for testing patterns.
+    """
+    assert all([f == os.path.normpath(f) for f in files]), "Pattern matchers expect normalized input paths"
+
+    matched = [f for f in files if pattern.match(f)]
+
+    assert matched == (files if expected is None else expected)
+
+
+@pytest.mark.parametrize("pattern, expected", [
+    # "None" means all files, i.e. all match the given pattern
+    ("/", []),
+    ("/home", ["/home"]),
+    ("/home///", ["/home"]),
+    ("/./home", ["/home"]),
+    ("/home/user", ["/home/user"]),
+    ("/home/user2", ["/home/user2"]),
+    ("/home/user/.bashrc", ["/home/user/.bashrc"]),
+    ])
+def test_patterns_full(pattern, expected):
+    files = ["/home", "/home/user", "/home/user2", "/home/user/.bashrc", ]
+
+    check_patterns(files, PathFullPattern(pattern), expected)
+
+
+@pytest.mark.parametrize("pattern, expected", [
+    # "None" means all files, i.e. all match the given pattern
+    ("", []),
+    ("relative", []),
+    ("relative/path/", ["relative/path"]),
+    ("relative/path", ["relative/path"]),
+    ])
+def test_patterns_full_relative(pattern, expected):
+    files = ["relative/path", "relative/path2", ]
+
+    check_patterns(files, PathFullPattern(pattern), expected)
+
+
+@pytest.mark.parametrize("pattern, expected", [
+    # "None" means all files, i.e. all match the given pattern
+    ("/", None),
+    ("/./", None),
+    ("", []),
+    ("/home/u", []),
+    ("/home/user", ["/home/user/.profile", "/home/user/.bashrc"]),
+    ("/etc", ["/etc/server/config", "/etc/server/hosts"]),
+    ("///etc//////", ["/etc/server/config", "/etc/server/hosts"]),
+    ("/./home//..//home/user2", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
+    ("/srv", ["/srv/messages", "/srv/dmesg"]),
+    ])
+def test_patterns_prefix(pattern, expected):
+    files = [
+        "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
+        "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg",
+    ]
+
+    check_patterns(files, PathPrefixPattern(pattern), expected)
+
+
+@pytest.mark.parametrize("pattern, expected", [
+    # "None" means all files, i.e. all match the given pattern
+    ("", []),
+    ("foo", []),
+    ("relative", ["relative/path1", "relative/two"]),
+    ("more", ["more/relative"]),
+    ])
+def test_patterns_prefix_relative(pattern, expected):
+    files = ["relative/path1", "relative/two", "more/relative"]
+
+    check_patterns(files, PathPrefixPattern(pattern), expected)
+
+
+@pytest.mark.parametrize("pattern, expected", [
+    # "None" means all files, i.e. all match the given pattern
+    ("/*", None),
+    ("/./*", None),
+    ("*", None),
+    ("*/*", None),
+    ("*///*", None),
+    ("/home/u", []),
+    ("/home/*",
+     ["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html",
+      "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
+    ("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]),
+    ("/etc/*", ["/etc/server/config", "/etc/server/hosts"]),
+    ("*/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]),
+    ("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]),
+    ("/./home//..//home/user2/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
+    ("/srv*", ["/srv/messages", "/srv/dmesg"]),
+    ("/home/*/.thumbnails", ["/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
+    ])
+def test_patterns_fnmatch(pattern, expected):
+    files = [
+        "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
+        "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv/messages", "/srv/dmesg",
+        "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
+    ]
+
+    check_patterns(files, FnmatchPattern(pattern), expected)
+
+
+@pytest.mark.parametrize("pattern, expected", [
+    # "None" means all files, i.e. all match the given pattern
+    ("*", None),
+    ("**/*", None),
+    ("/**/*", None),
+    ("/./*", None),
+    ("*/*", None),
+    ("*///*", None),
+    ("/home/u", []),
+    ("/home/*",
+     ["/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile", "/home/user2/public_html/index.html",
+      "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails"]),
+    ("/home/user/*", ["/home/user/.profile", "/home/user/.bashrc"]),
+    ("/etc/*/*", ["/etc/server/config", "/etc/server/hosts"]),
+    ("/etc/**/*", ["/etc/server/config", "/etc/server/hosts"]),
+    ("/etc/**/*/*", ["/etc/server/config", "/etc/server/hosts"]),
+    ("*/.pr????e", []),
+    ("**/.pr????e", ["/home/user/.profile", "/home/user2/.profile"]),
+    ("///etc//////*", ["/etc/server/config", "/etc/server/hosts"]),
+    ("/./home//..//home/user2/", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
+    ("/./home//..//home/user2/**/*", ["/home/user2/.profile", "/home/user2/public_html/index.html"]),
+    ("/srv*/", ["/srv/messages", "/srv/dmesg", "/srv2/blafasel"]),
+    ("/srv*", ["/srv", "/srv/messages", "/srv/dmesg", "/srv2", "/srv2/blafasel"]),
+    ("/srv/*", ["/srv/messages", "/srv/dmesg"]),
+    ("/srv2/**", ["/srv2", "/srv2/blafasel"]),
+    ("/srv2/**/", ["/srv2/blafasel"]),
+    ("/home/*/.thumbnails", ["/home/foo/.thumbnails"]),
+    ("/home/*/*/.thumbnails", ["/home/foo/bar/.thumbnails"]),
+    ])
+def test_patterns_shell(pattern, expected):
+    files = [
+        "/etc/server/config", "/etc/server/hosts", "/home", "/home/user/.profile", "/home/user/.bashrc",
+        "/home/user2/.profile", "/home/user2/public_html/index.html", "/srv", "/srv/messages", "/srv/dmesg",
+        "/srv2", "/srv2/blafasel", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
+    ]
+
+    check_patterns(files, ShellPattern(pattern), expected)
+
+
+@pytest.mark.parametrize("pattern, expected", [
+    # "None" means all files, i.e. all match the given pattern
+    ("", None),
+    (".*", None),
+    ("^/", None),
+    ("^abc$", []),
+    ("^[^/]", []),
+    ("^(?!/srv|/foo|/opt)",
+     ["/home", "/home/user/.profile", "/home/user/.bashrc", "/home/user2/.profile",
+      "/home/user2/public_html/index.html", "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails", ]),
+    ])
+def test_patterns_regex(pattern, expected):
+    files = [
+        '/srv/data', '/foo/bar', '/home',
+        '/home/user/.profile', '/home/user/.bashrc',
+        '/home/user2/.profile', '/home/user2/public_html/index.html',
+        '/opt/log/messages.txt', '/opt/log/dmesg.txt',
+        "/home/foo/.thumbnails", "/home/foo/bar/.thumbnails",
+    ]
+
+    obj = RegexPattern(pattern)
+    assert str(obj) == pattern
+    assert obj.pattern == pattern
+
+    check_patterns(files, obj, expected)
+
+
+def test_regex_pattern():
+    # The forward slash must match the platform-specific path separator
+    assert RegexPattern("^/$").match("/")
+    assert RegexPattern("^/$").match(os.path.sep)
+    assert not RegexPattern(r"^\\$").match("/")
+
+
+def use_normalized_unicode():
+    return sys.platform in ("darwin",)
+
+
+def _make_test_patterns(pattern):
+    return [PathPrefixPattern(pattern),
+            FnmatchPattern(pattern),
+            RegexPattern("^{}/foo$".format(pattern)),
+            ShellPattern(pattern),
+            ]
+
+
+@pytest.mark.parametrize("pattern", _make_test_patterns("b\N{LATIN SMALL LETTER A WITH ACUTE}"))
+def test_composed_unicode_pattern(pattern):
+    assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+    assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo") == use_normalized_unicode()
+
+
+@pytest.mark.parametrize("pattern", _make_test_patterns("ba\N{COMBINING ACUTE ACCENT}"))
+def test_decomposed_unicode_pattern(pattern):
+    assert pattern.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo") == use_normalized_unicode()
+    assert pattern.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+
+
+@pytest.mark.parametrize("pattern", _make_test_patterns(str(b"ba\x80", "latin1")))
+def test_invalid_unicode_pattern(pattern):
+    assert not pattern.match("ba/foo")
+    assert pattern.match(str(b"ba\x80/foo", "latin1"))
+
+
+@pytest.mark.parametrize("lines, expected", [
+    # "None" means all files, i.e. none excluded
+    ([], None),
+    (["# Comment only"], None),
+    (["*"], []),
+    (["# Comment",
+      "*/something00.txt",
+      "  *whitespace*  ",
+      # Whitespace before comment
+      " #/ws*",
+      # Empty line
+      "",
+      "# EOF"],
+     ["/more/data", "/home", " #/wsfoobar"]),
+    (["re:.*"], []),
+    (["re:\s"], ["/data/something00.txt", "/more/data", "/home"]),
+    ([r"re:(.)(\1)"], ["/more/data", "/home", "\tstart/whitespace", "/whitespace/end\t"]),
+    (["", "", "",
+      "# This is a test with mixed pattern styles",
+      # Case-insensitive pattern
+      "re:(?i)BAR|ME$",
+      "",
+      "*whitespace*",
+      "fm:*/something00*"],
+     ["/more/data"]),
+    ([r"  re:^\s  "], ["/data/something00.txt", "/more/data", "/home", "/whitespace/end\t"]),
+    ([r"  re:\s$  "], ["/data/something00.txt", "/more/data", "/home", " #/wsfoobar", "\tstart/whitespace"]),
+    (["pp:./"], None),
+    (["pp:/"], [" #/wsfoobar", "\tstart/whitespace"]),
+    (["pp:aaabbb"], None),
+    (["pp:/data", "pp: #/", "pp:\tstart", "pp:/whitespace"], ["/more/data", "/home"]),
+    (["/nomatch", "/more/*"],
+     ['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']),
+    # the order of exclude patterns shouldn't matter
+    (["/more/*", "/nomatch"],
+     ['/data/something00.txt', '/home', ' #/wsfoobar', '\tstart/whitespace', '/whitespace/end\t']),
+    ])
+def test_exclude_patterns_from_file(tmpdir, lines, expected):
+    files = [
+        '/data/something00.txt', '/more/data', '/home',
+        ' #/wsfoobar',
+        '\tstart/whitespace',
+        '/whitespace/end\t',
+    ]
+
+    def evaluate(filename):
+        patterns = []
+        load_exclude_file(open(filename, "rt"), patterns)
+        matcher = PatternMatcher(fallback=True)
+        matcher.add_inclexcl(patterns)
+        return [path for path in files if matcher.match(path)]
+
+    exclfile = tmpdir.join("exclude.txt")
+
+    with exclfile.open("wt") as fh:
+        fh.write("\n".join(lines))
+
+    assert evaluate(str(exclfile)) == (files if expected is None else expected)
+
+
+@pytest.mark.parametrize("lines, expected_roots, expected_numpatterns", [
+    # "None" means all files, i.e. none excluded
+    ([], [], 0),
+    (["# Comment only"], [], 0),
+    (["- *"], [], 1),
+    (["+fm:*/something00.txt",
+      "-/data"], [], 2),
+    (["R /"], ["/"], 0),
+    (["R /",
+      "# comment"], ["/"], 0),
+    (["# comment",
+      "- /data",
+      "R /home"], ["/home"], 1),
+])
+def test_load_patterns_from_file(tmpdir, lines, expected_roots, expected_numpatterns):
+    def evaluate(filename):
+        roots = []
+        inclexclpatterns = []
+        load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
+        return roots, len(inclexclpatterns)
+    patternfile = tmpdir.join("patterns.txt")
+
+    with patternfile.open("wt") as fh:
+        fh.write("\n".join(lines))
+
+    roots, numpatterns = evaluate(str(patternfile))
+    assert roots == expected_roots
+    assert numpatterns == expected_numpatterns
+
+
+def test_switch_patterns_style():
+    patterns = """\
+        +0_initial_default_is_shell
+        p fm
+        +1_fnmatch
+        P re
+        +2_regex
+        +3_more_regex
+        P pp
+        +4_pathprefix
+        p fm
+        p sh
+        +5_shell
+    """
+    pattern_file = io.StringIO(patterns)
+    roots, patterns = [], []
+    load_pattern_file(pattern_file, roots, patterns)
+    assert len(patterns) == 6
+    assert isinstance(patterns[0].val, ShellPattern)
+    assert isinstance(patterns[1].val, FnmatchPattern)
+    assert isinstance(patterns[2].val, RegexPattern)
+    assert isinstance(patterns[3].val, RegexPattern)
+    assert isinstance(patterns[4].val, PathPrefixPattern)
+    assert isinstance(patterns[5].val, ShellPattern)
+
+
+@pytest.mark.parametrize("lines", [
+    (["X /data"]),  # illegal pattern type prefix
+    (["/data"]),    # need a pattern type prefix
+])
+def test_load_invalid_patterns_from_file(tmpdir, lines):
+    patternfile = tmpdir.join("patterns.txt")
+    with patternfile.open("wt") as fh:
+        fh.write("\n".join(lines))
+    filename = str(patternfile)
+    with pytest.raises(argparse.ArgumentTypeError):
+        roots = []
+        inclexclpatterns = []
+        load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
+
+
+@pytest.mark.parametrize("lines, expected", [
+    # "None" means all files, i.e. none excluded
+    ([], None),
+    (["# Comment only"], None),
+    (["- *"], []),
+    # default match type is sh: for patterns -> * doesn't match a /
+    (["-*/something0?.txt"],
+     ['/data', '/data/something00.txt', '/data/subdir/something01.txt',
+      '/home', '/home/leo', '/home/leo/t', '/home/other']),
+    (["-fm:*/something00.txt"],
+     ['/data', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t', '/home/other']),
+    (["-fm:*/something0?.txt"],
+     ["/data", '/home', '/home/leo', '/home/leo/t', '/home/other']),
+    (["+/*/something0?.txt",
+      "-/data"],
+     ["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
+    (["+fm:*/something00.txt",
+      "-/data"],
+     ["/data/something00.txt", '/home', '/home/leo', '/home/leo/t', '/home/other']),
+    # include /home/leo and exclude the rest of /home:
+    (["+/home/leo",
+      "-/home/*"],
+     ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']),
+    # wrong order, /home/leo is already excluded by -/home/*:
+    (["-/home/*",
+      "+/home/leo"],
+     ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home']),
+    (["+fm:/home/leo",
+      "-/home/"],
+     ['/data', '/data/something00.txt', '/data/subdir/something01.txt', '/home', '/home/leo', '/home/leo/t']),
+])
+def test_inclexcl_patterns_from_file(tmpdir, lines, expected):
+    files = [
+        '/data', '/data/something00.txt', '/data/subdir/something01.txt',
+        '/home', '/home/leo', '/home/leo/t', '/home/other'
+    ]
+
+    def evaluate(filename):
+        matcher = PatternMatcher(fallback=True)
+        roots = []
+        inclexclpatterns = []
+        load_pattern_file(open(filename, "rt"), roots, inclexclpatterns)
+        matcher.add_inclexcl(inclexclpatterns)
+        return [path for path in files if matcher.match(path)]
+
+    patternfile = tmpdir.join("patterns.txt")
+
+    with patternfile.open("wt") as fh:
+        fh.write("\n".join(lines))
+
+    assert evaluate(str(patternfile)) == (files if expected is None else expected)
+
+
+@pytest.mark.parametrize("pattern, cls", [
+    ("", FnmatchPattern),
+
+    # Default style
+    ("*", FnmatchPattern),
+    ("/data/*", FnmatchPattern),
+
+    # fnmatch style
+    ("fm:", FnmatchPattern),
+    ("fm:*", FnmatchPattern),
+    ("fm:/data/*", FnmatchPattern),
+    ("fm:fm:/data/*", FnmatchPattern),
+
+    # Regular expression
+    ("re:", RegexPattern),
+    ("re:.*", RegexPattern),
+    ("re:^/something/", RegexPattern),
+    ("re:re:^/something/", RegexPattern),
+
+    # Path prefix
+    ("pp:", PathPrefixPattern),
+    ("pp:/", PathPrefixPattern),
+    ("pp:/data/", PathPrefixPattern),
+    ("pp:pp:/data/", PathPrefixPattern),
+
+    # Shell-pattern style
+    ("sh:", ShellPattern),
+    ("sh:*", ShellPattern),
+    ("sh:/data/*", ShellPattern),
+    ("sh:sh:/data/*", ShellPattern),
+    ])
+def test_parse_pattern(pattern, cls):
+    assert isinstance(parse_pattern(pattern), cls)
+
+
+@pytest.mark.parametrize("pattern", ["aa:", "fo:*", "00:", "x1:abc"])
+def test_parse_pattern_error(pattern):
+    with pytest.raises(ValueError):
+        parse_pattern(pattern)
+
+
+def test_pattern_matcher():
+    pm = PatternMatcher()
+
+    assert pm.fallback is None
+
+    for i in ["", "foo", "bar"]:
+        assert pm.match(i) is None
+
+    # add extra entries to aid in testing
+    for target in ["A", "B", "Empty", "FileNotFound"]:
+        pm.is_include_cmd[target] = target
+
+    pm.add([RegexPattern("^a")], "A")
+    pm.add([RegexPattern("^b"), RegexPattern("^z")], "B")
+    pm.add([RegexPattern("^$")], "Empty")
+    pm.fallback = "FileNotFound"
+
+    assert pm.match("") == "Empty"
+    assert pm.match("aaa") == "A"
+    assert pm.match("bbb") == "B"
+    assert pm.match("ccc") == "FileNotFound"
+    assert pm.match("xyz") == "FileNotFound"
+    assert pm.match("z") == "B"
+
+    assert PatternMatcher(fallback="hey!").fallback == "hey!"