فهرست منبع

Merge pull request #497 from hansmi/master

Implement exclusions using regular expressions
TW 9 سال پیش
والد
کامیت
95fd2248f6
5فایلهای تغییر یافته به همراه342 افزوده شده و 79 حذف شده
  1. 56 14
      borg/archiver.py
  2. 80 37
      borg/helpers.py
  3. 73 0
      borg/testsuite/archiver.py
  4. 128 28
      borg/testsuite/helpers.py
  5. 5 0
      docs/usage.rst

+ 56 - 14
borg/archiver.py

@@ -17,7 +17,7 @@ import traceback
 
 from . import __version__
 from .helpers import Error, location_validator, format_time, format_file_size, \
-    format_file_mode, ExcludePattern, IncludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \
+    format_file_mode, parse_pattern, IncludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \
     get_cache_dir, get_keys_dir, prune_within, prune_split, unhexlify, \
     Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
     dir_is_tagged, bigint_to_int, ChunkerParams, CompressionSpec, is_slow_msgpack, yes, sysinfo, \
@@ -598,17 +598,45 @@ class Archiver:
 
     helptext = {}
     helptext['patterns'] = textwrap.dedent('''
-        Exclude patterns use a variant of shell pattern syntax, with '*' matching any
-        number of characters, '?' matching any single character, '[...]' matching any
-        single character specified, including ranges, and '[!...]' matching any
-        character not specified.  For the purpose of these patterns, the path
-        separator ('\\' for Windows and '/' on other systems) is not treated
-        specially.  For a path to match a pattern, it must completely match from
-        start to end, or must match from the start to just before a path separator.
-        Except for the root path, paths will never end in the path separator when
-        matching is attempted.  Thus, if a given pattern ends in a path separator, a
-        '*' is appended before matching is attempted.  Patterns with wildcards should
-        be quoted to protect them from shell expansion.
+        Exclusion patterns support two separate styles, fnmatch and regular
+        expressions. If followed by a colon (':') the first two characters of
+        a pattern are used as a style selector. Explicit style selection is necessary
+        when regular expressions are desired or when the desired fnmatch pattern
+        starts with two alphanumeric characters followed by a colon (i.e.
+        `aa:something/*`).
+
+        `Fnmatch <https://docs.python.org/3/library/fnmatch.html>`_ patterns use
+        a variant of shell pattern syntax, with '*' matching any number of
+        characters, '?' matching any single character, '[...]' matching any single
+        character specified, including ranges, and '[!...]' matching any character
+        not specified. The style selector is `fm`. For the purpose of these patterns,
+        the path separator ('\\' for Windows and '/' on other systems) is not treated
+        specially. For a path to match a pattern, it must completely match from start
+        to end, or must match from the start to just before a path separator. Except
+        for the root path, paths will never end in the path separator when matching
+        is attempted. Thus, if a given pattern ends in a path separator, a '*' is
+        appended before matching is attempted.
+
+        Regular expressions similar to those found in Perl are supported with the
+        selection prefix `re:`. Unlike shell patterns regular expressions are not
+        required to match the complete path and any substring match is sufficient. It
+        is strongly recommended to anchor patterns to the start ('^'), to the end
+        ('$') or both. Path separators ('\\' for Windows and '/' on other systems) in
+        paths are always normalized to a forward slash ('/') before applying
+        a pattern. The regular expression syntax is described in the `Python
+        documentation for the re module
+        <https://docs.python.org/3/library/re.html>`_.
+
+        Exclusions can be passed via the command line option `--exclude`. When used
+        from within a shell the patterns should be quoted to protect them from
+        expansion.
+
+        The `--exclude-from` option permits loading exclusion patterns from a text
+        file with one pattern per line. Lines empty or starting with the number sign
+        ('#') after removing whitespace on both ends are ignored. The optional style
+        selector prefix is also supported for patterns loaded from a file. Due to
+        whitespace removal paths with whitespace at the beginning or end can only be
+        excluded using regular expressions.
 
         Examples:
 
@@ -624,6 +652,20 @@ class Archiver:
 
         # The file '/home/user/cache/important' is *not* backed up:
         $ borg create -e /home/user/cache/ backup / /home/user/cache/important
+
+        # The contents of directories in '/home' are not backed up when their name
+        # ends in '.tmp'
+        $ borg create --exclude 're:^/home/[^/]+\.tmp/' backup /
+
+        # Load exclusions from file
+        $ cat >exclude.txt <<EOF
+        # Comment line
+        /home/*/junk
+        *.tmp
+        fm:aa:something/*
+        re:^/home/[^/]\.tmp/
+        EOF
+        $ borg create --exclude-from exclude.txt backup /
         ''')
 
     def do_help(self, parser, commands, args):
@@ -812,7 +854,7 @@ class Archiver:
         subparser.add_argument('--filter', dest='output_filter', metavar='STATUSCHARS',
                                help='only display items with the given status characters')
         subparser.add_argument('-e', '--exclude', dest='excludes',
-                               type=ExcludePattern, action='append',
+                               type=parse_pattern, action='append',
                                metavar="PATTERN", help='exclude paths matching PATTERN')
         subparser.add_argument('--exclude-from', dest='exclude_files',
                                type=argparse.FileType('r'), action='append',
@@ -882,7 +924,7 @@ class Archiver:
                                default=False, action='store_true',
                                help='do not actually change any files')
         subparser.add_argument('-e', '--exclude', dest='excludes',
-                               type=ExcludePattern, action='append',
+                               type=parse_pattern, action='append',
                                metavar="PATTERN", help='exclude paths matching PATTERN')
         subparser.add_argument('--exclude-from', dest='exclude_files',
                                type=argparse.FileType('r'), action='append',

+ 80 - 37
borg/helpers.py

@@ -236,11 +236,11 @@ def parse_timestamp(timestamp):
 
 
 def load_excludes(fh):
-    """Load and parse exclude patterns from file object. Empty lines and lines starting with '#' are ignored, but
-    whitespace is not stripped.
+    """Load and parse exclude patterns from file object. Lines empty or starting with '#' after stripping whitespace on
+    both line ends are ignored.
     """
-    patterns = (line.rstrip('\r\n') for line in fh if not line.startswith('#'))
-    return [ExcludePattern(pattern) for pattern in patterns if pattern]
+    patterns = (line for line in (i.strip() for i in fh) if not line.startswith('#'))
+    return [parse_pattern(pattern) for pattern in patterns if pattern]
 
 
 def update_excludes(args):
@@ -266,16 +266,10 @@ def exclude_path(path, patterns):
     """
     for pattern in (patterns or []):
         if pattern.match(path):
-            return isinstance(pattern, ExcludePattern)
+            return isinstance(pattern, (ExcludePattern, ExcludeRegex))
     return False
 
 
-# For both IncludePattern and ExcludePattern, we require that
-# the pattern either match the whole path or an initial segment
-# of the path up to but not including a path separator.  To
-# unify the two cases, we add a path separator to the end of
-# the path before matching.
-
 def normalized(func):
     """ Decorator for the Pattern match methods, returning a wrapper that
     normalizes OSX paths to match the normalized pattern on OSX, and
@@ -294,11 +288,8 @@ def normalized(func):
         return func
 
 
-class IncludePattern:
-    """Literal files or directories listed on the command line
-    for some operations (e.g. extract, but not create).
-    If a directory is specified, all paths that start with that
-    path match as well.  A trailing slash makes no difference.
+class PatternBase:
+    """Shared logic for inclusion/exclusion patterns.
     """
     def __init__(self, pattern):
         self.pattern_orig = pattern
@@ -307,13 +298,15 @@ class IncludePattern:
         if sys.platform in ('darwin',):
             pattern = unicodedata.normalize("NFD", pattern)
 
-        self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep
+        self._prepare(pattern)
 
     @normalized
     def match(self, path):
-        matches = (path+os.path.sep).startswith(self.pattern)
+        matches = self._match(path)
+
         if matches:
             self.match_count += 1
+
         return matches
 
     def __repr__(self):
@@ -322,39 +315,89 @@ class IncludePattern:
     def __str__(self):
         return self.pattern_orig
 
+    def _prepare(self, pattern):
+        raise NotImplementedError
+
+    def _match(self, path):
+        raise NotImplementedError
+
+
+# For both IncludePattern and ExcludePattern, we require that
+# the pattern either match the whole path or an initial segment
+# of the path up to but not including a path separator.  To
+# unify the two cases, we add a path separator to the end of
+# the path before matching.
+
+
+class IncludePattern(PatternBase):
+    """Literal files or directories listed on the command line
+    for some operations (e.g. extract, but not create).
+    If a directory is specified, all paths that start with that
+    path match as well.  A trailing slash makes no difference.
+    """
+    def _prepare(self, pattern):
+        self.pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep
+
+    def _match(self, path):
+        return (path + os.path.sep).startswith(self.pattern)
 
-class ExcludePattern(IncludePattern):
+
+class ExcludePattern(PatternBase):
     """Shell glob patterns to exclude.  A trailing slash means to
     exclude the contents of a directory, but not the directory itself.
     """
-    def __init__(self, pattern):
-        self.pattern_orig = pattern
-        self.match_count = 0
-
+    def _prepare(self, pattern):
         if pattern.endswith(os.path.sep):
-            self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep+'*'+os.path.sep
+            pattern = os.path.normpath(pattern).rstrip(os.path.sep) + os.path.sep + '*' + os.path.sep
         else:
-            self.pattern = os.path.normpath(pattern)+os.path.sep+'*'
+            pattern = os.path.normpath(pattern) + os.path.sep+'*'
 
-        if sys.platform in ('darwin',):
-            self.pattern = unicodedata.normalize("NFD", self.pattern)
+        self.pattern = pattern
 
         # fnmatch and re.match both cache compiled regular expressions.
         # Nevertheless, this is about 10 times faster.
         self.regex = re.compile(translate(self.pattern))
 
-    @normalized
-    def match(self, path):
-        matches = self.regex.match(path+os.path.sep) is not None
-        if matches:
-            self.match_count += 1
-        return matches
+    def _match(self, path):
+        return (self.regex.match(path + os.path.sep) is not None)
 
-    def __repr__(self):
-        return '%s(%s)' % (type(self), self.pattern)
 
-    def __str__(self):
-        return self.pattern_orig
+class ExcludeRegex(PatternBase):
+    """Regular expression to exclude.
+    """
+    def _prepare(self, pattern):
+        self.pattern = pattern
+        self.regex = re.compile(pattern)
+
+    def _match(self, path):
+        # Normalize path separators
+        if os.path.sep != '/':
+            path = path.replace(os.path.sep, '/')
+
+        return (self.regex.search(path) is not None)
+
+
+_DEFAULT_PATTERN_STYLE = "fm"
+_PATTERN_STYLES = {
+        "fm": ExcludePattern,
+        "re": ExcludeRegex,
+        }
+
+
+def parse_pattern(pattern):
+    """Read pattern from string and return an instance of the appropriate implementation class.
+    """
+    if len(pattern) > 2 and pattern[2] == ":" and pattern[:2].isalnum():
+        (style, pattern) = (pattern[:2], pattern[3:])
+    else:
+        style = _DEFAULT_PATTERN_STYLE
+
+    cls = _PATTERN_STYLES.get(style, None)
+
+    if cls is None:
+        raise ValueError("Unknown pattern style: {}".format(style))
+
+    return cls(pattern)
 
 
 def timestamp(s):

+ 73 - 0
borg/testsuite/archiver.py

@@ -489,6 +489,79 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             self.cmd('extract', '--exclude-from=' + self.exclude_file_path, self.repository_location + '::test')
         self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'file3'])
 
+    def test_extract_include_exclude_regex(self):
+        self.cmd('init', self.repository_location)
+        self.create_regular_file('file1', size=1024 * 80)
+        self.create_regular_file('file2', size=1024 * 80)
+        self.create_regular_file('file3', size=1024 * 80)
+        self.create_regular_file('file4', size=1024 * 80)
+        self.create_regular_file('file333', size=1024 * 80)
+
+        # Create with regular expression exclusion for file4
+        self.cmd('create', '--exclude=re:input/file4$', self.repository_location + '::test', 'input')
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test')
+        self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'file2', 'file3', 'file333'])
+        shutil.rmtree('output/input')
+
+        # Extract with regular expression exclusion
+        with changedir('output'):
+            self.cmd('extract', '--exclude=re:file3+', self.repository_location + '::test')
+        self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'file2'])
+        shutil.rmtree('output/input')
+
+        # Combine --exclude with fnmatch and regular expression
+        with changedir('output'):
+            self.cmd('extract', '--exclude=input/file2', '--exclude=re:file[01]', self.repository_location + '::test')
+        self.assert_equal(sorted(os.listdir('output/input')), ['file3', 'file333'])
+        shutil.rmtree('output/input')
+
+        # Combine --exclude-from and regular expression exclusion
+        with changedir('output'):
+            self.cmd('extract', '--exclude-from=' + self.exclude_file_path, '--exclude=re:file1',
+                     '--exclude=re:file(\\d)\\1\\1$', self.repository_location + '::test')
+        self.assert_equal(sorted(os.listdir('output/input')), ['file3'])
+
+    def test_extract_include_exclude_regex_from_file(self):
+        self.cmd('init', self.repository_location)
+        self.create_regular_file('file1', size=1024 * 80)
+        self.create_regular_file('file2', size=1024 * 80)
+        self.create_regular_file('file3', size=1024 * 80)
+        self.create_regular_file('file4', size=1024 * 80)
+        self.create_regular_file('file333', size=1024 * 80)
+        self.create_regular_file('aa:something', size=1024 * 80)
+
+        # Create while excluding using mixed pattern styles
+        with open(self.exclude_file_path, 'wb') as fd:
+            fd.write(b're:input/file4$\n')
+            fd.write(b'fm:*aa:*thing\n')
+
+        self.cmd('create', '--exclude-from=' + self.exclude_file_path, self.repository_location + '::test', 'input')
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test')
+        self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'file2', 'file3', 'file333'])
+        shutil.rmtree('output/input')
+
+        # Exclude using regular expression
+        with open(self.exclude_file_path, 'wb') as fd:
+            fd.write(b're:file3+\n')
+
+        with changedir('output'):
+            self.cmd('extract', '--exclude-from=' + self.exclude_file_path, self.repository_location + '::test')
+        self.assert_equal(sorted(os.listdir('output/input')), ['file1', 'file2'])
+        shutil.rmtree('output/input')
+
+        # Mixed exclude pattern styles
+        with open(self.exclude_file_path, 'wb') as fd:
+            fd.write(b're:file(\\d)\\1\\1$\n')
+            fd.write(b'fm:nothingwillmatchthis\n')
+            fd.write(b'*/file1\n')
+            fd.write(b're:file2$\n')
+
+        with changedir('output'):
+            self.cmd('extract', '--exclude-from=' + self.exclude_file_path, self.repository_location + '::test')
+        self.assert_equal(sorted(os.listdir('output/input')), ['file3'])
+
     def test_exclude_caches(self):
         self.cmd('init', self.repository_location)
         self.create_regular_file('file1', size=1024 * 80)

+ 128 - 28
borg/testsuite/helpers.py

@@ -10,9 +10,9 @@ import msgpack
 import msgpack.fallback
 
 from ..helpers import adjust_patterns, exclude_path, Location, format_file_size, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, \
-    prune_within, prune_split, get_cache_dir, Statistics, is_slow_msgpack, yes, \
+    prune_within, prune_split, get_cache_dir, Statistics, is_slow_msgpack, yes, ExcludeRegex, \
     StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, \
-    ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes
+    ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern
 from . import BaseTestCase, environment_variable, FakeInputs
 
 
@@ -160,8 +160,32 @@ class FormatTimedeltaTestCase(BaseTestCase):
         )
 
 
-class PatternTestCase(BaseTestCase):
+def check_patterns(files, paths, excludes, expected):
+    """Utility for testing exclusion patterns.
+    """
+    patterns = adjust_patterns(paths, excludes)
+    included = [path for path in files if not exclude_path(path, patterns)]
 
+    assert included == (files if expected is None else expected)
+
+
+@pytest.mark.parametrize("paths, excludes, expected", [
+    # "None" means all files, i.e. none excluded
+    ([], [], None),
+    (['/'], [], None),
+    (['/'], ['/h'], None),
+    (['/'], ['/home'], ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg']),
+    (['/'], ['/home/'], ['/etc/passwd', '/etc/hosts', '/home', '/var/log/messages', '/var/log/dmesg']),
+    (['/home/u'], [], []),
+    (['/', '/home', '/etc/hosts'], ['/'], []),
+    (['/home/'], ['/home/user2'], ['/home', '/home/user/.profile', '/home/user/.bashrc']),
+    (['/'], ['*.profile', '/var/log'],
+     ['/etc/passwd', '/etc/hosts', '/home', '/home/user/.bashrc', '/home/user2/public_html/index.html']),
+    (['/'], ['/home/*/public_html', '*.profile', '*/log/*'],
+     ['/etc/passwd', '/etc/hosts', '/home', '/home/user/.bashrc']),
+    (['/etc/', '/var'], ['dmesg'], ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg']),
+    ])
+def test_patterns(paths, excludes, expected):
     files = [
         '/etc/passwd', '/etc/hosts', '/home',
         '/home/user/.profile', '/home/user/.bashrc',
@@ -169,28 +193,44 @@ class PatternTestCase(BaseTestCase):
         '/var/log/messages', '/var/log/dmesg',
     ]
 
-    def evaluate(self, paths, excludes):
-        patterns = adjust_patterns(paths, [ExcludePattern(p) for p in excludes])
-        return [path for path in self.files if not exclude_path(path, patterns)]
+    check_patterns(files, paths, [ExcludePattern(p) for p in excludes], expected)
 
-    def test(self):
-        self.assert_equal(self.evaluate(['/'], []), self.files)
-        self.assert_equal(self.evaluate([], []), self.files)
-        self.assert_equal(self.evaluate(['/'], ['/h']), self.files)
-        self.assert_equal(self.evaluate(['/'], ['/home']),
-                          ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg'])
-        self.assert_equal(self.evaluate(['/'], ['/home/']),
-                          ['/etc/passwd', '/etc/hosts', '/home', '/var/log/messages', '/var/log/dmesg'])
-        self.assert_equal(self.evaluate(['/home/u'], []), [])
-        self.assert_equal(self.evaluate(['/', '/home', '/etc/hosts'], ['/']), [])
-        self.assert_equal(self.evaluate(['/home/'], ['/home/user2']),
-                          ['/home', '/home/user/.profile', '/home/user/.bashrc'])
-        self.assert_equal(self.evaluate(['/'], ['*.profile', '/var/log']),
-                          ['/etc/passwd', '/etc/hosts', '/home', '/home/user/.bashrc', '/home/user2/public_html/index.html'])
-        self.assert_equal(self.evaluate(['/'], ['/home/*/public_html', '*.profile', '*/log/*']),
-                          ['/etc/passwd', '/etc/hosts', '/home', '/home/user/.bashrc'])
-        self.assert_equal(self.evaluate(['/etc/', '/var'], ['dmesg']),
-                          ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg'])
+
+@pytest.mark.parametrize("paths, excludes, expected", [
+    # "None" means all files, i.e. none excluded
+    ([], [], None),
+    (['/'], [], None),
+    (['/'], ['.*'], []),
+    (['/'], ['^/'], []),
+    (['/'], ['^abc$'], None),
+    (['/'], ['^(?!/home/)'],
+     ['/home/user/.profile', '/home/user/.bashrc', '/home/user2/.profile',
+      '/home/user2/public_html/index.html']),
+    ])
+def test_patterns_regex(paths, excludes, expected):
+    files = [
+        '/srv/data', '/foo/bar', '/home',
+        '/home/user/.profile', '/home/user/.bashrc',
+        '/home/user2/.profile', '/home/user2/public_html/index.html',
+        '/opt/log/messages.txt', '/opt/log/dmesg.txt',
+    ]
+
+    patterns = []
+
+    for i in excludes:
+        pat = ExcludeRegex(i)
+        assert str(pat) == i
+        assert pat.pattern == i
+        patterns.append(pat)
+
+    check_patterns(files, paths, patterns, expected)
+
+
+def test_regex_pattern():
+    # The forward slash must match the platform-specific path separator
+    assert ExcludeRegex("^/$").match("/")
+    assert ExcludeRegex("^/$").match(os.path.sep)
+    assert not ExcludeRegex(r"^\\$").match("/")
 
 
 @pytest.mark.skipif(sys.platform in ('darwin',), reason='all but OS X test')
@@ -199,31 +239,40 @@ class PatternNonAsciiTestCase(BaseTestCase):
         pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}'
         i = IncludePattern(pattern)
         e = ExcludePattern(pattern)
+        er = ExcludeRegex("^{}/foo$".format(pattern))
 
         assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
         assert not i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
         assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
         assert not e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+        assert er.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert not er.match("ba\N{COMBINING ACUTE ACCENT}/foo")
 
     def testDecomposedUnicode(self):
         pattern = 'ba\N{COMBINING ACUTE ACCENT}'
         i = IncludePattern(pattern)
         e = ExcludePattern(pattern)
+        er = ExcludeRegex("^{}/foo$".format(pattern))
 
         assert not i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
         assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
         assert not e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
         assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+        assert not er.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert er.match("ba\N{COMBINING ACUTE ACCENT}/foo")
 
     def testInvalidUnicode(self):
         pattern = str(b'ba\x80', 'latin1')
         i = IncludePattern(pattern)
         e = ExcludePattern(pattern)
+        er = ExcludeRegex("^{}/foo$".format(pattern))
 
         assert not i.match("ba/foo")
         assert i.match(str(b"ba\x80/foo", 'latin1'))
         assert not e.match("ba/foo")
         assert e.match(str(b"ba\x80/foo", 'latin1'))
+        assert not er.match("ba/foo")
+        assert er.match(str(b"ba\x80/foo", 'latin1'))
 
 
 @pytest.mark.skipif(sys.platform not in ('darwin',), reason='OS X test')
@@ -232,31 +281,40 @@ class OSXPatternNormalizationTestCase(BaseTestCase):
         pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}'
         i = IncludePattern(pattern)
         e = ExcludePattern(pattern)
+        er = ExcludeRegex("^{}/foo$".format(pattern))
 
         assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
         assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
         assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
         assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+        assert er.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert er.match("ba\N{COMBINING ACUTE ACCENT}/foo")
 
     def testDecomposedUnicode(self):
         pattern = 'ba\N{COMBINING ACUTE ACCENT}'
         i = IncludePattern(pattern)
         e = ExcludePattern(pattern)
+        er = ExcludeRegex("^{}/foo$".format(pattern))
 
         assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
         assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
         assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
         assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
+        assert er.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
+        assert er.match("ba\N{COMBINING ACUTE ACCENT}/foo")
 
     def testInvalidUnicode(self):
         pattern = str(b'ba\x80', 'latin1')
         i = IncludePattern(pattern)
         e = ExcludePattern(pattern)
+        er = ExcludeRegex("^{}/foo$".format(pattern))
 
         assert not i.match("ba/foo")
         assert i.match(str(b"ba\x80/foo", 'latin1'))
         assert not e.match("ba/foo")
         assert e.match(str(b"ba\x80/foo", 'latin1'))
+        assert not er.match("ba/foo")
+        assert er.match(str(b"ba\x80/foo", 'latin1'))
 
 
 @pytest.mark.parametrize("lines, expected", [
@@ -266,20 +324,33 @@ class OSXPatternNormalizationTestCase(BaseTestCase):
     (["*"], []),
     (["# Comment",
       "*/something00.txt",
-      "  whitespace\t",
-      "/whitespace/at/end of filename \t ",
+      "  *whitespace*  ",
       # Whitespace before comment
       " #/ws*",
       # Empty line
       "",
       "# EOF"],
-     ["/more/data", "/home"]),
+     ["/more/data", "/home", " #/wsfoobar"]),
+    (["re:.*"], []),
+    (["re:\s"], ["/data/something00.txt", "/more/data", "/home"]),
+    ([r"re:(.)(\1)"], ["/more/data", "/home", "\tstart/whitespace", "/whitespace/end\t"]),
+    (["", "", "",
+      "# This is a test with mixed pattern styles",
+      # Case-insensitive pattern
+      "re:(?i)BAR|ME$",
+      "",
+      "*whitespace*",
+      "fm:*/something00*"],
+     ["/more/data"]),
+    ([r"  re:^\s  "], ["/data/something00.txt", "/more/data", "/home", "/whitespace/end\t"]),
+    ([r"  re:\s$  "], ["/data/something00.txt", "/more/data", "/home", " #/wsfoobar", "\tstart/whitespace"]),
     ])
 def test_patterns_from_file(tmpdir, lines, expected):
     files = [
         '/data/something00.txt', '/more/data', '/home',
         ' #/wsfoobar',
-        '/whitespace/at/end of filename \t ',
+        '\tstart/whitespace',
+        '/whitespace/end\t',
     ]
 
     def evaluate(filename):
@@ -294,6 +365,35 @@ def test_patterns_from_file(tmpdir, lines, expected):
     assert evaluate(str(exclfile)) == (files if expected is None else expected)
 
 
+@pytest.mark.parametrize("pattern, cls", [
+    ("", ExcludePattern),
+
+    # Default style
+    ("*", ExcludePattern),
+    ("/data/*", ExcludePattern),
+
+    # fnmatch style
+    ("fm:", ExcludePattern),
+    ("fm:*", ExcludePattern),
+    ("fm:/data/*", ExcludePattern),
+    ("fm:fm:/data/*", ExcludePattern),
+
+    # Regular expression
+    ("re:", ExcludeRegex),
+    ("re:.*", ExcludeRegex),
+    ("re:^/something/", ExcludeRegex),
+    ("re:re:^/something/", ExcludeRegex),
+    ])
+def test_parse_pattern(pattern, cls):
+    assert isinstance(parse_pattern(pattern), cls)
+
+
+@pytest.mark.parametrize("pattern", ["aa:", "fo:*", "00:", "x1:abc"])
+def test_parse_pattern_error(pattern):
+    with pytest.raises(ValueError):
+        parse_pattern(pattern)
+
+
 def test_compression_specs():
     with pytest.raises(ValueError):
         CompressionSpec('')

+ 5 - 0
docs/usage.rst

@@ -231,6 +231,11 @@ Examples
         ~/src                             \
         --exclude '*.pyc'
 
+    # Backup home directories excluding image thumbnails (i.e. only
+    # /home/*/.thumbnails is excluded, not /home/*/*/.thumbnails)
+    $ borg create /mnt/backup::my-files /home \
+        --exclude 're:^/home/[^/]+/\.thumbnails/'
+
     # Backup the root filesystem into an archive named "root-YYYY-MM-DD"
     # use zlib compression (good, but slow) - default is no compression
     NAME="root-`date +%Y-%m-%d`"