10 سال پیش · 638204fd0e
--- a/borg/helpers.py
+++ b/borg/helpers.py
@@ -1,12 +1,15 @@
 
				 import argparse
			
 
				 import binascii
			
 
				 from collections import namedtuple
			
 
				+from functools import wraps
			
 
				 import grp
			
 
				 import os
			
 
				 import pwd
			
 
				 import re
			
 
				 import sys
			
 
				 import time
			
 
				+import unicodedata
			
 
				+
			
 
				 from datetime import datetime, timezone, timedelta
			
 
				 from fnmatch import translate
			
 
				 from operator import attrgetter
			
@@ -220,6 +223,23 @@ def exclude_path(path, patterns):
 
				 # unify the two cases, we add a path separator to the end of
			
 
				 # the path before matching.
			
 
				 
			
 
				+def normalized(func):
			
 
				+    """ Decorator for the Pattern match methods, returning a wrapper that
			
 
				+    normalizes OSX paths to match the normalized pattern on OSX, and 
			
 
				+    returning the original method on other platforms"""
			
 
				+    @wraps(func)
			
 
				+    def normalize_wrapper(self, path):
			
 
				+        return func(self, unicodedata.normalize("NFD", path))
			
 
				+
			
 
				+    if sys.platform in ('darwin',):
			
 
				+        # HFS+ converts paths to a canonical form, so users shouldn't be
			
 
				+        # required to enter an exact match
			
 
				+        return normalize_wrapper
			
 
				+    else:
			
 
				+        # Windows and Unix filesystems allow different forms, so users
			
 
				+        # always have to enter an exact match
			
 
				+        return func
			
 
				+
			
 
				 class IncludePattern:
			
 
				     """Literal files or directories listed on the command line
			
 
				     for some operations (e.g. extract, but not create).
			
@@ -227,8 +247,12 @@ class IncludePattern:
 
				     path match as well.  A trailing slash makes no difference.
			
 
				     """
			
 
				     def __init__(self, pattern):
			
 
				+        if sys.platform in ('darwin',):
			
 
				+            pattern = unicodedata.normalize("NFD", pattern)
			
 
				+
			
 
				         self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep
			
 
				 
			
 
				+    @normalized
			
 
				     def match(self, path):
			
 
				         return (path+os.path.sep).startswith(self.pattern)
			
 
				 
			
@@ -245,10 +269,15 @@ class ExcludePattern(IncludePattern):
 
				             self.pattern = os.path.normpath(pattern).rstrip(os.path.sep)+os.path.sep+'*'+os.path.sep
			
 
				         else:
			
 
				             self.pattern = os.path.normpath(pattern)+os.path.sep+'*'
			
 
				+
			
 
				+        if sys.platform in ('darwin',):
			
 
				+            self.pattern = unicodedata.normalize("NFD", self.pattern)
			
 
				+
			
 
				         # fnmatch and re.match both cache compiled regular expressions.
			
 
				         # Nevertheless, this is about 10 times faster.
			
 
				         self.regex = re.compile(translate(self.pattern))
			
 
				 
			
 
				+    @normalized
			
 
				     def match(self, path):
			
 
				         return self.regex.match(path+os.path.sep) is not None
			
 
				 
			
--- a/borg/testsuite/helpers.py
+++ b/borg/testsuite/helpers.py
@@ -3,9 +3,10 @@ from time import mktime, strptime
 
				 from datetime import datetime, timezone, timedelta
			
 
				 
			
 
				 import pytest
			
 
				+import sys
			
 
				 import msgpack
			
 
				 
			
 
				-from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, ExcludePattern, make_path_safe, \
			
 
				+from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, \
			
 
				     prune_within, prune_split, \
			
 
				     StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams
			
 
				 from . import BaseTestCase
			
@@ -178,6 +179,72 @@ class PatternTestCase(BaseTestCase):
 
				                           ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg'])
			
 
				 
			
 
				 
			
 
				+@pytest.mark.skipif(sys.platform in ('darwin',), reason='all but OS X test')
			
 
				+class PatternNonAsciiTestCase(BaseTestCase):
			
 
				+    def testComposedUnicode(self):
			
 
				+        pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}'
			
 
				+        i = IncludePattern(pattern)
			
 
				+        e = ExcludePattern(pattern)
			
 
				+
			
 
				+        assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
			
 
				+        assert not i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
			
 
				+        assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
			
 
				+        assert not e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
			
 
				+
			
 
				+    def testDecomposedUnicode(self):
			
 
				+        pattern = 'ba\N{COMBINING ACUTE ACCENT}'
			
 
				+        i = IncludePattern(pattern)
			
 
				+        e = ExcludePattern(pattern)
			
 
				+
			
 
				+        assert not i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
			
 
				+        assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
			
 
				+        assert not e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
			
 
				+        assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
			
 
				+    
			
 
				+    def testInvalidUnicode(self):
			
 
				+        pattern = str(b'ba\x80', 'latin1')
			
 
				+        i = IncludePattern(pattern)
			
 
				+        e = ExcludePattern(pattern)
			
 
				+
			
 
				+        assert not i.match("ba/foo")
			
 
				+        assert i.match(str(b"ba\x80/foo", 'latin1'))
			
 
				+        assert not e.match("ba/foo")
			
 
				+        assert e.match(str(b"ba\x80/foo", 'latin1'))
			
 
				+
			
 
				+
			
 
				+@pytest.mark.skipif(sys.platform not in ('darwin',), reason='OS X test')
			
 
				+class OSXPatternNormalizationTestCase(BaseTestCase):
			
 
				+    def testComposedUnicode(self):
			
 
				+        pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}'
			
 
				+        i = IncludePattern(pattern)
			
 
				+        e = ExcludePattern(pattern)
			
 
				+
			
 
				+        assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
			
 
				+        assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
			
 
				+        assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
			
 
				+        assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
			
 
				+    
			
 
				+    def testDecomposedUnicode(self):
			
 
				+        pattern = 'ba\N{COMBINING ACUTE ACCENT}'
			
 
				+        i = IncludePattern(pattern)
			
 
				+        e = ExcludePattern(pattern)
			
 
				+
			
 
				+        assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
			
 
				+        assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
			
 
				+        assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
			
 
				+        assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
			
 
				+    
			
 
				+    def testInvalidUnicode(self):
			
 
				+        pattern = str(b'ba\x80', 'latin1')
			
 
				+        i = IncludePattern(pattern)
			
 
				+        e = ExcludePattern(pattern)
			
 
				+
			
 
				+        assert not i.match("ba/foo")
			
 
				+        assert i.match(str(b"ba\x80/foo", 'latin1'))
			
 
				+        assert not e.match("ba/foo")
			
 
				+        assert e.match(str(b"ba\x80/foo", 'latin1'))
			
 
				+
			
 
				+
			
 
				 def test_compression_specs():
			
 
				     with pytest.raises(ValueError):
			
 
				         CompressionSpec('')