|
@@ -10,9 +10,9 @@ import msgpack
|
|
import msgpack.fallback
|
|
import msgpack.fallback
|
|
|
|
|
|
from ..helpers import adjust_patterns, exclude_path, Location, format_file_size, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, \
|
|
from ..helpers import adjust_patterns, exclude_path, Location, format_file_size, format_timedelta, IncludePattern, ExcludePattern, make_path_safe, \
|
|
- prune_within, prune_split, get_cache_dir, Statistics, is_slow_msgpack, yes, \
|
|
|
|
|
|
+ prune_within, prune_split, get_cache_dir, Statistics, is_slow_msgpack, yes, ExcludeRegex, \
|
|
StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, \
|
|
StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec, ChunkerParams, \
|
|
- ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes
|
|
|
|
|
|
+ ProgressIndicatorPercent, ProgressIndicatorEndless, load_excludes, parse_pattern
|
|
from . import BaseTestCase, environment_variable, FakeInputs
|
|
from . import BaseTestCase, environment_variable, FakeInputs
|
|
|
|
|
|
|
|
|
|
@@ -160,8 +160,32 @@ class FormatTimedeltaTestCase(BaseTestCase):
|
|
)
|
|
)
|
|
|
|
|
|
|
|
|
|
-class PatternTestCase(BaseTestCase):
|
|
|
|
|
|
+def check_patterns(files, paths, excludes, expected):
|
|
|
|
+ """Utility for testing exclusion patterns.
|
|
|
|
+ """
|
|
|
|
+ patterns = adjust_patterns(paths, excludes)
|
|
|
|
+ included = [path for path in files if not exclude_path(path, patterns)]
|
|
|
|
|
|
|
|
+ assert included == (files if expected is None else expected)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+@pytest.mark.parametrize("paths, excludes, expected", [
|
|
|
|
+ # "None" means all files, i.e. none excluded
|
|
|
|
+ ([], [], None),
|
|
|
|
+ (['/'], [], None),
|
|
|
|
+ (['/'], ['/h'], None),
|
|
|
|
+ (['/'], ['/home'], ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg']),
|
|
|
|
+ (['/'], ['/home/'], ['/etc/passwd', '/etc/hosts', '/home', '/var/log/messages', '/var/log/dmesg']),
|
|
|
|
+ (['/home/u'], [], []),
|
|
|
|
+ (['/', '/home', '/etc/hosts'], ['/'], []),
|
|
|
|
+ (['/home/'], ['/home/user2'], ['/home', '/home/user/.profile', '/home/user/.bashrc']),
|
|
|
|
+ (['/'], ['*.profile', '/var/log'],
|
|
|
|
+ ['/etc/passwd', '/etc/hosts', '/home', '/home/user/.bashrc', '/home/user2/public_html/index.html']),
|
|
|
|
+ (['/'], ['/home/*/public_html', '*.profile', '*/log/*'],
|
|
|
|
+ ['/etc/passwd', '/etc/hosts', '/home', '/home/user/.bashrc']),
|
|
|
|
+ (['/etc/', '/var'], ['dmesg'], ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg']),
|
|
|
|
+ ])
|
|
|
|
+def test_patterns(paths, excludes, expected):
|
|
files = [
|
|
files = [
|
|
'/etc/passwd', '/etc/hosts', '/home',
|
|
'/etc/passwd', '/etc/hosts', '/home',
|
|
'/home/user/.profile', '/home/user/.bashrc',
|
|
'/home/user/.profile', '/home/user/.bashrc',
|
|
@@ -169,28 +193,44 @@ class PatternTestCase(BaseTestCase):
|
|
'/var/log/messages', '/var/log/dmesg',
|
|
'/var/log/messages', '/var/log/dmesg',
|
|
]
|
|
]
|
|
|
|
|
|
- def evaluate(self, paths, excludes):
|
|
|
|
- patterns = adjust_patterns(paths, [ExcludePattern(p) for p in excludes])
|
|
|
|
- return [path for path in self.files if not exclude_path(path, patterns)]
|
|
|
|
|
|
+ check_patterns(files, paths, [ExcludePattern(p) for p in excludes], expected)
|
|
|
|
|
|
- def test(self):
|
|
|
|
- self.assert_equal(self.evaluate(['/'], []), self.files)
|
|
|
|
- self.assert_equal(self.evaluate([], []), self.files)
|
|
|
|
- self.assert_equal(self.evaluate(['/'], ['/h']), self.files)
|
|
|
|
- self.assert_equal(self.evaluate(['/'], ['/home']),
|
|
|
|
- ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg'])
|
|
|
|
- self.assert_equal(self.evaluate(['/'], ['/home/']),
|
|
|
|
- ['/etc/passwd', '/etc/hosts', '/home', '/var/log/messages', '/var/log/dmesg'])
|
|
|
|
- self.assert_equal(self.evaluate(['/home/u'], []), [])
|
|
|
|
- self.assert_equal(self.evaluate(['/', '/home', '/etc/hosts'], ['/']), [])
|
|
|
|
- self.assert_equal(self.evaluate(['/home/'], ['/home/user2']),
|
|
|
|
- ['/home', '/home/user/.profile', '/home/user/.bashrc'])
|
|
|
|
- self.assert_equal(self.evaluate(['/'], ['*.profile', '/var/log']),
|
|
|
|
- ['/etc/passwd', '/etc/hosts', '/home', '/home/user/.bashrc', '/home/user2/public_html/index.html'])
|
|
|
|
- self.assert_equal(self.evaluate(['/'], ['/home/*/public_html', '*.profile', '*/log/*']),
|
|
|
|
- ['/etc/passwd', '/etc/hosts', '/home', '/home/user/.bashrc'])
|
|
|
|
- self.assert_equal(self.evaluate(['/etc/', '/var'], ['dmesg']),
|
|
|
|
- ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg'])
|
|
|
|
|
|
+
|
|
|
|
+@pytest.mark.parametrize("paths, excludes, expected", [
|
|
|
|
+ # "None" means all files, i.e. none excluded
|
|
|
|
+ ([], [], None),
|
|
|
|
+ (['/'], [], None),
|
|
|
|
+ (['/'], ['.*'], []),
|
|
|
|
+ (['/'], ['^/'], []),
|
|
|
|
+ (['/'], ['^abc$'], None),
|
|
|
|
+ (['/'], ['^(?!/home/)'],
|
|
|
|
+ ['/home/user/.profile', '/home/user/.bashrc', '/home/user2/.profile',
|
|
|
|
+ '/home/user2/public_html/index.html']),
|
|
|
|
+ ])
|
|
|
|
+def test_patterns_regex(paths, excludes, expected):
|
|
|
|
+ files = [
|
|
|
|
+ '/srv/data', '/foo/bar', '/home',
|
|
|
|
+ '/home/user/.profile', '/home/user/.bashrc',
|
|
|
|
+ '/home/user2/.profile', '/home/user2/public_html/index.html',
|
|
|
|
+ '/opt/log/messages.txt', '/opt/log/dmesg.txt',
|
|
|
|
+ ]
|
|
|
|
+
|
|
|
|
+ patterns = []
|
|
|
|
+
|
|
|
|
+ for i in excludes:
|
|
|
|
+ pat = ExcludeRegex(i)
|
|
|
|
+ assert str(pat) == i
|
|
|
|
+ assert pat.pattern == i
|
|
|
|
+ patterns.append(pat)
|
|
|
|
+
|
|
|
|
+ check_patterns(files, paths, patterns, expected)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def test_regex_pattern():
|
|
|
|
+ # The forward slash must match the platform-specific path separator
|
|
|
|
+ assert ExcludeRegex("^/$").match("/")
|
|
|
|
+ assert ExcludeRegex("^/$").match(os.path.sep)
|
|
|
|
+ assert not ExcludeRegex(r"^\\$").match("/")
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skipif(sys.platform in ('darwin',), reason='all but OS X test')
|
|
@pytest.mark.skipif(sys.platform in ('darwin',), reason='all but OS X test')
|
|
@@ -199,31 +239,40 @@ class PatternNonAsciiTestCase(BaseTestCase):
|
|
pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}'
|
|
pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}'
|
|
i = IncludePattern(pattern)
|
|
i = IncludePattern(pattern)
|
|
e = ExcludePattern(pattern)
|
|
e = ExcludePattern(pattern)
|
|
|
|
+ er = ExcludeRegex("^{}/foo$".format(pattern))
|
|
|
|
|
|
assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
|
assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
|
assert not i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
|
assert not i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
|
assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
|
assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
|
assert not e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
|
assert not e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
|
|
|
+ assert er.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
|
|
|
+ assert not er.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
|
|
|
|
|
def testDecomposedUnicode(self):
|
|
def testDecomposedUnicode(self):
|
|
pattern = 'ba\N{COMBINING ACUTE ACCENT}'
|
|
pattern = 'ba\N{COMBINING ACUTE ACCENT}'
|
|
i = IncludePattern(pattern)
|
|
i = IncludePattern(pattern)
|
|
e = ExcludePattern(pattern)
|
|
e = ExcludePattern(pattern)
|
|
|
|
+ er = ExcludeRegex("^{}/foo$".format(pattern))
|
|
|
|
|
|
assert not i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
|
assert not i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
|
assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
|
assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
|
assert not e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
|
assert not e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
|
assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
|
assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
|
|
|
+ assert not er.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
|
|
|
+ assert er.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
|
|
|
|
|
def testInvalidUnicode(self):
|
|
def testInvalidUnicode(self):
|
|
pattern = str(b'ba\x80', 'latin1')
|
|
pattern = str(b'ba\x80', 'latin1')
|
|
i = IncludePattern(pattern)
|
|
i = IncludePattern(pattern)
|
|
e = ExcludePattern(pattern)
|
|
e = ExcludePattern(pattern)
|
|
|
|
+ er = ExcludeRegex("^{}/foo$".format(pattern))
|
|
|
|
|
|
assert not i.match("ba/foo")
|
|
assert not i.match("ba/foo")
|
|
assert i.match(str(b"ba\x80/foo", 'latin1'))
|
|
assert i.match(str(b"ba\x80/foo", 'latin1'))
|
|
assert not e.match("ba/foo")
|
|
assert not e.match("ba/foo")
|
|
assert e.match(str(b"ba\x80/foo", 'latin1'))
|
|
assert e.match(str(b"ba\x80/foo", 'latin1'))
|
|
|
|
+ assert not er.match("ba/foo")
|
|
|
|
+ assert er.match(str(b"ba\x80/foo", 'latin1'))
|
|
|
|
|
|
|
|
|
|
@pytest.mark.skipif(sys.platform not in ('darwin',), reason='OS X test')
|
|
@pytest.mark.skipif(sys.platform not in ('darwin',), reason='OS X test')
|
|
@@ -232,31 +281,40 @@ class OSXPatternNormalizationTestCase(BaseTestCase):
|
|
pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}'
|
|
pattern = 'b\N{LATIN SMALL LETTER A WITH ACUTE}'
|
|
i = IncludePattern(pattern)
|
|
i = IncludePattern(pattern)
|
|
e = ExcludePattern(pattern)
|
|
e = ExcludePattern(pattern)
|
|
|
|
+ er = ExcludeRegex("^{}/foo$".format(pattern))
|
|
|
|
|
|
assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
|
assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
|
assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
|
assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
|
assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
|
assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
|
assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
|
assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
|
|
|
+ assert er.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
|
|
|
+ assert er.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
|
|
|
|
|
def testDecomposedUnicode(self):
|
|
def testDecomposedUnicode(self):
|
|
pattern = 'ba\N{COMBINING ACUTE ACCENT}'
|
|
pattern = 'ba\N{COMBINING ACUTE ACCENT}'
|
|
i = IncludePattern(pattern)
|
|
i = IncludePattern(pattern)
|
|
e = ExcludePattern(pattern)
|
|
e = ExcludePattern(pattern)
|
|
|
|
+ er = ExcludeRegex("^{}/foo$".format(pattern))
|
|
|
|
|
|
assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
|
assert i.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
|
assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
|
assert i.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
|
assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
|
assert e.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
|
assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
|
assert e.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
|
|
|
+ assert er.match("b\N{LATIN SMALL LETTER A WITH ACUTE}/foo")
|
|
|
|
+ assert er.match("ba\N{COMBINING ACUTE ACCENT}/foo")
|
|
|
|
|
|
def testInvalidUnicode(self):
|
|
def testInvalidUnicode(self):
|
|
pattern = str(b'ba\x80', 'latin1')
|
|
pattern = str(b'ba\x80', 'latin1')
|
|
i = IncludePattern(pattern)
|
|
i = IncludePattern(pattern)
|
|
e = ExcludePattern(pattern)
|
|
e = ExcludePattern(pattern)
|
|
|
|
+ er = ExcludeRegex("^{}/foo$".format(pattern))
|
|
|
|
|
|
assert not i.match("ba/foo")
|
|
assert not i.match("ba/foo")
|
|
assert i.match(str(b"ba\x80/foo", 'latin1'))
|
|
assert i.match(str(b"ba\x80/foo", 'latin1'))
|
|
assert not e.match("ba/foo")
|
|
assert not e.match("ba/foo")
|
|
assert e.match(str(b"ba\x80/foo", 'latin1'))
|
|
assert e.match(str(b"ba\x80/foo", 'latin1'))
|
|
|
|
+ assert not er.match("ba/foo")
|
|
|
|
+ assert er.match(str(b"ba\x80/foo", 'latin1'))
|
|
|
|
|
|
|
|
|
|
@pytest.mark.parametrize("lines, expected", [
|
|
@pytest.mark.parametrize("lines, expected", [
|
|
@@ -266,20 +324,33 @@ class OSXPatternNormalizationTestCase(BaseTestCase):
|
|
(["*"], []),
|
|
(["*"], []),
|
|
(["# Comment",
|
|
(["# Comment",
|
|
"*/something00.txt",
|
|
"*/something00.txt",
|
|
- " whitespace\t",
|
|
|
|
- "/whitespace/at/end of filename \t ",
|
|
|
|
|
|
+ " *whitespace* ",
|
|
# Whitespace before comment
|
|
# Whitespace before comment
|
|
" #/ws*",
|
|
" #/ws*",
|
|
# Empty line
|
|
# Empty line
|
|
"",
|
|
"",
|
|
"# EOF"],
|
|
"# EOF"],
|
|
- ["/more/data", "/home"]),
|
|
|
|
|
|
+ ["/more/data", "/home", " #/wsfoobar"]),
|
|
|
|
+ (["re:.*"], []),
|
|
|
|
+ (["re:\s"], ["/data/something00.txt", "/more/data", "/home"]),
|
|
|
|
+ ([r"re:(.)(\1)"], ["/more/data", "/home", "\tstart/whitespace", "/whitespace/end\t"]),
|
|
|
|
+ (["", "", "",
|
|
|
|
+ "# This is a test with mixed pattern styles",
|
|
|
|
+ # Case-insensitive pattern
|
|
|
|
+ "re:(?i)BAR|ME$",
|
|
|
|
+ "",
|
|
|
|
+ "*whitespace*",
|
|
|
|
+ "fm:*/something00*"],
|
|
|
|
+ ["/more/data"]),
|
|
|
|
+ ([r" re:^\s "], ["/data/something00.txt", "/more/data", "/home", "/whitespace/end\t"]),
|
|
|
|
+ ([r" re:\s$ "], ["/data/something00.txt", "/more/data", "/home", " #/wsfoobar", "\tstart/whitespace"]),
|
|
])
|
|
])
|
|
def test_patterns_from_file(tmpdir, lines, expected):
|
|
def test_patterns_from_file(tmpdir, lines, expected):
|
|
files = [
|
|
files = [
|
|
'/data/something00.txt', '/more/data', '/home',
|
|
'/data/something00.txt', '/more/data', '/home',
|
|
' #/wsfoobar',
|
|
' #/wsfoobar',
|
|
- '/whitespace/at/end of filename \t ',
|
|
|
|
|
|
+ '\tstart/whitespace',
|
|
|
|
+ '/whitespace/end\t',
|
|
]
|
|
]
|
|
|
|
|
|
def evaluate(filename):
|
|
def evaluate(filename):
|
|
@@ -294,6 +365,35 @@ def test_patterns_from_file(tmpdir, lines, expected):
|
|
assert evaluate(str(exclfile)) == (files if expected is None else expected)
|
|
assert evaluate(str(exclfile)) == (files if expected is None else expected)
|
|
|
|
|
|
|
|
|
|
|
|
+@pytest.mark.parametrize("pattern, cls", [
|
|
|
|
+ ("", ExcludePattern),
|
|
|
|
+
|
|
|
|
+ # Default style
|
|
|
|
+ ("*", ExcludePattern),
|
|
|
|
+ ("/data/*", ExcludePattern),
|
|
|
|
+
|
|
|
|
+ # fnmatch style
|
|
|
|
+ ("fm:", ExcludePattern),
|
|
|
|
+ ("fm:*", ExcludePattern),
|
|
|
|
+ ("fm:/data/*", ExcludePattern),
|
|
|
|
+ ("fm:fm:/data/*", ExcludePattern),
|
|
|
|
+
|
|
|
|
+ # Regular expression
|
|
|
|
+ ("re:", ExcludeRegex),
|
|
|
|
+ ("re:.*", ExcludeRegex),
|
|
|
|
+ ("re:^/something/", ExcludeRegex),
|
|
|
|
+ ("re:re:^/something/", ExcludeRegex),
|
|
|
|
+ ])
|
|
|
|
+def test_parse_pattern(pattern, cls):
|
|
|
|
+ assert isinstance(parse_pattern(pattern), cls)
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+@pytest.mark.parametrize("pattern", ["aa:", "fo:*", "00:", "x1:abc"])
|
|
|
|
+def test_parse_pattern_error(pattern):
|
|
|
|
+ with pytest.raises(ValueError):
|
|
|
|
+ parse_pattern(pattern)
|
|
|
|
+
|
|
|
|
+
|
|
def test_compression_specs():
|
|
def test_compression_specs():
|
|
with pytest.raises(ValueError):
|
|
with pytest.raises(ValueError):
|
|
CompressionSpec('')
|
|
CompressionSpec('')
|