2
0
Эх сурвалжийг харах

Merge pull request #7619 from DavidRambo/master

Add support and tests for shell-style alternatives.
TW 2 жил өмнө
parent
commit
021c9b656c

+ 2 - 1
src/borg/archiver/help_cmd.py

@@ -59,7 +59,8 @@ class HelpMixIn:
             Like fnmatch patterns these are similar to shell patterns. The difference
             is that the pattern may include ``**/`` for matching zero or more directory
             levels, ``*`` for matching zero or more arbitrary characters with the
-            exception of any path separator. A leading path separator is always removed.
+            exception of any path separator, ``{}`` containing comma-separated
+            alternative patterns. A leading path separator is always removed.
 
         `Regular expressions <https://docs.python.org/3/library/re.html>`_, selector ``re:``
             Unlike shell patterns, regular expressions are not required to match the full

+ 78 - 3
src/borg/helpers/shellpattern.py

@@ -1,5 +1,6 @@
 import os
 import re
+from queue import LifoQueue
 
 
 def translate(pat, match_end=r"\Z"):
@@ -17,10 +18,9 @@ def translate(pat, match_end=r"\Z"):
 
     :copyright: 2001-2016 Python Software Foundation. All rights reserved.
     :license: PSFLv2
-
-    TODO: support {alt1,alt2} shell-style alternatives
-
     """
+    pat = _translate_alternatives(pat)
+
     sep = os.path.sep
     n = len(pat)
     i = 0
@@ -60,7 +60,82 @@ def translate(pat, match_end=r"\Z"):
                 elif stuff[0] == "^":
                     stuff = "\\" + stuff
                 res += "[%s]" % stuff
+        elif c in "(|)":
+            if i > 0 and pat[i - 1] != "\\":
+                res += c
         else:
             res += re.escape(c)
 
     return "(?ms)" + res + match_end
+
+
+def _parse_braces(pat):
+    """Returns the index values of paired braces in `pat` as a list of tuples.
+
+    The dict's keys are the indexes corresponding to opening braces. Initially,
+    they are set to a value of `None`. Once a corresponding closing brace is found,
+    the value is updated. All dict keys with a positive int value are valid pairs.
+
+    Cannot rely on re.match("[^\\(\\\\)*]?{.*[^\\(\\\\)*]}") because, while it
+    does handle unpaired braces and nested pairs of braces, it misses sequences
+    of paired braces. E.g.: "{foo,bar}{bar,baz}" would translate, incorrectly, to
+    "(foo|bar\\}\\{bar|baz)" instead of, correctly, to "(foo|bar)(bar|baz)"
+
+    So this function parses in a left-to-right fashion, tracking pairs with a LIFO
+    queue: pushing opening braces on and popping them off when finding a closing
+    brace.
+    """
+    curly_q = LifoQueue()
+    pairs: dict[int, int] = dict()
+
+    for idx, c in enumerate(pat):
+        if c == "{":
+            if idx == 0 or pat[idx - 1] != "\\":
+                # Opening brace is not escaped.
+                # Add to dict
+                pairs[idx] = None
+                # Add to queue
+                curly_q.put(idx)
+        if c == "}" and curly_q.qsize():
+            # If queue is empty, then cannot close pair.
+            if idx > 0 and pat[idx - 1] != "\\":
+                # Closing brace is not escaped.
+                # Pop off the index of the corresponding opening brace, which
+                # provides the key in the dict of pairs, and set its value.
+                pairs[curly_q.get()] = idx
+    return [(opening, closing) for opening, closing in pairs.items() if closing is not None]
+
+
+def _translate_alternatives(pat):
+    """Translates the shell-style alternative portions of the pattern to regular expression groups.
+
+    For example: {alt1,alt2} -> (alt1|alt2)
+    """
+    # Parse pattern for paired braces.
+    brace_pairs = _parse_braces(pat)
+
+    pat_list = list(pat)  # Convert to list in order to subscript characters.
+
+    # Convert non-escaped commas within groups to pipes.
+    # Passing, e.g. "{a\,b}.txt" to the shell expands to "{a,b}.txt", whereas
+    # "{a\,,b}.txt" expands to "a,.txt" and "b.txt"
+    for opening, closing in brace_pairs:
+        commas = 0
+
+        for i in range(opening + 1, closing):  # Convert non-escaped commas to pipes.
+            if pat_list[i] == ",":
+                if i == opening or pat_list[i - 1] != "\\":
+                    pat_list[i] = "|"
+                    commas += 1
+            elif pat_list[i] == "|" and (i == opening or pat_list[i - 1] != "\\"):
+                # Nested groups have their commas converted to pipes when traversing the parent group.
+                # So in order to confirm the presence of a comma in the original, shell-style pattern,
+                # we must also check for a pipe.
+                commas += 1
+
+        # Convert paired braces into parentheses, but only if at least one comma is present.
+        if commas > 0:
+            pat_list[opening] = "("
+            pat_list[closing] = ")"
+
+    return "".join(pat_list)

+ 15 - 0
src/borg/testsuite/shellpattern.py

@@ -66,6 +66,16 @@ def check(path, pattern):
         ("foo3", ["foo[!12]"]),
         ("foo^", ["foo[^!]"]),
         ("foo!", ["foo[^!]"]),
+        # Group
+        ("foo1", ["{foo1,foo2}"]),
+        ("foo2", ["foo{1,2}"]),
+        ("foo", ["foo{,1,2}"]),
+        ("foo1", ["{foo{1,2},bar}"]),
+        ("bar", ["{foo{1,2},bar}"]),
+        ("{foo", ["{foo{,bar}"]),
+        ("{foobar", ["{foo{,bar}"]),
+        ("{foo},bar}", ["{foo},bar}"]),
+        ("bar/foobar", ["**/foo{ba[!z]*,[0-9]}"]),
     ],
 )
 def test_match(path, patterns):
@@ -99,6 +109,11 @@ def test_match(path, patterns):
         # Inverted set
         ("foo1", ["foo[!12]"]),
         ("foo2", ["foo[!12]"]),
+        # Group
+        ("foo", ["{foo1,foo2}"]),
+        ("foo", ["foo{1,2}"]),
+        ("foo{1,2}", ["foo{1,2}"]),
+        ("bar/foobaz", ["**/foo{ba[!z]*,[0-9]}"]),
     ],
 )
 def test_mismatch(path, patterns):