Sfoglia il codice sorgente

Merge pull request #7028 from ThomasWaldmann/match-archives

implement pattern support for --match-archives, fixes #6504
TW 2 anni fa
parent
commit
78b1301b98

+ 9 - 9
src/borg/archive.py

@@ -1654,18 +1654,18 @@ class ArchiveChecker:
         self.possibly_superseded = set()
 
     def check(
-        self, repository, repair=False, first=0, last=0, sort_by="", glob=None, verify_data=False, save_space=False
+        self, repository, repair=False, first=0, last=0, sort_by="", match=None, verify_data=False, save_space=False
     ):
         """Perform a set of checks on 'repository'
 
         :param repair: enable repair mode, write updated or corrected data into repository
         :param first/last/sort_by: only check this number of first/last archives ordered by sort_by
-        :param glob: only check archives matching this glob
+        :param match: only check archives matching this pattern
         :param verify_data: integrity verification of data referenced by archives
         :param save_space: Repository.commit(save_space)
         """
         logger.info("Starting archive consistency check...")
-        self.check_all = not any((first, last, glob))
+        self.check_all = not any((first, last, match))
         self.repair = repair
         self.repository = repository
         self.init_chunks()
@@ -1688,7 +1688,7 @@ class ArchiveChecker:
                 self.error_found = True
                 del self.chunks[Manifest.MANIFEST_ID]
                 self.manifest = self.rebuild_manifest()
-        self.rebuild_refcounts(glob=glob, first=first, last=last, sort_by=sort_by)
+        self.rebuild_refcounts(match=match, first=first, last=last, sort_by=sort_by)
         self.orphan_chunks_check()
         self.finish(save_space=save_space)
         if self.error_found:
@@ -1883,7 +1883,7 @@ class ArchiveChecker:
         logger.info("Manifest rebuild complete.")
         return manifest
 
-    def rebuild_refcounts(self, first=0, last=0, sort_by="", glob=None):
+    def rebuild_refcounts(self, first=0, last=0, sort_by="", match=None):
         """Rebuild object reference counts by walking the metadata
 
         Missing and/or incorrect data is repaired when detected
@@ -2077,10 +2077,10 @@ class ArchiveChecker:
                     i += 1
 
         sort_by = sort_by.split(",")
-        if any((first, last, glob)):
-            archive_infos = self.manifest.archives.list(sort_by=sort_by, glob=glob, first=first, last=last)
-            if glob and not archive_infos:
-                logger.warning("--glob-archives %s does not match any archives", glob)
+        if any((first, last, match)):
+            archive_infos = self.manifest.archives.list(sort_by=sort_by, match=match, first=first, last=last)
+            if match and not archive_infos:
+                logger.warning("--match-archives %s does not match any archives", match)
             if first and len(archive_infos) < first:
                 logger.warning("--first %d archives: only found %d archives", first, len(archive_infos))
             if last and len(archive_infos) < last:

+ 1 - 1
src/borg/archiver/__init__.py

@@ -410,7 +410,7 @@ class Archiver(
             replace_placeholders.override("now", DatetimeWrapper(args.timestamp))
             replace_placeholders.override("utcnow", DatetimeWrapper(args.timestamp.astimezone(timezone.utc)))
             args.location = args.location.with_timestamp(args.timestamp)
-        for name in "name", "other_name", "newname", "glob_archives", "comment":
+        for name in "name", "other_name", "newname", "match_archives", "comment":
             value = getattr(args, name, None)
             if value is not None:
                 setattr(args, name, replace_placeholders(value))

+ 4 - 4
src/borg/archiver/_common.py

@@ -360,11 +360,11 @@ def define_archive_filters_group(subparser, *, sort_by=True, first_last=True):
     group = filters_group.add_mutually_exclusive_group()
     group.add_argument(
         "-a",
-        "--glob-archives",
-        metavar="GLOB",
-        dest="glob_archives",
+        "--match-archives",
+        metavar="PATTERN",
+        dest="match_archives",
         action=Highlander,
-        help="only consider archive names matching the glob. " 'sh: rules apply, see "borg help patterns".',
+        help='only consider archive names matching the pattern. see "borg help match-archives".',
     )
 
     if sort_by:

+ 3 - 3
src/borg/archiver/check_cmd.py

@@ -31,9 +31,9 @@ class CheckMixIn:
                 env_var_override="BORG_CHECK_I_KNOW_WHAT_I_AM_DOING",
             ):
                 return EXIT_ERROR
-        if args.repo_only and any((args.verify_data, args.first, args.last, args.glob_archives)):
+        if args.repo_only and any((args.verify_data, args.first, args.last, args.match_archives)):
             self.print_error(
-                "--repository-only contradicts --first, --last, -a / --glob-archives " " and --verify-data arguments."
+                "--repository-only contradicts --first, --last, -a / --match-archives and --verify-data arguments."
             )
             return EXIT_ERROR
         if args.repair and args.max_duration:
@@ -55,7 +55,7 @@ class CheckMixIn:
             first=args.first,
             last=args.last,
             sort_by=args.sort_by or "ts",
-            glob=args.glob_archives,
+            match=args.match_archives,
             verify_data=args.verify_data,
             save_space=args.save_space,
         ):

+ 4 - 4
src/borg/archiver/delete_cmd.py

@@ -23,9 +23,9 @@ class DeleteMixIn:
         archive_names = tuple(x.name for x in manifest.archives.list_considering(args))
         if not archive_names:
             return self.exit_code
-        if args.glob_archives is None and args.first == 0 and args.last == 0:
+        if args.match_archives is None and args.first == 0 and args.last == 0:
             self.print_error(
-                "Aborting: if you really want to delete all archives, please use -a '*' "
+                "Aborting: if you really want to delete all archives, please use -a 'sh:*' "
                 "or just delete the whole repository (might be much faster)."
             )
             return EXIT_ERROR
@@ -114,8 +114,8 @@ class DeleteMixIn:
         that is how much your repository will shrink.
         Please note that the "All archives" stats refer to the state after deletion.
 
-        You can delete multiple archives by specifying a matching shell pattern,
-        using the ``--glob-archives GLOB`` option (for more info on these patterns,
+        You can delete multiple archives by specifying a matching pattern,
+        using the ``--match-archives PATTERN`` option (for more info on these patterns,
         see :ref:`borg_patterns`).
 
         Always first use ``--dry-run --list`` to see what would be deleted.

+ 31 - 2
src/borg/archiver/help_cmd.py

@@ -244,9 +244,38 @@ class HelpMixIn:
         This allows you to share the same patterns between multiple repositories
         without needing to specify them on the command line.\n\n"""
     )
+    helptext["match-archives"] = textwrap.dedent(
+        """
+        The ``--match-archives`` option matches a given pattern against the list of all archive
+        names in the repository.
+
+        It uses pattern styles similar to the ones described by ``borg help patterns``:
+
+        Identical match pattern, selector ``id:`` (default)
+            Simple string match, must fully match exactly as given.
+
+        Shell-style patterns, selector ``sh:``
+            Match like on the shell, wildcards like `*` and `?` work.
+
+        `Regular expressions <https://docs.python.org/3/library/re.html>`_, selector ``re:``
+            Full regular expression support.
+            This is very powerful, but can also get rather complicated.
+
+        Examples::
+            # id: style
+            borg delete --match-archives 'id:archive-with-crap'
+            borg delete -a 'id:archive-with-crap'  # same, using short option
+            borg delete -a 'archive-with-crap'  # same, because 'id:' is the default
+
+            # sh: style
+            borg delete -a 'sh:home-kenny-*'
+
+            # re: style
+            borg delete -a 're:pc[123]-home-(user1|user2)-2022-09-.*'\n\n"""
+    )
     helptext["placeholders"] = textwrap.dedent(
         """
-        Repository URLs, ``--name``, ``-a`` / ``--glob-archives``, ``--comment``
+        Repository URLs, ``--name``, ``-a`` / ``--match-archives``, ``--comment``
         and ``--remote-path`` values support these placeholders:
 
         {hostname}
@@ -292,7 +321,7 @@ class HelpMixIn:
 
             borg create /path/to/repo::{hostname}-{user}-{utcnow} ...
             borg create /path/to/repo::{hostname}-{now:%Y-%m-%d_%H:%M:%S%z} ...
-            borg prune -a '{hostname}-*' ...
+            borg prune -a 'sh:{hostname}-*' ...
 
         .. note::
             systemd uses a difficult, non-standard syntax for command lines in unit files (refer to

+ 3 - 3
src/borg/archiver/prune_cmd.py

@@ -84,7 +84,7 @@ class PruneMixIn:
             return self.exit_code
         checkpoint_re = r"\.checkpoint(\.\d+)?"
         archives_checkpoints = manifest.archives.list(
-            glob=args.glob_archives,
+            match=args.match_archives,
             consider_checkpoints=True,
             match_end=r"(%s)?\Z" % checkpoint_re,
             sort_by=["ts"],
@@ -191,7 +191,7 @@ class PruneMixIn:
         archive (and thus still needed). Checkpoint archives are not considered when
         comparing archive counts against the retention limits (``--keep-X``).
 
-        If you use --glob-archives (-a), then only archives that match the GLOB are
+        If you use --match-archives (-a), then only archives that match the pattern are
         considered for deletion and only those archives count towards the totals
         specified by the rules.
         Otherwise, *all* archives in the repository are candidates for deletion!
@@ -200,7 +200,7 @@ class PruneMixIn:
 
         If you have multiple sequences of archives with different data sets (e.g.
         from different machines) in one shared repository, use one prune call per
-        data set that matches only the respective archives using the --glob-archives
+        data set that matches only the respective archives using the --match-archives
         (-a) option.
 
         The ``--keep-within`` option takes an argument of the form "<int><char>",

+ 16 - 7
src/borg/manifest.py

@@ -11,12 +11,12 @@ from .logger import create_logger
 
 logger = create_logger()
 
-from .helpers import shellpattern
 from .constants import *  # NOQA
 from .helpers.datastruct import StableDict
 from .helpers.parseformat import bin_to_hex
 from .helpers.time import parse_timestamp
 from .helpers.errors import Error
+from .patterns import get_regex_from_pattern
 from .repoobj import RepoObj
 
 
@@ -74,12 +74,20 @@ class Archives(abc.MutableMapping):
         del self._archives[name]
 
     def list(
-        self, *, glob=None, match_end=r"\Z", sort_by=(), consider_checkpoints=True, first=None, last=None, reverse=False
+        self,
+        *,
+        match=None,
+        match_end=r"\Z",
+        sort_by=(),
+        consider_checkpoints=True,
+        first=None,
+        last=None,
+        reverse=False
     ):
         """
         Return list of ArchiveInfo instances according to the parameters.
 
-        First match *glob* (considering *match_end*), then *sort_by*.
+        First match *match* (considering *match_end*), then *sort_by*.
         Apply *first* and *last* filters, and then possibly *reverse* the list.
 
         *sort_by* is a list of sort keys applied in reverse order.
@@ -90,7 +98,8 @@ class Archives(abc.MutableMapping):
         """
         if isinstance(sort_by, (str, bytes)):
             raise TypeError("sort_by must be a sequence of str")
-        regex = re.compile(shellpattern.translate(glob or "*", match_end=match_end))
+        regex = get_regex_from_pattern(match or "re:.*")
+        regex = re.compile(regex + match_end)
         archives = [x for x in self.values() if regex.match(x.name) is not None]
         if not consider_checkpoints:
             archives = [x for x in archives if ".checkpoint" not in x.name]
@@ -106,18 +115,18 @@ class Archives(abc.MutableMapping):
 
     def list_considering(self, args):
         """
-        get a list of archives, considering --first/last/prefix/glob-archives/sort/consider-checkpoints cmdline args
+        get a list of archives, considering --first/last/prefix/match-archives/sort/consider-checkpoints cmdline args
         """
         name = getattr(args, "name", None)
         consider_checkpoints = getattr(args, "consider_checkpoints", None)
         if name is not None:
             raise Error(
-                "Giving a specific name is incompatible with options --first, --last, -a / --glob-archives, and --consider-checkpoints."
+                "Giving a specific name is incompatible with options --first, --last, -a / --match-archives, and --consider-checkpoints."
             )
         return self.list(
             sort_by=args.sort_by.split(","),
             consider_checkpoints=consider_checkpoints,
-            glob=args.glob_archives,
+            match=args.match_archives,
             first=args.first,
             last=args.last,
         )

+ 23 - 0
src/borg/patterns.py

@@ -388,3 +388,26 @@ def parse_inclexcl_command(cmd_line_str, fallback=ShellPattern):
         val = parse_pattern(remainder_str, fallback, recurse_dir)
 
     return CmdTuple(val, cmd)
+
+
+def get_regex_from_pattern(pattern: str) -> str:
+    """
+    return a regular expression string corresponding to the given pattern string.
+
+    the allowed pattern types are similar to the ones implemented by PatternBase subclasses,
+    but here we rather do generic string matching, not specialised filesystem paths matching.
+    """
+    if len(pattern) > 2 and pattern[2] == ":" and pattern[:2] in {"sh", "re", "id"}:
+        (style, pattern) = (pattern[:2], pattern[3:])
+    else:
+        (style, pattern) = ("id", pattern)  # "identical" match is the default
+    if style == "sh":
+        # (?ms) (meaning re.MULTILINE and re.DOTALL) are not desired here.
+        regex = shellpattern.translate(pattern, match_end="").removeprefix("(?ms)")
+    elif style == "re":
+        regex = pattern
+    elif style == "id":
+        regex = re.escape(pattern)
+    else:
+        raise NotImplementedError
+    return regex

+ 1 - 1
src/borg/testsuite/archiver/check_cmd.py

@@ -39,7 +39,7 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
             "check",
             "-v",
             "--archives-only",
-            "--glob-archives=archive2",
+            "--match-archives=archive2",
             exit_code=0,
         )
         self.assert_not_in("archive1", output)

+ 1 - 1
src/borg/testsuite/archiver/delete_cmd.py

@@ -19,7 +19,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd(f"--repo={self.repository_location}", "create", "another_test.2", "input")
         self.cmd(f"--repo={self.repository_location}", "extract", "test", "--dry-run")
         self.cmd(f"--repo={self.repository_location}", "extract", "test.2", "--dry-run")
-        self.cmd(f"--repo={self.repository_location}", "delete", "--glob-archives", "another_*")
+        self.cmd(f"--repo={self.repository_location}", "delete", "--match-archives", "sh:another_*")
         self.cmd(f"--repo={self.repository_location}", "delete", "--last", "1")
         self.cmd(f"--repo={self.repository_location}", "delete", "-a", "test")
         self.cmd(f"--repo={self.repository_location}", "extract", "test.2", "--dry-run")

+ 4 - 4
src/borg/testsuite/archiver/mount_cmds.py

@@ -235,13 +235,13 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             assert sorted(os.listdir(os.path.join(mountpoint))) == ["arch11", "arch12"]
         with self.fuse_mount(self.repository_location, mountpoint, "--last=2", "--sort=name"):
             assert sorted(os.listdir(os.path.join(mountpoint))) == ["arch21", "arch22"]
-        with self.fuse_mount(self.repository_location, mountpoint, "--glob-archives=arch1*"):
+        with self.fuse_mount(self.repository_location, mountpoint, "--match-archives=sh:arch1*"):
             assert sorted(os.listdir(os.path.join(mountpoint))) == ["arch11", "arch12"]
-        with self.fuse_mount(self.repository_location, mountpoint, "--glob-archives=arch2*"):
+        with self.fuse_mount(self.repository_location, mountpoint, "--match-archives=sh:arch2*"):
             assert sorted(os.listdir(os.path.join(mountpoint))) == ["arch21", "arch22"]
-        with self.fuse_mount(self.repository_location, mountpoint, "--glob-archives=arch*"):
+        with self.fuse_mount(self.repository_location, mountpoint, "--match-archives=sh:arch*"):
             assert sorted(os.listdir(os.path.join(mountpoint))) == ["arch11", "arch12", "arch21", "arch22"]
-        with self.fuse_mount(self.repository_location, mountpoint, "--glob-archives=nope"):
+        with self.fuse_mount(self.repository_location, mountpoint, "--match-archives=nope"):
             assert sorted(os.listdir(os.path.join(mountpoint))) == []
 
     @unittest.skipUnless(llfuse, "llfuse not installed")

+ 4 - 4
src/borg/testsuite/archiver/prune_cmd.py

@@ -188,7 +188,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             "--list",
             "--dry-run",
             "--keep-daily=1",
-            "--glob-archives=foo-*",
+            "--match-archives=sh:foo-*",
         )
         assert re.search(r"Keeping archive \(rule: daily #1\):\s+foo-2015-08-12-20:00", output)
         assert re.search(r"Would prune:\s+foo-2015-08-12-10:00", output)
@@ -197,7 +197,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.assert_in("foo-2015-08-12-20:00", output)
         self.assert_in("bar-2015-08-12-10:00", output)
         self.assert_in("bar-2015-08-12-20:00", output)
-        self.cmd(f"--repo={self.repository_location}", "prune", "--keep-daily=1", "--glob-archives=foo-*")
+        self.cmd(f"--repo={self.repository_location}", "prune", "--keep-daily=1", "--match-archives=sh:foo-*")
         output = self.cmd(f"--repo={self.repository_location}", "rlist")
         self.assert_not_in("foo-2015-08-12-10:00", output)
         self.assert_in("foo-2015-08-12-20:00", output)
@@ -216,7 +216,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             "--list",
             "--dry-run",
             "--keep-daily=1",
-            "--glob-archives=2015-*-foo",
+            "--match-archives=sh:2015-*-foo",
         )
         assert re.search(r"Keeping archive \(rule: daily #1\):\s+2015-08-12-20:00-foo", output)
         assert re.search(r"Would prune:\s+2015-08-12-10:00-foo", output)
@@ -225,7 +225,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.assert_in("2015-08-12-20:00-foo", output)
         self.assert_in("2015-08-12-10:00-bar", output)
         self.assert_in("2015-08-12-20:00-bar", output)
-        self.cmd(f"--repo={self.repository_location}", "prune", "--keep-daily=1", "--glob-archives=2015-*-foo")
+        self.cmd(f"--repo={self.repository_location}", "prune", "--keep-daily=1", "--match-archives=sh:2015-*-foo")
         output = self.cmd(f"--repo={self.repository_location}", "rlist")
         self.assert_not_in("2015-08-12-10:00-foo", output)
         self.assert_in("2015-08-12-20:00-foo", output)

+ 1 - 1
src/borg/testsuite/archiver/rlist_cmd.py

@@ -19,7 +19,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd(f"--repo={self.repository_location}", "create", "test-1", src_dir)
         self.cmd(f"--repo={self.repository_location}", "create", "something-else-than-test-1", src_dir)
         self.cmd(f"--repo={self.repository_location}", "create", "test-2", src_dir)
-        output = self.cmd(f"--repo={self.repository_location}", "rlist", "--glob-archives=test-*")
+        output = self.cmd(f"--repo={self.repository_location}", "rlist", "--match-archives=sh:test-*")
         self.assert_in("test-1", output)
         self.assert_in("test-2", output)
         self.assert_not_in("something-else", output)

+ 16 - 0
src/borg/testsuite/patterns.py

@@ -8,6 +8,7 @@ import pytest
 from ..patterns import PathFullPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, RegexPattern
 from ..patterns import load_exclude_file, load_pattern_file
 from ..patterns import parse_pattern, PatternMatcher
+from ..patterns import get_regex_from_pattern
 
 
 def check_patterns(files, pattern, expected):
@@ -617,3 +618,18 @@ def test_pattern_matcher():
     assert pm.match("z") == "B"
 
     assert PatternMatcher(fallback="hey!").fallback == "hey!"
+
+
+@pytest.mark.parametrize(
+    "pattern, regex",
+    [
+        ("foo.bar", r"foo\.bar"),  # default is id:
+        ("id:foo.bar", r"foo\.bar"),
+        ("id:foo?", r"foo\?"),
+        ("re:foo.bar", r"foo.bar"),
+        ("re:.*(fooo?|bar|baz).*", r".*(fooo?|bar|baz).*"),
+        ("sh:foo.*", r"foo\.[^\/]*"),
+    ],
+)
+def test_regex_from_pattern(pattern, regex):
+    assert get_regex_from_pattern(pattern) == regex