瀏覽代碼

Merge pull request #8429 from ThomasWaldmann/improve-matching

AND-matching also on host, user, tags
TW 8 月之前
父節點
當前提交
156d33e69c

+ 5 - 1
src/borg/archiver/__init__.py

@@ -418,10 +418,14 @@ class Archiver(
             replace_placeholders.override("now", DatetimeWrapper(args.timestamp))
             replace_placeholders.override("utcnow", DatetimeWrapper(args.timestamp.astimezone(timezone.utc)))
             args.location = args.location.with_timestamp(args.timestamp)
-        for name in "name", "other_name", "newname", "match_archives", "comment":
+        for name in "name", "other_name", "newname", "comment":
             value = getattr(args, name, None)
             if value is not None:
                 setattr(args, name, replace_placeholders(value))
+        for name in ("match_archives",):  # lists
+            value = getattr(args, name, None)
+            if value:
+                setattr(args, name, [replace_placeholders(elem) for elem in value])
 
         return args
 

+ 3 - 3
src/borg/archiver/_common.py

@@ -256,7 +256,7 @@ def with_archive(method):
     def wrapper(self, args, repository, manifest, **kwargs):
         archive_name = getattr(args, "name", None)
         assert archive_name is not None
-        archive_info = manifest.archives.get_one(archive_name)
+        archive_info = manifest.archives.get_one([archive_name])
         archive = Archive(
             manifest,
             archive_info.id,
@@ -379,8 +379,8 @@ def define_archive_filters_group(subparser, *, sort_by=True, first_last=True, ol
         "--match-archives",
         metavar="PATTERN",
         dest="match_archives",
-        action=Highlander,
-        help='only consider archive names matching the pattern. see "borg help match-archives".',
+        action="append",
+        help='only consider archives matching all patterns. see "borg help match-archives".',
     )
 
     if sort_by:

+ 2 - 2
src/borg/archiver/debug_cmd.py

@@ -32,7 +32,7 @@ class DebugMixIn:
     def do_debug_dump_archive_items(self, args, repository, manifest):
         """dump (decrypted, decompressed) archive items metadata (not: data)"""
         repo_objs = manifest.repo_objs
-        archive_info = manifest.archives.get_one(args.name)
+        archive_info = manifest.archives.get_one([args.name])
         archive = Archive(manifest, archive_info.id)
         for i, item_id in enumerate(archive.metadata.items):
             _, data = repo_objs.parse(item_id, repository.get(item_id), ro_type=ROBJ_ARCHIVE_STREAM)
@@ -45,7 +45,7 @@ class DebugMixIn:
     @with_repository(compatibility=Manifest.NO_OPERATION_CHECK)
     def do_debug_dump_archive(self, args, repository, manifest):
         """dump decoded archive metadata (not: data)"""
-        archive_info = manifest.archives.get_one(args.name)
+        archive_info = manifest.archives.get_one([args.name])
         repo_objs = manifest.repo_objs
         try:
             archive_meta_orig = manifest.archives.get_by_id(archive_info.id, raw=True)

+ 2 - 2
src/borg/archiver/delete_cmd.py

@@ -19,13 +19,13 @@ class DeleteMixIn:
         dry_run = args.dry_run
         manifest = Manifest.load(repository, (Manifest.Operation.DELETE,))
         if args.name:
-            archive_infos = [manifest.archives.get_one(args.name)]
+            archive_infos = [manifest.archives.get_one([args.name])]
         else:
             archive_infos = manifest.archives.list_considering(args)
         count = len(archive_infos)
         if count == 0:
             return
-        if not args.name and args.match_archives is None and args.first == 0 and args.last == 0:
+        if not args.name and not args.match_archives and args.first == 0 and args.last == 0:
             raise CommandError(
                 "Aborting: if you really want to delete all archives, please use -a 'sh:*' "
                 "or just delete the whole repository (might be much faster)."

+ 2 - 2
src/borg/archiver/diff_cmd.py

@@ -25,8 +25,8 @@ class DiffMixIn:
         else:
             format = os.environ.get("BORG_DIFF_FORMAT", "{change} {path}{NL}")
 
-        archive1_info = manifest.archives.get_one(args.name)
-        archive2_info = manifest.archives.get_one(args.other_name)
+        archive1_info = manifest.archives.get_one([args.name])
+        archive2_info = manifest.archives.get_one([args.other_name])
         archive1 = Archive(manifest, archive1_info.id)
         archive2 = Archive(manifest, archive2_info.id)
 

+ 26 - 7
src/borg/archiver/help_cmd.py

@@ -264,10 +264,19 @@ class HelpMixIn:
     )
     helptext["match-archives"] = textwrap.dedent(
         """
-        The ``--match-archives`` option matches a given pattern against the list of all archive
-        names in the repository.
+        The ``--match-archives`` option matches a given pattern against the list of all archives
+        in the repository. It can be given multiple times.
 
-        It uses pattern styles similar to the ones described by ``borg help patterns``:
+        The patterns can have a prefix of:
+
+        - name: pattern match on the archive name (default)
+        - aid: prefix match on the archive id (only one result allowed)
+        - user: exact match on the username who created the archive
+        - host: exact match on the hostname where the archive was created
+        - tags: match on the archive tags
+
+        In case of a name pattern match,
+        it uses pattern styles similar to the ones described by ``borg help patterns``:
 
         Identical match pattern, selector ``id:`` (default)
             Simple string match, must fully match exactly as given.
@@ -281,16 +290,26 @@ class HelpMixIn:
 
         Examples::
 
-            # id: style
+            # name match, id: style
             borg delete --match-archives 'id:archive-with-crap'
             borg delete -a 'id:archive-with-crap'  # same, using short option
             borg delete -a 'archive-with-crap'  # same, because 'id:' is the default
 
-            # sh: style
+            # name match, sh: style
             borg delete -a 'sh:home-kenny-*'
 
-            # re: style
-            borg delete -a 're:pc[123]-home-(user1|user2)-2022-09-.*'\n\n"""
+            # name match, re: style
+            borg delete -a 're:pc[123]-home-(user1|user2)-2022-09-.*'
+
+            # archive id prefix match:
+            borg delete -a 'aid:d34db33f'
+
+            # host or user match
+            borg delete -a 'user:kenny'
+            borg delete -a 'host:kenny-pc'
+
+            # tags match
+            borg delete -a 'tags:TAG1' -a 'tags:TAG2'\n\n"""
     )
     helptext["placeholders"] = textwrap.dedent(
         """

+ 1 - 1
src/borg/archiver/info_cmd.py

@@ -19,7 +19,7 @@ class InfoMixIn:
         """Show archive details such as disk space used"""
 
         if args.name:
-            archive_infos = [manifest.archives.get_one(args.name)]
+            archive_infos = [manifest.archives.get_one([args.name])]
         else:
             archive_infos = manifest.archives.list_considering(args)
 

+ 1 - 1
src/borg/archiver/list_cmd.py

@@ -27,7 +27,7 @@ class ListMixIn:
         else:
             format = os.environ.get("BORG_LIST_FORMAT", "{mode} {user:6} {group:6} {size:8} {mtime} {path}{extra}{NL}")
 
-        archive_info = manifest.archives.get_one(args.name)
+        archive_info = manifest.archives.get_one([args.name])
 
         def _list_inner(cache):
             archive = Archive(manifest, archive_info.id, cache=cache)

+ 1 - 1
src/borg/cache.py

@@ -402,7 +402,7 @@ class FilesCacheMixin:
         from .archive import Archive
 
         # get the latest archive with the IDENTICAL name, supporting archive series:
-        archives = self.manifest.archives.list(match=self.archive_name, sort_by=["ts"], last=1)
+        archives = self.manifest.archives.list(match=[self.archive_name], sort_by=["ts"], last=1)
         if not archives:
             # nothing found
             return

+ 58 - 24
src/borg/manifest.py

@@ -33,7 +33,7 @@ class NoManifestError(Error):
     exit_mcode = 26
 
 
-ArchiveInfo = namedtuple("ArchiveInfo", "name id ts")
+ArchiveInfo = namedtuple("ArchiveInfo", "name id ts tags host user", defaults=[(), None, None])
 
 # timestamp is a replacement for ts, archive is an alias for name (see SortBySpec)
 AI_HUMAN_SORT_KEYS = ["timestamp", "archive"] + list(ArchiveInfo._fields)
@@ -129,6 +129,9 @@ class Archives:
                 time="1970-01-01T00:00:00.000000",
                 # new:
                 exists=False,  # we have the pointer, but the repo does not have an archive item
+                username="",
+                hostname="",
+                tags=(),
             )
         else:
             _, data = self.manifest.repo_objs.parse(id, cdata, ro_type=ROBJ_ARCHIVE_META)
@@ -149,6 +152,7 @@ class Archives:
                 size=archive_item.size,
                 nfiles=archive_item.nfiles,
                 comment=archive_item.comment,  # not always present?
+                tags=tuple(sorted(getattr(archive_item, "tags", []))),  # must be hashable
             )
         return metadata
 
@@ -159,7 +163,41 @@ class Archives:
 
     def _info_tuples(self):
         for info in self._infos():
-            yield ArchiveInfo(name=info["name"], id=info["id"], ts=parse_timestamp(info["time"]))
+            yield ArchiveInfo(
+                name=info["name"],
+                id=info["id"],
+                ts=parse_timestamp(info["time"]),
+                tags=info["tags"],
+                user=info["username"],
+                host=info["hostname"],
+            )
+
+    def _matching_info_tuples(self, match_patterns, match_end):
+        archive_infos = list(self._info_tuples())
+        if match_patterns:
+            assert isinstance(match_patterns, list), f"match_pattern is a {type(match_patterns)}"
+            for match in match_patterns:
+                if match.startswith("aid:"):  # do a match on the archive ID (prefix)
+                    wanted_id = match.removeprefix("aid:")
+                    archive_infos = [x for x in archive_infos if bin_to_hex(x.id).startswith(wanted_id)]
+                    if len(archive_infos) != 1:
+                        raise CommandError("archive ID based match needs to match precisely one archive ID")
+                elif match.startswith("tags:"):
+                    wanted_tags = match.removeprefix("tags:")
+                    wanted_tags = [tag for tag in wanted_tags.split(",") if tag]  # remove empty tags
+                    archive_infos = [x for x in archive_infos if set(x.tags) >= set(wanted_tags)]
+                elif match.startswith("user:"):
+                    wanted_user = match.removeprefix("user:")
+                    archive_infos = [x for x in archive_infos if x.user == wanted_user]
+                elif match.startswith("host:"):
+                    wanted_host = match.removeprefix("host:")
+                    archive_infos = [x for x in archive_infos if x.host == wanted_host]
+                else:  #  do a match on the name
+                    match = match.removeprefix("name:")  # accept optional name: prefix
+                    regex = get_regex_from_pattern(match)
+                    regex = re.compile(regex + match_end)
+                    archive_infos = [x for x in archive_infos if regex.match(x.name) is not None]
+        return archive_infos
 
     def count(self):
         # return the count of archives in the repo
@@ -211,7 +249,14 @@ class Archives:
             if archive_info["exists"] and archive_info["name"] == name:
                 if not raw:
                     ts = parse_timestamp(archive_info["time"])
-                    return ArchiveInfo(name=archive_info["name"], id=archive_info["id"], ts=ts)
+                    return ArchiveInfo(
+                        name=archive_info["name"],
+                        id=archive_info["id"],
+                        ts=ts,
+                        tags=archive_info["tags"],
+                        user=archive_info["username"],
+                        host=archive_info["hostname"],
+                    )
                 else:
                     return archive_info
         else:
@@ -243,7 +288,14 @@ class Archives:
                 if archive_info["exists"]:
                     if not raw:
                         ts = parse_timestamp(archive_info["time"])
-                        archive_info = ArchiveInfo(name=archive_info["name"], id=archive_info["id"], ts=ts)
+                        archive_info = ArchiveInfo(
+                            name=archive_info["name"],
+                            id=archive_info["id"],
+                            ts=ts,
+                            tags=archive_info["tags"],
+                            user=archive_info["username"],
+                            host=archive_info["hostname"],
+                        )
                     return archive_info
         else:
             for name, values in self._archives.items():
@@ -311,18 +363,7 @@ class Archives:
         if isinstance(sort_by, (str, bytes)):
             raise TypeError("sort_by must be a sequence of str")
 
-        archive_infos = self._info_tuples()
-        if match is None:
-            archive_infos = list(archive_infos)
-        elif match.startswith("aid:"):  # do a match on the archive ID (prefix)
-            wanted_id = match.removeprefix("aid:")
-            archive_infos = [x for x in archive_infos if bin_to_hex(x.id).startswith(wanted_id)]
-            if len(archive_infos) != 1:
-                raise CommandError("archive ID based match needs to match precisely one archive ID")
-        else:  #  do a match on the name
-            regex = get_regex_from_pattern(match)
-            regex = re.compile(regex + match_end)
-            archive_infos = [x for x in archive_infos if regex.match(x.name) is not None]
+        archive_infos = self._matching_info_tuples(match, match_end)
 
         if any([oldest, newest, older, newer]):
             archive_infos = filter_archives_by_date(
@@ -361,14 +402,7 @@ class Archives:
     def get_one(self, match, *, match_end=r"\Z"):
         """get exactly one archive matching <match>"""
         assert match is not None
-        archive_infos = self._info_tuples()
-        if match.startswith("aid:"):  # do a match on the archive ID (prefix)
-            wanted_id = match.removeprefix("aid:")
-            archive_infos = [i for i in archive_infos if bin_to_hex(i.id).startswith(wanted_id)]
-        else:  # do a match on the name
-            regex = get_regex_from_pattern(match)
-            regex = re.compile(regex + match_end)
-            archive_infos = [i for i in archive_infos if regex.match(i.name) is not None]
+        archive_infos = self._matching_info_tuples(match, match_end)
         if len(archive_infos) != 1:
             raise CommandError(f"{match} needed to match precisely one archive, but matched {len(archive_infos)}.")
         return archive_infos[0]

+ 1 - 1
src/borg/testsuite/archiver/__init__.py

@@ -169,7 +169,7 @@ def open_archive(repo_path, name):
     repository = Repository(repo_path, exclusive=True)
     with repository:
         manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
-        archive_info = manifest.archives.get_one(name)
+        archive_info = manifest.archives.get_one([name])
         archive = Archive(manifest, archive_info.id)
     return archive, repository