2
0
Эх сурвалжийг харах

NAME is the series, archive id is the hash

aid:<archive-id-prefix> can be used for -a / --match-archives
to match on the archive id (prefix) instead of the name.

NAME positional argument now also supports matching (and aid:),
but requires that there is exactly ONE result.
Thomas Waldmann 8 сар өмнө
parent
commit
8237e6beca

+ 22 - 24
src/borg/archive.py

@@ -442,6 +442,7 @@ class Archive:
         self,
         manifest,
         name,
+        *,
         cache=None,
         create=False,
         numeric_ids=False,
@@ -458,6 +459,7 @@ class Archive:
         log_json=False,
         iec=False,
     ):
+        name_is_id = isinstance(name, bytes)
         self.cwd = os.getcwd()
         assert isinstance(manifest, Manifest)
         self.manifest = manifest
@@ -493,10 +495,12 @@ class Archive:
         self.create = create
         if self.create:
             self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats)
-            if manifest.archives.exists(name):
-                raise self.AlreadyExists(name)
         else:
-            info = self.manifest.archives.get(name)
+            if name_is_id:
+                # we also go over the manifest here to avoid quick&dirty deleted archives
+                info = self.manifest.archives.get_by_id(name)
+            else:
+                info = self.manifest.archives.get(name)
             if info is None:
                 raise self.DoesNotExist(name)
             self.load(info.id)
@@ -611,8 +615,6 @@ Duration: {0.duration}
 
     def save(self, name=None, comment=None, timestamp=None, stats=None, additional_metadata=None):
         name = name or self.name
-        if self.manifest.archives.exists(name):
-            raise self.AlreadyExists(name)
         self.items_buffer.flush(flush=True)
         item_ptrs = archive_put_items(
             self.items_buffer.chunks, repo_objs=self.repo_objs, cache=self.cache, stats=self.stats
@@ -956,18 +958,16 @@ Duration: {0.duration}
         self.id = new_id
 
     def rename(self, name):
-        if self.manifest.archives.exists(name):
-            raise self.AlreadyExists(name)
-        oldname = self.name
+        old_id = self.id
         self.name = name
         self.set_meta("name", name)
-        self.manifest.archives.delete(oldname)
+        self.manifest.archives.delete_by_id(old_id)
 
     def delete(self):
         # quick and dirty: we just nuke the archive from the archives list - that will
         # potentially orphan all chunks previously referenced by the archive, except the ones also
         # referenced by other archives. In the end, "borg compact" will clean up and free space.
-        self.manifest.archives.delete(self.name)
+        self.manifest.archives.delete_by_id(self.id)
 
     @staticmethod
     def compare_archives_iter(
@@ -2090,7 +2090,9 @@ class ArchiveChecker:
                     logger.debug(f"archive id new: {bin_to_hex(new_archive_id)}")
                     cdata = self.repo_objs.format(new_archive_id, {}, data, ro_type=ROBJ_ARCHIVE_META)
                     add_reference(new_archive_id, len(data), cdata)
-                    self.manifest.archives.create(info.name, new_archive_id, info.ts, overwrite=True)
+                    self.manifest.archives.create(info.name, new_archive_id, info.ts)
+                    if archive_id != new_archive_id:
+                        self.manifest.archives.delete_by_id(archive_id)
             pi.finish()
 
     def finish(self):
@@ -2148,18 +2150,16 @@ class ArchiveRecreater:
         self.progress = progress
         self.print_file_status = file_status_printer or (lambda *args: None)
 
-    def recreate(self, archive_name, comment=None, target_name=None):
-        assert not self.is_temporary_archive(archive_name)
-        archive = self.open_archive(archive_name)
+    def recreate(self, archive_id, target_name, delete_original, comment=None):
+        archive = self.open_archive(archive_id)
         target = self.create_target(archive, target_name)
         if self.exclude_if_present or self.exclude_caches:
             self.matcher_add_tagged_dirs(archive)
-        if self.matcher.empty() and not target.recreate_rechunkify and comment is None and target_name is None:
+        if self.matcher.empty() and not target.recreate_rechunkify and comment is None:
             # nothing to do
             return False
         self.process_items(archive, target)
-        replace_original = target_name is None
-        self.save(archive, target, comment, replace_original=replace_original)
+        self.save(archive, target, comment, delete_original=delete_original)
         return True
 
     def process_items(self, archive, target):
@@ -2216,7 +2216,7 @@ class ArchiveRecreater:
             for chunk in chunk_iterator:
                 yield Chunk(chunk, size=len(chunk), allocation=CH_DATA)
 
-    def save(self, archive, target, comment=None, replace_original=True):
+    def save(self, archive, target, comment=None, delete_original=True):
         if self.dry_run:
             return
         if comment is None:
@@ -2242,9 +2242,8 @@ class ArchiveRecreater:
             }
 
         target.save(comment=comment, timestamp=self.timestamp, additional_metadata=additional_metadata)
-        if replace_original:
+        if delete_original:
             archive.delete()
-            target.rename(archive.name)
         if self.stats:
             target.start = _start
             target.end = archive_ts_now()
@@ -2277,9 +2276,8 @@ class ArchiveRecreater:
         matcher.add(tag_files, IECommand.Include)
         matcher.add(tagged_dirs, IECommand.ExcludeNoRecurse)
 
-    def create_target(self, archive, target_name=None):
+    def create_target(self, archive, target_name):
         """Create target archive."""
-        target_name = target_name or archive.name + ".recreate"
         target = self.create_target_archive(target_name)
         # If the archives use the same chunker params, then don't rechunkify
         source_chunker_params = tuple(archive.metadata.get("chunker_params", []))
@@ -2308,5 +2306,5 @@ class ArchiveRecreater:
         )
         return target
 
-    def open_archive(self, name, **kwargs):
-        return Archive(self.manifest, name, cache=self.cache, **kwargs)
+    def open_archive(self, archive_id, **kwargs):
+        return Archive(self.manifest, archive_id, cache=self.cache, **kwargs)

+ 2 - 1
src/borg/archiver/_common.py

@@ -257,9 +257,10 @@ def with_archive(method):
     def wrapper(self, args, repository, manifest, **kwargs):
         archive_name = getattr(args, "name", None)
         assert archive_name is not None
+        archive_info = manifest.archives.get_one(archive_name)
         archive = Archive(
             manifest,
-            archive_name,
+            archive_info.id,
             numeric_ids=getattr(args, "numeric_ids", False),
             noflags=getattr(args, "noflags", False),
             noacls=getattr(args, "noacls", False),

+ 4 - 2
src/borg/archiver/debug_cmd.py

@@ -32,7 +32,8 @@ class DebugMixIn:
     def do_debug_dump_archive_items(self, args, repository, manifest):
         """dump (decrypted, decompressed) archive items metadata (not: data)"""
         repo_objs = manifest.repo_objs
-        archive = Archive(manifest, args.name)
+        archive_info = manifest.archives.get_one(args.name)
+        archive = Archive(manifest, archive_info.id)
         for i, item_id in enumerate(archive.metadata.items):
             _, data = repo_objs.parse(item_id, repository.get(item_id), ro_type=ROBJ_ARCHIVE_STREAM)
             filename = "%06d_%s.items" % (i, bin_to_hex(item_id))
@@ -44,9 +45,10 @@ class DebugMixIn:
     @with_repository(compatibility=Manifest.NO_OPERATION_CHECK)
     def do_debug_dump_archive(self, args, repository, manifest):
         """dump decoded archive metadata (not: data)"""
+        archive_info = manifest.archives.get_one(args.name)
         repo_objs = manifest.repo_objs
         try:
-            archive_meta_orig = manifest.archives.get(args.name, raw=True)
+            archive_meta_orig = manifest.archives.get_by_id(archive_info.id, raw=True)
         except KeyError:
             raise Archive.DoesNotExist(args.name)
 

+ 10 - 7
src/borg/archiver/delete_cmd.py

@@ -3,7 +3,7 @@ import logging
 
 from ._common import with_repository
 from ..constants import *  # NOQA
-from ..helpers import format_archive, CommandError
+from ..helpers import format_archive, CommandError, bin_to_hex
 from ..manifest import Manifest
 
 from ..logger import create_logger
@@ -18,8 +18,9 @@ class DeleteMixIn:
         self.output_list = args.output_list
         dry_run = args.dry_run
         manifest = Manifest.load(repository, (Manifest.Operation.DELETE,))
-        archive_names = tuple(x.name for x in manifest.archives.list_considering(args))
-        if not archive_names:
+        archive_infos = manifest.archives.list_considering(args)
+        count = len(archive_infos)
+        if count == 0:
             return
         if args.match_archives is None and args.first == 0 and args.last == 0:
             raise CommandError(
@@ -29,18 +30,20 @@ class DeleteMixIn:
 
         deleted = False
         logger_list = logging.getLogger("borg.output.list")
-        for i, archive_name in enumerate(archive_names, 1):
+        for i, archive_info in enumerate(archive_infos, 1):
+            name, id, hex_id = archive_info.name, archive_info.id, bin_to_hex(archive_info.id)
             try:
                 # this does NOT use Archive.delete, so this code hopefully even works in cases a corrupt archive
                 # would make the code in class Archive crash, so the user can at least get rid of such archives.
-                current_archive = manifest.archives.delete(archive_name)
+                if not dry_run:
+                    manifest.archives.delete_by_id(id)
             except KeyError:
-                self.print_warning(f"Archive {archive_name} not found ({i}/{len(archive_names)}).")
+                self.print_warning(f"Archive {name} {hex_id} not found ({i}/{count}).")
             else:
                 deleted = True
                 if self.output_list:
                     msg = "Would delete: {} ({}/{})" if dry_run else "Deleted archive: {} ({}/{})"
-                    logger_list.info(msg.format(format_archive(current_archive), i, len(archive_names)))
+                    logger_list.info(msg.format(format_archive(archive_info), i, count))
         if dry_run:
             logger.info("Finished dry-run.")
         elif deleted:

+ 4 - 4
src/borg/archiver/info_cmd.py

@@ -18,12 +18,12 @@ class InfoMixIn:
     def do_info(self, args, repository, manifest, cache):
         """Show archive details such as disk space used"""
 
-        archive_names = tuple(x.name for x in manifest.archives.list_considering(args))
+        archive_infos = manifest.archives.list_considering(args)
 
         output_data = []
 
-        for i, archive_name in enumerate(archive_names, 1):
-            archive = Archive(manifest, archive_name, cache=cache, iec=args.iec)
+        for i, archive_info in enumerate(archive_infos, 1):
+            archive = Archive(manifest, archive_info.id, cache=cache, iec=args.iec)
             info = archive.info()
             if args.json:
                 output_data.append(info)
@@ -48,7 +48,7 @@ class InfoMixIn:
                     .strip()
                     .format(**info)
                 )
-            if not args.json and len(archive_names) - i:
+            if not args.json and len(archive_infos) - i:
                 print()
 
         if args.json:

+ 3 - 1
src/borg/archiver/list_cmd.py

@@ -27,8 +27,10 @@ class ListMixIn:
         else:
             format = os.environ.get("BORG_LIST_FORMAT", "{mode} {user:6} {group:6} {size:8} {mtime} {path}{extra}{NL}")
 
+        archive_info = manifest.archives.get_one(args.name)
+
         def _list_inner(cache):
-            archive = Archive(manifest, args.name, cache=cache)
+            archive = Archive(manifest, archive_info.id, cache=cache)
             formatter = ItemFormatter(archive, format)
             for item in archive.iter_items(lambda item: matcher.match(item.path)):
                 sys.stdout.write(formatter.format_item(item, args.json_lines, sort=True))

+ 1 - 1
src/borg/archiver/prune_cmd.py

@@ -125,7 +125,7 @@ class PruneMixIn:
                     else:
                         archives_deleted += 1
                         log_message = "Pruning archive (%d/%d):" % (archives_deleted, to_delete_len)
-                        archive = Archive(manifest, archive.name, cache)
+                        archive = Archive(manifest, archive.id, cache=cache)
                         archive.delete()
                         uncommitted_deletes += 1
                 else:

+ 14 - 11
src/borg/archiver/recreate_cmd.py

@@ -5,7 +5,7 @@ from ._common import build_matcher
 from ..archive import ArchiveRecreater
 from ..constants import *  # NOQA
 from ..compress import CompressionSpec
-from ..helpers import archivename_validator, comment_validator, PathSpec, ChunkerParams, CommandError
+from ..helpers import archivename_validator, comment_validator, PathSpec, ChunkerParams, bin_to_hex
 from ..helpers import timestamp
 from ..manifest import Manifest
 
@@ -38,15 +38,19 @@ class RecreateMixIn:
             timestamp=args.timestamp,
         )
 
-        archive_names = tuple(archive.name for archive in manifest.archives.list_considering(args))
-        if args.target is not None and len(archive_names) != 1:
-            raise CommandError("--target: Need to specify single archive")
-        for name in archive_names:
-            if recreater.is_temporary_archive(name):
+        for archive_info in manifest.archives.list_considering(args):
+            if recreater.is_temporary_archive(archive_info.name):
                 continue
-            print("Processing", name)
-            if not recreater.recreate(name, args.comment, args.target):
-                logger.info("Skipped archive %s: Nothing to do. Archive was not processed.", name)
+            name, hex_id = archive_info.name, bin_to_hex(archive_info.id)
+            print(f"Processing {name} {hex_id}")
+            if args.target:
+                target = args.target
+                delete_original = False
+            else:
+                target = archive_info.name
+                delete_original = True
+            if not recreater.recreate(archive_info.id, target, delete_original, args.comment):
+                logger.info(f"Skipped archive {name} {hex_id}: Nothing to do.")
         if not args.dry_run:
             manifest.write()
 
@@ -135,8 +139,7 @@ class RecreateMixIn:
             default=None,
             type=archivename_validator,
             action=Highlander,
-            help="create a new archive with the name ARCHIVE, do not replace existing archive "
-            "(only applies for a single archive)",
+            help="create a new archive with the name ARCHIVE, do not replace existing archive",
         )
         archive_group.add_argument(
             "--comment",

+ 10 - 8
src/borg/archiver/transfer_cmd.py

@@ -33,14 +33,15 @@ class TransferMixIn:
             )
 
         dry_run = args.dry_run
-        archive_names = tuple(x.name for x in other_manifest.archives.list_considering(args))
-        if not archive_names:
+        archive_infos = other_manifest.archives.list_considering(args)
+        count = len(archive_infos)
+        if count == 0:
             return
 
         an_errors = []
-        for archive_name in archive_names:
+        for archive_info in archive_infos:
             try:
-                archivename_validator(archive_name)
+                archivename_validator(archive_info.name)
             except argparse.ArgumentTypeError as err:
                 an_errors.append(str(err))
         if an_errors:
@@ -48,12 +49,12 @@ class TransferMixIn:
             raise Error("\n".join(an_errors))
 
         ac_errors = []
-        for archive_name in archive_names:
-            archive = Archive(other_manifest, archive_name)
+        for archive_info in archive_infos:
+            archive = Archive(other_manifest, archive_info.id)
             try:
                 comment_validator(archive.metadata.get("comment", ""))
             except argparse.ArgumentTypeError as err:
-                ac_errors.append(f"{archive_name}: {err}")
+                ac_errors.append(f"{archive_info.name}: {err}")
         if ac_errors:
             ac_errors.insert(0, "Invalid archive comments detected, please fix them before transfer:")
             raise Error("\n".join(ac_errors))
@@ -75,7 +76,8 @@ class TransferMixIn:
 
         upgrader = UpgraderCls(cache=cache)
 
-        for name in archive_names:
+        for archive_info in archive_infos:
+            name = archive_info.name
             transfer_size = 0
             present_size = 0
             if manifest.archives.exists(name) and not dry_run:

+ 81 - 22
src/borg/manifest.py

@@ -15,7 +15,7 @@ from .constants import *  # NOQA
 from .helpers.datastruct import StableDict
 from .helpers.parseformat import bin_to_hex, hex_to_bin
 from .helpers.time import parse_timestamp, calculate_relative_offset, archive_ts_now
-from .helpers.errors import Error
+from .helpers.errors import Error, CommandError
 from .patterns import get_regex_from_pattern
 from .repoobj import RepoObj
 
@@ -152,6 +152,10 @@ class Archives:
                 archive_info = dict(name=name, id=self._archives[name]["id"], time=self._archives[name]["time"])
                 yield None, archive_info
 
+    def _info_tuples(self):
+        for _, info in self._infos():
+            yield ArchiveInfo(name=info["name"], id=info["id"], ts=parse_timestamp(info["time"]))
+
     def _lookup_name(self, name, raw=False):
         assert isinstance(name, str)
         assert not self.legacy
@@ -159,12 +163,25 @@ class Archives:
             if archive_info["name"] == name:
                 if not raw:
                     ts = parse_timestamp(archive_info["time"])
-                    return store_key, ArchiveInfo(name=name, id=archive_info["id"], ts=ts)
+                    return store_key, ArchiveInfo(name=archive_info["name"], id=archive_info["id"], ts=ts)
                 else:
                     return store_key, archive_info
         else:
             raise KeyError(name)
 
+    def _lookup_id(self, id, raw=False):
+        assert isinstance(id, bytes)
+        assert not self.legacy
+        for store_key, archive_info in self._infos():
+            if archive_info["id"] == id:
+                if not raw:
+                    ts = parse_timestamp(archive_info["time"])
+                    return store_key, ArchiveInfo(name=archive_info["name"], id=archive_info["id"], ts=ts)
+                else:
+                    return store_key, archive_info
+        else:
+            raise KeyError(bin_to_hex(id))
+
     def names(self):
         # yield the names of all archives
         if not self.legacy:
@@ -191,6 +208,26 @@ class Archives:
             else:
                 return dict(name=name, id=values["id"], time=values["time"])
 
+    def get_by_id(self, id, raw=False):
+        assert isinstance(id, bytes)
+        if not self.legacy:
+            try:
+                store_key, archive_info = self._lookup_id(id, raw=raw)
+                return archive_info
+            except KeyError:
+                return None
+        else:
+            for name, values in self._archives.items():
+                if id == values["id"]:
+                    break
+            else:
+                return None
+            if not raw:
+                ts = parse_timestamp(values["time"])
+                return ArchiveInfo(name=name, id=values["id"], ts=ts)
+            else:
+                return dict(name=name, id=values["id"], time=values["time"])
+
     def create(self, name, id, ts, *, overwrite=False):
         assert isinstance(name, str)
         assert isinstance(id, bytes)
@@ -198,16 +235,6 @@ class Archives:
             ts = ts.isoformat(timespec="microseconds")
         assert isinstance(ts, str)
         if not self.legacy:
-            try:
-                store_key, _ = self._lookup_name(name)
-            except KeyError:
-                pass
-            else:
-                # looks like we already have an archive list entry with that name
-                if not overwrite:
-                    raise KeyError("archive already exists")
-                else:
-                    self.repository.store_delete(f"archives/{store_key}")
             archive = dict(name=name, id=id, time=ts)
             value = self.manifest.key.pack_metadata(archive)
             id = self.manifest.repo_objs.id_hash(value)
@@ -228,6 +255,13 @@ class Archives:
         else:
             self._archives.pop(name)
 
+    def delete_by_id(self, id):
+        # delete an archive
+        assert isinstance(id, bytes)
+        assert not self.legacy
+        store_key, archive_info = self._lookup_id(id)
+        self.repository.store_delete(f"archives/{store_key}")
+
     def list(
         self,
         *,
@@ -262,22 +296,32 @@ class Archives:
         if isinstance(sort_by, (str, bytes)):
             raise TypeError("sort_by must be a sequence of str")
 
-        archives = [self.get(name) for name in self.names()]
-        regex = get_regex_from_pattern(match or "re:.*")
-        regex = re.compile(regex + match_end)
-        archives = [x for x in archives if regex.match(x.name) is not None]
+        archive_infos = self._info_tuples()
+        if match is None:
+            archive_infos = list(archive_infos)
+        elif match.startswith("aid:"):  # do a match on the archive ID (prefix)
+            wanted_id = match.removeprefix("aid:")
+            archive_infos = [x for x in archive_infos if bin_to_hex(x.id).startswith(wanted_id)]
+            if len(archive_infos) != 1:
+                raise CommandError("archive ID based match needs to match precisely one archive ID")
+        else:  #  do a match on the name
+            regex = get_regex_from_pattern(match)
+            regex = re.compile(regex + match_end)
+            archive_infos = [x for x in archive_infos if regex.match(x.name) is not None]
 
         if any([oldest, newest, older, newer]):
-            archives = filter_archives_by_date(archives, oldest=oldest, newest=newest, newer=newer, older=older)
+            archive_infos = filter_archives_by_date(
+                archive_infos, oldest=oldest, newest=newest, newer=newer, older=older
+            )
         for sortkey in reversed(sort_by):
-            archives.sort(key=attrgetter(sortkey))
+            archive_infos.sort(key=attrgetter(sortkey))
         if first:
-            archives = archives[:first]
+            archive_infos = archive_infos[:first]
         elif last:
-            archives = archives[max(len(archives) - last, 0) :]
+            archive_infos = archive_infos[max(len(archive_infos) - last, 0) :]
         if reverse:
-            archives.reverse()
-        return archives
+            archive_infos.reverse()
+        return archive_infos
 
     def list_considering(self, args):
         """
@@ -299,6 +343,21 @@ class Archives:
             newest=getattr(args, "newest", None),
         )
 
+    def get_one(self, match, *, match_end=r"\Z"):
+        """get exactly one archive matching <match>"""
+        assert match is not None
+        archive_infos = self._info_tuples()
+        if match.startswith("aid:"):  # do a match on the archive ID (prefix)
+            wanted_id = match.removeprefix("aid:")
+            archive_infos = [i for i in archive_infos if bin_to_hex(i.id).startswith(wanted_id)]
+        else:  # do a match on the name
+            regex = get_regex_from_pattern(match)
+            regex = re.compile(regex + match_end)
+            archive_infos = [i for i in archive_infos if regex.match(i.name) is not None]
+        if len(archive_infos) != 1:
+            raise CommandError(f"{match} needed to match precisely one archive, but matched {len(archive_infos)}.")
+        return archive_infos[0]
+
     def _set_raw_dict(self, d):
         """set the dict we get from the msgpack unpacker"""
         for k, v in d.items():

+ 0 - 13
src/borg/testsuite/archiver/recreate_cmd.py

@@ -5,7 +5,6 @@ from datetime import datetime
 import pytest
 
 from ...constants import *  # NOQA
-from ...helpers import CommandError
 from .. import changedir, are_hardlinks_supported
 from . import (
     _create_test_caches,
@@ -79,18 +78,6 @@ def test_recreate_hardlinked_tags(archivers, request):  # test for issue #4911
     # if issue #4911 is present, the recreate will crash with a KeyError for "input/file1"
 
 
-def test_recreate_target_rc(archivers, request):
-    archiver = request.getfixturevalue(archivers)
-    cmd(archiver, "repo-create", RK_ENCRYPTION)
-    if archiver.FORK_DEFAULT:
-        expected_ec = CommandError().exit_code
-        output = cmd(archiver, "recreate", "--target=asdf", exit_code=expected_ec)
-        assert "Need to specify single archive" in output
-    else:
-        with pytest.raises(CommandError):
-            cmd(archiver, "recreate", "--target=asdf")
-
-
 def test_recreate_target(archivers, request):
     archiver = request.getfixturevalue(archivers)
     create_test_files(archiver.input_path)

+ 0 - 3
src/borg/testsuite/archiver/return_codes.py

@@ -1,4 +1,3 @@
-from ...archive import Archive
 from ...constants import *  # NOQA
 from ...helpers import IncludePatternNeverMatchedWarning
 from . import cmd_fixture, changedir  # NOQA
@@ -18,5 +17,3 @@ def test_return_codes(cmd_fixture, tmpdir):
         assert rc == EXIT_SUCCESS
     rc, out = cmd_fixture("--repo=%s" % repo, "extract", "archive", "does/not/match")
     assert rc == IncludePatternNeverMatchedWarning().exit_code
-    rc, out = cmd_fixture("--repo=%s" % repo, "create", "archive", str(input))
-    assert rc == Archive.AlreadyExists().exit_code