소스 검색

manifest.archives: refactor api

Archives was built with a dictionary-like api,
but in future we want to go away from a
read-modify-write archives list.
Thomas Waldmann 9 달 전
부모
커밋
b56c81bf62

+ 13 - 13
src/borg/archive.py

@@ -492,7 +492,7 @@ class Archive:
         self.create = create
         if self.create:
             self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats)
-            if name in manifest.archives:
+            if manifest.archives.exists(name):
                 raise self.AlreadyExists(name)
         else:
             info = self.manifest.archives.get(name)
@@ -610,7 +610,7 @@ Duration: {0.duration}
 
     def save(self, name=None, comment=None, timestamp=None, stats=None, additional_metadata=None):
         name = name or self.name
-        if name in self.manifest.archives:
+        if self.manifest.archives.exists(name):
             raise self.AlreadyExists(name)
         self.items_buffer.flush(flush=True)
         item_ptrs = archive_put_items(
@@ -657,7 +657,7 @@ Duration: {0.duration}
                 raise
         while self.repository.async_response(wait=True) is not None:
             pass
-        self.manifest.archives[name] = (self.id, metadata.time)
+        self.manifest.archives.create(name, self.id, metadata.time)
         self.manifest.write()
         return metadata
 
@@ -951,22 +951,22 @@ Duration: {0.duration}
         data = self.key.pack_metadata(metadata.as_dict())
         new_id = self.key.id_hash(data)
         self.cache.add_chunk(new_id, {}, data, stats=self.stats, ro_type=ROBJ_ARCHIVE_META)
-        self.manifest.archives[self.name] = (new_id, metadata.time)
+        self.manifest.archives.create(self.name, new_id, metadata.time, overwrite=True)
         self.id = new_id
 
     def rename(self, name):
-        if name in self.manifest.archives:
+        if self.manifest.archives.exists(name):
             raise self.AlreadyExists(name)
         oldname = self.name
         self.name = name
         self.set_meta("name", name)
-        del self.manifest.archives[oldname]
+        self.manifest.archives.delete(oldname)
 
     def delete(self):
         # quick and dirty: we just nuke the archive from the archives list - that will
         # potentially orphan all chunks previously referenced by the archive, except the ones also
         # referenced by other archives. In the end, "borg compact" will clean up and free space.
-        del self.manifest.archives[self.name]
+        self.manifest.archives.delete(self.name)
 
     @staticmethod
     def compare_archives_iter(
@@ -1798,16 +1798,16 @@ class ArchiveChecker:
                 archive = ArchiveItem(internal_dict=archive)
                 name = archive.name
                 logger.info("Found archive %s", name)
-                if name in manifest.archives:
+                if manifest.archives.exists(name):
                     i = 1
                     while True:
                         new_name = "%s.%d" % (name, i)
-                        if new_name not in manifest.archives:
+                        if not manifest.archives.exists(new_name):
                             break
                         i += 1
                     logger.warning("Duplicate archive name %s, storing as %s", name, new_name)
                     name = new_name
-                manifest.archives[name] = (chunk_id, archive.time)
+                manifest.archives.create(name, chunk_id, archive.time)
         pi.finish()
         logger.info("Manifest rebuild complete.")
         return manifest
@@ -2025,7 +2025,7 @@ class ArchiveChecker:
                 if archive_id not in self.chunks:
                     logger.error("Archive metadata block %s is missing!", bin_to_hex(archive_id))
                     self.error_found = True
-                    del self.manifest.archives[info.name]
+                    self.manifest.archives.delete(info.name)
                     continue
                 cdata = self.repository.get(archive_id)
                 try:
@@ -2033,7 +2033,7 @@ class ArchiveChecker:
                 except IntegrityError as integrity_error:
                     logger.error("Archive metadata block %s is corrupted: %s", bin_to_hex(archive_id), integrity_error)
                     self.error_found = True
-                    del self.manifest.archives[info.name]
+                    self.manifest.archives.delete(info.name)
                     continue
                 archive = self.key.unpack_archive(data)
                 archive = ArchiveItem(internal_dict=archive)
@@ -2053,7 +2053,7 @@ class ArchiveChecker:
                 new_archive_id = self.key.id_hash(data)
                 cdata = self.repo_objs.format(new_archive_id, {}, data, ro_type=ROBJ_ARCHIVE_META)
                 add_reference(new_archive_id, len(data), cdata)
-                self.manifest.archives[info.name] = (new_archive_id, info.ts)
+                self.manifest.archives.create(info.name, new_archive_id, info.ts, overwrite=True)
             pi.finish()
 
     def finish(self):

+ 1 - 1
src/borg/archiver/debug_cmd.py

@@ -46,7 +46,7 @@ class DebugMixIn:
         """dump decoded archive metadata (not: data)"""
         repo_objs = manifest.repo_objs
         try:
-            archive_meta_orig = manifest.archives.get_raw_dict()[args.name]
+            archive_meta_orig = manifest.archives.get(args.name, raw=True)
         except KeyError:
             raise Archive.DoesNotExist(args.name)
 

+ 1 - 1
src/borg/archiver/delete_cmd.py

@@ -33,7 +33,7 @@ class DeleteMixIn:
             try:
                 # this does NOT use Archive.delete, so this code hopefully even works in cases a corrupt archive
                 # would make the code in class Archive crash, so the user can at least get rid of such archives.
-                current_archive = manifest.archives.pop(archive_name)
+                current_archive = manifest.archives.delete(archive_name)
             except KeyError:
                 self.print_warning(f"Archive {archive_name} not found ({i}/{len(archive_names)}).")
             else:

+ 1 - 1
src/borg/archiver/rdelete_cmd.py

@@ -29,7 +29,7 @@ class RDeleteMixIn:
                 msg = []
                 try:
                     manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
-                    n_archives = len(manifest.archives)
+                    n_archives = manifest.archives.count()
                     msg.append(
                         f"You requested to DELETE the following repository completely "
                         f"*including* {n_archives} archives it contains:"

+ 1 - 1
src/borg/archiver/transfer_cmd.py

@@ -78,7 +78,7 @@ class TransferMixIn:
         for name in archive_names:
             transfer_size = 0
             present_size = 0
-            if name in manifest.archives and not dry_run:
+            if manifest.archives.exists(name) and not dry_run:
                 print(f"{name}: archive is already present in destination repo, skipping.")
             else:
                 if not dry_run:

+ 42 - 29
src/borg/manifest.py

@@ -1,6 +1,6 @@
 import enum
 import re
-from collections import abc, namedtuple
+from collections import namedtuple
 from datetime import datetime, timedelta, timezone
 from operator import attrgetter
 from collections.abc import Sequence
@@ -68,11 +68,13 @@ def filter_archives_by_date(archives, older=None, newer=None, oldest=None, newes
     return archives
 
 
-class Archives(abc.MutableMapping):
+class Archives:
     """
-    Nice wrapper around the archives dict, making sure only valid types/values get in
-    and we can deal with str keys (and it internally encodes to byte keys) and either
-    str timestamps or datetime timestamps.
+    Manage the list of archives.
+
+    We still need to support the borg 1.x manifest-with-list-of-archives,
+    so borg transfer can work.
+    borg2 has separate items archives/* in the borgstore.
     """
 
     def __init__(self, repository):
@@ -88,20 +90,20 @@ class Archives(abc.MutableMapping):
     def prepare(self, manifest, m):
         self.manifest = manifest
         if not self.legacy:
-            self.load()
+            self._load()
         else:
-            self.set_raw_dict(m.archives)
+            self._set_raw_dict(m.archives)
 
     def finish(self, manifest):
         self.manifest = manifest  # note: .prepare is not always called
         if not self.legacy:
-            self.save()
+            self._save()
             manifest_archives = {}
         else:
-            manifest_archives = StableDict(self.get_raw_dict())
+            manifest_archives = StableDict(self._get_raw_dict())
         return manifest_archives
 
-    def load(self):
+    def _load(self):
         # load archives list from store
         from .helpers import msgpack
 
@@ -116,12 +118,12 @@ class Archives(abc.MutableMapping):
             _, value = self.manifest.repo_objs.parse(hex_to_bin(info.name), value, ro_type=ROBJ_MANIFEST)
             archive = msgpack.unpackb(value)
             archives[archive["name"]] = dict(id=archive["id"], time=archive["time"])
-        self.set_raw_dict(archives)
+        self._set_raw_dict(archives)
 
-    def save(self):
+    def _save(self):
         # save archives list to store
         valid_keys = set()
-        for name, info in self.get_raw_dict().items():
+        for name, info in self._get_raw_dict().items():
             archive = dict(name=name, id=info["id"], time=info["time"])
             value = self.manifest.key.pack_metadata(archive)  #
             id = self.manifest.repo_objs.id_hash(value)
@@ -141,33 +143,44 @@ class Archives(abc.MutableMapping):
             if info.name not in valid_keys:
                 self.repository.store_delete(f"archives/{info.name}")
 
-    def __len__(self):
+    def count(self):
+        # return the count of archives in the repo
         return len(self._archives)
 
-    def __iter__(self):
-        return iter(self._archives)
+    def exists(self, name):
+        # check if an archive with this name exists
+        assert isinstance(name, str)
+        return name in self._archives
+
+    def names(self):
+        # yield the names of all archives
+        yield from self._archives
 
-    def __getitem__(self, name):
+    def get(self, name, raw=False):
         assert isinstance(name, str)
         values = self._archives.get(name)
         if values is None:
             raise KeyError
-        ts = parse_timestamp(values["time"])
-        return ArchiveInfo(name=name, id=values["id"], ts=ts)
+        if not raw:
+            ts = parse_timestamp(values["time"])
+            return ArchiveInfo(name=name, id=values["id"], ts=ts)
+        else:
+            return dict(name=name, id=values["id"], time=values["time"])
 
-    def __setitem__(self, name, info):
+    def create(self, name, id, ts, *, overwrite=False):
         assert isinstance(name, str)
-        assert isinstance(info, tuple)
-        id, ts = info
         assert isinstance(id, bytes)
         if isinstance(ts, datetime):
             ts = ts.isoformat(timespec="microseconds")
         assert isinstance(ts, str)
+        if name in self._archives and not overwrite:
+            raise KeyError("archive already exists")
         self._archives[name] = {"id": id, "time": ts}
 
-    def __delitem__(self, name):
+    def delete(self, name):
+        # delete an archive
         assert isinstance(name, str)
-        del self._archives[name]
+        self._archives.pop(name)
 
     def list(
         self,
@@ -203,7 +216,7 @@ class Archives(abc.MutableMapping):
         if isinstance(sort_by, (str, bytes)):
             raise TypeError("sort_by must be a sequence of str")
 
-        archives = self.values()
+        archives = [self.get(name) for name in self.names()]
         regex = get_regex_from_pattern(match or "re:.*")
         regex = re.compile(regex + match_end)
         archives = [x for x in archives if regex.match(x.name) is not None]
@@ -240,14 +253,14 @@ class Archives(abc.MutableMapping):
             newest=getattr(args, "newest", None),
         )
 
-    def set_raw_dict(self, d):
+    def _set_raw_dict(self, d):
         """set the dict we get from the msgpack unpacker"""
         for k, v in d.items():
             assert isinstance(k, str)
             assert isinstance(v, dict) and "id" in v and "time" in v
             self._archives[k] = v
 
-    def get_raw_dict(self):
+    def _get_raw_dict(self):
         """get the dict we can give to the msgpack packer"""
         return self._archives
 
@@ -362,8 +375,8 @@ class Manifest:
             max_ts = max(incremented_ts, now_ts)
             self.timestamp = max_ts.isoformat(timespec="microseconds")
         # include checks for limits as enforced by limited unpacker (used by load())
-        assert len(self.archives) <= MAX_ARCHIVES
-        assert all(len(name) <= 255 for name in self.archives)
+        assert self.archives.count() <= MAX_ARCHIVES
+        assert all(len(name) <= 255 for name in self.archives.names())
         assert len(self.item_keys) <= 100
         self.config["item_keys"] = tuple(sorted(self.item_keys))
         manifest_archives = self.archives.finish(self)

+ 1 - 1
src/borg/testsuite/archiver/create_cmd.py

@@ -646,7 +646,7 @@ def test_create_dry_run(archivers, request):
     # Make sure no archive has been created
     with Repository(archiver.repository_path) as repository:
         manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
-    assert len(manifest.archives) == 0
+    assert manifest.archives.count() == 0
 
 
 def test_progress_on(archivers, request):

+ 3 - 3
src/borg/testsuite/archiver/rename_cmd.py

@@ -23,6 +23,6 @@ def test_rename(archivers, request):
     # Make sure both archives have been renamed
     with Repository(archiver.repository_path) as repository:
         manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
-    assert len(manifest.archives) == 2
-    assert "test.3" in manifest.archives
-    assert "test.4" in manifest.archives
+    assert manifest.archives.count() == 2
+    assert manifest.archives.exists("test.3")
+    assert manifest.archives.exists("test.4")