Browse Source

manifest.archives: refactor api

Archives was built with a dictionary-like api,
but in future we want to go away from a
read-modify-write archives list.
Thomas Waldmann 9 months ago
parent
commit
b56c81bf62

+ 13 - 13
src/borg/archive.py

@@ -492,7 +492,7 @@ class Archive:
         self.create = create
         self.create = create
         if self.create:
         if self.create:
             self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats)
             self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats)
-            if name in manifest.archives:
+            if manifest.archives.exists(name):
                 raise self.AlreadyExists(name)
                 raise self.AlreadyExists(name)
         else:
         else:
             info = self.manifest.archives.get(name)
             info = self.manifest.archives.get(name)
@@ -610,7 +610,7 @@ Duration: {0.duration}
 
 
     def save(self, name=None, comment=None, timestamp=None, stats=None, additional_metadata=None):
     def save(self, name=None, comment=None, timestamp=None, stats=None, additional_metadata=None):
         name = name or self.name
         name = name or self.name
-        if name in self.manifest.archives:
+        if self.manifest.archives.exists(name):
             raise self.AlreadyExists(name)
             raise self.AlreadyExists(name)
         self.items_buffer.flush(flush=True)
         self.items_buffer.flush(flush=True)
         item_ptrs = archive_put_items(
         item_ptrs = archive_put_items(
@@ -657,7 +657,7 @@ Duration: {0.duration}
                 raise
                 raise
         while self.repository.async_response(wait=True) is not None:
         while self.repository.async_response(wait=True) is not None:
             pass
             pass
-        self.manifest.archives[name] = (self.id, metadata.time)
+        self.manifest.archives.create(name, self.id, metadata.time)
         self.manifest.write()
         self.manifest.write()
         return metadata
         return metadata
 
 
@@ -951,22 +951,22 @@ Duration: {0.duration}
         data = self.key.pack_metadata(metadata.as_dict())
         data = self.key.pack_metadata(metadata.as_dict())
         new_id = self.key.id_hash(data)
         new_id = self.key.id_hash(data)
         self.cache.add_chunk(new_id, {}, data, stats=self.stats, ro_type=ROBJ_ARCHIVE_META)
         self.cache.add_chunk(new_id, {}, data, stats=self.stats, ro_type=ROBJ_ARCHIVE_META)
-        self.manifest.archives[self.name] = (new_id, metadata.time)
+        self.manifest.archives.create(self.name, new_id, metadata.time, overwrite=True)
         self.id = new_id
         self.id = new_id
 
 
     def rename(self, name):
     def rename(self, name):
-        if name in self.manifest.archives:
+        if self.manifest.archives.exists(name):
             raise self.AlreadyExists(name)
             raise self.AlreadyExists(name)
         oldname = self.name
         oldname = self.name
         self.name = name
         self.name = name
         self.set_meta("name", name)
         self.set_meta("name", name)
-        del self.manifest.archives[oldname]
+        self.manifest.archives.delete(oldname)
 
 
     def delete(self):
     def delete(self):
         # quick and dirty: we just nuke the archive from the archives list - that will
         # quick and dirty: we just nuke the archive from the archives list - that will
         # potentially orphan all chunks previously referenced by the archive, except the ones also
         # potentially orphan all chunks previously referenced by the archive, except the ones also
         # referenced by other archives. In the end, "borg compact" will clean up and free space.
         # referenced by other archives. In the end, "borg compact" will clean up and free space.
-        del self.manifest.archives[self.name]
+        self.manifest.archives.delete(self.name)
 
 
     @staticmethod
     @staticmethod
     def compare_archives_iter(
     def compare_archives_iter(
@@ -1798,16 +1798,16 @@ class ArchiveChecker:
                 archive = ArchiveItem(internal_dict=archive)
                 archive = ArchiveItem(internal_dict=archive)
                 name = archive.name
                 name = archive.name
                 logger.info("Found archive %s", name)
                 logger.info("Found archive %s", name)
-                if name in manifest.archives:
+                if manifest.archives.exists(name):
                     i = 1
                     i = 1
                     while True:
                     while True:
                         new_name = "%s.%d" % (name, i)
                         new_name = "%s.%d" % (name, i)
-                        if new_name not in manifest.archives:
+                        if not manifest.archives.exists(new_name):
                             break
                             break
                         i += 1
                         i += 1
                     logger.warning("Duplicate archive name %s, storing as %s", name, new_name)
                     logger.warning("Duplicate archive name %s, storing as %s", name, new_name)
                     name = new_name
                     name = new_name
-                manifest.archives[name] = (chunk_id, archive.time)
+                manifest.archives.create(name, chunk_id, archive.time)
         pi.finish()
         pi.finish()
         logger.info("Manifest rebuild complete.")
         logger.info("Manifest rebuild complete.")
         return manifest
         return manifest
@@ -2025,7 +2025,7 @@ class ArchiveChecker:
                 if archive_id not in self.chunks:
                 if archive_id not in self.chunks:
                     logger.error("Archive metadata block %s is missing!", bin_to_hex(archive_id))
                     logger.error("Archive metadata block %s is missing!", bin_to_hex(archive_id))
                     self.error_found = True
                     self.error_found = True
-                    del self.manifest.archives[info.name]
+                    self.manifest.archives.delete(info.name)
                     continue
                     continue
                 cdata = self.repository.get(archive_id)
                 cdata = self.repository.get(archive_id)
                 try:
                 try:
@@ -2033,7 +2033,7 @@ class ArchiveChecker:
                 except IntegrityError as integrity_error:
                 except IntegrityError as integrity_error:
                     logger.error("Archive metadata block %s is corrupted: %s", bin_to_hex(archive_id), integrity_error)
                     logger.error("Archive metadata block %s is corrupted: %s", bin_to_hex(archive_id), integrity_error)
                     self.error_found = True
                     self.error_found = True
-                    del self.manifest.archives[info.name]
+                    self.manifest.archives.delete(info.name)
                     continue
                     continue
                 archive = self.key.unpack_archive(data)
                 archive = self.key.unpack_archive(data)
                 archive = ArchiveItem(internal_dict=archive)
                 archive = ArchiveItem(internal_dict=archive)
@@ -2053,7 +2053,7 @@ class ArchiveChecker:
                 new_archive_id = self.key.id_hash(data)
                 new_archive_id = self.key.id_hash(data)
                 cdata = self.repo_objs.format(new_archive_id, {}, data, ro_type=ROBJ_ARCHIVE_META)
                 cdata = self.repo_objs.format(new_archive_id, {}, data, ro_type=ROBJ_ARCHIVE_META)
                 add_reference(new_archive_id, len(data), cdata)
                 add_reference(new_archive_id, len(data), cdata)
-                self.manifest.archives[info.name] = (new_archive_id, info.ts)
+                self.manifest.archives.create(info.name, new_archive_id, info.ts, overwrite=True)
             pi.finish()
             pi.finish()
 
 
     def finish(self):
     def finish(self):

+ 1 - 1
src/borg/archiver/debug_cmd.py

@@ -46,7 +46,7 @@ class DebugMixIn:
         """dump decoded archive metadata (not: data)"""
         """dump decoded archive metadata (not: data)"""
         repo_objs = manifest.repo_objs
         repo_objs = manifest.repo_objs
         try:
         try:
-            archive_meta_orig = manifest.archives.get_raw_dict()[args.name]
+            archive_meta_orig = manifest.archives.get(args.name, raw=True)
         except KeyError:
         except KeyError:
             raise Archive.DoesNotExist(args.name)
             raise Archive.DoesNotExist(args.name)
 
 

+ 1 - 1
src/borg/archiver/delete_cmd.py

@@ -33,7 +33,7 @@ class DeleteMixIn:
             try:
             try:
                 # this does NOT use Archive.delete, so this code hopefully even works in cases a corrupt archive
                 # this does NOT use Archive.delete, so this code hopefully even works in cases a corrupt archive
                 # would make the code in class Archive crash, so the user can at least get rid of such archives.
                 # would make the code in class Archive crash, so the user can at least get rid of such archives.
-                current_archive = manifest.archives.pop(archive_name)
+                current_archive = manifest.archives.delete(archive_name)
             except KeyError:
             except KeyError:
                 self.print_warning(f"Archive {archive_name} not found ({i}/{len(archive_names)}).")
                 self.print_warning(f"Archive {archive_name} not found ({i}/{len(archive_names)}).")
             else:
             else:

+ 1 - 1
src/borg/archiver/rdelete_cmd.py

@@ -29,7 +29,7 @@ class RDeleteMixIn:
                 msg = []
                 msg = []
                 try:
                 try:
                     manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
                     manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
-                    n_archives = len(manifest.archives)
+                    n_archives = manifest.archives.count()
                     msg.append(
                     msg.append(
                         f"You requested to DELETE the following repository completely "
                         f"You requested to DELETE the following repository completely "
                         f"*including* {n_archives} archives it contains:"
                         f"*including* {n_archives} archives it contains:"

+ 1 - 1
src/borg/archiver/transfer_cmd.py

@@ -78,7 +78,7 @@ class TransferMixIn:
         for name in archive_names:
         for name in archive_names:
             transfer_size = 0
             transfer_size = 0
             present_size = 0
             present_size = 0
-            if name in manifest.archives and not dry_run:
+            if manifest.archives.exists(name) and not dry_run:
                 print(f"{name}: archive is already present in destination repo, skipping.")
                 print(f"{name}: archive is already present in destination repo, skipping.")
             else:
             else:
                 if not dry_run:
                 if not dry_run:

+ 42 - 29
src/borg/manifest.py

@@ -1,6 +1,6 @@
 import enum
 import enum
 import re
 import re
-from collections import abc, namedtuple
+from collections import namedtuple
 from datetime import datetime, timedelta, timezone
 from datetime import datetime, timedelta, timezone
 from operator import attrgetter
 from operator import attrgetter
 from collections.abc import Sequence
 from collections.abc import Sequence
@@ -68,11 +68,13 @@ def filter_archives_by_date(archives, older=None, newer=None, oldest=None, newes
     return archives
     return archives
 
 
 
 
-class Archives(abc.MutableMapping):
+class Archives:
     """
     """
-    Nice wrapper around the archives dict, making sure only valid types/values get in
-    and we can deal with str keys (and it internally encodes to byte keys) and either
-    str timestamps or datetime timestamps.
+    Manage the list of archives.
+
+    We still need to support the borg 1.x manifest-with-list-of-archives,
+    so borg transfer can work.
+    borg2 has separate items archives/* in the borgstore.
     """
     """
 
 
     def __init__(self, repository):
     def __init__(self, repository):
@@ -88,20 +90,20 @@ class Archives(abc.MutableMapping):
     def prepare(self, manifest, m):
     def prepare(self, manifest, m):
         self.manifest = manifest
         self.manifest = manifest
         if not self.legacy:
         if not self.legacy:
-            self.load()
+            self._load()
         else:
         else:
-            self.set_raw_dict(m.archives)
+            self._set_raw_dict(m.archives)
 
 
     def finish(self, manifest):
     def finish(self, manifest):
         self.manifest = manifest  # note: .prepare is not always called
         self.manifest = manifest  # note: .prepare is not always called
         if not self.legacy:
         if not self.legacy:
-            self.save()
+            self._save()
             manifest_archives = {}
             manifest_archives = {}
         else:
         else:
-            manifest_archives = StableDict(self.get_raw_dict())
+            manifest_archives = StableDict(self._get_raw_dict())
         return manifest_archives
         return manifest_archives
 
 
-    def load(self):
+    def _load(self):
         # load archives list from store
         # load archives list from store
         from .helpers import msgpack
         from .helpers import msgpack
 
 
@@ -116,12 +118,12 @@ class Archives(abc.MutableMapping):
             _, value = self.manifest.repo_objs.parse(hex_to_bin(info.name), value, ro_type=ROBJ_MANIFEST)
             _, value = self.manifest.repo_objs.parse(hex_to_bin(info.name), value, ro_type=ROBJ_MANIFEST)
             archive = msgpack.unpackb(value)
             archive = msgpack.unpackb(value)
             archives[archive["name"]] = dict(id=archive["id"], time=archive["time"])
             archives[archive["name"]] = dict(id=archive["id"], time=archive["time"])
-        self.set_raw_dict(archives)
+        self._set_raw_dict(archives)
 
 
-    def save(self):
+    def _save(self):
         # save archives list to store
         # save archives list to store
         valid_keys = set()
         valid_keys = set()
-        for name, info in self.get_raw_dict().items():
+        for name, info in self._get_raw_dict().items():
             archive = dict(name=name, id=info["id"], time=info["time"])
             archive = dict(name=name, id=info["id"], time=info["time"])
             value = self.manifest.key.pack_metadata(archive)  #
             value = self.manifest.key.pack_metadata(archive)  #
             id = self.manifest.repo_objs.id_hash(value)
             id = self.manifest.repo_objs.id_hash(value)
@@ -141,33 +143,44 @@ class Archives(abc.MutableMapping):
             if info.name not in valid_keys:
             if info.name not in valid_keys:
                 self.repository.store_delete(f"archives/{info.name}")
                 self.repository.store_delete(f"archives/{info.name}")
 
 
-    def __len__(self):
+    def count(self):
+        # return the count of archives in the repo
         return len(self._archives)
         return len(self._archives)
 
 
-    def __iter__(self):
-        return iter(self._archives)
+    def exists(self, name):
+        # check if an archive with this name exists
+        assert isinstance(name, str)
+        return name in self._archives
+
+    def names(self):
+        # yield the names of all archives
+        yield from self._archives
 
 
-    def __getitem__(self, name):
+    def get(self, name, raw=False):
         assert isinstance(name, str)
         assert isinstance(name, str)
         values = self._archives.get(name)
         values = self._archives.get(name)
         if values is None:
         if values is None:
             raise KeyError
             raise KeyError
-        ts = parse_timestamp(values["time"])
-        return ArchiveInfo(name=name, id=values["id"], ts=ts)
+        if not raw:
+            ts = parse_timestamp(values["time"])
+            return ArchiveInfo(name=name, id=values["id"], ts=ts)
+        else:
+            return dict(name=name, id=values["id"], time=values["time"])
 
 
-    def __setitem__(self, name, info):
+    def create(self, name, id, ts, *, overwrite=False):
         assert isinstance(name, str)
         assert isinstance(name, str)
-        assert isinstance(info, tuple)
-        id, ts = info
         assert isinstance(id, bytes)
         assert isinstance(id, bytes)
         if isinstance(ts, datetime):
         if isinstance(ts, datetime):
             ts = ts.isoformat(timespec="microseconds")
             ts = ts.isoformat(timespec="microseconds")
         assert isinstance(ts, str)
         assert isinstance(ts, str)
+        if name in self._archives and not overwrite:
+            raise KeyError("archive already exists")
         self._archives[name] = {"id": id, "time": ts}
         self._archives[name] = {"id": id, "time": ts}
 
 
-    def __delitem__(self, name):
+    def delete(self, name):
+        # delete an archive
         assert isinstance(name, str)
         assert isinstance(name, str)
-        del self._archives[name]
+        self._archives.pop(name)
 
 
     def list(
     def list(
         self,
         self,
@@ -203,7 +216,7 @@ class Archives(abc.MutableMapping):
         if isinstance(sort_by, (str, bytes)):
         if isinstance(sort_by, (str, bytes)):
             raise TypeError("sort_by must be a sequence of str")
             raise TypeError("sort_by must be a sequence of str")
 
 
-        archives = self.values()
+        archives = [self.get(name) for name in self.names()]
         regex = get_regex_from_pattern(match or "re:.*")
         regex = get_regex_from_pattern(match or "re:.*")
         regex = re.compile(regex + match_end)
         regex = re.compile(regex + match_end)
         archives = [x for x in archives if regex.match(x.name) is not None]
         archives = [x for x in archives if regex.match(x.name) is not None]
@@ -240,14 +253,14 @@ class Archives(abc.MutableMapping):
             newest=getattr(args, "newest", None),
             newest=getattr(args, "newest", None),
         )
         )
 
 
-    def set_raw_dict(self, d):
+    def _set_raw_dict(self, d):
         """set the dict we get from the msgpack unpacker"""
         """set the dict we get from the msgpack unpacker"""
         for k, v in d.items():
         for k, v in d.items():
             assert isinstance(k, str)
             assert isinstance(k, str)
             assert isinstance(v, dict) and "id" in v and "time" in v
             assert isinstance(v, dict) and "id" in v and "time" in v
             self._archives[k] = v
             self._archives[k] = v
 
 
-    def get_raw_dict(self):
+    def _get_raw_dict(self):
         """get the dict we can give to the msgpack packer"""
         """get the dict we can give to the msgpack packer"""
         return self._archives
         return self._archives
 
 
@@ -362,8 +375,8 @@ class Manifest:
             max_ts = max(incremented_ts, now_ts)
             max_ts = max(incremented_ts, now_ts)
             self.timestamp = max_ts.isoformat(timespec="microseconds")
             self.timestamp = max_ts.isoformat(timespec="microseconds")
         # include checks for limits as enforced by limited unpacker (used by load())
         # include checks for limits as enforced by limited unpacker (used by load())
-        assert len(self.archives) <= MAX_ARCHIVES
-        assert all(len(name) <= 255 for name in self.archives)
+        assert self.archives.count() <= MAX_ARCHIVES
+        assert all(len(name) <= 255 for name in self.archives.names())
         assert len(self.item_keys) <= 100
         assert len(self.item_keys) <= 100
         self.config["item_keys"] = tuple(sorted(self.item_keys))
         self.config["item_keys"] = tuple(sorted(self.item_keys))
         manifest_archives = self.archives.finish(self)
         manifest_archives = self.archives.finish(self)

+ 1 - 1
src/borg/testsuite/archiver/create_cmd.py

@@ -646,7 +646,7 @@ def test_create_dry_run(archivers, request):
     # Make sure no archive has been created
     # Make sure no archive has been created
     with Repository(archiver.repository_path) as repository:
     with Repository(archiver.repository_path) as repository:
         manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
         manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
-    assert len(manifest.archives) == 0
+    assert manifest.archives.count() == 0
 
 
 
 
 def test_progress_on(archivers, request):
 def test_progress_on(archivers, request):

+ 3 - 3
src/borg/testsuite/archiver/rename_cmd.py

@@ -23,6 +23,6 @@ def test_rename(archivers, request):
     # Make sure both archives have been renamed
     # Make sure both archives have been renamed
     with Repository(archiver.repository_path) as repository:
     with Repository(archiver.repository_path) as repository:
         manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
         manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
-    assert len(manifest.archives) == 2
-    assert "test.3" in manifest.archives
-    assert "test.4" in manifest.archives
+    assert manifest.archives.count() == 2
+    assert manifest.archives.exists("test.3")
+    assert manifest.archives.exists("test.4")