9 月之前 · c67cf07522
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -1696,10 +1696,10 @@ class ArchiveChecker:
 
				             result = self.repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
			
 
				             if not result:
			
 
				                 break
			
 
				-            marker = result[-1]
			
 
				-            init_entry = ChunkIndexEntry(refcount=0, size=0)
			
 
				-            for id_ in result:
			
 
				-                self.chunks[id_] = init_entry
			
 
				+            marker = result[-1][0]
			
 
				+            init_entry = ChunkIndexEntry(refcount=0, size=0)  # unknown plaintext size (!= stored size!)
			
 
				+            for id, stored_size in result:
			
 
				+                self.chunks[id] = init_entry
			
 
				 
			
 
				     def make_key(self, repository):
			
 
				         attempt = 0
			
@@ -1737,7 +1737,7 @@ class ArchiveChecker:
 
				     def verify_data(self):
			
 
				         logger.info("Starting cryptographic data integrity verification...")
			
 
				         chunks_count_index = len(self.chunks)
			
 
				-        chunks_count_segments = 0
			
 
				+        chunks_count_repo = 0
			
 
				         errors = 0
			
 
				         defect_chunks = []
			
 
				         pi = ProgressIndicatorPercent(
			
@@ -1745,16 +1745,16 @@ class ArchiveChecker:
 
				         )
			
 
				         marker = None
			
 
				         while True:
			
 
				-            chunk_ids = self.repository.list(limit=100, marker=marker)
			
 
				-            if not chunk_ids:
			
 
				+            result = self.repository.list(limit=100, marker=marker)
			
 
				+            if not result:
			
 
				                 break
			
 
				-            marker = chunk_ids[-1]
			
 
				-            chunks_count_segments += len(chunk_ids)
			
 
				-            chunk_data_iter = self.repository.get_many(chunk_ids)
			
 
				-            chunk_ids_revd = list(reversed(chunk_ids))
			
 
				-            while chunk_ids_revd:
			
 
				+            marker = result[-1][0]
			
 
				+            chunks_count_repo += len(result)
			
 
				+            chunk_data_iter = self.repository.get_many(id for id, _ in result)
			
 
				+            result_revd = list(reversed(result))
			
 
				+            while result_revd:
			
 
				                 pi.show()
			
 
				-                chunk_id = chunk_ids_revd.pop(-1)  # better efficiency
			
 
				+                chunk_id, _ = result_revd.pop(-1)  # better efficiency
			
 
				                 try:
			
 
				                     encrypted_data = next(chunk_data_iter)
			
 
				                 except (Repository.ObjectNotFound, IntegrityErrorBase) as err:
			
@@ -1764,9 +1764,9 @@ class ArchiveChecker:
 
				                     if isinstance(err, IntegrityErrorBase):
			
 
				                         defect_chunks.append(chunk_id)
			
 
				                     # as the exception killed our generator, make a new one for remaining chunks:
			
 
				-                    if chunk_ids_revd:
			
 
				-                        chunk_ids = list(reversed(chunk_ids_revd))
			
 
				-                        chunk_data_iter = self.repository.get_many(chunk_ids)
			
 
				+                    if result_revd:
			
 
				+                        result = list(reversed(result_revd))
			
 
				+                        chunk_data_iter = self.repository.get_many(id for id, _ in result)
			
 
				                 else:
			
 
				                     try:
			
 
				                         # we must decompress, so it'll call assert_id() in there:
			
@@ -1777,10 +1777,10 @@ class ArchiveChecker:
 
				                         logger.error("chunk %s, integrity error: %s", bin_to_hex(chunk_id), integrity_error)
			
 
				                         defect_chunks.append(chunk_id)
			
 
				         pi.finish()
			
 
				-        if chunks_count_index != chunks_count_segments:
			
 
				-            logger.error("Repo/Chunks index object count vs. segment files object count mismatch.")
			
 
				+        if chunks_count_index != chunks_count_repo:
			
 
				+            logger.error("Chunks index object count vs. repository object count mismatch.")
			
 
				             logger.error(
			
 
				-                "Repo/Chunks index: %d objects != segment files: %d objects", chunks_count_index, chunks_count_segments
			
 
				+                "Chunks index: %d objects != Chunks repository: %d objects", chunks_count_index, chunks_count_repo
			
 
				             )
			
 
				         if defect_chunks:
			
 
				             if self.repair:
			
@@ -1820,7 +1820,7 @@ class ArchiveChecker:
 
				         log = logger.error if errors else logger.info
			
 
				         log(
			
 
				             "Finished cryptographic data integrity verification, verified %d chunks with %d integrity errors.",
			
 
				-            chunks_count_segments,
			
 
				+            chunks_count_repo,
			
 
				             errors,
			
 
				         )
			
 
				 
			
--- a/src/borg/archiver/compact_cmd.py
+++ b/src/borg/archiver/compact_cmd.py
@@ -51,9 +51,9 @@ class ArchiveGarbageCollector:
 
				             result = self.repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
			
 
				             if not result:
			
 
				                 break
			
 
				-            marker = result[-1]
			
 
				-            for chunk_id in result:
			
 
				-                repository_chunks[chunk_id] = 0  # plaintext size unknown
			
 
				+            marker = result[-1][0]
			
 
				+            for id, stored_size in result:
			
 
				+                repository_chunks[id] = 0  # plaintext size unknown
			
 
				         return repository_chunks
			
 
				 
			
 
				     def analyze_archives(self) -> Tuple[Dict[bytes, int], Dict[bytes, int], int, int, int]:
			
--- a/src/borg/archiver/debug_cmd.py
+++ b/src/borg/archiver/debug_cmd.py
@@ -123,17 +123,18 @@ class DebugMixIn:
 
				                 fd.write(data)
			
 
				 
			
 
				         # set up the key without depending on a manifest obj
			
 
				-        ids = repository.list(limit=1, marker=None)
			
 
				-        cdata = repository.get(ids[0])
			
 
				+        result = repository.list(limit=1, marker=None)
			
 
				+        id, _ = result[0]
			
 
				+        cdata = repository.get(id)
			
 
				         key = key_factory(repository, cdata)
			
 
				         repo_objs = RepoObj(key)
			
 
				         marker = None
			
 
				         while True:
			
 
				-            ids = repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
			
 
				-            if not ids:
			
 
				+            result = repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
			
 
				+            if not result:
			
 
				                 break
			
 
				-            marker = ids[-1]
			
 
				-            for id in ids:
			
 
				+            marker = result[-1][0]
			
 
				+            for id, stored_size in result:
			
 
				                 cdata = repository.get(id)
			
 
				                 decrypt_dump(id, cdata)
			
 
				         print("Done.")
			
@@ -168,8 +169,9 @@ class DebugMixIn:
 
				         from ..crypto.key import key_factory
			
 
				 
			
 
				         # set up the key without depending on a manifest obj
			
 
				-        ids = repository.list(limit=1, marker=None)
			
 
				-        cdata = repository.get(ids[0])
			
 
				+        result = repository.list(limit=1, marker=None)
			
 
				+        id, _ = result[0]
			
 
				+        cdata = repository.get(id)
			
 
				         key = key_factory(repository, cdata)
			
 
				         repo_objs = RepoObj(key)
			
 
				 
			
@@ -178,11 +180,11 @@ class DebugMixIn:
 
				         last_id = None
			
 
				         i = 0
			
 
				         while True:
			
 
				-            ids = repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
			
 
				-            if not ids:
			
 
				+            result = repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
			
 
				+            if not result:
			
 
				                 break
			
 
				-            marker = ids[-1]
			
 
				-            for id in ids:
			
 
				+            marker = result[-1][0]
			
 
				+            for id, stored_size in result:
			
 
				                 cdata = repository.get(id)
			
 
				                 _, data = repo_objs.parse(id, cdata, ro_type=ROBJ_DONTCARE)
			
 
				 
			
--- a/src/borg/archiver/rcompress_cmd.py
+++ b/src/borg/archiver/rcompress_cmd.py
@@ -20,12 +20,12 @@ def find_chunks(repository, repo_objs, stats, ctype, clevel, olevel):
 
				     compr_keys = stats["compr_keys"] = set()
			
 
				     compr_wanted = ctype, clevel, olevel
			
 
				     marker = None
			
 
				-    chunks_limit = 1000
			
 
				     while True:
			
 
				-        chunk_ids = repository.list(limit=chunks_limit, marker=marker)
			
 
				-        if not chunk_ids:
			
 
				+        result = repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
			
 
				+        if not result:
			
 
				             break
			
 
				-        marker = chunk_ids[-1]
			
 
				+        marker = result[-1][0]
			
 
				+        chunk_ids = [id for id, _ in result]
			
 
				         for id, chunk_no_data in zip(chunk_ids, repository.get_many(chunk_ids, read_data=False)):
			
 
				             meta = repo_objs.parse_meta(id, chunk_no_data, ro_type=ROBJ_DONTCARE)
			
 
				             compr_found = meta["ctype"], meta["clevel"], meta.get("olevel", -1)
			
--- a/src/borg/cache.py
+++ b/src/borg/cache.py
@@ -639,14 +639,14 @@ class ChunksMixin:
 
				             num_requests += 1
			
 
				             if not result:
			
 
				                 break
			
 
				-            marker = result[-1]
			
 
				+            marker = result[-1][0]
			
 
				             # All chunks from the repository have a refcount of MAX_VALUE, which is sticky,
			
 
				             # therefore we can't/won't delete them. Chunks we added ourselves in this transaction
			
 
				             # are tracked correctly.
			
 
				-            init_entry = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=0)
			
 
				-            for id_ in result:
			
 
				+            init_entry = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=0)  # plaintext size
			
 
				+            for id, stored_size in result:
			
 
				                 num_chunks += 1
			
 
				-                chunks[id_] = init_entry
			
 
				+                chunks[id] = init_entry
			
 
				         # Cache does not contain the manifest.
			
 
				         if not isinstance(self.repository, (Repository, RemoteRepository)):
			
 
				             del chunks[self.manifest.MANIFEST_ID]
			
--- a/src/borg/repository.py
+++ b/src/borg/repository.py
@@ -288,11 +288,12 @@ class Repository:
 
				 
			
 
				     def list(self, limit=None, marker=None):
			
 
				         """
			
 
				-        list <limit> IDs starting from after id <marker>.
			
 
				+        list <limit> infos starting from after id <marker>.
			
 
				+        each info is a tuple (id, storage_size).
			
 
				         """
			
 
				         self._lock_refresh()
			
 
				         collect = True if marker is None else False
			
 
				-        ids = []
			
 
				+        result = []
			
 
				         infos = self.store.list("data")  # generator yielding ItemInfos
			
 
				         while True:
			
 
				             try:
			
@@ -304,13 +305,13 @@ class Repository:
 
				             else:
			
 
				                 id = hex_to_bin(info.name)
			
 
				                 if collect:
			
 
				-                    ids.append(id)
			
 
				-                    if len(ids) == limit:
			
 
				+                    result.append((id, info.size))
			
 
				+                    if len(result) == limit:
			
 
				                         break
			
 
				                 elif id == marker:
			
 
				                     collect = True
			
 
				                     # note: do not collect the marker id
			
 
				-        return ids
			
 
				+        return result
			
 
				 
			
 
				     def get(self, id, read_data=True):
			
 
				         self._lock_refresh()
			
--- a/src/borg/testsuite/archiver/check_cmd.py
+++ b/src/borg/testsuite/archiver/check_cmd.py
@@ -432,6 +432,6 @@ def test_empty_repository(archivers, request):
 
				         pytest.skip("only works locally")
			
 
				     check_cmd_setup(archiver)
			
 
				     with Repository(archiver.repository_location, exclusive=True) as repository:
			
 
				-        for id_ in repository.list():
			
 
				-            repository.delete(id_)
			
 
				+        for id, _ in repository.list():
			
 
				+            repository.delete(id)
			
 
				     cmd(archiver, "check", exit_code=1)
			
--- a/src/borg/testsuite/archiver/rcompress_cmd.py
+++ b/src/borg/testsuite/archiver/rcompress_cmd.py
@@ -17,11 +17,11 @@ def test_rcompress(archiver):
 
				             manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
			
 
				             marker = None
			
 
				             while True:
			
 
				-                ids = repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
			
 
				-                if not ids:
			
 
				+                result = repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
			
 
				+                if not result:
			
 
				                     break
			
 
				-                marker = ids[-1]
			
 
				-                for id in ids:
			
 
				+                marker = result[-1][0]
			
 
				+                for id, _ in result:
			
 
				                     chunk = repository.get(id, read_data=True)
			
 
				                     meta, data = manifest.repo_objs.parse(
			
 
				                         id, chunk, ro_type=ROBJ_DONTCARE
			
--- a/src/borg/testsuite/repository.py
+++ b/src/borg/testsuite/repository.py
@@ -131,7 +131,7 @@ def test_list(repo_fixtures, request):
 
				         first_half = repository.list(limit=50)
			
 
				         assert len(first_half) == 50
			
 
				         assert first_half == repo_list[:50]
			
 
				-        second_half = repository.list(marker=first_half[-1])
			
 
				+        second_half = repository.list(marker=first_half[-1][0])
			
 
				         assert len(second_half) == 50
			
 
				         assert second_half == repo_list[50:]
			
 
				         assert len(repository.list(limit=50)) == 50