9 сар өмнө · e2aa9d56d0
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -22,7 +22,7 @@ logger = create_logger()
 
				 
			
 
				 from . import xattr
			
 
				 from .chunker import get_chunker, Chunk
			
 
				-from .cache import ChunkListEntry
			
 
				+from .cache import ChunkListEntry, build_chunkindex_from_repo
			
 
				 from .crypto.key import key_factory, UnsupportedPayloadError
			
 
				 from .compress import CompressionSpec
			
 
				 from .constants import *  # NOQA
			
@@ -50,7 +50,7 @@ from .patterns import PathPrefixPattern, FnmatchPattern, IECommand
 
				 from .item import Item, ArchiveItem, ItemDiff
			
 
				 from .platform import acl_get, acl_set, set_flags, get_flags, swidth, hostname
			
 
				 from .remote import RemoteRepository, cache_if_remote
			
 
				-from .repository import Repository, LIST_SCAN_LIMIT, NoManifestError
			
 
				+from .repository import Repository, NoManifestError
			
 
				 from .repoobj import RepoObj
			
 
				 
			
 
				 has_link = hasattr(os, "link")
			
@@ -1626,7 +1626,7 @@ class ArchiveChecker:
 
				         self.check_all = not any((first, last, match, older, newer, oldest, newest))
			
 
				         self.repair = repair
			
 
				         self.repository = repository
			
 
				-        self.init_chunks()
			
 
				+        self.chunks = build_chunkindex_from_repo(self.repository)
			
 
				         self.key = self.make_key(repository)
			
 
				         self.repo_objs = RepoObj(self.key)
			
 
				         if verify_data:
			
@@ -1653,23 +1653,6 @@ class ArchiveChecker:
 
				             logger.info("Archive consistency check complete, no problems found.")
			
 
				         return self.repair or not self.error_found
			
 
				 
			
 
				-    def init_chunks(self):
			
 
				-        """Fetch a list of all object keys from repository and initialize self.chunks"""
			
 
				-        self.chunks = ChunkIndex()
			
 
				-        marker = None
			
 
				-        while True:
			
 
				-            result = self.repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
			
 
				-            if not result:
			
 
				-                break
			
 
				-            marker = result[-1][0]
			
 
				-            # the repo says it has these chunks, so we assume they are referenced chunks.
			
 
				-            # we do not care for refcounting or garbage collection here, so we just set refcount = MAX_VALUE.
			
 
				-            # borg compact will deal with any unused/orphan chunks.
			
 
				-            # we do not know the plaintext size (!= stored_size), thus we set size = 0.
			
 
				-            init_entry = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=0)
			
 
				-            for id, stored_size in result:
			
 
				-                self.chunks[id] = init_entry
			
 
				-
			
 
				     def make_key(self, repository):
			
 
				         attempt = 0
			
 
				 
			
--- a/src/borg/cache.py
+++ b/src/borg/cache.py
@@ -565,6 +565,37 @@ class FilesCacheMixin:
 
				         )
			
 
				 
			
 
				 
			
 
				+def build_chunkindex_from_repo(repository):
			
 
				+    logger.debug("querying the chunk IDs list from the repo...")
			
 
				+    chunks = ChunkIndex()
			
 
				+    t0 = perf_counter()
			
 
				+    num_requests = 0
			
 
				+    num_chunks = 0
			
 
				+    marker = None
			
 
				+    while True:
			
 
				+        result = repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
			
 
				+        num_requests += 1
			
 
				+        if not result:
			
 
				+            break
			
 
				+        marker = result[-1][0]
			
 
				+        # The repo says it has these chunks, so we assume they are referenced chunks.
			
 
				+        # We do not care for refcounting anymore, so we just set refcount = MAX_VALUE.
			
 
				+        # We do not know the plaintext size (!= stored_size), thus we set size = 0.
			
 
				+        init_entry = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=0)
			
 
				+        for id, stored_size in result:
			
 
				+            num_chunks += 1
			
 
				+            chunks[id] = init_entry
			
 
				+    # Cache does not contain the manifest.
			
 
				+    if not isinstance(repository, (Repository, RemoteRepository)):
			
 
				+        del chunks[Manifest.MANIFEST_ID]
			
 
				+    duration = perf_counter() - t0 or 0.001
			
 
				+    # Chunk IDs in a list are encoded in 34 bytes: 1 byte msgpack header, 1 byte length, 32 ID bytes.
			
 
				+    # Protocol overhead is neglected in this calculation.
			
 
				+    speed = format_file_size(num_chunks * 34 / duration)
			
 
				+    logger.debug(f"queried {num_chunks} chunk IDs in {duration} s ({num_requests} requests), ~{speed}/s")
			
 
				+    return chunks
			
 
				+
			
 
				+
			
 
				 class ChunksMixin:
			
 
				     """
			
 
				     Chunks index related code for misc. Cache implementations.
			
@@ -576,7 +607,7 @@ class ChunksMixin:
 
				     @property
			
 
				     def chunks(self):
			
 
				         if self._chunks is None:
			
 
				-            self._chunks = self._load_chunks_from_repo()
			
 
				+            self._chunks = build_chunkindex_from_repo(self.repository)
			
 
				         return self._chunks
			
 
				 
			
 
				     def seen_chunk(self, id, size=None):
			
@@ -625,39 +656,6 @@ class ChunksMixin:
 
				         stats.update(size, not exists)
			
 
				         return ChunkListEntry(id, size)
			
 
				 
			
 
				-    def _load_chunks_from_repo(self):
			
 
				-        logger.debug("Cache: querying the chunk IDs list from the repo...")
			
 
				-        chunks = ChunkIndex()
			
 
				-        t0 = perf_counter()
			
 
				-        num_requests = 0
			
 
				-        num_chunks = 0
			
 
				-        marker = None
			
 
				-        while True:
			
 
				-            result = self.repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
			
 
				-            num_requests += 1
			
 
				-            if not result:
			
 
				-                break
			
 
				-            marker = result[-1][0]
			
 
				-            # All chunks have a refcount of MAX_VALUE, which is sticky, therefore we can't/won't delete them.
			
 
				-            init_entry = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=0)  # plaintext size
			
 
				-            for id, stored_size in result:
			
 
				-                num_chunks += 1
			
 
				-                chunks[id] = init_entry
			
 
				-        # Cache does not contain the manifest.
			
 
				-        if not isinstance(self.repository, (Repository, RemoteRepository)):
			
 
				-            del chunks[self.manifest.MANIFEST_ID]
			
 
				-        duration = perf_counter() - t0 or 0.01
			
 
				-        logger.debug(
			
 
				-            "Cache: queried %d chunk IDs in %.2f s (%d requests), ~%s/s",
			
 
				-            num_chunks,
			
 
				-            duration,
			
 
				-            num_requests,
			
 
				-            format_file_size(num_chunks * 34 / duration),
			
 
				-        )
			
 
				-        # Chunk IDs in a list are encoded in 34 bytes: 1 byte msgpack header, 1 byte length, 32 ID bytes.
			
 
				-        # Protocol overhead is neglected in this calculation.
			
 
				-        return chunks
			
 
				-
			
 
				 
			
 
				 class AdHocWithFilesCache(FilesCacheMixin, ChunksMixin):
			
 
				     """