2
0
Эх сурвалжийг харах

build_chunkindex_from_repo: reduce code duplication

Thomas Waldmann 9 сар өмнө
parent
commit
e2aa9d56d0
2 өөрчлөгдсөн 35 нэмэгдсэн , 54 устгасан
  1. 3 20
      src/borg/archive.py
  2. 32 34
      src/borg/cache.py

+ 3 - 20
src/borg/archive.py

@@ -22,7 +22,7 @@ logger = create_logger()
 
 from . import xattr
 from .chunker import get_chunker, Chunk
-from .cache import ChunkListEntry
+from .cache import ChunkListEntry, build_chunkindex_from_repo
 from .crypto.key import key_factory, UnsupportedPayloadError
 from .compress import CompressionSpec
 from .constants import *  # NOQA
@@ -50,7 +50,7 @@ from .patterns import PathPrefixPattern, FnmatchPattern, IECommand
 from .item import Item, ArchiveItem, ItemDiff
 from .platform import acl_get, acl_set, set_flags, get_flags, swidth, hostname
 from .remote import RemoteRepository, cache_if_remote
-from .repository import Repository, LIST_SCAN_LIMIT, NoManifestError
+from .repository import Repository, NoManifestError
 from .repoobj import RepoObj
 
 has_link = hasattr(os, "link")
@@ -1626,7 +1626,7 @@ class ArchiveChecker:
         self.check_all = not any((first, last, match, older, newer, oldest, newest))
         self.repair = repair
         self.repository = repository
-        self.init_chunks()
+        self.chunks = build_chunkindex_from_repo(self.repository)
         self.key = self.make_key(repository)
         self.repo_objs = RepoObj(self.key)
         if verify_data:
@@ -1653,23 +1653,6 @@ class ArchiveChecker:
             logger.info("Archive consistency check complete, no problems found.")
         return self.repair or not self.error_found
 
-    def init_chunks(self):
-        """Fetch a list of all object keys from repository and initialize self.chunks"""
-        self.chunks = ChunkIndex()
-        marker = None
-        while True:
-            result = self.repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
-            if not result:
-                break
-            marker = result[-1][0]
-            # the repo says it has these chunks, so we assume they are referenced chunks.
-            # we do not care for refcounting or garbage collection here, so we just set refcount = MAX_VALUE.
-            # borg compact will deal with any unused/orphan chunks.
-            # we do not know the plaintext size (!= stored_size), thus we set size = 0.
-            init_entry = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=0)
-            for id, stored_size in result:
-                self.chunks[id] = init_entry
-
     def make_key(self, repository):
         attempt = 0
 

+ 32 - 34
src/borg/cache.py

@@ -565,6 +565,37 @@ class FilesCacheMixin:
         )
 
 
+def build_chunkindex_from_repo(repository):
+    logger.debug("querying the chunk IDs list from the repo...")
+    chunks = ChunkIndex()
+    t0 = perf_counter()
+    num_requests = 0
+    num_chunks = 0
+    marker = None
+    while True:
+        result = repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
+        num_requests += 1
+        if not result:
+            break
+        marker = result[-1][0]
+        # The repo says it has these chunks, so we assume they are referenced chunks.
+        # We do not care for refcounting anymore, so we just set refcount = MAX_VALUE.
+        # We do not know the plaintext size (!= stored_size), thus we set size = 0.
+        init_entry = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=0)
+        for id, stored_size in result:
+            num_chunks += 1
+            chunks[id] = init_entry
+    # Cache does not contain the manifest.
+    if not isinstance(repository, (Repository, RemoteRepository)):
+        del chunks[Manifest.MANIFEST_ID]
+    duration = perf_counter() - t0 or 0.001
+    # Chunk IDs in a list are encoded in 34 bytes: 1 byte msgpack header, 1 byte length, 32 ID bytes.
+    # Protocol overhead is neglected in this calculation.
+    speed = format_file_size(num_chunks * 34 / duration)
+    logger.debug(f"queried {num_chunks} chunk IDs in {duration} s ({num_requests} requests), ~{speed}/s")
+    return chunks
+
+
 class ChunksMixin:
     """
     Chunks index related code for misc. Cache implementations.
@@ -576,7 +607,7 @@ class ChunksMixin:
     @property
     def chunks(self):
         if self._chunks is None:
-            self._chunks = self._load_chunks_from_repo()
+            self._chunks = build_chunkindex_from_repo(self.repository)
         return self._chunks
 
     def seen_chunk(self, id, size=None):
@@ -625,39 +656,6 @@ class ChunksMixin:
         stats.update(size, not exists)
         return ChunkListEntry(id, size)
 
-    def _load_chunks_from_repo(self):
-        logger.debug("Cache: querying the chunk IDs list from the repo...")
-        chunks = ChunkIndex()
-        t0 = perf_counter()
-        num_requests = 0
-        num_chunks = 0
-        marker = None
-        while True:
-            result = self.repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
-            num_requests += 1
-            if not result:
-                break
-            marker = result[-1][0]
-            # All chunks have a refcount of MAX_VALUE, which is sticky, therefore we can't/won't delete them.
-            init_entry = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=0)  # plaintext size
-            for id, stored_size in result:
-                num_chunks += 1
-                chunks[id] = init_entry
-        # Cache does not contain the manifest.
-        if not isinstance(self.repository, (Repository, RemoteRepository)):
-            del chunks[self.manifest.MANIFEST_ID]
-        duration = perf_counter() - t0 or 0.01
-        logger.debug(
-            "Cache: queried %d chunk IDs in %.2f s (%d requests), ~%s/s",
-            num_chunks,
-            duration,
-            num_requests,
-            format_file_size(num_chunks * 34 / duration),
-        )
-        # Chunk IDs in a list are encoded in 34 bytes: 1 byte msgpack header, 1 byte length, 32 ID bytes.
-        # Protocol overhead is neglected in this calculation.
-        return chunks
-
 
 class AdHocWithFilesCache(FilesCacheMixin, ChunksMixin):
     """