Bläddra i källkod

ArchiveChecker: don't do precise refcounting here

That's the job of borg compact and not needed inside borg check.
check only needs to know if a chunk is present in the repo.
Thomas Waldmann 9 månader sedan
förälder
incheckning
f9d2e6827b
1 ändrade filer med 15 tillägg och 24 borttagningar
  1. 15 24
      src/borg/archive.py

+ 15 - 24
src/borg/archive.py

@@ -1619,14 +1619,14 @@ class ArchiveChecker:
         :param oldest/newest: only check archives older/newer than timedelta from oldest/newest archive timestamp
         :param verify_data: integrity verification of data referenced by archives
         """
+        if not isinstance(repository, (Repository, RemoteRepository)):
+            logger.error("Checking legacy repositories is not supported.")
+            return False
         logger.info("Starting archive consistency check...")
         self.check_all = not any((first, last, match, older, newer, oldest, newest))
         self.repair = repair
         self.repository = repository
         self.init_chunks()
-        if not isinstance(repository, (Repository, RemoteRepository)) and not self.chunks:
-            logger.error("Repository contains no apparent data at all, cannot continue check/repair.")
-            return False
         self.key = self.make_key(repository)
         self.repo_objs = RepoObj(self.key)
         if verify_data:
@@ -1642,8 +1642,6 @@ class ArchiveChecker:
             except IntegrityErrorBase as exc:
                 logger.error("Repository manifest is corrupted: %s", exc)
                 self.error_found = True
-                if not isinstance(repository, (Repository, RemoteRepository)):
-                    del self.chunks[Manifest.MANIFEST_ID]
                 self.manifest = self.rebuild_manifest()
         self.rebuild_archives(
             match=match, first=first, last=last, sort_by=sort_by, older=older, oldest=oldest, newer=newer, newest=newest
@@ -1656,10 +1654,7 @@ class ArchiveChecker:
         return self.repair or not self.error_found
 
     def init_chunks(self):
-        """Fetch a list of all object keys from repository"""
-        # Explicitly set the initial usable hash table capacity to avoid performance issues
-        # due to hash table "resonance".
-        # Since reconstruction of archive items can add some new chunks, add 10 % headroom.
+        """Fetch a list of all object keys from repository and initialize self.chunks"""
         self.chunks = ChunkIndex()
         marker = None
         while True:
@@ -1667,7 +1662,11 @@ class ArchiveChecker:
             if not result:
                 break
             marker = result[-1][0]
-            init_entry = ChunkIndexEntry(refcount=0, size=0)  # unknown plaintext size (!= stored size!)
+            # the repo says it has these chunks, so we assume they are referenced chunks.
+            # we do not care for refcounting or garbage collection here, so we just set refcount = MAX_VALUE.
+            # borg compact will deal with any unused/orphan chunks.
+            # we do not know the plaintext size (!= stored_size), thus we set size = 0.
+            init_entry = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=0)
             for id, stored_size in result:
                 self.chunks[id] = init_entry
 
@@ -1834,9 +1833,6 @@ class ArchiveChecker:
         self, first=0, last=0, sort_by="", match=None, older=None, newer=None, oldest=None, newest=None
     ):
         """Analyze and rebuild archives, expecting some damage and trying to make stuff consistent again."""
-        # Exclude the manifest from chunks (manifest entry might be already deleted from self.chunks)
-        if not isinstance(self.repository, (Repository, RemoteRepository)):
-            self.chunks.pop(Manifest.MANIFEST_ID, None)
 
         def add_callback(chunk):
             id_ = self.key.id_hash(chunk)
@@ -1844,12 +1840,11 @@ class ArchiveChecker:
             add_reference(id_, len(chunk), cdata)
             return id_
 
-        def add_reference(id_, size, cdata=None):
-            try:
-                self.chunks.incref(id_)
-            except KeyError:
+        def add_reference(id_, size, cdata):
+            # either we already have this chunk in repo and chunks index or we add it now
+            if id_ not in self.chunks:
                 assert cdata is not None
-                self.chunks[id_] = ChunkIndexEntry(refcount=1, size=size)
+                self.chunks[id_] = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=size)
                 if self.repair:
                     self.repository.put(id_, cdata)
 
@@ -1900,9 +1895,7 @@ class ArchiveChecker:
                             )
                         )
                         chunk_id, size = chunk_current
-                        if chunk_id in self.chunks:
-                            add_reference(chunk_id, size)
-                        else:
+                        if chunk_id not in self.chunks:
                             logger.warning(
                                 "{}: {}: Missing all-zero replacement chunk detected (Byte {}-{}, Chunk {}). "
                                 "Generating new replacement chunk.".format(
@@ -1914,15 +1907,13 @@ class ArchiveChecker:
                             add_reference(chunk_id, size, cdata)
                 else:
                     if chunk_current == chunk_healthy:
-                        # normal case, all fine.
-                        add_reference(chunk_id, size)
+                        pass  # normal case, all fine.
                     else:
                         logger.info(
                             "{}: {}: Healed previously missing file chunk! (Byte {}-{}, Chunk {}).".format(
                                 archive_name, item.path, offset, offset + size, bin_to_hex(chunk_id)
                             )
                         )
-                        add_reference(chunk_id, size)
                 chunk_list.append([chunk_id, size])  # list-typed element as chunks_healthy is list-of-lists
                 offset += size
             if chunks_replaced and not has_chunks_healthy: