Explorar el Código

compact/check: fix bug not writing the complete index, fixes #8813

Add incremental flag to `write_chunkindex_to_repo_cache`.

borg create uses incremental cache indexes to save progress.
But other OPs need to write a full index and delete all other cached indexes.

Added debug logging for missing object IDs.
Thomas Waldmann hace 4 semanas
padre
commit
38268127c9

+ 5 - 1
src/borg/archiver/compact_cmd.py

@@ -65,7 +65,9 @@ class ArchiveGarbageCollector:
         # and also remove all older cached chunk indexes.
         # write_chunkindex_to_repo now removes all flags and size infos.
         # we need this, as we put the wrong size in there to support --stats computations.
-        write_chunkindex_to_repo_cache(self.repository, self.chunks, clear=True, force_write=True, delete_other=True)
+        write_chunkindex_to_repo_cache(
+            self.repository, self.chunks, incremental=False, clear=True, force_write=True, delete_other=True
+        )
         self.chunks = None  # nothing there (cleared!)
 
     def analyze_archives(self) -> Tuple[Set, int, int, int]:
@@ -113,6 +115,8 @@ class ArchiveGarbageCollector:
     def report_and_delete(self):
         if self.missing_chunks:
             logger.error(f"Repository has {len(self.missing_chunks)} missing objects!")
+            for id in sorted(self.missing_chunks):
+                logger.debug(f"Missing object {bin_to_hex(id)}")
             set_ec(EXIT_ERROR)
 
         logger.info("Cleaning archives directory from soft-deleted archives...")

+ 13 - 9
src/borg/cache.py

@@ -705,20 +705,24 @@ CHUNKINDEX_HASH_SEED = 3
 
 
 def write_chunkindex_to_repo_cache(
-    repository, chunks, *, clear=False, force_write=False, delete_other=False, delete_these=None
+    repository, chunks, *, incremental=True, clear=False, force_write=False, delete_other=False, delete_these=None
 ):
-    # the borghash code has no means to only serialize the F_NEW table entries,
-    # thus we copy only the new entries to a temporary table:
-    new_chunks = ChunkIndex()
     # for now, we don't want to serialize the flags or the size, just the keys (chunk IDs):
     cleaned_value = ChunkIndexEntry(flags=ChunkIndex.F_NONE, size=0)
-    for key, _ in chunks.iteritems(only_new=True):
-        new_chunks[key] = cleaned_value
+    chunks_to_write = ChunkIndex()
+    # incremental==True:
+    # the borghash code has no means to only serialize the F_NEW table entries,
+    # thus we copy only the new entries to a temporary table.
+    # incremental==False:
+    # maybe copying the stuff into a new ChunkIndex is not needed here,
+    # but for simplicity, we do it anyway.
+    for key, _ in chunks.iteritems(only_new=incremental):
+        chunks_to_write[key] = cleaned_value
     with io.BytesIO() as f:
-        new_chunks.write(f)
+        chunks_to_write.write(f)
         data = f.getvalue()
-    logger.debug(f"caching {len(new_chunks)} new chunks.")
-    new_chunks.clear()  # free memory of the temporary table
+    logger.debug(f"caching {len(chunks_to_write)} chunks (incremental={incremental}).")
+    chunks_to_write.clear()  # free memory of the temporary table
     if clear:
         # if we don't need the in-memory chunks index anymore:
         chunks.clear()  # free memory, immediately

+ 3 - 1
src/borg/repository.py

@@ -362,7 +362,9 @@ class Repository:
                     # if we did a full pass in one go, we built a complete, uptodate ChunkIndex, cache it!
                     from .cache import write_chunkindex_to_repo_cache
 
-                    write_chunkindex_to_repo_cache(self, chunks, clear=True, force_write=True, delete_other=True)
+                    write_chunkindex_to_repo_cache(
+                        self, chunks, incremental=False, clear=True, force_write=True, delete_other=True
+                    )
         except StoreObjectNotFound:
             # it can be that there is no "data/" at all, then it crashes when iterating infos.
             pass

+ 20 - 0
src/borg/testsuite/archiver/compact_cmd_test.py

@@ -59,3 +59,23 @@ def test_compact_after_deleting_some_archives(archivers, request, stats):
     else:
         assert "Repository has data stored in 0 objects." not in output
     assert "Finished compaction" in output
+
+
+def test_compact_index_corruption(archivers, request):
+    # see issue #8813 (borg did not write a complete index)
+    archiver = request.getfixturevalue(archivers)
+
+    cmd(archiver, "repo-create", RK_ENCRYPTION)
+    create_src_archive(archiver, "archive1")
+
+    output = cmd(archiver, "compact", "-v", "--stats", exit_code=0)
+    assert "missing objects" not in output
+
+    output = cmd(archiver, "compact", "-v", exit_code=0)
+    assert "missing objects" not in output
+
+    output = cmd(archiver, "compact", "-v", exit_code=0)
+    assert "missing objects" not in output
+
+    output = cmd(archiver, "compact", "-v", "--stats", exit_code=0)
+    assert "missing objects" not in output