Browse Source

extract, diff, ...: use raise_missing=False

preloading: always use raise_missing=False, because
the behaviour is defined at preloading time.

fetch_many: use get_many with raise_missing=False.
if get_many yields None instead of the expected chunk
cdata bytes, on-the-fly create an all-zero replacement
chunk of the correct size (if the size is known) and
emit an error msg about the missing chunk id / size.

note: for borg recreate with re-chunking this is a bit
unpretty, because it will transform a missing chunk into
a zero bytes range in the target file in the recreated
archive. it will emit an error message at recreate time,
but afterwards the recreated archive will not "know"
about the problem any more and will just have that
zero-patched file.
so guess borg recreate with re-chunking should better
only be used on repos that do not miss chunks.
Thomas Waldmann 3 months ago
parent
commit
84fe9d2c67
3 changed files with 34 additions and 4 deletions
  1. 8 2
      src/borg/archive.py
  2. 3 1
      src/borg/remote.py
  3. 23 1
      src/borg/testsuite/archiver/extract_cmd_test.py

+ 8 - 2
src/borg/archive.py

@@ -325,8 +325,14 @@ class DownloadPipeline:
             sizes = [None] * len(ids)
         else:
             raise TypeError(f"unsupported or mixed element types: {chunks}")
-        for id, size, cdata in zip(ids, sizes, self.repository.get_many(ids, is_preloaded=is_preloaded)):
-            _, data = self.repo_objs.parse(id, cdata, ro_type=ro_type)
+        for id, size, cdata in zip(
+            ids, sizes, self.repository.get_many(ids, is_preloaded=is_preloaded, raise_missing=False)
+        ):
+            if cdata is None:
+                logger.error(f"repository object {bin_to_hex(id)} missing, returning {size} zero bytes.")
+                data = zeros[:size] if size is not None else None
+            else:
+                _, data = self.repo_objs.parse(id, cdata, ro_type=ro_type)
             assert size is None or len(data) == size
             yield data
 

+ 3 - 1
src/borg/remote.py

@@ -943,7 +943,9 @@ class RemoteRepository:
                             self.to_send.push_back(msgpack.packb({MSGID: self.msgid, MSG: cmd, ARGS: args}))
                     if not self.to_send and self.preload_ids:
                         chunk_id = self.preload_ids.pop(0)
-                        args = {"id": chunk_id, "raise_missing": True}
+                        # for preloading chunks, the raise_missing behaviour is defined HERE,
+                        # not in the get_many / fetch_many call that later fetches the preloaded chunks.
+                        args = {"id": chunk_id, "raise_missing": False}
                         self.msgid += 1
                         self.chunkid_to_msgids.setdefault(chunk_id, []).append(self.msgid)
                         self.to_send.push_back(msgpack.packb({MSGID: self.msgid, MSG: "get", ARGS: args}))

+ 23 - 1
src/borg/testsuite/archiver/extract_cmd_test.py

@@ -9,7 +9,7 @@ import pytest
 from ... import xattr
 from ...chunker import has_seek_hole
 from ...constants import *  # NOQA
-from ...helpers import EXIT_WARNING, BackupPermissionError
+from ...helpers import EXIT_WARNING, BackupPermissionError, bin_to_hex
 from ...helpers import flags_noatime, flags_normal
 from .. import changedir, same_ts_ns
 from .. import are_symlinks_supported, are_hardlinks_supported, is_utime_fully_supported, is_birthtime_fully_supported
@@ -24,6 +24,9 @@ from . import (
     _extract_hardlinks_setup,
     assert_creates_file,
     generate_archiver_tests,
+    create_src_archive,
+    open_archive,
+    src_file,
 )
 
 pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary")  # NOQA
@@ -737,3 +740,22 @@ def test_dry_run_extraction_flags(archivers, request):
         print(output)
 
     assert not os.listdir("output"), "Output directory should be empty after dry-run"
+
+
+def test_extract_file_with_missing_chunk(archivers, request):
+    archiver = request.getfixturevalue(archivers)
+    cmd(archiver, "repo-create", RK_ENCRYPTION)
+    create_src_archive(archiver, "archive")
+    # Get rid of a chunk
+    archive, repository = open_archive(archiver.repository_path, "archive")
+    with repository:
+        for item in archive.iter_items():
+            if item.path.endswith(src_file):
+                chunk = item.chunks[-1]
+                repository.delete(chunk.id)
+                break
+        else:
+            assert False  # missed the file
+    output = cmd(archiver, "extract", "archive")
+    # TODO: this is a bit dirty still: no warning/error rc, no filename output for the damaged file.
+    assert f"repository object {bin_to_hex(chunk.id)} missing, returning {chunk.size} zero bytes." in output