Browse Source

tests: borg check must not add a spoofed archive to manifest

also: do a small optimisation in borg check:

if the type of the repo object is not ROBJ_ARCHIVE_META, we
can skip the object, it can not contain valid archive meta data.

if the type is correct, this is already a sufficient check, so
we can be quite sure that there will be valid archive metadata
in the object.
Thomas Waldmann 2 years ago
parent
commit
d1fde11645
2 changed files with 43 additions and 5 deletions
  1. 2 5
      src/borg/archive.py
  2. 41 0
      src/borg/testsuite/archiver/check_cmd.py

+ 2 - 5
src/borg/archive.py

@@ -1975,7 +1975,6 @@ class ArchiveChecker:
         # lost manifest on a older borg version than the most recent one that was ever used
         # within this repository (assuming that newer borg versions support more item keys).
         manifest = Manifest(self.key, self.repository)
-        archive_keys_serialized = [msgpack.packb(name) for name in ARCHIVE_KEYS]
         pi = ProgressIndicatorPercent(
             total=len(self.chunks), msg="Rebuilding manifest %6.2f%%", step=0.01, msgid="check.rebuild_manifest"
         )
@@ -1983,14 +1982,12 @@ class ArchiveChecker:
             pi.show()
             cdata = self.repository.get(chunk_id)
             try:
-                _, data = self.repo_objs.parse(chunk_id, cdata, ro_type=ROBJ_DONTCARE)
+                meta, data = self.repo_objs.parse(chunk_id, cdata, ro_type=ROBJ_DONTCARE)
             except IntegrityErrorBase as exc:
                 logger.error("Skipping corrupted chunk: %s", exc)
                 self.error_found = True
                 continue
-            if not valid_msgpacked_dict(data, archive_keys_serialized):
-                continue
-            if b"command_line" not in data or b"\xa7version\x02" not in data:
+            if meta["type"] != ROBJ_ARCHIVE_META:
                 continue
             try:
                 archive = msgpack.unpackb(data)

+ 41 - 0
src/borg/testsuite/archiver/check_cmd.py

@@ -288,6 +288,47 @@ def test_manifest_rebuild_duplicate_archive(archivers, request):
     assert "archive2" in output
 
 
+def test_spoofed_archive(archivers, request):
+    archiver = request.getfixturevalue(archivers)
+    check_cmd_setup(archiver)
+    archive, repository = open_archive(archiver.repository_path, "archive1")
+    repo_objs = archive.repo_objs
+    with repository:
+        # attacker would corrupt or delete the manifest to trigger a rebuild of it:
+        manifest = repository.get(Manifest.MANIFEST_ID)
+        corrupted_manifest = manifest + b"corrupted!"
+        repository.put(Manifest.MANIFEST_ID, corrupted_manifest)
+        archive_dict = {
+            "command_line": "",
+            "item_ptrs": [],
+            "hostname": "foo",
+            "username": "bar",
+            "name": "archive_spoofed",
+            "time": "2016-12-15T18:49:51.849711",
+            "version": 2,
+        }
+        archive = repo_objs.key.pack_metadata(archive_dict)
+        archive_id = repo_objs.id_hash(archive)
+        repository.put(
+            archive_id,
+            repo_objs.format(
+                archive_id,
+                {},
+                archive,
+                # we assume that an attacker can put a file into backup src files that contains a fake archive.
+                # but, the attacker can not influence the ro_type borg will use to store user file data:
+                ro_type=ROBJ_FILE_STREAM,  # a real archive is stored with ROBJ_ARCHIVE_META
+            ),
+        )
+        repository.commit(compact=False)
+    cmd(archiver, "check", exit_code=1)
+    cmd(archiver, "check", "--repair", "--debug", exit_code=0)
+    output = cmd(archiver, "rlist")
+    assert "archive1" in output
+    assert "archive2" in output
+    assert "archive_spoofed" not in output
+
+
 def test_extra_chunks(archivers, request):
     archiver = request.getfixturevalue(archivers)
     if archiver.get_kind() == "remote":