瀏覽代碼

Merge pull request #7367 from ThomasWaldmann/new-crypto-assert-id

assert_id: better be paranoid, fixes #7362
TW 2 年之前
父節點
當前提交
6da5b7d1ba
共有 2 個文件被更改,包括 19 次插入11 次删除
  1. 4 8
      src/borg/archive.py
  2. 15 3
      src/borg/crypto/key.py

+ 4 - 8
src/borg/archive.py

@@ -1837,12 +1837,6 @@ class ArchiveChecker:
         chunks_count_index = len(self.chunks)
         chunks_count_segments = 0
         errors = 0
-        # for the new crypto, derived from AEADKeyBase, we know that it checks authenticity on
-        # the crypto.low_level level - invalid chunks will fail to AEAD authenticate.
-        # for these key types, we know that there is no need to decompress the data afterwards.
-        # for all other modes, we assume that we must decompress, so we can verify authenticity
-        # based on the plaintext MAC (via calling ._assert_id(id, plaintext)).
-        decompress = not isinstance(self.key, AEADKeyBase)
         defect_chunks = []
         pi = ProgressIndicatorPercent(
             total=chunks_count_index, msg="Verifying data %6.2f%%", step=0.01, msgid="check.verify_data"
@@ -1872,7 +1866,8 @@ class ArchiveChecker:
                         chunk_data_iter = self.repository.get_many(chunk_ids)
                 else:
                     try:
-                        self.repo_objs.parse(chunk_id, encrypted_data, decompress=decompress)
+                        # we must decompress, so it'll call assert_id() in there:
+                        self.repo_objs.parse(chunk_id, encrypted_data, decompress=True)
                     except IntegrityErrorBase as integrity_error:
                         self.error_found = True
                         errors += 1
@@ -1903,7 +1898,8 @@ class ArchiveChecker:
                     # from the underlying media.
                     try:
                         encrypted_data = self.repository.get(defect_chunk)
-                        self.repo_objs.parse(defect_chunk, encrypted_data, decompress=decompress)
+                        # we must decompress, so it'll call assert_id() in there:
+                        self.repo_objs.parse(defect_chunk, encrypted_data, decompress=True)
                     except IntegrityErrorBase:
                         # failed twice -> get rid of this chunk
                         del self.chunks[defect_chunk]

+ 15 - 3
src/borg/crypto/key.py

@@ -840,11 +840,23 @@ class AEADKeyBase(KeyBase):
     MAX_IV = 2**48 - 1
 
     def assert_id(self, id, data):
-        # note: assert_id(id, data) is not needed any more for the new AEAD crypto.
-        # we put the id into AAD when storing the chunk, so it gets into the authentication tag computation.
+        # Comparing the id hash here would not be needed any more for the new AEAD crypto **IF** we
+        # could be sure that chunks were created by normal (not tampered, not evil) borg code:
+        # We put the id into AAD when storing the chunk, so it gets into the authentication tag computation.
         # when decrypting, we provide the id we **want** as AAD for the auth tag verification, so
         # decrypting only succeeds if we got the ciphertext we wrote **for that chunk id**.
-        pass
+        # So, basically the **repository** can not cheat on us by giving us a different chunk.
+        #
+        # **BUT**, if chunks are created by tampered, evil borg code, the borg client code could put
+        # a wrong chunkid into AAD and then AEAD-encrypt-and-auth this and store it into the
+        # repository using this bad chunkid as key (violating the usual chunkid == id_hash(data)).
+        # Later, when reading such a bad chunk, AEAD-auth-and-decrypt would not notice any
+        # issue and decrypt successfully.
+        # Thus, to notice such evil borg activity, we must check for such violations here:
+        if id and id != Manifest.MANIFEST_ID:
+            id_computed = self.id_hash(data)
+            if not hmac.compare_digest(id_computed, id):
+                raise IntegrityError("Chunk %s: id verification failed" % bin_to_hex(id))
 
     def encrypt(self, id, data):
         # to encrypt new data in this session we use always self.cipher and self.sessionid