Browse Source

implement Repository3.check

It uses xxh64 hashes of the meta and data parts to verify their validity.
On a server with borg, this can be done server-side without the borg key.

The new RepoObj header has meta_size, data_size, meta_hash and data_hash.
Thomas Waldmann 10 months ago
parent
commit
d95cacd624

+ 22 - 18
src/borg/repoobj.py

@@ -1,6 +1,8 @@
+from collections import namedtuple
 from struct import Struct
 from struct import Struct
 
 
 from .constants import *  # NOQA
 from .constants import *  # NOQA
+from .checksums import xxh64
 from .helpers import msgpack, workarounds
 from .helpers import msgpack, workarounds
 from .helpers.errors import IntegrityError
 from .helpers.errors import IntegrityError
 from .compress import Compressor, LZ4_COMPRESSOR, get_compressor
 from .compress import Compressor, LZ4_COMPRESSOR, get_compressor
@@ -10,14 +12,17 @@ AUTHENTICATED_NO_KEY = "authenticated_no_key" in workarounds
 
 
 
 
 class RepoObj:
 class RepoObj:
-    meta_len_hdr = Struct("<H")  # 16bit unsigned int
+    # Object header format includes size infos for parsing the object into meta and data,
+    # as well as hashes to enable checking consistency without having the borg key.
+    obj_header = Struct("<II8s8s")  # meta size (32b), data size (32b), meta hash (64b), data hash (64b)
+    ObjHeader = namedtuple("ObjHeader", "meta_size data_size meta_hash data_hash")
 
 
     @classmethod
     @classmethod
     def extract_crypted_data(cls, data: bytes) -> bytes:
     def extract_crypted_data(cls, data: bytes) -> bytes:
         # used for crypto type detection
         # used for crypto type detection
-        offs = cls.meta_len_hdr.size
-        meta_len = cls.meta_len_hdr.unpack(data[:offs])[0]
-        return data[offs + meta_len :]
+        hdr_size = cls.obj_header.size
+        hdr = cls.ObjHeader(*cls.obj_header.unpack(data[:hdr_size]))
+        return data[hdr_size + hdr.meta_size:]
 
 
     def __init__(self, key):
     def __init__(self, key):
         self.key = key
         self.key = key
@@ -61,8 +66,9 @@ class RepoObj:
         data_encrypted = self.key.encrypt(id, data_compressed)
         data_encrypted = self.key.encrypt(id, data_compressed)
         meta_packed = msgpack.packb(meta)
         meta_packed = msgpack.packb(meta)
         meta_encrypted = self.key.encrypt(id, meta_packed)
         meta_encrypted = self.key.encrypt(id, meta_packed)
-        hdr = self.meta_len_hdr.pack(len(meta_encrypted))
-        return hdr + meta_encrypted + data_encrypted
+        hdr = self.ObjHeader(len(meta_encrypted), len(data_encrypted), xxh64(meta_encrypted), xxh64(data_encrypted))
+        hdr_packed = self.obj_header.pack(*hdr)
+        return hdr_packed + meta_encrypted + data_encrypted
 
 
     def parse_meta(self, id: bytes, cdata: bytes, ro_type: str) -> dict:
     def parse_meta(self, id: bytes, cdata: bytes, ro_type: str) -> dict:
         # when calling parse_meta, enough cdata needs to be supplied to contain completely the
         # when calling parse_meta, enough cdata needs to be supplied to contain completely the
@@ -71,11 +77,10 @@ class RepoObj:
         assert isinstance(cdata, bytes)
         assert isinstance(cdata, bytes)
         assert isinstance(ro_type, str)
         assert isinstance(ro_type, str)
         obj = memoryview(cdata)
         obj = memoryview(cdata)
-        offs = self.meta_len_hdr.size
-        hdr = obj[:offs]
-        len_meta_encrypted = self.meta_len_hdr.unpack(hdr)[0]
-        assert offs + len_meta_encrypted <= len(obj)
-        meta_encrypted = obj[offs : offs + len_meta_encrypted]
+        hdr_size = self.obj_header.size
+        hdr = self.ObjHeader(*self.obj_header.unpack(obj[:hdr_size]))
+        assert hdr_size + hdr.meta_size <= len(obj)
+        meta_encrypted = obj[hdr_size:hdr_size + hdr.meta_size]
         meta_packed = self.key.decrypt(id, meta_encrypted)
         meta_packed = self.key.decrypt(id, meta_encrypted)
         meta = msgpack.unpackb(meta_packed)
         meta = msgpack.unpackb(meta_packed)
         if ro_type != ROBJ_DONTCARE and meta["type"] != ro_type:
         if ro_type != ROBJ_DONTCARE and meta["type"] != ro_type:
@@ -100,17 +105,16 @@ class RepoObj:
         assert isinstance(id, bytes)
         assert isinstance(id, bytes)
         assert isinstance(cdata, bytes)
         assert isinstance(cdata, bytes)
         obj = memoryview(cdata)
         obj = memoryview(cdata)
-        offs = self.meta_len_hdr.size
-        hdr = obj[:offs]
-        len_meta_encrypted = self.meta_len_hdr.unpack(hdr)[0]
-        assert offs + len_meta_encrypted <= len(obj)
-        meta_encrypted = obj[offs : offs + len_meta_encrypted]
-        offs += len_meta_encrypted
+        hdr_size = self.obj_header.size
+        hdr = self.ObjHeader(*self.obj_header.unpack(obj[:hdr_size]))
+        assert hdr_size + hdr.meta_size <= len(obj)
+        meta_encrypted = obj[hdr_size : hdr_size + hdr.meta_size]
         meta_packed = self.key.decrypt(id, meta_encrypted)
         meta_packed = self.key.decrypt(id, meta_encrypted)
         meta_compressed = msgpack.unpackb(meta_packed)  # means: before adding more metadata in decompress block
         meta_compressed = msgpack.unpackb(meta_packed)  # means: before adding more metadata in decompress block
         if ro_type != ROBJ_DONTCARE and meta_compressed["type"] != ro_type:
         if ro_type != ROBJ_DONTCARE and meta_compressed["type"] != ro_type:
             raise IntegrityError(f"ro_type expected: {ro_type} got: {meta_compressed['type']}")
             raise IntegrityError(f"ro_type expected: {ro_type} got: {meta_compressed['type']}")
-        data_encrypted = obj[offs:]
+        assert hdr_size + hdr.meta_size + hdr.data_size <= len(obj)
+        data_encrypted = obj[hdr_size + hdr.meta_size:hdr_size + hdr.meta_size + hdr.data_size]
         data_compressed = self.key.decrypt(id, data_encrypted)  # does not include the type/level bytes
         data_compressed = self.key.decrypt(id, data_encrypted)  # does not include the type/level bytes
         if decompress:
         if decompress:
             ctype = meta_compressed["ctype"]
             ctype = meta_compressed["ctype"]

+ 12 - 13
src/borg/repository.py

@@ -1837,25 +1837,24 @@ class LoggedIO:
                         # supporting separately encrypted metadata and data.
                         # supporting separately encrypted metadata and data.
                         # In this case, we return enough bytes so the client can decrypt the metadata
                         # In this case, we return enough bytes so the client can decrypt the metadata
                         # and seek over the rest (over the encrypted data).
                         # and seek over the rest (over the encrypted data).
-                        meta_len_size = RepoObj.meta_len_hdr.size
-                        meta_len = fd.read(meta_len_size)
-                        length -= meta_len_size
-                        if len(meta_len) != meta_len_size:
+                        hdr_size = RepoObj.obj_header.size
+                        hdr = fd.read(hdr_size)
+                        length -= hdr_size
+                        if len(hdr) != hdr_size:
                             raise IntegrityError(
                             raise IntegrityError(
                                 f"Segment entry meta length short read [segment {segment}, offset {offset}]: "
                                 f"Segment entry meta length short read [segment {segment}, offset {offset}]: "
-                                f"expected {meta_len_size}, got {len(meta_len)} bytes"
+                                f"expected {hdr_size}, got {len(hdr)} bytes"
                             )
                             )
-                        ml = RepoObj.meta_len_hdr.unpack(meta_len)[0]
-                        meta = fd.read(ml)
-                        length -= ml
-                        if len(meta) != ml:
+                        meta_size = RepoObj.obj_header.unpack(hdr)[0]
+                        meta = fd.read(meta_size)
+                        length -= meta_size
+                        if len(meta) != meta_size:
                             raise IntegrityError(
                             raise IntegrityError(
                                 f"Segment entry meta short read [segment {segment}, offset {offset}]: "
                                 f"Segment entry meta short read [segment {segment}, offset {offset}]: "
-                                f"expected {ml}, got {len(meta)} bytes"
+                                f"expected {meta_size}, got {len(meta)} bytes"
                             )
                             )
-                        data = meta_len + meta  # shortened chunk - enough so the client can decrypt the metadata
-                        # we do not have a checksum for this data, but the client's AEAD crypto will check it.
-                    # in any case, we see over the remainder of the chunk
+                        data = hdr + meta  # shortened chunk - enough so the client can decrypt the metadata
+                    # in any case, we seek over the remainder of the chunk
                     oldpos = fd.tell()
                     oldpos = fd.tell()
                     seeked = fd.seek(length, os.SEEK_CUR) - oldpos
                     seeked = fd.seek(length, os.SEEK_CUR) - oldpos
                     if seeked != length:
                     if seeked != length:

+ 51 - 20
src/borg/repository3.py

@@ -3,6 +3,7 @@ import os
 from borgstore.store import Store
 from borgstore.store import Store
 from borgstore.store import ObjectNotFound as StoreObjectNotFound
 from borgstore.store import ObjectNotFound as StoreObjectNotFound
 
 
+from .checksums import xxh64
 from .constants import *  # NOQA
 from .constants import *  # NOQA
 from .helpers import Error, ErrorWithTraceback, IntegrityError
 from .helpers import Error, ErrorWithTraceback, IntegrityError
 from .helpers import Location
 from .helpers import Location
@@ -184,16 +185,46 @@ class Repository3:
         pass
         pass
 
 
     def check(self, repair=False, max_duration=0):
     def check(self, repair=False, max_duration=0):
-        """Check repository consistency
+        """Check repository consistency"""
+        def log_error(msg):
+            nonlocal obj_corrupted
+            obj_corrupted = True
+            logger.error(f"Repo object {info.name} is corrupted: {msg}")
 
 
-        This method verifies all segment checksums and makes sure
-        the index is consistent with the data stored in the segments.
-        """
+        # TODO: implement repair, progress indicator, partial checks, ...
         mode = "full"
         mode = "full"
         logger.info("Starting repository check")
         logger.info("Starting repository check")
-        # XXX TODO
-        logger.info("Finished %s repository check, no problems found.", mode)
-        return True
+        objs_checked = objs_errors = 0
+        infos = self.store.list("data")
+        for info in infos:
+            obj_corrupted = False
+            key = "data/%s" % info.name
+            obj = self.store.load(key)
+            hdr_size = RepoObj.obj_header.size
+            obj_size = len(obj)
+            if obj_size >= hdr_size:
+                hdr = RepoObj.ObjHeader(*RepoObj.obj_header.unpack(obj[:hdr_size]))
+                meta = obj[hdr_size:hdr_size+hdr.meta_size]
+                if hdr.meta_size != len(meta):
+                    log_error("metadata size incorrect.")
+                elif hdr.meta_hash != xxh64(meta):
+                    log_error("metadata does not match checksum.")
+                data = obj[hdr_size+hdr.meta_size:hdr_size+hdr.meta_size+hdr.data_size]
+                if hdr.data_size != len(data):
+                    log_error("data size incorrect.")
+                elif hdr.data_hash != xxh64(data):
+                    log_error("data does not match checksum.")
+            else:
+                log_error("too small.")
+            objs_checked += 1
+            if obj_corrupted:
+                objs_errors += 1
+        logger.info(f"Checked {objs_checked} repository objects, {objs_errors} errors.")
+        if objs_errors == 0:
+            logger.info("Finished %s repository check, no problems found.", mode)
+        else:
+            logger.error("Finished %s repository check, errors found.", mode)
+        return objs_errors == 0 or repair
 
 
     def scan_low_level(self, segment=None, offset=None):
     def scan_low_level(self, segment=None, offset=None):
         raise NotImplementedError
         raise NotImplementedError
@@ -244,25 +275,25 @@ class Repository3:
             else:
             else:
                 # RepoObj layout supports separately encrypted metadata and data.
                 # RepoObj layout supports separately encrypted metadata and data.
                 # We return enough bytes so the client can decrypt the metadata.
                 # We return enough bytes so the client can decrypt the metadata.
-                meta_len_size = RepoObj.meta_len_hdr.size
-                extra_len = 1024 - meta_len_size  # load a bit more, 1024b, reduces round trips
-                obj = self.store.load(key, size=meta_len_size + extra_len)
-                meta_len = obj[0:meta_len_size]
-                if len(meta_len) != meta_len_size:
+                hdr_size = RepoObj.obj_header.size
+                extra_size = 1024 - hdr_size  # load a bit more, 1024b, reduces round trips
+                obj = self.store.load(key, size=hdr_size + extra_size)
+                hdr = obj[0:hdr_size]
+                if len(hdr) != hdr_size:
                     raise IntegrityError(
                     raise IntegrityError(
-                        f"Object too small [id {id_hex}]: expected {meta_len_size}, got {len(meta_len)} bytes"
+                        f"Object too small [id {id_hex}]: expected {hdr_size}, got {len(hdr)} bytes"
                     )
                     )
-                ml = RepoObj.meta_len_hdr.unpack(meta_len)[0]
-                if ml > extra_len:
+                meta_size = RepoObj.obj_header.unpack(hdr)[0]
+                if meta_size > extra_size:
                     # we did not get enough, need to load more, but not all.
                     # we did not get enough, need to load more, but not all.
                     # this should be rare, as chunk metadata is rather small usually.
                     # this should be rare, as chunk metadata is rather small usually.
-                    obj = self.store.load(key, size=meta_len_size + ml)
-                meta = obj[meta_len_size:meta_len_size + ml]
-                if len(meta) != ml:
+                    obj = self.store.load(key, size=hdr_size + meta_size)
+                meta = obj[hdr_size:hdr_size + meta_size]
+                if len(meta) != meta_size:
                     raise IntegrityError(
                     raise IntegrityError(
-                        f"Object too small [id {id_hex}]: expected {ml}, got {len(meta)} bytes"
+                        f"Object too small [id {id_hex}]: expected {meta_size}, got {len(meta)} bytes"
                     )
                     )
-                return meta_len + meta
+                return hdr + meta
         except StoreObjectNotFound:
         except StoreObjectNotFound:
             raise self.ObjectNotFound(id, self.path) from None
             raise self.ObjectNotFound(id, self.path) from None
 
 

+ 8 - 6
src/borg/testsuite/archiver/check_cmd.py

@@ -9,6 +9,7 @@ from ...constants import *  # NOQA
 from ...helpers import bin_to_hex, msgpack
 from ...helpers import bin_to_hex, msgpack
 from ...manifest import Manifest
 from ...manifest import Manifest
 from ...repository3 import Repository3
 from ...repository3 import Repository3
+from ..repository3 import fchunk
 from . import cmd, src_file, create_src_archive, open_archive, generate_archiver_tests, RK_ENCRYPTION
 from . import cmd, src_file, create_src_archive, open_archive, generate_archiver_tests, RK_ENCRYPTION
 
 
 pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary")  # NOQA
 pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary")  # NOQA
@@ -207,7 +208,7 @@ def test_corrupted_manifest(archivers, request):
     archive, repository = open_archive(archiver.repository_path, "archive1")
     archive, repository = open_archive(archiver.repository_path, "archive1")
     with repository:
     with repository:
         manifest = repository.get(Manifest.MANIFEST_ID)
         manifest = repository.get(Manifest.MANIFEST_ID)
-        corrupted_manifest = manifest + b"corrupted!"
+        corrupted_manifest = manifest[:123] + b"corrupted!" + manifest[123:]
         repository.put(Manifest.MANIFEST_ID, corrupted_manifest)
         repository.put(Manifest.MANIFEST_ID, corrupted_manifest)
         repository.commit(compact=False)
         repository.commit(compact=False)
     cmd(archiver, "check", exit_code=1)
     cmd(archiver, "check", exit_code=1)
@@ -257,7 +258,7 @@ def test_manifest_rebuild_corrupted_chunk(archivers, request):
     archive, repository = open_archive(archiver.repository_path, "archive1")
     archive, repository = open_archive(archiver.repository_path, "archive1")
     with repository:
     with repository:
         manifest = repository.get(Manifest.MANIFEST_ID)
         manifest = repository.get(Manifest.MANIFEST_ID)
-        corrupted_manifest = manifest + b"corrupted!"
+        corrupted_manifest = manifest[:123] + b"corrupted!" + manifest[123:]
         repository.put(Manifest.MANIFEST_ID, corrupted_manifest)
         repository.put(Manifest.MANIFEST_ID, corrupted_manifest)
         chunk = repository.get(archive.id)
         chunk = repository.get(archive.id)
         corrupted_chunk = chunk + b"corrupted!"
         corrupted_chunk = chunk + b"corrupted!"
@@ -276,7 +277,7 @@ def test_manifest_rebuild_duplicate_archive(archivers, request):
     repo_objs = archive.repo_objs
     repo_objs = archive.repo_objs
     with repository:
     with repository:
         manifest = repository.get(Manifest.MANIFEST_ID)
         manifest = repository.get(Manifest.MANIFEST_ID)
-        corrupted_manifest = manifest + b"corrupted!"
+        corrupted_manifest = manifest[:123] + b"corrupted!" + manifest[123:]
         repository.put(Manifest.MANIFEST_ID, corrupted_manifest)
         repository.put(Manifest.MANIFEST_ID, corrupted_manifest)
         archive_dict = {
         archive_dict = {
             "command_line": "",
             "command_line": "",
@@ -307,7 +308,7 @@ def test_spoofed_archive(archivers, request):
     with repository:
     with repository:
         # attacker would corrupt or delete the manifest to trigger a rebuild of it:
         # attacker would corrupt or delete the manifest to trigger a rebuild of it:
         manifest = repository.get(Manifest.MANIFEST_ID)
         manifest = repository.get(Manifest.MANIFEST_ID)
-        corrupted_manifest = manifest + b"corrupted!"
+        corrupted_manifest = manifest[:123] + b"corrupted!" + manifest[123:]
         repository.put(Manifest.MANIFEST_ID, corrupted_manifest)
         repository.put(Manifest.MANIFEST_ID, corrupted_manifest)
         archive_dict = {
         archive_dict = {
             "command_line": "",
             "command_line": "",
@@ -347,7 +348,8 @@ def test_extra_chunks(archivers, request):
     check_cmd_setup(archiver)
     check_cmd_setup(archiver)
     cmd(archiver, "check", exit_code=0)
     cmd(archiver, "check", exit_code=0)
     with Repository3(archiver.repository_location, exclusive=True) as repository:
     with Repository3(archiver.repository_location, exclusive=True) as repository:
-        repository.put(b"01234567890123456789012345678901", b"xxxx")
+        chunk = fchunk(b"xxxx")
+        repository.put(b"01234567890123456789012345678901", chunk)
         repository.commit(compact=False)
         repository.commit(compact=False)
     output = cmd(archiver, "check", "-v", exit_code=0)  # orphans are not considered warnings anymore
     output = cmd(archiver, "check", "-v", exit_code=0)  # orphans are not considered warnings anymore
     assert "1 orphaned (unused) objects found." in output
     assert "1 orphaned (unused) objects found." in output
@@ -374,7 +376,7 @@ def test_verify_data(archivers, request, init_args):
                 repository.put(chunk.id, data)
                 repository.put(chunk.id, data)
                 break
                 break
         repository.commit(compact=False)
         repository.commit(compact=False)
-    cmd(archiver, "check", exit_code=0)
+    cmd(archiver, "check", exit_code=1)
     output = cmd(archiver, "check", "--verify-data", exit_code=1)
     output = cmd(archiver, "check", "--verify-data", exit_code=1)
     assert bin_to_hex(chunk.id) + ", integrity error" in output
     assert bin_to_hex(chunk.id) + ", integrity error" in output
 
 

+ 13 - 12
src/borg/testsuite/repository.py

@@ -6,6 +6,7 @@ from unittest.mock import patch
 
 
 import pytest
 import pytest
 
 
+from ..checksums import xxh64
 from ..hashindex import NSIndex
 from ..hashindex import NSIndex
 from ..helpers import Location
 from ..helpers import Location
 from ..helpers import IntegrityError
 from ..helpers import IntegrityError
@@ -73,19 +74,19 @@ def get_path(repository):
 
 
 def fchunk(data, meta=b""):
 def fchunk(data, meta=b""):
     # create a raw chunk that has valid RepoObj layout, but does not use encryption or compression.
     # create a raw chunk that has valid RepoObj layout, but does not use encryption or compression.
-    meta_len = RepoObj.meta_len_hdr.pack(len(meta))
+    hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta), xxh64(data))
     assert isinstance(data, bytes)
     assert isinstance(data, bytes)
-    chunk = meta_len + meta + data
+    chunk = hdr + meta + data
     return chunk
     return chunk
 
 
 
 
 def pchunk(chunk):
 def pchunk(chunk):
     # parse data and meta from a raw chunk made by fchunk
     # parse data and meta from a raw chunk made by fchunk
-    meta_len_size = RepoObj.meta_len_hdr.size
-    meta_len = chunk[:meta_len_size]
-    meta_len = RepoObj.meta_len_hdr.unpack(meta_len)[0]
-    meta = chunk[meta_len_size : meta_len_size + meta_len]
-    data = chunk[meta_len_size + meta_len :]
+    hdr_size = RepoObj.obj_header.size
+    hdr = chunk[:hdr_size]
+    meta_size, data_size = RepoObj.obj_header.unpack(hdr)[0:2]
+    meta = chunk[hdr_size : hdr_size + meta_size]
+    data = chunk[hdr_size + meta_size : hdr_size + meta_size + data_size]
     return data, meta
     return data, meta
 
 
 
 
@@ -148,9 +149,9 @@ def test_multiple_transactions(repo_fixtures, request):
 def test_read_data(repo_fixtures, request):
 def test_read_data(repo_fixtures, request):
     with get_repository_from_fixture(repo_fixtures, request) as repository:
     with get_repository_from_fixture(repo_fixtures, request) as repository:
         meta, data = b"meta", b"data"
         meta, data = b"meta", b"data"
-        meta_len = RepoObj.meta_len_hdr.pack(len(meta))
-        chunk_complete = meta_len + meta + data
-        chunk_short = meta_len + meta
+        hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta), xxh64(data))
+        chunk_complete = hdr + meta + data
+        chunk_short = hdr + meta
         repository.put(H(0), chunk_complete)
         repository.put(H(0), chunk_complete)
         repository.commit(compact=False)
         repository.commit(compact=False)
         assert repository.get(H(0)) == chunk_complete
         assert repository.get(H(0)) == chunk_complete
@@ -273,7 +274,7 @@ def test_scan_modify(repo_fixtures, request):
 
 
 def test_max_data_size(repo_fixtures, request):
 def test_max_data_size(repo_fixtures, request):
     with get_repository_from_fixture(repo_fixtures, request) as repository:
     with get_repository_from_fixture(repo_fixtures, request) as repository:
-        max_data = b"x" * (MAX_DATA_SIZE - RepoObj.meta_len_hdr.size)
+        max_data = b"x" * (MAX_DATA_SIZE - RepoObj.obj_header.size)
         repository.put(H(0), fchunk(max_data))
         repository.put(H(0), fchunk(max_data))
         assert pdchunk(repository.get(H(0))) == max_data
         assert pdchunk(repository.get(H(0))) == max_data
         with pytest.raises(IntegrityError):
         with pytest.raises(IntegrityError):
@@ -706,7 +707,7 @@ def test_exceed_quota(repository):
             repository.commit(compact=False)
             repository.commit(compact=False)
         assert repository.storage_quota_use == len(ch1) + len(ch2) + (41 + 8) * 2  # check ch2!?
         assert repository.storage_quota_use == len(ch1) + len(ch2) + (41 + 8) * 2  # check ch2!?
     with reopen(repository) as repository:
     with reopen(repository) as repository:
-        repository.storage_quota = 150
+        repository.storage_quota = 161
         # open new transaction; hints and thus quota data is not loaded unless needed.
         # open new transaction; hints and thus quota data is not loaded unless needed.
         repository.put(H(1), ch1)
         repository.put(H(1), ch1)
         # we have 2 puts for H(1) here and not yet compacted.
         # we have 2 puts for H(1) here and not yet compacted.

+ 12 - 11
src/borg/testsuite/repository3.py

@@ -5,6 +5,7 @@ from typing import Optional
 
 
 import pytest
 import pytest
 
 
+from ..checksums import xxh64
 from ..helpers import Location
 from ..helpers import Location
 from ..helpers import IntegrityError
 from ..helpers import IntegrityError
 from ..platformflags import is_win32
 from ..platformflags import is_win32
@@ -57,19 +58,19 @@ def reopen(repository, exclusive: Optional[bool] = True, create=False):
 
 
 def fchunk(data, meta=b""):
 def fchunk(data, meta=b""):
     # format chunk: create a raw chunk that has valid RepoObj layout, but does not use encryption or compression.
     # format chunk: create a raw chunk that has valid RepoObj layout, but does not use encryption or compression.
-    meta_len = RepoObj.meta_len_hdr.pack(len(meta))
+    hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta), xxh64(data))
     assert isinstance(data, bytes)
     assert isinstance(data, bytes)
-    chunk = meta_len + meta + data
+    chunk = hdr + meta + data
     return chunk
     return chunk
 
 
 
 
 def pchunk(chunk):
 def pchunk(chunk):
     # parse chunk: parse data and meta from a raw chunk made by fchunk
     # parse chunk: parse data and meta from a raw chunk made by fchunk
-    meta_len_size = RepoObj.meta_len_hdr.size
-    meta_len = chunk[:meta_len_size]
-    meta_len = RepoObj.meta_len_hdr.unpack(meta_len)[0]
-    meta = chunk[meta_len_size : meta_len_size + meta_len]
-    data = chunk[meta_len_size + meta_len :]
+    hdr_size = RepoObj.obj_header.size
+    hdr = chunk[:hdr_size]
+    meta_size, data_size = RepoObj.obj_header.unpack(hdr)[0:2]
+    meta = chunk[hdr_size : hdr_size + meta_size]
+    data = chunk[hdr_size + meta_size : hdr_size + meta_size + data_size]
     return data, meta
     return data, meta
 
 
 
 
@@ -99,9 +100,9 @@ def test_basic_operations(repo_fixtures, request):
 def test_read_data(repo_fixtures, request):
 def test_read_data(repo_fixtures, request):
     with get_repository_from_fixture(repo_fixtures, request) as repository:
     with get_repository_from_fixture(repo_fixtures, request) as repository:
         meta, data = b"meta", b"data"
         meta, data = b"meta", b"data"
-        meta_len = RepoObj.meta_len_hdr.pack(len(meta))
-        chunk_complete = meta_len + meta + data
-        chunk_short = meta_len + meta
+        hdr = RepoObj.obj_header.pack(len(meta), len(data), xxh64(meta), xxh64(data))
+        chunk_complete = hdr + meta + data
+        chunk_short = hdr + meta
         repository.put(H(0), chunk_complete)
         repository.put(H(0), chunk_complete)
         assert repository.get(H(0)) == chunk_complete
         assert repository.get(H(0)) == chunk_complete
         assert repository.get(H(0), read_data=True) == chunk_complete
         assert repository.get(H(0), read_data=True) == chunk_complete
@@ -152,7 +153,7 @@ def test_scan(repo_fixtures, request):
 
 
 def test_max_data_size(repo_fixtures, request):
 def test_max_data_size(repo_fixtures, request):
     with get_repository_from_fixture(repo_fixtures, request) as repository:
     with get_repository_from_fixture(repo_fixtures, request) as repository:
-        max_data = b"x" * (MAX_DATA_SIZE - RepoObj.meta_len_hdr.size)
+        max_data = b"x" * (MAX_DATA_SIZE - RepoObj.obj_header.size)
         repository.put(H(0), fchunk(max_data))
         repository.put(H(0), fchunk(max_data))
         assert pdchunk(repository.get(H(0))) == max_data
         assert pdchunk(repository.get(H(0))) == max_data
         with pytest.raises(IntegrityError):
         with pytest.raises(IntegrityError):