3 vuotta sitten · 1b95950613
--- a/docs/internals/data-structures.rst
+++ b/docs/internals/data-structures.rst
@@ -59,7 +59,7 @@ Each repository has a ``config`` file which is a ``INI``-style file
 
				 and looks like this::
			
 
				 
			
 
				     [repository]
			
 
				-    version = 1
			
 
				+    version = 2
			
 
				     segments_per_dir = 1000
			
 
				     max_segment_size = 524288000
			
 
				     id = 57d6c1d52ce76a836b532b0e42e677dec6af9fca3673db511279358828a21ed6
			
@@ -94,18 +94,27 @@ this value in a non-empty repository, you may also need to relocate the segment
 
				 files manually.
			
 
				 
			
 
				 A segment starts with a magic number (``BORG_SEG`` as an eight byte ASCII string),
			
 
				-followed by a number of log entries. Each log entry consists of: (in this order)
			
 
				-
			
 
				-* First, unsigned 32-bit number, the CRC32 of the entire entry (for a PUT including the DATA) excluding the CRC32 field
			
 
				-* Second, unsigned 32-bit size of the entry (including the whole header)
			
 
				-* Third, unsigned 8-bit entry tag: PUT(1), DELETE(2) or COMMIT(3)
			
 
				-* Fourth, on PUT or DELETE, 32 byte key
			
 
				-* Fifth, PUT only, (size - 41) bytes of data (length = size - sizeof(CRC32) - sizeof(size) - sizeof(entry tag) - sizeof(key))
			
 
				+followed by a number of log entries. Each log entry consists of (in this order):
			
 
				+
			
 
				+* crc32 checksum (uint32):
			
 
				+  - for PUT2: CRC32(size + tag + key + digest)
			
 
				+  - for PUT: CRC32(size + tag + key + data)
			
 
				+  - for DELETE: CRC32(size + tag + key)
			
 
				+  - for COMMIT: CRC32(size + tag)
			
 
				+* size (uint32) of the entry (including the whole header)
			
 
				+* tag (uint8): PUT(0), DELETE(1), COMMIT(2) or PUT2(3)
			
 
				+* key (256 bit) - only for PUT/PUT2/DELETE
			
 
				+* data (size - 41 bytes) - only for PUT
			
 
				+* xxh64 digest (64 bit) = XXH64(size + tag + key + data) - only for PUT2
			
 
				+* data (size - 41 - 8 bytes) - only for PUT2
			
 
				+
			
 
				+PUT2 is new since repository version 2. For new log entries PUT2 is used.
			
 
				+PUT is still supported to read version 1 repositories, but not generated any more.
			
 
				+If we talk about ``PUT`` in general, it shall usually mean PUT2 for repository
			
 
				+version 2+.
			
 
				 
			
 
				 Those files are strictly append-only and modified only once.
			
 
				 
			
 
				-Tag is either ``PUT``, ``DELETE``, or ``COMMIT``.
			
 
				-
			
 
				 When an object is written to the repository a ``PUT`` entry is written
			
 
				 to the file containing the object id and data. If an object is deleted
			
 
				 a ``DELETE`` entry is appended with the object id.
			
--- a/src/borg/constants.py
+++ b/src/borg/constants.py
@@ -33,14 +33,15 @@ CACHE_TAG_CONTENTS = b'Signature: 8a477f597d28d172789f06886806bc55'
 
				 # bytes. That's why it's 500 MiB instead of 512 MiB.
			
 
				 DEFAULT_MAX_SEGMENT_SIZE = 500 * 1024 * 1024
			
 
				 
			
 
				-# 20 MiB minus 41 bytes for a Repository header (because the "size" field in the Repository includes
			
 
				-# the header, and the total size was set to 20 MiB).
			
 
				+# in borg < 1.3, this has been defined like this:
			
 
				+# 20 MiB minus 41 bytes for a PUT header (because the "size" field in the Repository includes
			
 
				+# the header, and the total size was set to precisely 20 MiB for borg < 1.3).
			
 
				 MAX_DATA_SIZE = 20971479
			
 
				 
			
 
				-# MAX_OBJECT_SIZE = <20 MiB (MAX_DATA_SIZE) + 41 bytes for a Repository PUT header, which consists of
			
 
				-# a 1 byte tag ID, 4 byte CRC, 4 byte size and 32 bytes for the ID.
			
 
				-MAX_OBJECT_SIZE = MAX_DATA_SIZE + 41  # see LoggedIO.put_header_fmt.size assertion in repository module
			
 
				-assert MAX_OBJECT_SIZE == 20 * 1024 * 1024
			
 
				+# MAX_OBJECT_SIZE = MAX_DATA_SIZE + len(PUT2 header)
			
 
				+# note: for borg >= 1.3, this makes the MAX_OBJECT_SIZE grow slightly over the precise 20MiB used by
			
 
				+# borg < 1.3, but this is not expected to cause any issues.
			
 
				+MAX_OBJECT_SIZE = MAX_DATA_SIZE + 41 + 8  # see assertion at end of repository module
			
 
				 
			
 
				 # repo config max_segment_size value must be below this limit to stay within uint32 offsets:
			
 
				 MAX_SEGMENT_SIZE_LIMIT = 2 ** 32 - MAX_OBJECT_SIZE
			
--- a/src/borg/repository.py
+++ b/src/borg/repository.py
@@ -25,7 +25,7 @@ from .locking import Lock, LockError, LockErrorT
 
				 from .logger import create_logger
			
 
				 from .lrucache import LRUCache
			
 
				 from .platform import SaveFile, SyncFile, sync_dir, safe_fadvise
			
 
				-from .checksums import crc32
			
 
				+from .checksums import crc32, StreamingXXH64
			
 
				 from .crypto.file_integrity import IntegrityCheckedFile, FileIntegrityError
			
 
				 
			
 
				 logger = create_logger(__name__)
			
@@ -34,9 +34,11 @@ MAGIC = b'BORG_SEG'
 
				 MAGIC_LEN = len(MAGIC)
			
 
				 ATTIC_MAGIC = b'ATTICSEG'
			
 
				 assert len(ATTIC_MAGIC) == MAGIC_LEN
			
 
				+
			
 
				 TAG_PUT = 0
			
 
				 TAG_DELETE = 1
			
 
				 TAG_COMMIT = 2
			
 
				+TAG_PUT2 = 3
			
 
				 
			
 
				 # Highest ID usable as TAG_* value
			
 
				 #
			
@@ -163,6 +165,7 @@ class Repository:
 
				                  make_parent_dirs=False):
			
 
				         self.path = os.path.abspath(path)
			
 
				         self._location = Location('file://%s' % self.path)
			
 
				+        self.version = None
			
 
				         self.io = None  # type: LoggedIO
			
 
				         self.lock = None
			
 
				         self.index = None
			
@@ -284,7 +287,8 @@ class Repository:
 
				         os.mkdir(os.path.join(path, 'data'))
			
 
				         config = ConfigParser(interpolation=None)
			
 
				         config.add_section('repository')
			
 
				-        config.set('repository', 'version', '1')
			
 
				+        self.version = 2
			
 
				+        config.set('repository', 'version', str(self.version))
			
 
				         config.set('repository', 'segments_per_dir', str(DEFAULT_SEGMENTS_PER_DIR))
			
 
				         config.set('repository', 'max_segment_size', str(DEFAULT_MAX_SEGMENT_SIZE))
			
 
				         config.set('repository', 'append_only', str(int(self.append_only)))
			
@@ -440,7 +444,11 @@ class Repository:
 
				         except FileNotFoundError:
			
 
				             self.close()
			
 
				             raise self.InvalidRepository(self.path)
			
 
				-        if 'repository' not in self.config.sections() or self.config.getint('repository', 'version') != 1:
			
 
				+        if 'repository' not in self.config.sections():
			
 
				+            self.close()
			
 
				+            raise self.InvalidRepository(path)
			
 
				+        self.version = self.config.getint('repository', 'version')
			
 
				+        if self.version not in (2, ):  # for now, only work on new repos
			
 
				             self.close()
			
 
				             raise self.InvalidRepository(path)
			
 
				         self.max_segment_size = parse_file_size(self.config.get('repository', 'max_segment_size'))
			
@@ -788,7 +796,7 @@ class Repository:
 
				                     continue
			
 
				                 in_index = self.index.get(key)
			
 
				                 is_index_object = in_index == (segment, offset)
			
 
				-                if tag == TAG_PUT and is_index_object:
			
 
				+                if tag in (TAG_PUT2, TAG_PUT) and is_index_object:
			
 
				                     try:
			
 
				                         new_segment, offset = self.io.write_put(key, data, raise_full=True)
			
 
				                     except LoggedIO.SegmentFull:
			
@@ -798,7 +806,10 @@ class Repository:
 
				                     segments.setdefault(new_segment, 0)
			
 
				                     segments[new_segment] += 1
			
 
				                     segments[segment] -= 1
			
 
				-                elif tag == TAG_PUT and not is_index_object:
			
 
				+                    if tag == TAG_PUT:
			
 
				+                        # old tag is PUT, but new will be PUT2 and use a bit more storage
			
 
				+                        self.storage_quota_use += self.io.ENTRY_HASH_SIZE
			
 
				+                elif tag in (TAG_PUT2, TAG_PUT) and not is_index_object:
			
 
				                     # If this is a PUT shadowed by a later tag, then it will be gone when this segment is deleted after
			
 
				                     # this loop. Therefore it is removed from the shadow index.
			
 
				                     try:
			
@@ -807,7 +818,10 @@ class Repository:
 
				                         # do not remove entry with empty shadowed_segments list here,
			
 
				                         # it is needed for shadowed_put_exists code (see below)!
			
 
				                         pass
			
 
				-                    self.storage_quota_use -= len(data) + self.io.HEADER_ID_SIZE
			
 
				+                    if tag == TAG_PUT2:
			
 
				+                        self.storage_quota_use -= len(data) + self.io.HEADER_ID_SIZE + self.io.ENTRY_HASH_SIZE
			
 
				+                    elif tag == TAG_PUT:
			
 
				+                        self.storage_quota_use -= len(data) + self.io.HEADER_ID_SIZE
			
 
				                 elif tag == TAG_DELETE and not in_index:
			
 
				                     # If the shadow index doesn't contain this key, then we can't say if there's a shadowed older tag,
			
 
				                     # therefore we do not drop the delete, but write it to a current segment.
			
@@ -830,7 +844,7 @@ class Repository:
 
				                         # Consider the following series of operations if we would not do this, ie. this entire if:
			
 
				                         # would be removed.
			
 
				                         # Columns are segments, lines are different keys (line 1 = some key, line 2 = some other key)
			
 
				-                        # Legend: P=TAG_PUT, D=TAG_DELETE, c=commit, i=index is written for latest commit
			
 
				+                        # Legend: P=TAG_PUT/TAG_PUT2, D=TAG_DELETE, c=commit, i=index is written for latest commit
			
 
				                         #
			
 
				                         # Segment | 1     | 2   | 3
			
 
				                         # --------+-------+-----+------
			
@@ -899,7 +913,7 @@ class Repository:
 
				         """some code shared between replay_segments and check"""
			
 
				         self.segments[segment] = 0
			
 
				         for tag, key, offset, size in objects:
			
 
				-            if tag == TAG_PUT:
			
 
				+            if tag in (TAG_PUT2, TAG_PUT):
			
 
				                 try:
			
 
				                     # If this PUT supersedes an older PUT, mark the old segment for compaction and count the free space
			
 
				                     s, _ = self.index[key]
			
@@ -950,7 +964,7 @@ class Repository:
 
				 
			
 
				         self.compact[segment] = 0
			
 
				         for tag, key, offset, size in self.io.iter_objects(segment, read_data=False):
			
 
				-            if tag == TAG_PUT:
			
 
				+            if tag in (TAG_PUT2, TAG_PUT):
			
 
				                 if self.index.get(key, (-1, -1)) != (segment, offset):
			
 
				                     # This PUT is superseded later
			
 
				                     self.compact[segment] += size
			
@@ -1169,7 +1183,7 @@ class Repository:
 
				                     # also, for the next segment, we need to start at offset 0.
			
 
				                     start_offset = 0
			
 
				                     continue
			
 
				-                if tag == TAG_PUT and (segment, offset) == self.index.get(id):
			
 
				+                if tag in (TAG_PUT2, TAG_PUT) and (segment, offset) == self.index.get(id):
			
 
				                     # we have found an existing and current object
			
 
				                     result.append(id)
			
 
				                     if len(result) == limit:
			
@@ -1208,7 +1222,7 @@ class Repository:
 
				             # be in the repo index (and we won't need it in the shadow_index).
			
 
				             self._delete(id, segment, offset, update_shadow_index=False)
			
 
				         segment, offset = self.io.write_put(id, data)
			
 
				-        self.storage_quota_use += len(data) + self.io.HEADER_ID_SIZE
			
 
				+        self.storage_quota_use += len(data) + self.io.HEADER_ID_SIZE + self.io.ENTRY_HASH_SIZE
			
 
				         self.segments.setdefault(segment, 0)
			
 
				         self.segments[segment] += 1
			
 
				         self.index[id] = segment, offset
			
@@ -1278,6 +1292,7 @@ class LoggedIO:
 
				     COMMIT = crc_fmt.pack(crc32(_commit)) + _commit
			
 
				 
			
 
				     HEADER_ID_SIZE = header_fmt.size + 32
			
 
				+    ENTRY_HASH_SIZE = 8
			
 
				 
			
 
				     def __init__(self, path, limit, segments_per_dir, capacity=90):
			
 
				         self.path = path
			
@@ -1475,7 +1490,8 @@ class LoggedIO:
 
				         Return object iterator for *segment*.
			
 
				 
			
 
				         If read_data is False then include_data must be False as well.
			
 
				-        Integrity checks are skipped: all data obtained from the iterator must be considered informational.
			
 
				+
			
 
				+        See the _read() docstring about confidence in the returned data.
			
 
				 
			
 
				         The iterator returns four-tuples of (tag, key, offset, data|size).
			
 
				         """
			
@@ -1491,7 +1507,7 @@ class LoggedIO:
 
				         header = fd.read(self.header_fmt.size)
			
 
				         while header:
			
 
				             size, tag, key, data = self._read(fd, header, segment, offset,
			
 
				-                                              (TAG_PUT, TAG_DELETE, TAG_COMMIT),
			
 
				+                                              (TAG_PUT2, TAG_DELETE, TAG_COMMIT, TAG_PUT),
			
 
				                                               read_data=read_data)
			
 
				             if include_data:
			
 
				                 yield tag, key, offset, data
			
@@ -1528,8 +1544,25 @@ class LoggedIO:
 
				                         dst_fd.write(MAGIC)
			
 
				                         while len(d) >= self.header_fmt.size:
			
 
				                             crc, size, tag = self.header_fmt.unpack(d[:self.header_fmt.size])
			
 
				-                            if size > MAX_OBJECT_SIZE or tag > MAX_TAG_ID or size < self.header_fmt.size \
			
 
				-                               or size > len(d) or crc32(d[4:size]) & 0xffffffff != crc:
			
 
				+                            size_invalid = size > MAX_OBJECT_SIZE or size < self.header_fmt.size or size > len(d)
			
 
				+                            if size_invalid or tag > MAX_TAG_ID:
			
 
				+                                d = d[1:]
			
 
				+                                continue
			
 
				+                            if tag == TAG_PUT2:
			
 
				+                                c_offset = self.HEADER_ID_SIZE + self.ENTRY_HASH_SIZE
			
 
				+                                # skip if header is invalid
			
 
				+                                if crc32(d[4:c_offset]) & 0xffffffff != crc:
			
 
				+                                    d = d[1:]
			
 
				+                                    continue
			
 
				+                                # skip if content is invalid
			
 
				+                                if self.entry_hash(d[4:self.HEADER_ID_SIZE], d[c_offset:size]) != d[self.HEADER_ID_SIZE:c_offset]:
			
 
				+                                    d = d[1:]
			
 
				+                                    continue
			
 
				+                            elif tag in (TAG_DELETE, TAG_COMMIT, TAG_PUT):
			
 
				+                                if crc32(d[4:size]) & 0xffffffff != crc:
			
 
				+                                    d = d[1:]
			
 
				+                                    continue
			
 
				+                            else:  # tag unknown
			
 
				                                 d = d[1:]
			
 
				                                 continue
			
 
				                             dst_fd.write(d[:size])
			
@@ -1538,72 +1571,108 @@ class LoggedIO:
 
				                         del d
			
 
				                         data.release()
			
 
				 
			
 
				+    def entry_hash(self, *data):
			
 
				+        h = StreamingXXH64()
			
 
				+        for d in data:
			
 
				+            h.update(d)
			
 
				+        return h.digest()
			
 
				+
			
 
				     def read(self, segment, offset, id, read_data=True):
			
 
				         """
			
 
				         Read entry from *segment* at *offset* with *id*.
			
 
				+        If read_data is False the size of the entry is returned instead.
			
 
				 
			
 
				-        If read_data is False the size of the entry is returned instead and integrity checks are skipped.
			
 
				-        The return value should thus be considered informational.
			
 
				+        See the _read() docstring about confidence in the returned data.
			
 
				         """
			
 
				         if segment == self.segment and self._write_fd:
			
 
				             self._write_fd.sync()
			
 
				         fd = self.get_fd(segment)
			
 
				         fd.seek(offset)
			
 
				         header = fd.read(self.header_fmt.size)
			
 
				-        size, tag, key, data = self._read(fd, header, segment, offset, (TAG_PUT,), read_data)
			
 
				+        size, tag, key, data = self._read(fd, header, segment, offset, (TAG_PUT2, TAG_PUT), read_data)
			
 
				         if id != key:
			
 
				             raise IntegrityError('Invalid segment entry header, is not for wanted id [segment {}, offset {}]'.format(
			
 
				                 segment, offset))
			
 
				         return data if read_data else size
			
 
				 
			
 
				     def _read(self, fd, header, segment, offset, acceptable_tags, read_data=True):
			
 
				-        # some code shared by read() and iter_objects()
			
 
				+        """
			
 
				+        Code shared by read() and iter_objects().
			
 
				+
			
 
				+        Confidence in returned data:
			
 
				+        PUT2 tags, read_data == True: crc32 check (header) plus digest check (header+data)
			
 
				+        PUT2 tags, read_data == False: crc32 check (header)
			
 
				+        PUT tags, read_data == True: crc32 check (header+data)
			
 
				+        PUT tags, read_data == False: crc32 check can not be done, all data obtained must be considered informational
			
 
				+        """
			
 
				+        def check_crc32(wanted, header, *data):
			
 
				+            result = crc32(memoryview(header)[4:])  # skip first 32 bits of the header, they contain the crc.
			
 
				+            for d in data:
			
 
				+                result = crc32(d, result)
			
 
				+            if result & 0xffffffff != wanted:
			
 
				+                raise IntegrityError(f'Segment entry header checksum mismatch [segment {segment}, offset {offset}]')
			
 
				+
			
 
				         # See comment on MAX_TAG_ID for details
			
 
				         assert max(acceptable_tags) <= MAX_TAG_ID, 'Exceeding MAX_TAG_ID will break backwards compatibility'
			
 
				+        key = data = None
			
 
				         fmt = self.header_fmt
			
 
				         try:
			
 
				             hdr_tuple = fmt.unpack(header)
			
 
				         except struct.error as err:
			
 
				-            raise IntegrityError('Invalid segment entry header [segment {}, offset {}]: {}'.format(
			
 
				-                segment, offset, err)) from None
			
 
				+            raise IntegrityError(f'Invalid segment entry header [segment {segment}, offset {offset}]: {err}') from None
			
 
				         crc, size, tag = hdr_tuple
			
 
				         length = size - fmt.size  # we already read the header
			
 
				         if size > MAX_OBJECT_SIZE:
			
 
				             # if you get this on an archive made with borg < 1.0.7 and millions of files and
			
 
				             # you need to restore it, you can disable this check by using "if False:" above.
			
 
				-            raise IntegrityError('Invalid segment entry size {} - too big [segment {}, offset {}]'.format(
			
 
				-                size, segment, offset))
			
 
				+            raise IntegrityError(f'Invalid segment entry size {size} - too big [segment {segment}, offset {offset}]')
			
 
				         if size < fmt.size:
			
 
				-            raise IntegrityError('Invalid segment entry size {} - too small [segment {}, offset {}]'.format(
			
 
				-                size, segment, offset))
			
 
				-        if tag in (TAG_PUT, TAG_DELETE):
			
 
				+            raise IntegrityError(f'Invalid segment entry size {size} - too small [segment {segment}, offset {offset}]')
			
 
				+        if tag not in (TAG_PUT2, TAG_DELETE, TAG_COMMIT, TAG_PUT):
			
 
				+            raise IntegrityError(f'Invalid segment entry header, did not get a known tag '
			
 
				+                                 f'[segment {segment}, offset {offset}]')
			
 
				+        if tag not in acceptable_tags:
			
 
				+            raise IntegrityError(f'Invalid segment entry header, did not get acceptable tag '
			
 
				+                                 f'[segment {segment}, offset {offset}]')
			
 
				+        if tag == TAG_COMMIT:
			
 
				+            check_crc32(crc, header)
			
 
				+            # that's all for COMMITs.
			
 
				+        else:
			
 
				+            # all other tags (TAG_PUT2, TAG_DELETE, TAG_PUT) have a key
			
 
				             key = fd.read(32)
			
 
				             length -= 32
			
 
				             if len(key) != 32:
			
 
				-                raise IntegrityError(
			
 
				-                    'Segment entry key short read [segment {}, offset {}]: expected {}, got {} bytes'.format(
			
 
				-                        segment, offset, 32, len(key)))
			
 
				-        else:
			
 
				-            key = None
			
 
				-        if read_data and tag == TAG_PUT:
			
 
				-            data = fd.read(length)
			
 
				-            if len(data) != length:
			
 
				-                raise IntegrityError('Segment entry data short read [segment {}, offset {}]: expected {}, got {} bytes'.format(
			
 
				-                    segment, offset, length, len(data)))
			
 
				-            if crc32(data, crc32(key, crc32(memoryview(header)[4:]))) & 0xffffffff != crc:
			
 
				-                raise IntegrityError('Segment entry checksum mismatch [segment {}, offset {}]'.format(
			
 
				-                    segment, offset))
			
 
				-        else:
			
 
				-            data = None
			
 
				-            if length > 0:
			
 
				-                oldpos = fd.tell()
			
 
				-                seeked = fd.seek(length, os.SEEK_CUR) - oldpos
			
 
				-                if seeked != length:
			
 
				-                    raise IntegrityError('Segment entry data short seek [segment {}, offset {}]: expected {}, got {} bytes'.format(
			
 
				-                            segment, offset, length, seeked))
			
 
				-        if tag not in acceptable_tags:
			
 
				-            raise IntegrityError('Invalid segment entry header, did not get acceptable tag [segment {}, offset {}]'.format(
			
 
				-                segment, offset))
			
 
				+                raise IntegrityError(f'Segment entry key short read [segment {segment}, offset {offset}]: '
			
 
				+                                     f'expected {32}, got {len(key)} bytes')
			
 
				+            if tag == TAG_DELETE:
			
 
				+                check_crc32(crc, header, key)
			
 
				+                # that's all for DELETEs.
			
 
				+            else:
			
 
				+                # TAG_PUT: we can not do a crc32 header check here, because the crc32 is computed over header+data!
			
 
				+                #          for the check, see code below when read_data is True.
			
 
				+                if tag == TAG_PUT2:
			
 
				+                    entry_hash = fd.read(self.ENTRY_HASH_SIZE)
			
 
				+                    length -= self.ENTRY_HASH_SIZE
			
 
				+                    if len(entry_hash) != self.ENTRY_HASH_SIZE:
			
 
				+                        raise IntegrityError(f'Segment entry hash short read [segment {segment}, offset {offset}]: '
			
 
				+                                             f'expected {self.ENTRY_HASH_SIZE}, got {len(entry_hash)} bytes')
			
 
				+                    check_crc32(crc, header, key, entry_hash)
			
 
				+                if not read_data:  # seek over data
			
 
				+                    oldpos = fd.tell()
			
 
				+                    seeked = fd.seek(length, os.SEEK_CUR) - oldpos
			
 
				+                    if seeked != length:
			
 
				+                        raise IntegrityError(f'Segment entry data short seek [segment {segment}, offset {offset}]: '
			
 
				+                                             f'expected {length}, got {seeked} bytes')
			
 
				+                else:  # read data!
			
 
				+                    data = fd.read(length)
			
 
				+                    if len(data) != length:
			
 
				+                        raise IntegrityError(f'Segment entry data short read [segment {segment}, offset {offset}]: '
			
 
				+                                             f'expected {length}, got {len(data)} bytes')
			
 
				+                    if tag == TAG_PUT2:
			
 
				+                        if self.entry_hash(memoryview(header)[4:], key, data) != entry_hash:
			
 
				+                            raise IntegrityError(f'Segment entry hash mismatch [segment {segment}, offset {offset}]')
			
 
				+                    elif tag == TAG_PUT:
			
 
				+                        check_crc32(crc, header, key, data)
			
 
				         return size, tag, key, data
			
 
				 
			
 
				     def write_put(self, id, data, raise_full=False):
			
@@ -1612,11 +1681,13 @@ class LoggedIO:
 
				             # this would push the segment entry size beyond MAX_OBJECT_SIZE.
			
 
				             raise IntegrityError(f'More than allowed put data [{data_size} > {MAX_DATA_SIZE}]')
			
 
				         fd = self.get_write_fd(want_new=(id == Manifest.MANIFEST_ID), raise_full=raise_full)
			
 
				-        size = data_size + self.HEADER_ID_SIZE
			
 
				+        size = data_size + self.HEADER_ID_SIZE + self.ENTRY_HASH_SIZE
			
 
				         offset = self.offset
			
 
				-        header = self.header_no_crc_fmt.pack(size, TAG_PUT)
			
 
				-        crc = self.crc_fmt.pack(crc32(data, crc32(id, crc32(header))) & 0xffffffff)
			
 
				-        fd.write(b''.join((crc, header, id, data)))
			
 
				+        header = self.header_no_crc_fmt.pack(size, TAG_PUT2)
			
 
				+        entry_hash = self.entry_hash(header, id, data)
			
 
				+        crc = self.crc_fmt.pack(crc32(entry_hash, crc32(id, crc32(header))) & 0xffffffff)
			
 
				+        fd.write(b''.join((crc, header, id, entry_hash)))
			
 
				+        fd.write(data)
			
 
				         self.offset += size
			
 
				         return self.segment, offset
			
 
				 
			
@@ -1641,4 +1712,4 @@ class LoggedIO:
 
				         return self.segment - 1  # close_segment() increments it
			
 
				 
			
 
				 
			
 
				-assert LoggedIO.HEADER_ID_SIZE == 41  # see constants.MAX_OBJECT_SIZE
			
 
				+assert LoggedIO.HEADER_ID_SIZE + LoggedIO.ENTRY_HASH_SIZE == 41 + 8  # see constants.MAX_OBJECT_SIZE
			
--- a/src/borg/testsuite/repository.py
+++ b/src/borg/testsuite/repository.py
@@ -14,7 +14,7 @@ from ..helpers import IntegrityError
 
				 from ..helpers import msgpack
			
 
				 from ..locking import Lock, LockFailed
			
 
				 from ..remote import RemoteRepository, InvalidRPCMethod, PathNotAllowed, ConnectionClosedWithHint, handle_remote_line
			
 
				-from ..repository import Repository, LoggedIO, MAGIC, MAX_DATA_SIZE, TAG_DELETE, TAG_PUT, TAG_COMMIT
			
 
				+from ..repository import Repository, LoggedIO, MAGIC, MAX_DATA_SIZE, TAG_DELETE, TAG_PUT2, TAG_PUT, TAG_COMMIT
			
 
				 from . import BaseTestCase
			
 
				 from .hashindex import H
			
 
				 
			
@@ -58,7 +58,7 @@ class RepositoryTestCaseBase(BaseTestCase):
 
				         label = label + ': ' if label is not None else ''
			
 
				         H_trans = {H(i): i for i in range(10)}
			
 
				         H_trans[None] = -1  # key == None appears in commits
			
 
				-        tag_trans = {TAG_PUT: 'put', TAG_DELETE: 'del', TAG_COMMIT: 'comm'}
			
 
				+        tag_trans = {TAG_PUT2: 'put2', TAG_PUT: 'put', TAG_DELETE: 'del', TAG_COMMIT: 'comm'}
			
 
				         for segment, fn in self.repository.io.segment_iterator():
			
 
				             for tag, key, offset, size in self.repository.io.iter_objects(segment):
			
 
				                 print("%s%s H(%d) -> %s[%d..+%d]" % (label, tag_trans[tag], H_trans[key], fn, offset, size))
			
@@ -185,13 +185,13 @@ class LocalRepositoryTestCase(RepositoryTestCaseBase):
 
				 
			
 
				     def _assert_sparse(self):
			
 
				         # The superseded 123456... PUT
			
 
				-        assert self.repository.compact[0] == 41 + 9
			
 
				+        assert self.repository.compact[0] == 41 + 8 + 9
			
 
				         # a COMMIT
			
 
				         assert self.repository.compact[1] == 9
			
 
				         # The DELETE issued by the superseding PUT (or issued directly)
			
 
				         assert self.repository.compact[2] == 41
			
 
				         self.repository._rebuild_sparse(0)
			
 
				-        assert self.repository.compact[0] == 41 + 9
			
 
				+        assert self.repository.compact[0] == 41 + 8 + 9
			
 
				 
			
 
				     def test_sparse1(self):
			
 
				         self.repository.put(H(0), b'foo')
			
@@ -213,10 +213,10 @@ class LocalRepositoryTestCase(RepositoryTestCaseBase):
 
				         self.repository.io._write_fd.sync()
			
 
				 
			
 
				         # The on-line tracking works on a per-object basis...
			
 
				-        assert self.repository.compact[0] == 41 + 41 + 4
			
 
				+        assert self.repository.compact[0] == 41 + 8 + 41 + 4
			
 
				         self.repository._rebuild_sparse(0)
			
 
				         # ...while _rebuild_sparse can mark whole segments as completely sparse (which then includes the segment magic)
			
 
				-        assert self.repository.compact[0] == 41 + 41 + 4 + len(MAGIC)
			
 
				+        assert self.repository.compact[0] == 41 + 8 + 41 + 4 + len(MAGIC)
			
 
				 
			
 
				         self.repository.commit(compact=True)
			
 
				         assert 0 not in [segment for segment, _ in self.repository.io.segment_iterator()]
			
@@ -459,42 +459,42 @@ class QuotaTestCase(RepositoryTestCaseBase):
 
				     def test_tracking(self):
			
 
				         assert self.repository.storage_quota_use == 0
			
 
				         self.repository.put(H(1), bytes(1234))
			
 
				-        assert self.repository.storage_quota_use == 1234 + 41
			
 
				+        assert self.repository.storage_quota_use == 1234 + 41 + 8
			
 
				         self.repository.put(H(2), bytes(5678))
			
 
				-        assert self.repository.storage_quota_use == 1234 + 5678 + 2 * 41
			
 
				+        assert self.repository.storage_quota_use == 1234 + 5678 + 2 * (41 + 8)
			
 
				         self.repository.delete(H(1))
			
 
				-        assert self.repository.storage_quota_use == 1234 + 5678 + 2 * 41  # we have not compacted yet
			
 
				+        assert self.repository.storage_quota_use == 1234 + 5678 + 2 * (41 + 8)  # we have not compacted yet
			
 
				         self.repository.commit(compact=False)
			
 
				-        assert self.repository.storage_quota_use == 1234 + 5678 + 2 * 41  # we have not compacted yet
			
 
				+        assert self.repository.storage_quota_use == 1234 + 5678 + 2 * (41 + 8)  # we have not compacted yet
			
 
				         self.reopen()
			
 
				         with self.repository:
			
 
				             # Open new transaction; hints and thus quota data is not loaded unless needed.
			
 
				             self.repository.put(H(3), b'')
			
 
				             self.repository.delete(H(3))
			
 
				-            assert self.repository.storage_quota_use == 1234 + 5678 + 3 * 41  # we have not compacted yet
			
 
				+            assert self.repository.storage_quota_use == 1234 + 5678 + 3 * (41 + 8)  # we have not compacted yet
			
 
				             self.repository.commit(compact=True)
			
 
				-            assert self.repository.storage_quota_use == 5678 + 41
			
 
				+            assert self.repository.storage_quota_use == 5678 + 41 + 8
			
 
				 
			
 
				     def test_exceed_quota(self):
			
 
				         assert self.repository.storage_quota_use == 0
			
 
				-        self.repository.storage_quota = 50
			
 
				+        self.repository.storage_quota = 80
			
 
				         self.repository.put(H(1), b'')
			
 
				-        assert self.repository.storage_quota_use == 41
			
 
				+        assert self.repository.storage_quota_use == 41 + 8
			
 
				         self.repository.commit(compact=False)
			
 
				         with pytest.raises(Repository.StorageQuotaExceeded):
			
 
				             self.repository.put(H(2), b'')
			
 
				-        assert self.repository.storage_quota_use == 82
			
 
				+        assert self.repository.storage_quota_use == (41 + 8) * 2
			
 
				         with pytest.raises(Repository.StorageQuotaExceeded):
			
 
				             self.repository.commit(compact=False)
			
 
				-        assert self.repository.storage_quota_use == 82
			
 
				+        assert self.repository.storage_quota_use == (41 + 8) * 2
			
 
				         self.reopen()
			
 
				         with self.repository:
			
 
				-            self.repository.storage_quota = 100
			
 
				+            self.repository.storage_quota = 150
			
 
				             # Open new transaction; hints and thus quota data is not loaded unless needed.
			
 
				             self.repository.put(H(1), b'')
			
 
				-            assert self.repository.storage_quota_use == 82  # we have 2 puts for H(1) here and not yet compacted.
			
 
				+            assert self.repository.storage_quota_use == (41 + 8) * 2  # we have 2 puts for H(1) here and not yet compacted.
			
 
				             self.repository.commit(compact=True)
			
 
				-            assert self.repository.storage_quota_use == 41  # now we have compacted.
			
 
				+            assert self.repository.storage_quota_use == 41 + 8  # now we have compacted.
			
 
				 
			
 
				 
			
 
				 class NonceReservation(RepositoryTestCaseBase):