| 
					
				 | 
			
			
				@@ -25,7 +25,7 @@ from .locking import Lock, LockError, LockErrorT 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from .logger import create_logger 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from .lrucache import LRUCache 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from .platform import SaveFile, SyncFile, sync_dir, safe_fadvise 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-from .checksums import crc32 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+from .checksums import crc32, StreamingXXH64 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 from .crypto.file_integrity import IntegrityCheckedFile, FileIntegrityError 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 logger = create_logger(__name__) 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -34,9 +34,11 @@ MAGIC = b'BORG_SEG' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 MAGIC_LEN = len(MAGIC) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 ATTIC_MAGIC = b'ATTICSEG' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 assert len(ATTIC_MAGIC) == MAGIC_LEN 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 TAG_PUT = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 TAG_DELETE = 1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 TAG_COMMIT = 2 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+TAG_PUT2 = 3 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 # Highest ID usable as TAG_* value 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				 # 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -788,7 +790,7 @@ class Repository: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 in_index = self.index.get(key) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 is_index_object = in_index == (segment, offset) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                if tag == TAG_PUT and is_index_object: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if tag in (TAG_PUT2, TAG_PUT) and is_index_object: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         new_segment, offset = self.io.write_put(key, data, raise_full=True) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     except LoggedIO.SegmentFull: 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -798,7 +800,10 @@ class Repository: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     segments.setdefault(new_segment, 0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     segments[new_segment] += 1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     segments[segment] -= 1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                elif tag == TAG_PUT and not is_index_object: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    if tag == TAG_PUT: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        # old tag is PUT, but new will be PUT2 and use a bit more storage 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        self.storage_quota_use += self.io.ENTRY_HASH_SIZE 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                elif tag in (TAG_PUT2, TAG_PUT) and not is_index_object: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # If this is a PUT shadowed by a later tag, then it will be gone when this segment is deleted after 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # this loop. Therefore it is removed from the shadow index. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     try: 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -807,7 +812,10 @@ class Repository: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         # do not remove entry with empty shadowed_segments list here, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         # it is needed for shadowed_put_exists code (see below)! 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         pass 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    self.storage_quota_use -= len(data) + self.io.HEADER_ID_SIZE 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    if tag == TAG_PUT2: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        self.storage_quota_use -= len(data) + self.io.HEADER_ID_SIZE + self.io.ENTRY_HASH_SIZE 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    elif tag == TAG_PUT: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        self.storage_quota_use -= len(data) + self.io.HEADER_ID_SIZE 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 elif tag == TAG_DELETE and not in_index: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # If the shadow index doesn't contain this key, then we can't say if there's a shadowed older tag, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # therefore we do not drop the delete, but write it to a current segment. 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -830,7 +838,7 @@ class Repository: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         # Consider the following series of operations if we would not do this, ie. this entire if: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         # would be removed. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         # Columns are segments, lines are different keys (line 1 = some key, line 2 = some other key) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        # Legend: P=TAG_PUT, D=TAG_DELETE, c=commit, i=index is written for latest commit 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        # Legend: P=TAG_PUT/TAG_PUT2, D=TAG_DELETE, c=commit, i=index is written for latest commit 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         # 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         # Segment | 1     | 2   | 3 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         # --------+-------+-----+------ 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -899,7 +907,7 @@ class Repository: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         """some code shared between replay_segments and check""" 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self.segments[segment] = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         for tag, key, offset, size in objects: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if tag == TAG_PUT: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if tag in (TAG_PUT2, TAG_PUT): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # If this PUT supersedes an older PUT, mark the old segment for compaction and count the free space 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     s, _ = self.index[key] 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -950,7 +958,7 @@ class Repository: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self.compact[segment] = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         for tag, key, offset, size in self.io.iter_objects(segment, read_data=False): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if tag == TAG_PUT: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if tag in (TAG_PUT2, TAG_PUT): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 if self.index.get(key, (-1, -1)) != (segment, offset): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # This PUT is superseded later 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     self.compact[segment] += size 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -1169,7 +1177,7 @@ class Repository: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # also, for the next segment, we need to start at offset 0. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     start_offset = 0 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                if tag == TAG_PUT and (segment, offset) == self.index.get(id): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if tag in (TAG_PUT2, TAG_PUT) and (segment, offset) == self.index.get(id): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     # we have found an existing and current object 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     result.append(id) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                     if len(result) == limit: 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -1208,7 +1216,7 @@ class Repository: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             # be in the repo index (and we won't need it in the shadow_index). 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             self._delete(id, segment, offset, update_shadow_index=False) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         segment, offset = self.io.write_put(id, data) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        self.storage_quota_use += len(data) + self.io.HEADER_ID_SIZE 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        self.storage_quota_use += len(data) + self.io.HEADER_ID_SIZE + self.io.ENTRY_HASH_SIZE 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self.segments.setdefault(segment, 0) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self.segments[segment] += 1 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self.index[id] = segment, offset 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -1278,6 +1286,7 @@ class LoggedIO: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     COMMIT = crc_fmt.pack(crc32(_commit)) + _commit 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     HEADER_ID_SIZE = header_fmt.size + 32 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    ENTRY_HASH_SIZE = 8 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     def __init__(self, path, limit, segments_per_dir, capacity=90): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self.path = path 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -1475,7 +1484,8 @@ class LoggedIO: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         Return object iterator for *segment*. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         If read_data is False then include_data must be False as well. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        Integrity checks are skipped: all data obtained from the iterator must be considered informational. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        See the _read() docstring about confidence in the returned data. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         The iterator returns four-tuples of (tag, key, offset, data|size). 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         """ 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -1491,7 +1501,7 @@ class LoggedIO: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         header = fd.read(self.header_fmt.size) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         while header: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             size, tag, key, data = self._read(fd, header, segment, offset, 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                                              (TAG_PUT, TAG_DELETE, TAG_COMMIT), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                              (TAG_PUT2, TAG_DELETE, TAG_COMMIT, TAG_PUT), 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                                               read_data=read_data) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             if include_data: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 yield tag, key, offset, data 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -1528,8 +1538,25 @@ class LoggedIO: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         dst_fd.write(MAGIC) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         while len(d) >= self.header_fmt.size: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                             crc, size, tag = self.header_fmt.unpack(d[:self.header_fmt.size]) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            if size > MAX_OBJECT_SIZE or tag > MAX_TAG_ID or size < self.header_fmt.size \ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                               or size > len(d) or crc32(d[4:size]) & 0xffffffff != crc: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            size_invalid = size > MAX_OBJECT_SIZE or size < self.header_fmt.size or size > len(d) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            if size_invalid or tag > MAX_TAG_ID: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                d = d[1:] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            if tag == TAG_PUT2: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                c_offset = self.HEADER_ID_SIZE + self.ENTRY_HASH_SIZE 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                # skip if header is invalid 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                if crc32(d[4:c_offset]) & 0xffffffff != crc: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                    d = d[1:] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                    continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                # skip if content is invalid 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                if self.entry_hash(d[4:self.HEADER_ID_SIZE], d[c_offset:size]) != d[self.HEADER_ID_SIZE:c_offset]: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                    d = d[1:] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                    continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            elif tag in (TAG_DELETE, TAG_COMMIT, TAG_PUT): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                if crc32(d[4:size]) & 0xffffffff != crc: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                    d = d[1:] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                    continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            else:  # tag unknown 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                                 d = d[1:] 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                                 continue 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                             dst_fd.write(d[:size]) 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -1538,72 +1565,108 @@ class LoggedIO: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         del d 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                         data.release() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+    def entry_hash(self, *data): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        h = StreamingXXH64() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        for d in data: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            h.update(d) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        return h.digest() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     def read(self, segment, offset, id, read_data=True): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         Read entry from *segment* at *offset* with *id*. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        If read_data is False the size of the entry is returned instead. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        If read_data is False the size of the entry is returned instead and integrity checks are skipped. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        The return value should thus be considered informational. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        See the _read() docstring about confidence in the returned data. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if segment == self.segment and self._write_fd: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             self._write_fd.sync() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         fd = self.get_fd(segment) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         fd.seek(offset) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         header = fd.read(self.header_fmt.size) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        size, tag, key, data = self._read(fd, header, segment, offset, (TAG_PUT,), read_data) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        size, tag, key, data = self._read(fd, header, segment, offset, (TAG_PUT2, TAG_PUT), read_data) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if id != key: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             raise IntegrityError('Invalid segment entry header, is not for wanted id [segment {}, offset {}]'.format( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				                 segment, offset)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         return data if read_data else size 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     def _read(self, fd, header, segment, offset, acceptable_tags, read_data=True): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        # some code shared by read() and iter_objects() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        Code shared by read() and iter_objects(). 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        Confidence in returned data: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        PUT2 tags, read_data == True: crc32 check (header) plus digest check (header+data) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        PUT2 tags, read_data == False: crc32 check (header) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        PUT tags, read_data == True: crc32 check (header+data) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        PUT tags, read_data == False: crc32 check can not be done, all data obtained must be considered informational 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        """ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        def check_crc32(wanted, header, *data): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            result = crc32(memoryview(header)[4:])  # skip first 32 bits of the header, they contain the crc. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            for d in data: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                result = crc32(d, result) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if result & 0xffffffff != wanted: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                raise IntegrityError(f'Segment entry header checksum mismatch [segment {segment}, offset {offset}]') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+ 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         # See comment on MAX_TAG_ID for details 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         assert max(acceptable_tags) <= MAX_TAG_ID, 'Exceeding MAX_TAG_ID will break backwards compatibility' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        key = data = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         fmt = self.header_fmt 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         try: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             hdr_tuple = fmt.unpack(header) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         except struct.error as err: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            raise IntegrityError('Invalid segment entry header [segment {}, offset {}]: {}'.format( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                segment, offset, err)) from None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            raise IntegrityError(f'Invalid segment entry header [segment {segment}, offset {offset}]: {err}') from None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         crc, size, tag = hdr_tuple 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         length = size - fmt.size  # we already read the header 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if size > MAX_OBJECT_SIZE: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             # if you get this on an archive made with borg < 1.0.7 and millions of files and 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             # you need to restore it, you can disable this check by using "if False:" above. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            raise IntegrityError('Invalid segment entry size {} - too big [segment {}, offset {}]'.format( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                size, segment, offset)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            raise IntegrityError(f'Invalid segment entry size {size} - too big [segment {segment}, offset {offset}]') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         if size < fmt.size: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            raise IntegrityError('Invalid segment entry size {} - too small [segment {}, offset {}]'.format( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                size, segment, offset)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if tag in (TAG_PUT, TAG_DELETE): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            raise IntegrityError(f'Invalid segment entry size {size} - too small [segment {segment}, offset {offset}]') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if tag not in (TAG_PUT2, TAG_DELETE, TAG_COMMIT, TAG_PUT): 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            raise IntegrityError(f'Invalid segment entry header, did not get a known tag ' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                 f'[segment {segment}, offset {offset}]') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if tag not in acceptable_tags: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            raise IntegrityError(f'Invalid segment entry header, did not get acceptable tag ' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                 f'[segment {segment}, offset {offset}]') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        if tag == TAG_COMMIT: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            check_crc32(crc, header) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            # that's all for COMMITs. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            # all other tags (TAG_PUT2, TAG_DELETE, TAG_PUT) have a key 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             key = fd.read(32) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             length -= 32 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             if len(key) != 32: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                raise IntegrityError( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    'Segment entry key short read [segment {}, offset {}]: expected {}, got {} bytes'.format( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                        segment, offset, 32, len(key))) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            key = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if read_data and tag == TAG_PUT: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            data = fd.read(length) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if len(data) != length: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                raise IntegrityError('Segment entry data short read [segment {}, offset {}]: expected {}, got {} bytes'.format( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    segment, offset, length, len(data))) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if crc32(data, crc32(key, crc32(memoryview(header)[4:]))) & 0xffffffff != crc: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                raise IntegrityError('Segment entry checksum mismatch [segment {}, offset {}]'.format( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    segment, offset)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            data = None 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            if length > 0: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                oldpos = fd.tell() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                seeked = fd.seek(length, os.SEEK_CUR) - oldpos 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                if seeked != length: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                    raise IntegrityError('Segment entry data short seek [segment {}, offset {}]: expected {}, got {} bytes'.format( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                            segment, offset, length, seeked)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        if tag not in acceptable_tags: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-            raise IntegrityError('Invalid segment entry header, did not get acceptable tag [segment {}, offset {}]'.format( 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-                segment, offset)) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                raise IntegrityError(f'Segment entry key short read [segment {segment}, offset {offset}]: ' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                     f'expected {32}, got {len(key)} bytes') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            if tag == TAG_DELETE: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                check_crc32(crc, header, key) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                # that's all for DELETEs. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+            else: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                # TAG_PUT: we can not do a crc32 header check here, because the crc32 is computed over header+data! 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                #          for the check, see code below when read_data is True. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if tag == TAG_PUT2: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    entry_hash = fd.read(self.ENTRY_HASH_SIZE) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    length -= self.ENTRY_HASH_SIZE 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    if len(entry_hash) != self.ENTRY_HASH_SIZE: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        raise IntegrityError(f'Segment entry hash short read [segment {segment}, offset {offset}]: ' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                             f'expected {self.ENTRY_HASH_SIZE}, got {len(entry_hash)} bytes') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    check_crc32(crc, header, key, entry_hash) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                if not read_data:  # seek over data 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    oldpos = fd.tell() 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    seeked = fd.seek(length, os.SEEK_CUR) - oldpos 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    if seeked != length: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        raise IntegrityError(f'Segment entry data short seek [segment {segment}, offset {offset}]: ' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                             f'expected {length}, got {seeked} bytes') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                else:  # read data! 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    data = fd.read(length) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    if len(data) != length: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        raise IntegrityError(f'Segment entry data short read [segment {segment}, offset {offset}]: ' 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                                             f'expected {length}, got {len(data)} bytes') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    if tag == TAG_PUT2: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        if self.entry_hash(memoryview(header)[4:], key, data) != entry_hash: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                            raise IntegrityError(f'Segment entry hash mismatch [segment {segment}, offset {offset}]') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                    elif tag == TAG_PUT: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+                        check_crc32(crc, header, key, data) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         return size, tag, key, data 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				     def write_put(self, id, data, raise_full=False): 
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -1612,11 +1675,13 @@ class LoggedIO: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             # this would push the segment entry size beyond MAX_OBJECT_SIZE. 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				             raise IntegrityError(f'More than allowed put data [{data_size} > {MAX_DATA_SIZE}]') 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         fd = self.get_write_fd(want_new=(id == Manifest.MANIFEST_ID), raise_full=raise_full) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        size = data_size + self.HEADER_ID_SIZE 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        size = data_size + self.HEADER_ID_SIZE + self.ENTRY_HASH_SIZE 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         offset = self.offset 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        header = self.header_no_crc_fmt.pack(size, TAG_PUT) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        crc = self.crc_fmt.pack(crc32(data, crc32(id, crc32(header))) & 0xffffffff) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-        fd.write(b''.join((crc, header, id, data))) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        header = self.header_no_crc_fmt.pack(size, TAG_PUT2) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        entry_hash = self.entry_hash(header, id, data) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        crc = self.crc_fmt.pack(crc32(entry_hash, crc32(id, crc32(header))) & 0xffffffff) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        fd.write(b''.join((crc, header, id, entry_hash))) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+        fd.write(data) 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         self.offset += size 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         return self.segment, offset 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
	
		
			
				| 
					
				 | 
			
			
				@@ -1641,4 +1706,4 @@ class LoggedIO: 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				         return self.segment - 1  # close_segment() increments it 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				  
			 | 
		
	
		
			
				 | 
				 | 
			
			
				-assert LoggedIO.HEADER_ID_SIZE == 41  # see constants.MAX_OBJECT_SIZE 
			 | 
		
	
		
			
				 | 
				 | 
			
			
				+assert LoggedIO.HEADER_ID_SIZE + LoggedIO.ENTRY_HASH_SIZE == 41 + 8  # see constants.MAX_OBJECT_SIZE 
			 |