Browse Source

fix reading borg 1.x repo index, fixes #9022

2 fixes:
- add code to update/verify the HashHeader integrity hash. this code was
  missing and led to FileIntegrityError on the borg 1.x repo index.
- when reading a non-compact borg 1.x hash table from disk (like the borg
  repo index), only add the "used" buckets to the in-memory hashtable,
  but not the unused/tombstone buckets.

The corruption described in #9022 was happening like this:
- borg failed to read the repo index, because the integrity check failed
- due to open_index(..., auto_recover=True), it tried to "fix" it by
  writing an empty hash table to disk. borg 1.x usually then rebuilt the
  index, but somehow this wasn't happening for the user in #9022.
Thomas Waldmann 3 weeks ago
parent
commit
d955f8ce75
1 changed files with 12 additions and 0 deletions
  1. 12 0
      src/borg/hashindex.pyx

+ 12 - 0
src/borg/hashindex.pyx

@@ -201,6 +201,10 @@ class NSIndex1(HTProxyMixin, MutableMapping):
         used = len(self.ht)
         header_bytes = struct.pack(self.HEADER_FMT, self.MAGIC, used, used, self.KEY_SIZE, self.VALUE_SIZE)
         fd.write(header_bytes)
+        # record the header as a separate integrity-hash part if supported
+        hash_part = getattr(fd, "hash_part", None)
+        if hash_part:
+            hash_part("HashHeader")
         count = 0
         for key, _ in self.ht.items():
             value = self.ht._get_raw(key)
@@ -214,6 +218,10 @@ class NSIndex1(HTProxyMixin, MutableMapping):
         header_bytes = fd.read(header_size)
         if len(header_bytes) < header_size:
             raise ValueError(f"Invalid file, file is too short (header).")
+        # verify the header as a separate integrity-hash part if supported
+        hash_part = getattr(fd, "hash_part", None)
+        if hash_part:
+            hash_part("HashHeader")
         magic, entries, buckets, ksize, vsize = struct.unpack(self.HEADER_FMT, header_bytes)
         if magic != self.MAGIC:
             raise ValueError(f"Invalid file, magic {self.MAGIC.decode()} not found.")
@@ -228,6 +236,10 @@ class NSIndex1(HTProxyMixin, MutableMapping):
         for i in range(buckets):
             key = fd.read(ksize)
             value = fd.read(vsize)
+            if value.startswith(b'\xFF\xFF\xFF\xFF'):  # LE for 0xffffffff (empty/unused bucket)
+                continue
+            if value.startswith(b'\xFE\xFF\xFF\xFF'):  # LE for 0xfffffffe (deleted/tombstone bucket)
+                continue
             self.ht._set_raw(key, value)
         pos = fd.tell()
         assert pos == end_of_file