浏览代码

hashindex: add NSIndex1 write support, fix iteritems

to get the unit tests working, we better have write support for the
borg 1.x format.

iteritems grew support for the marker kwarg.
Thomas Waldmann 7 月之前
父节点
当前提交
7fffb0dbb1
共有 1 个文件被更改,包括 38 次插入23 次删除
  1. 38 23
      src/borg/hashindex.pyx

+ 38 - 23
src/borg/hashindex.pyx

@@ -116,11 +116,16 @@ NSIndex1Entry = namedtuple('NSIndex1bEntry', 'segment offset')
 class NSIndex1:  # legacy borg 1.x
 
     MAX_VALUE = 2**32 - 1  # borghash has the full uint32_t range
+    MAGIC = b"BORG_IDX"  # borg 1.x
+    HEADER_FMT = "<8sIIBB"  # magic, entries, buckets, ksize, vsize
+    VALUE_FMT = "<II"  # borg 1.x on-disk: little-endian segment, offset
+    KEY_SIZE = 32
+    VALUE_SIZE = 8
 
     def __init__(self, capacity=1000, path=None, permit_compact=False, usable=None):
         if usable is not None:
             capacity = usable * 2  # load factor 0.5
-        self.ht = _borghash.HashTableNT(key_size=32, value_format="<II", namedtuple_type=NSIndex1Entry, capacity=capacity)
+        self.ht = _borghash.HashTableNT(key_size=self.KEY_SIZE, value_format=self.VALUE_FMT, namedtuple_type=NSIndex1Entry, capacity=capacity)
         if path:
             self._read(path)
 
@@ -155,8 +160,13 @@ class NSIndex1:  # legacy borg 1.x
                 return default
             raise
 
-    def iteritems(self):
-        yield from self.ht.iteritems()
+    def iteritems(self, marker=None):
+        do_yield = marker is None
+        for key, value in self.ht.iteritems():
+            if do_yield:
+                yield key, value
+            else:
+                do_yield = key == marker
 
     def compact(self):
         return 0
@@ -168,12 +178,16 @@ class NSIndex1:  # legacy borg 1.x
     def read(cls, path, permit_compact=False):
         return cls(path=path)
 
-    def write(self, path):
-        self.ht.write(path)  # only for unit tests
-
     def size(self):
         return self.ht.size()  # not quite correct as this is not the on-disk read-only format.
 
+    def write(self, path):
+        if isinstance(path, str):
+            with open(path, 'wb') as fd:
+                self._write_fd(fd)
+        else:
+            self._write_fd(path)
+
     def _read(self, path):
         if isinstance(path, str):
             with open(path, 'rb') as fd:
@@ -181,27 +195,28 @@ class NSIndex1:  # legacy borg 1.x
         else:
             self._read_fd(path)
 
+    def _write_fd(self, fd):
+        used = len(self.ht)
+        header_bytes = struct.pack(self.HEADER_FMT, self.MAGIC, used, used, self.KEY_SIZE, self.VALUE_SIZE)
+        fd.write(header_bytes)
+        count = 0
+        for key, _ in self.ht.iteritems():
+            value = self.ht._get_raw(key)
+            fd.write(key)
+            fd.write(value)
+            count += 1
+        assert count == used
+
     def _read_fd(self, fd):
-        magic = fd.read(8)
-        fd.seek(0)
-        if magic == b"BORG_IDX":  # used for borg transfer borg 1.x -> borg 2
-            self._read_fd_borg1(fd)
-        if magic == b"BORGHASH":  # only for unit tests
-            self.ht = _borghash.HashTableNT.read(fd)
-
-    def _read_fd_borg1(self, fd):
-        MAGIC = b"BORG_IDX"  # borg 1.x
-        HEADER_FMT = "<8sIIBB"  # magic, entries, buckets, ksize, vsize
-        header_size = struct.calcsize(HEADER_FMT)
+        header_size = struct.calcsize(self.HEADER_FMT)
         header_bytes = fd.read(header_size)
         if len(header_bytes) < header_size:
             raise ValueError(f"Invalid file, file is too short (header).")
-        magic, entries, buckets, ksize, vsize = struct.unpack(HEADER_FMT, header_bytes)
-        if magic != MAGIC:
-            raise ValueError(f"Invalid file, magic {MAGIC.decode()} not found.")
-        VALUE_FMT = "<II"  # borg 1.x on-disk: little-endian segment, offset
-        assert ksize == 32, "invalid key size"
-        assert vsize == struct.calcsize(VALUE_FMT), "invalid value size"
+        magic, entries, buckets, ksize, vsize = struct.unpack(self.HEADER_FMT, header_bytes)
+        if magic != self.MAGIC:
+            raise ValueError(f"Invalid file, magic {self.MAGIC.decode()} not found.")
+        assert ksize == self.KEY_SIZE, "invalid key size"
+        assert vsize == self.VALUE_SIZE, "invalid value size"
         buckets_size = buckets * (ksize + vsize)
         current_pos = fd.tell()
         end_of_file = fd.seek(0, os.SEEK_END)