瀏覽代碼

generalize hashindex code for any key length

currently, we only use sha256 hashes as key, so key length is always 32.
but instead of hardcoding 32 everywhere, using key_length is just better
readable and also more flexible for the future.
Thomas Waldmann 10 年之前
父節點
當前提交
b180158876
共有 1 個文件被更改,包括 24 次插入19 次删除
  1. 24 19
      borg/hashindex.pyx

+ 24 - 19
borg/hashindex.pyx

@@ -32,9 +32,10 @@ cimport cython
 @cython.internal
 cdef class IndexBase:
     cdef HashIndex *index
-    key_size = 32
+    cdef int key_size
 
-    def __cinit__(self, capacity=0, path=None):
+    def __cinit__(self, capacity=0, path=None, key_size=32):
+        self.key_size = key_size
         if path:
             self.index = hashindex_read(os.fsencode(path))
             if not self.index:
@@ -67,7 +68,7 @@ cdef class IndexBase:
             self[key] = value
 
     def __delitem__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         if not hashindex_delete(self.index, <char *>key):
             raise Exception('hashindex_delete failed')
 
@@ -96,14 +97,14 @@ cdef class NSIndex(IndexBase):
     value_size = 8
 
     def __getitem__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         if not data:
             raise KeyError
         return _le32toh(data[0]), _le32toh(data[1])
 
     def __setitem__(self, key, value):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         cdef int[2] data
         data[0] = _htole32(value[0])
         data[1] = _htole32(value[1])
@@ -111,20 +112,20 @@ cdef class NSIndex(IndexBase):
             raise Exception('hashindex_set failed')
 
     def __contains__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         return data != NULL
 
     def iteritems(self, marker=None):
         cdef const void *key
-        iter = NSKeyIterator()
+        iter = NSKeyIterator(self.key_size)
         iter.idx = self
         iter.index = self.index
         if marker:
             key = hashindex_get(self.index, <char *>marker)
             if marker is None:
                 raise IndexError
-            iter.key = key - 32
+            iter.key = key - self.key_size
         return iter
 
 
@@ -132,9 +133,11 @@ cdef class NSKeyIterator:
     cdef NSIndex idx
     cdef HashIndex *index
     cdef const void *key
+    cdef int key_size
 
-    def __cinit__(self):
+    def __cinit__(self, key_size):
         self.key = NULL
+        self.key_size = key_size
 
     def __iter__(self):
         return self
@@ -143,8 +146,8 @@ cdef class NSKeyIterator:
         self.key = hashindex_next_key(self.index, <char *>self.key)
         if not self.key:
             raise StopIteration
-        cdef int *value = <int *>(self.key + 32)
-        return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]))
+        cdef int *value = <int *>(self.key + self.key_size)
+        return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]))
 
 
 cdef class ChunkIndex(IndexBase):
@@ -152,14 +155,14 @@ cdef class ChunkIndex(IndexBase):
     value_size = 12
 
     def __getitem__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         if not data:
             raise KeyError
         return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2])
 
     def __setitem__(self, key, value):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         cdef int[3] data
         data[0] = _htole32(value[0])
         data[1] = _htole32(value[1])
@@ -168,20 +171,20 @@ cdef class ChunkIndex(IndexBase):
             raise Exception('hashindex_set failed')
 
     def __contains__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         return data != NULL
 
     def iteritems(self, marker=None):
         cdef const void *key
-        iter = ChunkKeyIterator()
+        iter = ChunkKeyIterator(self.key_size)
         iter.idx = self
         iter.index = self.index
         if marker:
             key = hashindex_get(self.index, <char *>marker)
             if marker is None:
                 raise IndexError
-            iter.key = key - 32
+            iter.key = key - self.key_size
         return iter
 
     def summarize(self):
@@ -199,9 +202,11 @@ cdef class ChunkKeyIterator:
     cdef ChunkIndex idx
     cdef HashIndex *index
     cdef const void *key
+    cdef int key_size
 
-    def __cinit__(self):
+    def __cinit__(self, key_size):
         self.key = NULL
+        self.key_size = key_size
 
     def __iter__(self):
         return self
@@ -210,5 +215,5 @@ cdef class ChunkKeyIterator:
         self.key = hashindex_next_key(self.index, <char *>self.key)
         if not self.key:
             raise StopIteration
-        cdef int *value = <int *>(self.key + 32)
-        return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))
+        cdef int *value = <int *>(self.key + self.key_size)
+        return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))