Browse Source

generalize hashindex code for any key length

currently, we only use sha256 hashes as key, so key length is always 32.
but instead of hardcoding 32 everywhere, using key_length is just better
readable and also more flexible for the future.
Thomas Waldmann 10 years ago
parent
commit
b180158876
1 changed files with 24 additions and 19 deletions
  1. 24 19
      borg/hashindex.pyx

+ 24 - 19
borg/hashindex.pyx

@@ -32,9 +32,10 @@ cimport cython
 @cython.internal
 cdef class IndexBase:
     cdef HashIndex *index
-    key_size = 32
+    cdef int key_size
 
-    def __cinit__(self, capacity=0, path=None):
+    def __cinit__(self, capacity=0, path=None, key_size=32):
+        self.key_size = key_size
         if path:
             self.index = hashindex_read(os.fsencode(path))
             if not self.index:
@@ -67,7 +68,7 @@ cdef class IndexBase:
             self[key] = value
 
     def __delitem__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         if not hashindex_delete(self.index, <char *>key):
             raise Exception('hashindex_delete failed')
 
@@ -96,14 +97,14 @@ cdef class NSIndex(IndexBase):
     value_size = 8
 
     def __getitem__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         if not data:
             raise KeyError
         return _le32toh(data[0]), _le32toh(data[1])
 
     def __setitem__(self, key, value):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         cdef int[2] data
         data[0] = _htole32(value[0])
         data[1] = _htole32(value[1])
@@ -111,20 +112,20 @@ cdef class NSIndex(IndexBase):
             raise Exception('hashindex_set failed')
 
     def __contains__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         return data != NULL
 
     def iteritems(self, marker=None):
         cdef const void *key
-        iter = NSKeyIterator()
+        iter = NSKeyIterator(self.key_size)
         iter.idx = self
         iter.index = self.index
         if marker:
             key = hashindex_get(self.index, <char *>marker)
             if marker is None:
                 raise IndexError
-            iter.key = key - 32
+            iter.key = key - self.key_size
         return iter
 
 
@@ -132,9 +133,11 @@ cdef class NSKeyIterator:
     cdef NSIndex idx
     cdef HashIndex *index
     cdef const void *key
+    cdef int key_size
 
-    def __cinit__(self):
+    def __cinit__(self, key_size):
         self.key = NULL
+        self.key_size = key_size
 
     def __iter__(self):
         return self
@@ -143,8 +146,8 @@ cdef class NSKeyIterator:
         self.key = hashindex_next_key(self.index, <char *>self.key)
         if not self.key:
             raise StopIteration
-        cdef int *value = <int *>(self.key + 32)
-        return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]))
+        cdef int *value = <int *>(self.key + self.key_size)
+        return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]))
 
 
 cdef class ChunkIndex(IndexBase):
@@ -152,14 +155,14 @@ cdef class ChunkIndex(IndexBase):
     value_size = 12
 
     def __getitem__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         if not data:
             raise KeyError
         return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2])
 
     def __setitem__(self, key, value):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         cdef int[3] data
         data[0] = _htole32(value[0])
         data[1] = _htole32(value[1])
@@ -168,20 +171,20 @@ cdef class ChunkIndex(IndexBase):
             raise Exception('hashindex_set failed')
 
     def __contains__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         return data != NULL
 
     def iteritems(self, marker=None):
         cdef const void *key
-        iter = ChunkKeyIterator()
+        iter = ChunkKeyIterator(self.key_size)
         iter.idx = self
         iter.index = self.index
         if marker:
             key = hashindex_get(self.index, <char *>marker)
             if marker is None:
                 raise IndexError
-            iter.key = key - 32
+            iter.key = key - self.key_size
         return iter
 
     def summarize(self):
@@ -199,9 +202,11 @@ cdef class ChunkKeyIterator:
     cdef ChunkIndex idx
     cdef HashIndex *index
     cdef const void *key
+    cdef int key_size
 
-    def __cinit__(self):
+    def __cinit__(self, key_size):
         self.key = NULL
+        self.key_size = key_size
 
     def __iter__(self):
         return self
@@ -210,5 +215,5 @@ cdef class ChunkKeyIterator:
         self.key = hashindex_next_key(self.index, <char *>self.key)
         if not self.key:
             raise StopIteration
-        cdef int *value = <int *>(self.key + 32)
-        return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))
+        cdef int *value = <int *>(self.key + self.key_size)
+        return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))