Browse Source

generalize hashindex code for any key length

currently, we only use sha256 hashes as key, so key length is always 32.
but instead of hardcoding 32 everywhere, using key_length is just better
readable and also more flexible for the future.
Thomas Waldmann 10 years ago
parent
commit
b180158876
1 changed files with 24 additions and 19 deletions
  1. 24 19
      borg/hashindex.pyx

+ 24 - 19
borg/hashindex.pyx

@@ -32,9 +32,10 @@ cimport cython
 @cython.internal
 @cython.internal
 cdef class IndexBase:
 cdef class IndexBase:
     cdef HashIndex *index
     cdef HashIndex *index
-    key_size = 32
+    cdef int key_size
 
 
-    def __cinit__(self, capacity=0, path=None):
+    def __cinit__(self, capacity=0, path=None, key_size=32):
+        self.key_size = key_size
         if path:
         if path:
             self.index = hashindex_read(os.fsencode(path))
             self.index = hashindex_read(os.fsencode(path))
             if not self.index:
             if not self.index:
@@ -67,7 +68,7 @@ cdef class IndexBase:
             self[key] = value
             self[key] = value
 
 
     def __delitem__(self, key):
     def __delitem__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         if not hashindex_delete(self.index, <char *>key):
         if not hashindex_delete(self.index, <char *>key):
             raise Exception('hashindex_delete failed')
             raise Exception('hashindex_delete failed')
 
 
@@ -96,14 +97,14 @@ cdef class NSIndex(IndexBase):
     value_size = 8
     value_size = 8
 
 
     def __getitem__(self, key):
     def __getitem__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         data = <int *>hashindex_get(self.index, <char *>key)
         if not data:
         if not data:
             raise KeyError
             raise KeyError
         return _le32toh(data[0]), _le32toh(data[1])
         return _le32toh(data[0]), _le32toh(data[1])
 
 
     def __setitem__(self, key, value):
     def __setitem__(self, key, value):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         cdef int[2] data
         cdef int[2] data
         data[0] = _htole32(value[0])
         data[0] = _htole32(value[0])
         data[1] = _htole32(value[1])
         data[1] = _htole32(value[1])
@@ -111,20 +112,20 @@ cdef class NSIndex(IndexBase):
             raise Exception('hashindex_set failed')
             raise Exception('hashindex_set failed')
 
 
     def __contains__(self, key):
     def __contains__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         data = <int *>hashindex_get(self.index, <char *>key)
         return data != NULL
         return data != NULL
 
 
     def iteritems(self, marker=None):
     def iteritems(self, marker=None):
         cdef const void *key
         cdef const void *key
-        iter = NSKeyIterator()
+        iter = NSKeyIterator(self.key_size)
         iter.idx = self
         iter.idx = self
         iter.index = self.index
         iter.index = self.index
         if marker:
         if marker:
             key = hashindex_get(self.index, <char *>marker)
             key = hashindex_get(self.index, <char *>marker)
             if marker is None:
             if marker is None:
                 raise IndexError
                 raise IndexError
-            iter.key = key - 32
+            iter.key = key - self.key_size
         return iter
         return iter
 
 
 
 
@@ -132,9 +133,11 @@ cdef class NSKeyIterator:
     cdef NSIndex idx
     cdef NSIndex idx
     cdef HashIndex *index
     cdef HashIndex *index
     cdef const void *key
     cdef const void *key
+    cdef int key_size
 
 
-    def __cinit__(self):
+    def __cinit__(self, key_size):
         self.key = NULL
         self.key = NULL
+        self.key_size = key_size
 
 
     def __iter__(self):
     def __iter__(self):
         return self
         return self
@@ -143,8 +146,8 @@ cdef class NSKeyIterator:
         self.key = hashindex_next_key(self.index, <char *>self.key)
         self.key = hashindex_next_key(self.index, <char *>self.key)
         if not self.key:
         if not self.key:
             raise StopIteration
             raise StopIteration
-        cdef int *value = <int *>(self.key + 32)
-        return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]))
+        cdef int *value = <int *>(self.key + self.key_size)
+        return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]))
 
 
 
 
 cdef class ChunkIndex(IndexBase):
 cdef class ChunkIndex(IndexBase):
@@ -152,14 +155,14 @@ cdef class ChunkIndex(IndexBase):
     value_size = 12
     value_size = 12
 
 
     def __getitem__(self, key):
     def __getitem__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         data = <int *>hashindex_get(self.index, <char *>key)
         if not data:
         if not data:
             raise KeyError
             raise KeyError
         return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2])
         return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2])
 
 
     def __setitem__(self, key, value):
     def __setitem__(self, key, value):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         cdef int[3] data
         cdef int[3] data
         data[0] = _htole32(value[0])
         data[0] = _htole32(value[0])
         data[1] = _htole32(value[1])
         data[1] = _htole32(value[1])
@@ -168,20 +171,20 @@ cdef class ChunkIndex(IndexBase):
             raise Exception('hashindex_set failed')
             raise Exception('hashindex_set failed')
 
 
     def __contains__(self, key):
     def __contains__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         data = <int *>hashindex_get(self.index, <char *>key)
         return data != NULL
         return data != NULL
 
 
     def iteritems(self, marker=None):
     def iteritems(self, marker=None):
         cdef const void *key
         cdef const void *key
-        iter = ChunkKeyIterator()
+        iter = ChunkKeyIterator(self.key_size)
         iter.idx = self
         iter.idx = self
         iter.index = self.index
         iter.index = self.index
         if marker:
         if marker:
             key = hashindex_get(self.index, <char *>marker)
             key = hashindex_get(self.index, <char *>marker)
             if marker is None:
             if marker is None:
                 raise IndexError
                 raise IndexError
-            iter.key = key - 32
+            iter.key = key - self.key_size
         return iter
         return iter
 
 
     def summarize(self):
     def summarize(self):
@@ -199,9 +202,11 @@ cdef class ChunkKeyIterator:
     cdef ChunkIndex idx
     cdef ChunkIndex idx
     cdef HashIndex *index
     cdef HashIndex *index
     cdef const void *key
     cdef const void *key
+    cdef int key_size
 
 
-    def __cinit__(self):
+    def __cinit__(self, key_size):
         self.key = NULL
         self.key = NULL
+        self.key_size = key_size
 
 
     def __iter__(self):
     def __iter__(self):
         return self
         return self
@@ -210,5 +215,5 @@ cdef class ChunkKeyIterator:
         self.key = hashindex_next_key(self.index, <char *>self.key)
         self.key = hashindex_next_key(self.index, <char *>self.key)
         if not self.key:
         if not self.key:
             raise StopIteration
             raise StopIteration
-        cdef int *value = <int *>(self.key + 32)
-        return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))
+        cdef int *value = <int *>(self.key + self.key_size)
+        return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))