Bladeren bron

hashindex: make key_size not hardcoded

Thomas Waldmann 10 jaren geleden
bovenliggende
commit
6dcf51b6e8
2 gewijzigde bestanden met toevoegingen van 25 en 20 verwijderingen
  1. 1 1
      attic/_hashindex.c
  2. 24 19
      attic/hashindex.pyx

+ 1 - 1
attic/_hashindex.c

@@ -355,7 +355,7 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs
     void *key = NULL;
 
     while((key = hashindex_next_key(index, key))) {
-        values = key + 32;
+        values = key + index->key_size;
         unique_size += values[1];
         unique_csize += values[2];
         size += values[0] * values[1];

+ 24 - 19
attic/hashindex.pyx

@@ -26,9 +26,10 @@ _NoDefault = object()
 
 cdef class IndexBase:
     cdef HashIndex *index
-    key_size = 32
+    cdef int key_size
 
-    def __cinit__(self, capacity=0, path=None):
+    def __cinit__(self, capacity=0, path=None, key_size=32):
+        self.key_size = key_size
         if path:
             self.index = hashindex_read(<bytes>os.fsencode(path))
             if not self.index:
@@ -61,7 +62,7 @@ cdef class IndexBase:
             self[key] = value
 
     def __delitem__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         if not hashindex_delete(self.index, <char *>key):
             raise Exception('hashindex_delete failed')
 
@@ -90,14 +91,14 @@ cdef class NSIndex(IndexBase):
     value_size = 8
 
     def __getitem__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         if not data:
             raise KeyError
         return _le32toh(data[0]), _le32toh(data[1])
 
     def __setitem__(self, key, value):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         cdef int[2] data
         data[0] = _htole32(value[0])
         data[1] = _htole32(value[1])
@@ -105,20 +106,20 @@ cdef class NSIndex(IndexBase):
             raise Exception('hashindex_set failed')
 
     def __contains__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         return data != NULL
 
     def iteritems(self, marker=None):
         cdef const void *key
-        iter = NSKeyIterator()
+        iter = NSKeyIterator(self.key_size)
         iter.idx = self
         iter.index = self.index
         if marker:
             key = hashindex_get(self.index, <char *>marker)
             if marker is None:
                 raise IndexError
-            iter.key = key - 32
+            iter.key = key - self.key_size
         return iter
 
 
@@ -126,9 +127,11 @@ cdef class NSKeyIterator:
     cdef NSIndex idx
     cdef HashIndex *index
     cdef const void *key
+    cdef int key_size
 
-    def __cinit__(self):
+    def __cinit__(self, key_size):
         self.key = NULL
+        self.key_size = key_size
 
     def __iter__(self):
         return self
@@ -137,8 +140,8 @@ cdef class NSKeyIterator:
         self.key = hashindex_next_key(self.index, <char *>self.key)
         if not self.key:
             raise StopIteration
-        cdef int *value = <int *>(self.key + 32)
-        return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]))
+        cdef int *value = <int *>(self.key + self.key_size)
+        return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]))
 
 
 cdef class ChunkIndex(IndexBase):
@@ -146,14 +149,14 @@ cdef class ChunkIndex(IndexBase):
     value_size = 12
 
     def __getitem__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         if not data:
             raise KeyError
         return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2])
 
     def __setitem__(self, key, value):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         cdef int[3] data
         data[0] = _htole32(value[0])
         data[1] = _htole32(value[1])
@@ -162,20 +165,20 @@ cdef class ChunkIndex(IndexBase):
             raise Exception('hashindex_set failed')
 
     def __contains__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         return data != NULL
 
     def iteritems(self, marker=None):
         cdef const void *key
-        iter = ChunkKeyIterator()
+        iter = ChunkKeyIterator(self.key_size)
         iter.idx = self
         iter.index = self.index
         if marker:
             key = hashindex_get(self.index, <char *>marker)
             if marker is None:
                 raise IndexError
-            iter.key = key - 32
+            iter.key = key - self.key_size
         return iter
 
     def summarize(self):
@@ -188,9 +191,11 @@ cdef class ChunkKeyIterator:
     cdef ChunkIndex idx
     cdef HashIndex *index
     cdef const void *key
+    cdef int key_size
 
-    def __cinit__(self):
+    def __cinit__(self, key_size):
         self.key = NULL
+        self.key_size = key_size
 
     def __iter__(self):
         return self
@@ -199,5 +204,5 @@ cdef class ChunkKeyIterator:
         self.key = hashindex_next_key(self.index, <char *>self.key)
         if not self.key:
             raise StopIteration
-        cdef int *value = <int *>(self.key + 32)
-        return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))
+        cdef int *value = <int *>(self.key + self.key_size)
+        return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))