123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203 |
- # -*- coding: utf-8 -*-
- import os
- API_VERSION = 2
- cdef extern from "_hashindex.c":
- ctypedef struct HashIndex:
- pass
- HashIndex *hashindex_read(char *path)
- HashIndex *hashindex_init(int capacity, int key_size, int value_size)
- void hashindex_free(HashIndex *index)
- void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, long long *unique_size, long long *unique_csize)
- int hashindex_get_size(HashIndex *index)
- int hashindex_write(HashIndex *index, char *path)
- void *hashindex_get(HashIndex *index, void *key)
- void *hashindex_next_key(HashIndex *index, void *key)
- int hashindex_delete(HashIndex *index, void *key)
- int hashindex_set(HashIndex *index, void *key, void *value)
- int _htole32(int v)
- int _le32toh(int v)
- _NoDefault = object()
- cdef class IndexBase:
- cdef HashIndex *index
- key_size = 32
- def __cinit__(self, capacity=0, path=None):
- if path:
- self.index = hashindex_read(<bytes>os.fsencode(path))
- if not self.index:
- raise Exception('hashindex_read failed')
- else:
- self.index = hashindex_init(capacity, self.key_size, self.value_size)
- if not self.index:
- raise Exception('hashindex_init failed')
- def __dealloc__(self):
- if self.index:
- hashindex_free(self.index)
- @classmethod
- def read(cls, path):
- return cls(path=path)
- def write(self, path):
- if not hashindex_write(self.index, <bytes>os.fsencode(path)):
- raise Exception('hashindex_write failed')
- def clear(self):
- hashindex_free(self.index)
- self.index = hashindex_init(0, self.key_size, self.value_size)
- if not self.index:
- raise Exception('hashindex_init failed')
- def setdefault(self, key, value):
- if not key in self:
- self[key] = value
- def __delitem__(self, key):
- assert len(key) == 32
- if not hashindex_delete(self.index, <char *>key):
- raise Exception('hashindex_delete failed')
- def get(self, key, default=None):
- try:
- return self[key]
- except KeyError:
- return default
- def pop(self, key, default=_NoDefault):
- try:
- value = self[key]
- del self[key]
- return value
- except KeyError:
- if default != _NoDefault:
- return default
- raise
- def __len__(self):
- return hashindex_get_size(self.index)
- cdef class NSIndex(IndexBase):
- value_size = 8
- def __getitem__(self, key):
- assert len(key) == 32
- data = <int *>hashindex_get(self.index, <char *>key)
- if not data:
- raise KeyError
- return _le32toh(data[0]), _le32toh(data[1])
- def __setitem__(self, key, value):
- assert len(key) == 32
- cdef int[2] data
- data[0] = _htole32(value[0])
- data[1] = _htole32(value[1])
- if not hashindex_set(self.index, <char *>key, data):
- raise Exception('hashindex_set failed')
- def __contains__(self, key):
- assert len(key) == 32
- data = <int *>hashindex_get(self.index, <char *>key)
- return data != NULL
- def iteritems(self, marker=None):
- cdef const void *key
- iter = NSKeyIterator()
- iter.idx = self
- iter.index = self.index
- if marker:
- key = hashindex_get(self.index, <char *>marker)
- if marker is None:
- raise IndexError
- iter.key = key - 32
- return iter
- cdef class NSKeyIterator:
- cdef NSIndex idx
- cdef HashIndex *index
- cdef const void *key
- def __cinit__(self):
- self.key = NULL
- def __iter__(self):
- return self
- def __next__(self):
- self.key = hashindex_next_key(self.index, <char *>self.key)
- if not self.key:
- raise StopIteration
- cdef int *value = <int *>(self.key + 32)
- return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]))
- cdef class ChunkIndex(IndexBase):
- value_size = 12
- def __getitem__(self, key):
- assert len(key) == 32
- data = <int *>hashindex_get(self.index, <char *>key)
- if not data:
- raise KeyError
- return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2])
- def __setitem__(self, key, value):
- assert len(key) == 32
- cdef int[3] data
- data[0] = _htole32(value[0])
- data[1] = _htole32(value[1])
- data[2] = _htole32(value[2])
- if not hashindex_set(self.index, <char *>key, data):
- raise Exception('hashindex_set failed')
- def __contains__(self, key):
- assert len(key) == 32
- data = <int *>hashindex_get(self.index, <char *>key)
- return data != NULL
- def iteritems(self, marker=None):
- cdef const void *key
- iter = ChunkKeyIterator()
- iter.idx = self
- iter.index = self.index
- if marker:
- key = hashindex_get(self.index, <char *>marker)
- if marker is None:
- raise IndexError
- iter.key = key - 32
- return iter
- def summarize(self):
- cdef long long total_size, total_csize, unique_size, unique_csize
- hashindex_summarize(self.index, &total_size, &total_csize, &unique_size, &unique_csize)
- return total_size, total_csize, unique_size, unique_csize
- cdef class ChunkKeyIterator:
- cdef ChunkIndex idx
- cdef HashIndex *index
- cdef const void *key
- def __cinit__(self):
- self.key = NULL
- def __iter__(self):
- return self
- def __next__(self):
- self.key = hashindex_next_key(self.index, <char *>self.key)
- if not self.key:
- raise StopIteration
- cdef int *value = <int *>(self.key + 32)
- return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))
|