2
0

hashindex.pyx 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209
  1. # -*- coding: utf-8 -*-
  2. import os
  3. API_VERSION = 2
  4. cdef extern from "_hashindex.c":
  5. ctypedef struct HashIndex:
  6. pass
  7. HashIndex *hashindex_read(char *path)
  8. HashIndex *hashindex_init(int capacity, int key_size, int value_size)
  9. void hashindex_free(HashIndex *index)
  10. void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, long long *unique_size, long long *unique_csize)
  11. int hashindex_get_size(HashIndex *index)
  12. int hashindex_write(HashIndex *index, char *path)
  13. void *hashindex_get(HashIndex *index, void *key)
  14. void *hashindex_next_key(HashIndex *index, void *key)
  15. int hashindex_delete(HashIndex *index, void *key)
  16. int hashindex_set(HashIndex *index, void *key, void *value)
  17. int _htole32(int v)
  18. int _le32toh(int v)
  19. _NoDefault = object()
  20. cdef class IndexBase:
  21. cdef HashIndex *index
  22. cdef int key_size
  23. def __cinit__(self, capacity=0, path=None, key_size=None):
  24. assert key_size is not None
  25. self.key_size = key_size
  26. if path:
  27. self.index = hashindex_read(<bytes>os.fsencode(path))
  28. if not self.index:
  29. raise Exception('hashindex_read failed')
  30. else:
  31. self.index = hashindex_init(capacity, self.key_size, self.value_size)
  32. if not self.index:
  33. raise Exception('hashindex_init failed')
  34. def __dealloc__(self):
  35. if self.index:
  36. hashindex_free(self.index)
  37. @classmethod
  38. def read(cls, path, key_size=None):
  39. return cls(path=path, key_size=key_size)
  40. def write(self, path):
  41. if not hashindex_write(self.index, <bytes>os.fsencode(path)):
  42. raise Exception('hashindex_write failed')
  43. def clear(self):
  44. hashindex_free(self.index)
  45. self.index = hashindex_init(0, self.key_size, self.value_size)
  46. if not self.index:
  47. raise Exception('hashindex_init failed')
  48. def setdefault(self, key, value):
  49. if not key in self:
  50. self[key] = value
  51. def __delitem__(self, key):
  52. assert len(key) == self.key_size
  53. if not hashindex_delete(self.index, <char *>key):
  54. raise Exception('hashindex_delete failed')
  55. def get(self, key, default=None):
  56. try:
  57. return self[key]
  58. except KeyError:
  59. return default
  60. def pop(self, key, default=_NoDefault):
  61. try:
  62. value = self[key]
  63. del self[key]
  64. return value
  65. except KeyError:
  66. if default != _NoDefault:
  67. return default
  68. raise
  69. def __len__(self):
  70. return hashindex_get_size(self.index)
  71. cdef class NSIndex(IndexBase):
  72. value_size = 8
  73. def __getitem__(self, key):
  74. assert len(key) == self.key_size
  75. data = <int *>hashindex_get(self.index, <char *>key)
  76. if not data:
  77. raise KeyError
  78. return _le32toh(data[0]), _le32toh(data[1])
  79. def __setitem__(self, key, value):
  80. assert len(key) == self.key_size
  81. cdef int[2] data
  82. data[0] = _htole32(value[0])
  83. data[1] = _htole32(value[1])
  84. if not hashindex_set(self.index, <char *>key, data):
  85. raise Exception('hashindex_set failed')
  86. def __contains__(self, key):
  87. assert len(key) == self.key_size
  88. data = <int *>hashindex_get(self.index, <char *>key)
  89. return data != NULL
  90. def iteritems(self, marker=None):
  91. cdef const void *key
  92. iter = NSKeyIterator(self.key_size)
  93. iter.idx = self
  94. iter.index = self.index
  95. if marker:
  96. key = hashindex_get(self.index, <char *>marker)
  97. if marker is None:
  98. raise IndexError
  99. iter.key = key - self.key_size
  100. return iter
  101. cdef class NSKeyIterator:
  102. cdef NSIndex idx
  103. cdef HashIndex *index
  104. cdef const void *key
  105. cdef int key_size
  106. def __cinit__(self, key_size):
  107. self.key = NULL
  108. self.key_size = key_size
  109. def __iter__(self):
  110. return self
  111. def __next__(self):
  112. self.key = hashindex_next_key(self.index, <char *>self.key)
  113. if not self.key:
  114. raise StopIteration
  115. cdef int *value = <int *>(self.key + self.key_size)
  116. return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]))
  117. cdef class ChunkIndex(IndexBase):
  118. value_size = 12
  119. def __getitem__(self, key):
  120. assert len(key) == self.key_size
  121. data = <int *>hashindex_get(self.index, <char *>key)
  122. if not data:
  123. raise KeyError
  124. return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2])
  125. def __setitem__(self, key, value):
  126. assert len(key) == self.key_size
  127. cdef int[3] data
  128. data[0] = _htole32(value[0])
  129. data[1] = _htole32(value[1])
  130. data[2] = _htole32(value[2])
  131. if not hashindex_set(self.index, <char *>key, data):
  132. raise Exception('hashindex_set failed')
  133. def __contains__(self, key):
  134. assert len(key) == self.key_size
  135. data = <int *>hashindex_get(self.index, <char *>key)
  136. return data != NULL
  137. def iteritems(self, marker=None):
  138. cdef const void *key
  139. iter = ChunkKeyIterator(self.key_size)
  140. iter.idx = self
  141. iter.index = self.index
  142. if marker:
  143. key = hashindex_get(self.index, <char *>marker)
  144. if marker is None:
  145. raise IndexError
  146. iter.key = key - self.key_size
  147. return iter
  148. def summarize(self):
  149. cdef long long total_size, total_csize, unique_size, unique_csize
  150. hashindex_summarize(self.index, &total_size, &total_csize, &unique_size, &unique_csize)
  151. return total_size, total_csize, unique_size, unique_csize
  152. cdef class ChunkKeyIterator:
  153. cdef ChunkIndex idx
  154. cdef HashIndex *index
  155. cdef const void *key
  156. cdef int key_size
  157. def __cinit__(self, key_size):
  158. self.key = NULL
  159. self.key_size = key_size
  160. def __iter__(self):
  161. return self
  162. def __next__(self):
  163. self.key = hashindex_next_key(self.index, <char *>self.key)
  164. if not self.key:
  165. raise StopIteration
  166. cdef int *value = <int *>(self.key + self.key_size)
  167. return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))