hashindex.pyx 6.7 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221
  1. # -*- coding: utf-8 -*-
  2. import os
  3. API_VERSION = 2
  4. cdef extern from "_hashindex.c":
  5. ctypedef struct HashIndex:
  6. pass
  7. HashIndex *hashindex_read(char *path)
  8. HashIndex *hashindex_init(int capacity, int key_size, int value_size)
  9. void hashindex_free(HashIndex *index)
  10. void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize,
  11. long long *unique_size, long long *unique_csize,
  12. long long *total_unique_chunks, long long *total_chunks)
  13. void hashindex_merge(HashIndex *index, HashIndex *other)
  14. int hashindex_get_size(HashIndex *index)
  15. int hashindex_write(HashIndex *index, char *path)
  16. void *hashindex_get(HashIndex *index, void *key)
  17. void *hashindex_next_key(HashIndex *index, void *key)
  18. int hashindex_delete(HashIndex *index, void *key)
  19. int hashindex_set(HashIndex *index, void *key, void *value)
  20. int _htole32(int v)
  21. int _le32toh(int v)
  22. cdef _NoDefault = object()
  23. cimport cython
  24. @cython.internal
  25. cdef class IndexBase:
  26. cdef HashIndex *index
  27. cdef int key_size
  28. def __cinit__(self, capacity=0, path=None, key_size=32):
  29. self.key_size = key_size
  30. if path:
  31. path = os.fsencode(path)
  32. self.index = hashindex_read(path)
  33. if not self.index:
  34. raise Exception('hashindex_read failed')
  35. else:
  36. self.index = hashindex_init(capacity, self.key_size, self.value_size)
  37. if not self.index:
  38. raise Exception('hashindex_init failed')
  39. def __dealloc__(self):
  40. if self.index:
  41. hashindex_free(self.index)
  42. @classmethod
  43. def read(cls, path):
  44. return cls(path=path)
  45. def write(self, path):
  46. path = os.fsencode(path)
  47. if not hashindex_write(self.index, path):
  48. raise Exception('hashindex_write failed')
  49. def clear(self):
  50. hashindex_free(self.index)
  51. self.index = hashindex_init(0, self.key_size, self.value_size)
  52. if not self.index:
  53. raise Exception('hashindex_init failed')
  54. def setdefault(self, key, value):
  55. if not key in self:
  56. self[key] = value
  57. def __delitem__(self, key):
  58. assert len(key) == self.key_size
  59. if not hashindex_delete(self.index, <char *>key):
  60. raise Exception('hashindex_delete failed')
  61. def get(self, key, default=None):
  62. try:
  63. return self[key]
  64. except KeyError:
  65. return default
  66. def pop(self, key, default=_NoDefault):
  67. try:
  68. value = self[key]
  69. del self[key]
  70. return value
  71. except KeyError:
  72. if default != _NoDefault:
  73. return default
  74. raise
  75. def __len__(self):
  76. return hashindex_get_size(self.index)
  77. cdef class NSIndex(IndexBase):
  78. value_size = 8
  79. def __getitem__(self, key):
  80. assert len(key) == self.key_size
  81. data = <int *>hashindex_get(self.index, <char *>key)
  82. if not data:
  83. raise KeyError
  84. return _le32toh(data[0]), _le32toh(data[1])
  85. def __setitem__(self, key, value):
  86. assert len(key) == self.key_size
  87. cdef int[2] data
  88. data[0] = _htole32(value[0])
  89. data[1] = _htole32(value[1])
  90. if not hashindex_set(self.index, <char *>key, data):
  91. raise Exception('hashindex_set failed')
  92. def __contains__(self, key):
  93. assert len(key) == self.key_size
  94. data = <int *>hashindex_get(self.index, <char *>key)
  95. return data != NULL
  96. def iteritems(self, marker=None):
  97. cdef const void *key
  98. iter = NSKeyIterator(self.key_size)
  99. iter.idx = self
  100. iter.index = self.index
  101. if marker:
  102. key = hashindex_get(self.index, <char *>marker)
  103. if marker is None:
  104. raise IndexError
  105. iter.key = key - self.key_size
  106. return iter
  107. cdef class NSKeyIterator:
  108. cdef NSIndex idx
  109. cdef HashIndex *index
  110. cdef const void *key
  111. cdef int key_size
  112. def __cinit__(self, key_size):
  113. self.key = NULL
  114. self.key_size = key_size
  115. def __iter__(self):
  116. return self
  117. def __next__(self):
  118. self.key = hashindex_next_key(self.index, <char *>self.key)
  119. if not self.key:
  120. raise StopIteration
  121. cdef int *value = <int *>(self.key + self.key_size)
  122. return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]))
  123. cdef class ChunkIndex(IndexBase):
  124. value_size = 12
  125. def __getitem__(self, key):
  126. assert len(key) == self.key_size
  127. data = <int *>hashindex_get(self.index, <char *>key)
  128. if not data:
  129. raise KeyError
  130. return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2])
  131. def __setitem__(self, key, value):
  132. assert len(key) == self.key_size
  133. cdef int[3] data
  134. data[0] = _htole32(value[0])
  135. data[1] = _htole32(value[1])
  136. data[2] = _htole32(value[2])
  137. if not hashindex_set(self.index, <char *>key, data):
  138. raise Exception('hashindex_set failed')
  139. def __contains__(self, key):
  140. assert len(key) == self.key_size
  141. data = <int *>hashindex_get(self.index, <char *>key)
  142. return data != NULL
  143. def iteritems(self, marker=None):
  144. cdef const void *key
  145. iter = ChunkKeyIterator(self.key_size)
  146. iter.idx = self
  147. iter.index = self.index
  148. if marker:
  149. key = hashindex_get(self.index, <char *>marker)
  150. if marker is None:
  151. raise IndexError
  152. iter.key = key - self.key_size
  153. return iter
  154. def summarize(self):
  155. cdef long long total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks
  156. hashindex_summarize(self.index, &total_size, &total_csize,
  157. &unique_size, &unique_csize,
  158. &total_unique_chunks, &total_chunks)
  159. return total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks
  160. def merge(self, ChunkIndex other):
  161. hashindex_merge(self.index, other.index)
  162. cdef class ChunkKeyIterator:
  163. cdef ChunkIndex idx
  164. cdef HashIndex *index
  165. cdef const void *key
  166. cdef int key_size
  167. def __cinit__(self, key_size):
  168. self.key = NULL
  169. self.key_size = key_size
  170. def __iter__(self):
  171. return self
  172. def __next__(self):
  173. self.key = hashindex_next_key(self.index, <char *>self.key)
  174. if not self.key:
  175. raise StopIteration
  176. cdef int *value = <int *>(self.key + self.key_size)
  177. return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))