hashindex.pyx 6.2 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208
  1. # -*- coding: utf-8 -*-
  2. import os
  3. API_VERSION = 2
  4. cdef extern from "_hashindex.c":
  5. ctypedef struct HashIndex:
  6. pass
  7. HashIndex *hashindex_read(char *path)
  8. HashIndex *hashindex_init(int capacity, int key_size, int value_size)
  9. void hashindex_free(HashIndex *index)
  10. void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, long long *unique_size, long long *unique_csize)
  11. int hashindex_get_size(HashIndex *index)
  12. int hashindex_write(HashIndex *index, char *path)
  13. void *hashindex_get(HashIndex *index, void *key)
  14. void *hashindex_next_key(HashIndex *index, void *key)
  15. int hashindex_delete(HashIndex *index, void *key)
  16. int hashindex_set(HashIndex *index, void *key, void *value)
  17. int _htole32(int v)
  18. int _le32toh(int v)
  19. _NoDefault = object()
  20. cdef class IndexBase:
  21. cdef HashIndex *index
  22. cdef int key_size
  23. def __cinit__(self, capacity=0, path=None, key_size=32):
  24. self.key_size = key_size
  25. if path:
  26. self.index = hashindex_read(<bytes>os.fsencode(path))
  27. if not self.index:
  28. raise Exception('hashindex_read failed')
  29. else:
  30. self.index = hashindex_init(capacity, self.key_size, self.value_size)
  31. if not self.index:
  32. raise Exception('hashindex_init failed')
  33. def __dealloc__(self):
  34. if self.index:
  35. hashindex_free(self.index)
  36. @classmethod
  37. def read(cls, path):
  38. return cls(path=path)
  39. def write(self, path):
  40. if not hashindex_write(self.index, <bytes>os.fsencode(path)):
  41. raise Exception('hashindex_write failed')
  42. def clear(self):
  43. hashindex_free(self.index)
  44. self.index = hashindex_init(0, self.key_size, self.value_size)
  45. if not self.index:
  46. raise Exception('hashindex_init failed')
  47. def setdefault(self, key, value):
  48. if not key in self:
  49. self[key] = value
  50. def __delitem__(self, key):
  51. assert len(key) == self.key_size
  52. if not hashindex_delete(self.index, <char *>key):
  53. raise Exception('hashindex_delete failed')
  54. def get(self, key, default=None):
  55. try:
  56. return self[key]
  57. except KeyError:
  58. return default
  59. def pop(self, key, default=_NoDefault):
  60. try:
  61. value = self[key]
  62. del self[key]
  63. return value
  64. except KeyError:
  65. if default != _NoDefault:
  66. return default
  67. raise
  68. def __len__(self):
  69. return hashindex_get_size(self.index)
  70. cdef class NSIndex(IndexBase):
  71. value_size = 8
  72. def __getitem__(self, key):
  73. assert len(key) == self.key_size
  74. data = <int *>hashindex_get(self.index, <char *>key)
  75. if not data:
  76. raise KeyError
  77. return _le32toh(data[0]), _le32toh(data[1])
  78. def __setitem__(self, key, value):
  79. assert len(key) == self.key_size
  80. cdef int[2] data
  81. data[0] = _htole32(value[0])
  82. data[1] = _htole32(value[1])
  83. if not hashindex_set(self.index, <char *>key, data):
  84. raise Exception('hashindex_set failed')
  85. def __contains__(self, key):
  86. assert len(key) == self.key_size
  87. data = <int *>hashindex_get(self.index, <char *>key)
  88. return data != NULL
  89. def iteritems(self, marker=None):
  90. cdef const void *key
  91. iter = NSKeyIterator(self.key_size)
  92. iter.idx = self
  93. iter.index = self.index
  94. if marker:
  95. key = hashindex_get(self.index, <char *>marker)
  96. if marker is None:
  97. raise IndexError
  98. iter.key = key - self.key_size
  99. return iter
  100. cdef class NSKeyIterator:
  101. cdef NSIndex idx
  102. cdef HashIndex *index
  103. cdef const void *key
  104. cdef int key_size
  105. def __cinit__(self, key_size):
  106. self.key = NULL
  107. self.key_size = key_size
  108. def __iter__(self):
  109. return self
  110. def __next__(self):
  111. self.key = hashindex_next_key(self.index, <char *>self.key)
  112. if not self.key:
  113. raise StopIteration
  114. cdef int *value = <int *>(self.key + self.key_size)
  115. return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]))
  116. cdef class ChunkIndex(IndexBase):
  117. value_size = 12
  118. def __getitem__(self, key):
  119. assert len(key) == self.key_size
  120. data = <int *>hashindex_get(self.index, <char *>key)
  121. if not data:
  122. raise KeyError
  123. return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2])
  124. def __setitem__(self, key, value):
  125. assert len(key) == self.key_size
  126. cdef int[3] data
  127. data[0] = _htole32(value[0])
  128. data[1] = _htole32(value[1])
  129. data[2] = _htole32(value[2])
  130. if not hashindex_set(self.index, <char *>key, data):
  131. raise Exception('hashindex_set failed')
  132. def __contains__(self, key):
  133. assert len(key) == self.key_size
  134. data = <int *>hashindex_get(self.index, <char *>key)
  135. return data != NULL
  136. def iteritems(self, marker=None):
  137. cdef const void *key
  138. iter = ChunkKeyIterator(self.key_size)
  139. iter.idx = self
  140. iter.index = self.index
  141. if marker:
  142. key = hashindex_get(self.index, <char *>marker)
  143. if marker is None:
  144. raise IndexError
  145. iter.key = key - self.key_size
  146. return iter
  147. def summarize(self):
  148. cdef long long total_size, total_csize, unique_size, unique_csize
  149. hashindex_summarize(self.index, &total_size, &total_csize, &unique_size, &unique_csize)
  150. return total_size, total_csize, unique_size, unique_csize
  151. cdef class ChunkKeyIterator:
  152. cdef ChunkIndex idx
  153. cdef HashIndex *index
  154. cdef const void *key
  155. cdef int key_size
  156. def __cinit__(self, key_size):
  157. self.key = NULL
  158. self.key_size = key_size
  159. def __iter__(self):
  160. return self
  161. def __next__(self):
  162. self.key = hashindex_next_key(self.index, <char *>self.key)
  163. if not self.key:
  164. raise StopIteration
  165. cdef int *value = <int *>(self.key + self.key_size)
  166. return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))