hashindex.pyx 5.9 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202
  1. # -*- coding: utf-8 -*-
  2. import os
  3. API_VERSION = 1
  4. cdef extern from "_hashindex.c":
  5. ctypedef struct HashIndex:
  6. pass
  7. HashIndex *hashindex_open(char *path, int readonly)
  8. HashIndex *hashindex_create(char *path, int capacity, int key_size, int value_size)
  9. void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, long long *unique_size, long long *unique_csize)
  10. int hashindex_get_size(HashIndex *index)
  11. int hashindex_clear(HashIndex *index)
  12. int hashindex_close(HashIndex *index)
  13. int hashindex_flush(HashIndex *index)
  14. void *hashindex_get(HashIndex *index, void *key)
  15. void *hashindex_next_key(HashIndex *index, void *key)
  16. int hashindex_delete(HashIndex *index, void *key)
  17. int hashindex_set(HashIndex *index, void *key, void *value)
  18. int _htole32(int v)
  19. int _le32toh(int v)
  20. _NoDefault = object()
  21. cdef class IndexBase:
  22. cdef HashIndex *index
  23. key_size = 32
  24. def __cinit__(self, path, readonly=False):
  25. self.index = hashindex_open(<bytes>os.fsencode(path), readonly)
  26. if not self.index:
  27. raise Exception('Failed to open %s' % path)
  28. def __dealloc__(self):
  29. if self.index:
  30. if not hashindex_close(self.index):
  31. raise Exception('hashindex_close failed')
  32. @classmethod
  33. def create(cls, path, capacity=0):
  34. index = hashindex_create(<bytes>os.fsencode(path), capacity, cls.key_size, cls.value_size)
  35. if not index:
  36. raise Exception('Failed to create %s' % path)
  37. hashindex_close(index)
  38. return cls(path)
  39. def clear(self):
  40. if not hashindex_clear(self.index):
  41. raise Exception('hashindex_clear failed')
  42. def flush(self):
  43. if not hashindex_flush(self.index):
  44. raise Exception('hashindex_flush failed')
  45. def setdefault(self, key, value):
  46. if not key in self:
  47. self[key] = value
  48. def __delitem__(self, key):
  49. assert len(key) == 32
  50. if not hashindex_delete(self.index, <char *>key):
  51. raise Exception('hashindex_delete failed')
  52. def get(self, key, default=None):
  53. try:
  54. return self[key]
  55. except KeyError:
  56. return default
  57. def pop(self, key, default=_NoDefault):
  58. try:
  59. value = self[key]
  60. del self[key]
  61. return value
  62. except KeyError:
  63. if default != _NoDefault:
  64. return default
  65. raise
  66. def __len__(self):
  67. return hashindex_get_size(self.index)
  68. cdef class NSIndex(IndexBase):
  69. value_size = 8
  70. def __getitem__(self, key):
  71. assert len(key) == 32
  72. data = <int *>hashindex_get(self.index, <char *>key)
  73. if not data:
  74. raise KeyError
  75. return _le32toh(data[0]), _le32toh(data[1])
  76. def __setitem__(self, key, value):
  77. assert len(key) == 32
  78. cdef int[2] data
  79. data[0] = _htole32(value[0])
  80. data[1] = _htole32(value[1])
  81. if not hashindex_set(self.index, <char *>key, data):
  82. raise Exception('hashindex_set failed')
  83. def __contains__(self, key):
  84. assert len(key) == 32
  85. data = <int *>hashindex_get(self.index, <char *>key)
  86. return data != NULL
  87. def iteritems(self, marker=None):
  88. cdef const void *key
  89. iter = NSKeyIterator()
  90. iter.idx = self
  91. iter.index = self.index
  92. if marker:
  93. key = hashindex_get(self.index, <char *>marker)
  94. if marker is None:
  95. raise IndexError
  96. iter.key = key - 32
  97. return iter
  98. cdef class NSKeyIterator:
  99. cdef NSIndex idx
  100. cdef HashIndex *index
  101. cdef const void *key
  102. def __cinit__(self):
  103. self.key = NULL
  104. def __iter__(self):
  105. return self
  106. def __next__(self):
  107. self.key = hashindex_next_key(self.index, <char *>self.key)
  108. if not self.key:
  109. raise StopIteration
  110. cdef int *value = <int *>(self.key + 32)
  111. return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]))
  112. cdef class ChunkIndex(IndexBase):
  113. value_size = 12
  114. def __getitem__(self, key):
  115. assert len(key) == 32
  116. data = <int *>hashindex_get(self.index, <char *>key)
  117. if not data:
  118. raise KeyError
  119. return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2])
  120. def __setitem__(self, key, value):
  121. assert len(key) == 32
  122. cdef int[3] data
  123. data[0] = _htole32(value[0])
  124. data[1] = _htole32(value[1])
  125. data[2] = _htole32(value[2])
  126. if not hashindex_set(self.index, <char *>key, data):
  127. raise Exception('hashindex_set failed')
  128. def __contains__(self, key):
  129. assert len(key) == 32
  130. data = <int *>hashindex_get(self.index, <char *>key)
  131. return data != NULL
  132. def iteritems(self, marker=None):
  133. cdef const void *key
  134. iter = ChunkKeyIterator()
  135. iter.idx = self
  136. iter.index = self.index
  137. if marker:
  138. key = hashindex_get(self.index, <char *>marker)
  139. if marker is None:
  140. raise IndexError
  141. iter.key = key - 32
  142. return iter
  143. def summarize(self):
  144. cdef long long total_size, total_csize, unique_size, unique_csize
  145. hashindex_summarize(self.index, &total_size, &total_csize, &unique_size, &unique_csize)
  146. return total_size, total_csize, unique_size, unique_csize
  147. cdef class ChunkKeyIterator:
  148. cdef ChunkIndex idx
  149. cdef HashIndex *index
  150. cdef const void *key
  151. def __cinit__(self):
  152. self.key = NULL
  153. def __iter__(self):
  154. return self
  155. def __next__(self):
  156. self.key = hashindex_next_key(self.index, <char *>self.key)
  157. if not self.key:
  158. raise StopIteration
  159. cdef int *value = <int *>(self.key + 32)
  160. return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))