hashindex.pyx 5.8 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203
  1. # -*- coding: utf-8 -*-
  2. import os
  3. API_VERSION = 2
  4. cdef extern from "_hashindex.c":
  5. ctypedef struct HashIndex:
  6. pass
  7. HashIndex *hashindex_read(char *path)
  8. HashIndex *hashindex_init(int capacity, int key_size, int value_size)
  9. void hashindex_free(HashIndex *index)
  10. void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, long long *unique_size, long long *unique_csize)
  11. int hashindex_get_size(HashIndex *index)
  12. int hashindex_write(HashIndex *index, char *path)
  13. void *hashindex_get(HashIndex *index, void *key)
  14. void *hashindex_next_key(HashIndex *index, void *key)
  15. int hashindex_delete(HashIndex *index, void *key)
  16. int hashindex_set(HashIndex *index, void *key, void *value)
  17. int _htole32(int v)
  18. int _le32toh(int v)
  19. _NoDefault = object()
  20. cdef class IndexBase:
  21. cdef HashIndex *index
  22. key_size = 32
  23. def __cinit__(self, capacity=0, path=None):
  24. if path:
  25. self.index = hashindex_read(<bytes>os.fsencode(path))
  26. if not self.index:
  27. raise Exception('hashindex_read failed')
  28. else:
  29. self.index = hashindex_init(capacity, self.key_size, self.value_size)
  30. if not self.index:
  31. raise Exception('hashindex_init failed')
  32. def __dealloc__(self):
  33. if self.index:
  34. hashindex_free(self.index)
  35. @classmethod
  36. def read(cls, path):
  37. return cls(path=path)
  38. def write(self, path):
  39. if not hashindex_write(self.index, <bytes>os.fsencode(path)):
  40. raise Exception('hashindex_write failed')
  41. def clear(self):
  42. hashindex_free(self.index)
  43. self.index = hashindex_init(0, self.key_size, self.value_size)
  44. if not self.index:
  45. raise Exception('hashindex_init failed')
  46. def setdefault(self, key, value):
  47. if not key in self:
  48. self[key] = value
  49. def __delitem__(self, key):
  50. assert len(key) == 32
  51. if not hashindex_delete(self.index, <char *>key):
  52. raise Exception('hashindex_delete failed')
  53. def get(self, key, default=None):
  54. try:
  55. return self[key]
  56. except KeyError:
  57. return default
  58. def pop(self, key, default=_NoDefault):
  59. try:
  60. value = self[key]
  61. del self[key]
  62. return value
  63. except KeyError:
  64. if default != _NoDefault:
  65. return default
  66. raise
  67. def __len__(self):
  68. return hashindex_get_size(self.index)
  69. cdef class NSIndex(IndexBase):
  70. value_size = 8
  71. def __getitem__(self, key):
  72. assert len(key) == 32
  73. data = <int *>hashindex_get(self.index, <char *>key)
  74. if not data:
  75. raise KeyError
  76. return _le32toh(data[0]), _le32toh(data[1])
  77. def __setitem__(self, key, value):
  78. assert len(key) == 32
  79. cdef int[2] data
  80. data[0] = _htole32(value[0])
  81. data[1] = _htole32(value[1])
  82. if not hashindex_set(self.index, <char *>key, data):
  83. raise Exception('hashindex_set failed')
  84. def __contains__(self, key):
  85. assert len(key) == 32
  86. data = <int *>hashindex_get(self.index, <char *>key)
  87. return data != NULL
  88. def iteritems(self, marker=None):
  89. cdef const void *key
  90. iter = NSKeyIterator()
  91. iter.idx = self
  92. iter.index = self.index
  93. if marker:
  94. key = hashindex_get(self.index, <char *>marker)
  95. if marker is None:
  96. raise IndexError
  97. iter.key = key - 32
  98. return iter
  99. cdef class NSKeyIterator:
  100. cdef NSIndex idx
  101. cdef HashIndex *index
  102. cdef const void *key
  103. def __cinit__(self):
  104. self.key = NULL
  105. def __iter__(self):
  106. return self
  107. def __next__(self):
  108. self.key = hashindex_next_key(self.index, <char *>self.key)
  109. if not self.key:
  110. raise StopIteration
  111. cdef int *value = <int *>(self.key + 32)
  112. return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]))
  113. cdef class ChunkIndex(IndexBase):
  114. value_size = 12
  115. def __getitem__(self, key):
  116. assert len(key) == 32
  117. data = <int *>hashindex_get(self.index, <char *>key)
  118. if not data:
  119. raise KeyError
  120. return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2])
  121. def __setitem__(self, key, value):
  122. assert len(key) == 32
  123. cdef int[3] data
  124. data[0] = _htole32(value[0])
  125. data[1] = _htole32(value[1])
  126. data[2] = _htole32(value[2])
  127. if not hashindex_set(self.index, <char *>key, data):
  128. raise Exception('hashindex_set failed')
  129. def __contains__(self, key):
  130. assert len(key) == 32
  131. data = <int *>hashindex_get(self.index, <char *>key)
  132. return data != NULL
  133. def iteritems(self, marker=None):
  134. cdef const void *key
  135. iter = ChunkKeyIterator()
  136. iter.idx = self
  137. iter.index = self.index
  138. if marker:
  139. key = hashindex_get(self.index, <char *>marker)
  140. if marker is None:
  141. raise IndexError
  142. iter.key = key - 32
  143. return iter
  144. def summarize(self):
  145. cdef long long total_size, total_csize, unique_size, unique_csize
  146. hashindex_summarize(self.index, &total_size, &total_csize, &unique_size, &unique_csize)
  147. return total_size, total_csize, unique_size, unique_csize
  148. cdef class ChunkKeyIterator:
  149. cdef ChunkIndex idx
  150. cdef HashIndex *index
  151. cdef const void *key
  152. def __cinit__(self):
  153. self.key = NULL
  154. def __iter__(self):
  155. return self
  156. def __next__(self):
  157. self.key = hashindex_next_key(self.index, <char *>self.key)
  158. if not self.key:
  159. raise StopIteration
  160. cdef int *value = <int *>(self.key + 32)
  161. return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))