chunker.pyx 2.1 KB

1234567891011121314151617181920212223242526272829303132333435363738394041424344454647484950515253545556575859606162636465
  1. # -*- coding: utf-8 -*-
  2. API_VERSION = '1.0_01'
  3. from libc.stdlib cimport free
  4. cdef extern from "_chunker.c":
  5. ctypedef int uint32_t
  6. ctypedef struct _Chunker "Chunker":
  7. pass
  8. _Chunker *chunker_init(int window_size, int chunk_mask, int min_size, int max_size, uint32_t seed)
  9. void chunker_set_fd(_Chunker *chunker, object f, int fd)
  10. void chunker_free(_Chunker *chunker)
  11. object chunker_process(_Chunker *chunker)
  12. uint32_t *buzhash_init_table(uint32_t seed)
  13. uint32_t c_buzhash "buzhash"(unsigned char *data, size_t len, uint32_t *h)
  14. uint32_t c_buzhash_update "buzhash_update"(uint32_t sum, unsigned char remove, unsigned char add, size_t len, uint32_t *h)
  15. cdef class Chunker:
  16. cdef _Chunker *chunker
  17. def __cinit__(self, int seed, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size):
  18. min_size = 1 << chunk_min_exp
  19. max_size = 1 << chunk_max_exp
  20. hash_mask = (1 << hash_mask_bits) - 1
  21. self.chunker = chunker_init(hash_window_size, hash_mask, min_size, max_size, seed & 0xffffffff)
  22. def chunkify(self, fd, fh=-1):
  23. """
  24. Cut a file into chunks.
  25. :param fd: Python file object
  26. :param fh: OS-level file handle (if available),
  27. defaults to -1 which means not to use OS-level fd.
  28. """
  29. chunker_set_fd(self.chunker, fd, fh)
  30. return self
  31. def __dealloc__(self):
  32. if self.chunker:
  33. chunker_free(self.chunker)
  34. def __iter__(self):
  35. return self
  36. def __next__(self):
  37. return chunker_process(self.chunker)
  38. def buzhash(data, unsigned long seed):
  39. cdef uint32_t *table
  40. cdef uint32_t sum
  41. table = buzhash_init_table(seed & 0xffffffff)
  42. sum = c_buzhash(<const unsigned char *> data, len(data), table)
  43. free(table)
  44. return sum
  45. def buzhash_update(uint32_t sum, unsigned char remove, unsigned char add, size_t len, unsigned long seed):
  46. cdef uint32_t *table
  47. table = buzhash_init_table(seed & 0xffffffff)
  48. sum = c_buzhash_update(sum, remove, add, len, table)
  49. free(table)
  50. return sum