|
@@ -571,8 +571,9 @@ cdef class Chunker:
|
|
|
cdef readonly float chunking_time
|
|
|
cdef object file_reader # FileReader instance
|
|
|
cdef size_t reader_block_size
|
|
|
+ cdef bint sparse
|
|
|
|
|
|
- def __cinit__(self, int seed, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size):
|
|
|
+ def __cinit__(self, int seed, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size, bint sparse=False):
|
|
|
min_size = 1 << chunk_min_exp
|
|
|
max_size = 1 << chunk_max_exp
|
|
|
assert max_size <= len(zeros)
|
|
@@ -596,6 +597,7 @@ cdef class Chunker:
|
|
|
self._fd = None
|
|
|
self.chunking_time = 0.0
|
|
|
self.reader_block_size = 1024 * 1024
|
|
|
+ self.sparse = sparse
|
|
|
|
|
|
def __dealloc__(self):
|
|
|
"""Free the chunker's resources."""
|
|
@@ -706,17 +708,18 @@ cdef class Chunker:
|
|
|
# Return a memory view of the chunk
|
|
|
return memoryview((self.data + old_last)[:n])
|
|
|
|
|
|
- def chunkify(self, fd, fh=-1):
|
|
|
+ def chunkify(self, fd, fh=-1, fmap=None):
|
|
|
"""
|
|
|
Cut a file into chunks.
|
|
|
|
|
|
:param fd: Python file object
|
|
|
:param fh: OS-level file handle (if available),
|
|
|
defaults to -1 which means not to use OS-level fd.
|
|
|
+ :param fmap: a file map, same format as generated by sparsemap
|
|
|
"""
|
|
|
self._fd = fd
|
|
|
self.fh = fh
|
|
|
- self.file_reader = FileReader(fd=fd, fh=fh, read_size=self.reader_block_size)
|
|
|
+ self.file_reader = FileReader(fd=fd, fh=fh, read_size=self.reader_block_size, sparse=self.sparse, fmap=fmap)
|
|
|
self.done = 0
|
|
|
self.remaining = 0
|
|
|
self.bytes_read = 0
|
|
@@ -765,7 +768,8 @@ def buzhash_update(uint32_t sum, unsigned char remove, unsigned char add, size_t
|
|
|
def get_chunker(algo, *params, **kw):
|
|
|
if algo == 'buzhash':
|
|
|
seed = kw['seed']
|
|
|
- return Chunker(seed, *params)
|
|
|
+ sparse = kw['sparse']
|
|
|
+ return Chunker(seed, *params, sparse=sparse)
|
|
|
if algo == 'fixed':
|
|
|
sparse = kw['sparse']
|
|
|
return ChunkerFixed(*params, sparse=sparse)
|