Преглед изворни кода

Chunker: add sparse/fmap support

Thomas Waldmann пре 1 месец
родитељ
комит
5445a52994
3 измењених фајлова са 10 додато и 6 уклоњено
  1. 1 1
      src/borg/archiver/benchmark_cmd.py
  2. 8 4
      src/borg/chunker.pyx
  3. 1 1
      src/borg/testsuite/chunker_test.py

+ 1 - 1
src/borg/archiver/benchmark_cmd.py

@@ -146,7 +146,7 @@ class BenchmarkMixIn:
                     pass
 
         for spec, func in [
-            ("buzhash,19,23,21,4095", lambda: chunkit("buzhash", 19, 23, 21, 4095, seed=0)),
+            ("buzhash,19,23,21,4095", lambda: chunkit("buzhash", 19, 23, 21, 4095, seed=0, sparse=False)),
             ("fixed,1048576", lambda: chunkit("fixed", 1048576, sparse=False)),
         ]:
             print(f"{spec:<24} {size:<10} {timeit(func, number=100):.3f}s")

+ 8 - 4
src/borg/chunker.pyx

@@ -571,8 +571,9 @@ cdef class Chunker:
     cdef readonly float chunking_time
     cdef object file_reader  # FileReader instance
     cdef size_t reader_block_size
+    cdef bint sparse
 
-    def __cinit__(self, int seed, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size):
+    def __cinit__(self, int seed, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size, bint sparse=False):
         min_size = 1 << chunk_min_exp
         max_size = 1 << chunk_max_exp
         assert max_size <= len(zeros)
@@ -596,6 +597,7 @@ cdef class Chunker:
         self._fd = None
         self.chunking_time = 0.0
         self.reader_block_size = 1024 * 1024
+        self.sparse = sparse
 
     def __dealloc__(self):
         """Free the chunker's resources."""
@@ -706,17 +708,18 @@ cdef class Chunker:
         # Return a memory view of the chunk
         return memoryview((self.data + old_last)[:n])
 
-    def chunkify(self, fd, fh=-1):
+    def chunkify(self, fd, fh=-1, fmap=None):
         """
         Cut a file into chunks.
 
         :param fd: Python file object
         :param fh: OS-level file handle (if available),
                    defaults to -1 which means not to use OS-level fd.
+        :param fmap: a file map, same format as generated by sparsemap
         """
         self._fd = fd
         self.fh = fh
-        self.file_reader = FileReader(fd=fd, fh=fh, read_size=self.reader_block_size)
+        self.file_reader = FileReader(fd=fd, fh=fh, read_size=self.reader_block_size, sparse=self.sparse, fmap=fmap)
         self.done = 0
         self.remaining = 0
         self.bytes_read = 0
@@ -765,7 +768,8 @@ def buzhash_update(uint32_t sum, unsigned char remove, unsigned char add, size_t
 def get_chunker(algo, *params, **kw):
     if algo == 'buzhash':
         seed = kw['seed']
-        return Chunker(seed, *params)
+        sparse = kw['sparse']
+        return Chunker(seed, *params, sparse=sparse)
     if algo == 'fixed':
         sparse = kw['sparse']
         return ChunkerFixed(*params, sparse=sparse)

+ 1 - 1
src/borg/testsuite/chunker_test.py

@@ -136,6 +136,6 @@ class ChunkerTestCase(BaseTestCase):
                 self.input = self.input[:-1]
                 return self.input[:1]
 
-        chunker = get_chunker(*CHUNKER_PARAMS, seed=0)
+        chunker = get_chunker(*CHUNKER_PARAMS, seed=0, sparse=False)
         reconstructed = b"".join(cf(chunker.chunkify(SmallReadFile())))
         assert reconstructed == b"a" * 20