Browse Source

get_chunker: give it the key instead of the seed

the buzhash seed only has 32bits, but we rather
want 64bits for buzhash64.

just take them from crypt_key for now.
Thomas Waldmann 1 week ago
parent
commit
544b3f41a9

+ 4 - 4
src/borg/archive.py

@@ -351,7 +351,7 @@ class ChunkBuffer:
         self.packer = msgpack.Packer()
         self.packer = msgpack.Packer()
         self.chunks = []
         self.chunks = []
         self.key = key
         self.key = key
-        self.chunker = get_chunker(*chunker_params, seed=self.key.chunk_seed, sparse=False)
+        self.chunker = get_chunker(*chunker_params, key=self.key, sparse=False)
         self.saved_chunks_len = None
         self.saved_chunks_len = None
 
 
     def add(self, item):
     def add(self, item):
@@ -1227,7 +1227,7 @@ class FilesystemObjectProcessors:
         self.hlm = HardLinkManager(id_type=tuple, info_type=(list, type(None)))  # (dev, ino) -> chunks or None
         self.hlm = HardLinkManager(id_type=tuple, info_type=(list, type(None)))  # (dev, ino) -> chunks or None
         self.stats = Statistics(output_json=log_json, iec=iec)  # threading: done by cache (including progress)
         self.stats = Statistics(output_json=log_json, iec=iec)  # threading: done by cache (including progress)
         self.cwd = os.getcwd()
         self.cwd = os.getcwd()
-        self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed, sparse=sparse)
+        self.chunker = get_chunker(*chunker_params, key=key, sparse=sparse)
 
 
     @contextmanager
     @contextmanager
     def create_helper(self, path, st, status=None, hardlinkable=True, strip_prefix=None):
     def create_helper(self, path, st, status=None, hardlinkable=True, strip_prefix=None):
@@ -1502,7 +1502,7 @@ class TarfileObjectProcessors:
         self.print_file_status = file_status_printer or (lambda *args: None)
         self.print_file_status = file_status_printer or (lambda *args: None)
 
 
         self.stats = Statistics(output_json=log_json, iec=iec)  # threading: done by cache (including progress)
         self.stats = Statistics(output_json=log_json, iec=iec)  # threading: done by cache (including progress)
-        self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed, sparse=False)
+        self.chunker = get_chunker(*chunker_params, key=key, sparse=False)
         self.hlm = HardLinkManager(id_type=str, info_type=list)  # path -> chunks
         self.hlm = HardLinkManager(id_type=str, info_type=list)  # path -> chunks
 
 
     @contextmanager
     @contextmanager
@@ -2325,7 +2325,7 @@ class ArchiveRecreater:
         target.process_file_chunks = ChunksProcessor(
         target.process_file_chunks = ChunksProcessor(
             cache=self.cache, key=self.key, add_item=target.add_item, rechunkify=target.recreate_rechunkify
             cache=self.cache, key=self.key, add_item=target.add_item, rechunkify=target.recreate_rechunkify
         ).process_file_chunks
         ).process_file_chunks
-        target.chunker = get_chunker(*target.chunker_params, seed=self.key.chunk_seed, sparse=False)
+        target.chunker = get_chunker(*target.chunker_params, key=self.key, sparse=False)
         return target
         return target
 
 
     def create_target_archive(self, name):
     def create_target_archive(self, name):

+ 2 - 2
src/borg/archiver/benchmark_cmd.py

@@ -146,8 +146,8 @@ class BenchmarkMixIn:
                     pass
                     pass
 
 
         for spec, func in [
         for spec, func in [
-            ("buzhash,19,23,21,4095", lambda: chunkit("buzhash", 19, 23, 21, 4095, seed=0, sparse=False)),
-            ("buzhash64,19,23,21,4095", lambda: chunkit("buzhash64", 19, 23, 21, 4095, seed=0, sparse=False)),
+            ("buzhash,19,23,21,4095", lambda: chunkit("buzhash", 19, 23, 21, 4095, sparse=False)),
+            ("buzhash64,19,23,21,4095", lambda: chunkit("buzhash64", 19, 23, 21, 4095, sparse=False)),
             ("fixed,1048576", lambda: chunkit("fixed", 1048576, sparse=False)),
             ("fixed,1048576", lambda: chunkit("fixed", 1048576, sparse=False)),
         ]:
         ]:
             print(f"{spec:<24} {size:<10} {timeit(func, number=100):.3f}s")
             print(f"{spec:<24} {size:<10} {timeit(func, number=100):.3f}s")

+ 1 - 1
src/borg/archiver/transfer_cmd.py

@@ -41,7 +41,7 @@ def transfer_chunks(
         file = ChunkIteratorFileWrapper(chunk_iterator)
         file = ChunkIteratorFileWrapper(chunk_iterator)
 
 
         # Create a chunker with the specified parameters
         # Create a chunker with the specified parameters
-        chunker = get_chunker(*chunker_params, seed=archive.key.chunk_seed, sparse=False)
+        chunker = get_chunker(*chunker_params, key=archive.key, sparse=False)
         for chunk in chunker.chunkify(file):
         for chunk in chunker.chunkify(file):
             if not dry_run:
             if not dry_run:
                 chunk_id, data = cached_hash(chunk, archive.key.id_hash)
                 chunk_id, data = cached_hash(chunk, archive.key.id_hash)

+ 11 - 6
src/borg/chunkers/__init__.py

@@ -3,21 +3,26 @@ from .buzhash64 import ChunkerBuzHash64
 from .failing import ChunkerFailing
 from .failing import ChunkerFailing
 from .fixed import ChunkerFixed
 from .fixed import ChunkerFixed
 from .reader import *  # noqa
 from .reader import *  # noqa
+from ..crypto.key import PlaintextKey
 
 
 API_VERSION = "1.2_01"
 API_VERSION = "1.2_01"
 
 
 
 
 def get_chunker(algo, *params, **kw):
 def get_chunker(algo, *params, **kw):
+    key = kw.get("key", None)
+    sparse = kw.get("sparse", False)
+    # key.chunk_seed only has 32bits
+    seed = key.chunk_seed if key is not None else 0
+    # we want 64bits for buzhash64, get them from crypt_key
+    if key is None or isinstance(key, PlaintextKey):
+        seed64 = 0
+    else:
+        seed64 = int.from_bytes(key.crypt_key[:8], byteorder="little")
     if algo == "buzhash":
     if algo == "buzhash":
-        seed = kw["seed"]
-        sparse = kw["sparse"]
         return Chunker(seed, *params, sparse=sparse)
         return Chunker(seed, *params, sparse=sparse)
     if algo == "buzhash64":
     if algo == "buzhash64":
-        seed = kw["seed"]
-        sparse = kw["sparse"]
-        return ChunkerBuzHash64(seed, *params, sparse=sparse)
+        return ChunkerBuzHash64(seed64, *params, sparse=sparse)
     if algo == "fixed":
     if algo == "fixed":
-        sparse = kw["sparse"]
         return ChunkerFixed(*params, sparse=sparse)
         return ChunkerFixed(*params, sparse=sparse)
     if algo == "fail":
     if algo == "fail":
         return ChunkerFailing(*params)
         return ChunkerFailing(*params)

+ 1 - 1
src/borg/testsuite/chunkers/buzhash64_self_test.py

@@ -72,6 +72,6 @@ class ChunkerBuzHash64TestCase(BaseTestCase):
                 self.input = self.input[:-1]
                 self.input = self.input[:-1]
                 return self.input[:1]
                 return self.input[:1]
 
 
-        chunker = get_chunker(*CHUNKER64_PARAMS, seed=0, sparse=False)
+        chunker = get_chunker(*CHUNKER64_PARAMS, sparse=False)
         reconstructed = b"".join(cf(chunker.chunkify(SmallReadFile())))
         reconstructed = b"".join(cf(chunker.chunkify(SmallReadFile())))
         assert reconstructed == b"a" * 20
         assert reconstructed == b"a" * 20

+ 1 - 1
src/borg/testsuite/chunkers/buzhash_self_test.py

@@ -69,6 +69,6 @@ class ChunkerTestCase(BaseTestCase):
                 self.input = self.input[:-1]
                 self.input = self.input[:-1]
                 return self.input[:1]
                 return self.input[:1]
 
 
-        chunker = get_chunker(*CHUNKER_PARAMS, seed=0, sparse=False)
+        chunker = get_chunker(*CHUNKER_PARAMS, sparse=False)
         reconstructed = b"".join(cf(chunker.chunkify(SmallReadFile())))
         reconstructed = b"".join(cf(chunker.chunkify(SmallReadFile())))
         assert reconstructed == b"a" * 20
         assert reconstructed == b"a" * 20