Explorar o código

buzhash64: use own CSPRNG

Thomas Waldmann hai 5 días
pai
achega
3617b63336

+ 3 - 1
src/borg/chunkers/__init__.py

@@ -13,7 +13,9 @@ def get_chunker(algo, *params, **kw):
     # key.chunk_seed only has 32bits
     # key.chunk_seed only has 32bits
     seed = key.chunk_seed if key is not None else 0
     seed = key.chunk_seed if key is not None else 0
     # for buzhash64, we want a much longer key, so we derive it from the id key
     # for buzhash64, we want a much longer key, so we derive it from the id key
-    bh64_key = key.derive_key(salt=b"", domain=b"buzhash64", size=32, from_id_key=True) if key is not None else b""
+    bh64_key = (
+        key.derive_key(salt=b"", domain=b"buzhash64", size=32, from_id_key=True) if key is not None else b"\0" * 32
+    )
     if algo == "buzhash":
     if algo == "buzhash":
         return Chunker(seed, *params, sparse=sparse)
         return Chunker(seed, *params, sparse=sparse)
     if algo == "buzhash64":
     if algo == "buzhash64":

+ 3 - 2
src/borg/chunkers/buzhash64.pyx

@@ -3,7 +3,6 @@
 API_VERSION = '1.2_01'
 API_VERSION = '1.2_01'
 
 
 import cython
 import cython
-import random
 import time
 import time
 
 
 from cpython.bytes cimport PyBytes_AsString
 from cpython.bytes cimport PyBytes_AsString
@@ -11,6 +10,8 @@ from libc.stdint cimport uint8_t, uint64_t
 from libc.stdlib cimport malloc, free
 from libc.stdlib cimport malloc, free
 from libc.string cimport memcpy, memmove
 from libc.string cimport memcpy, memmove
 
 
+from ..crypto.low_level import CSPRNG
+
 from ..constants import CH_DATA, CH_ALLOC, CH_HOLE, zeros
 from ..constants import CH_DATA, CH_ALLOC, CH_HOLE, zeros
 from .reader import FileReader, Chunk
 from .reader import FileReader, Chunk
 
 
@@ -45,7 +46,7 @@ cdef uint64_t* buzhash64_init_table(bytes key):
     Balanced means that for each bit position 0..63, exactly 50% of the table values have the bit set to 1.
     Balanced means that for each bit position 0..63, exactly 50% of the table values have the bit set to 1.
     """
     """
     # Create deterministic random number generator
     # Create deterministic random number generator
-    rng = random.Random(int.from_bytes(key, 'big'))
+    rng = CSPRNG(key)
 
 
     cdef int i, j, bit_pos
     cdef int i, j, bit_pos
     cdef uint64_t* table = <uint64_t*>malloc(2048)  # 256 * sizeof(uint64_t)
     cdef uint64_t* table = <uint64_t*>malloc(2048)  # 256 * sizeof(uint64_t)

+ 12 - 12
src/borg/testsuite/chunkers/buzhash64_self_test.py

@@ -25,50 +25,50 @@ class ChunkerBuzHash64TestCase(BaseTestCase):
         self.assert_equal(cf(ChunkerBuzHash64(key0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b""))), [])
         self.assert_equal(cf(ChunkerBuzHash64(key0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b""))), [])
         self.assert_equal(
         self.assert_equal(
             cf(ChunkerBuzHash64(key0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b"foobarboobaz" * 3))),
             cf(ChunkerBuzHash64(key0, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b"foobarboobaz" * 3))),
-            [b"foobarboobaz", b"foobarboobaz", b"foobarboobaz"],
+            [b"foobarb", b"ooba", b"zf", b"oobarb", b"ooba", b"zf", b"oobarb", b"oobaz"],
         )
         )
         self.assert_equal(
         self.assert_equal(
             cf(ChunkerBuzHash64(key1, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b"foobarboobaz" * 3))),
             cf(ChunkerBuzHash64(key1, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b"foobarboobaz" * 3))),
-            [b"foobar", b"boob", b"az", b"foobar", b"boob", b"az", b"foobar", b"boobaz"],
+            [b"fo", b"oba", b"rb", b"oob", b"azf", b"ooba", b"rb", b"oob", b"azf", b"ooba", b"rb", b"oobaz"],
         )
         )
         self.assert_equal(
         self.assert_equal(
             cf(ChunkerBuzHash64(key2, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b"foobarboobaz" * 3))),
             cf(ChunkerBuzHash64(key2, 1, CHUNK_MAX_EXP, 2, 2).chunkify(BytesIO(b"foobarboobaz" * 3))),
-            [b"foobarb", b"oob", b"az", b"foobarb", b"oob", b"az", b"foobarb", b"oobaz"],
+            [b"foobar", b"booba", b"zfoobar", b"booba", b"zfoobar", b"boobaz"],
         )
         )
         self.assert_equal(
         self.assert_equal(
             cf(ChunkerBuzHash64(key0, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
             cf(ChunkerBuzHash64(key0, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
-            [b"foobarb", b"oobazf", b"oobarb", b"oobazf", b"oobarb", b"oobaz"],
+            [b"foobarbo", b"obaz", b"foobarbo", b"obaz", b"foobarbo", b"obaz"],
         )
         )
         self.assert_equal(
         self.assert_equal(
             cf(ChunkerBuzHash64(key1, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
             cf(ChunkerBuzHash64(key1, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
-            [b"foobarb", b"oobaz", b"foobarb", b"oobaz", b"foobarb", b"oobaz"],
+            [b"foobarboob", b"azfoobarboob", b"azfoobarboobaz"],
         )
         )
         self.assert_equal(
         self.assert_equal(
             cf(ChunkerBuzHash64(key2, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
             cf(ChunkerBuzHash64(key2, 2, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
-            [b"foobarbooba", b"zfoobarbooba", b"zfoobarboobaz"],
+            [b"foob", b"arboobazfoob", b"arboobazfoob", b"arboobaz"],
         )
         )
         self.assert_equal(
         self.assert_equal(
             cf(ChunkerBuzHash64(key0, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
             cf(ChunkerBuzHash64(key0, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
-            [b"foobarboobazf", b"oobarboobazf", b"oobarboobaz"],
+            [b"foobarbo", b"obazfoobarbo", b"obazfoobarbo", b"obaz"],
         )
         )
         self.assert_equal(
         self.assert_equal(
             cf(ChunkerBuzHash64(key1, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
             cf(ChunkerBuzHash64(key1, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
-            [b"foobarbo", b"obazfoobarb", b"oobazfoobarb", b"oobaz"],
+            [b"foobarboob", b"azfoobarboob", b"azfoobarboobaz"],
         )
         )
         self.assert_equal(
         self.assert_equal(
             cf(ChunkerBuzHash64(key2, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
             cf(ChunkerBuzHash64(key2, 3, CHUNK_MAX_EXP, 2, 3).chunkify(BytesIO(b"foobarboobaz" * 3))),
-            [b"foobarbooba", b"zfoobarbooba", b"zfoobarboobaz"],
+            [b"foobarboobazfoob", b"arboobazfoob", b"arboobaz"],
         )
         )
 
 
     def test_buzhash64(self):
     def test_buzhash64(self):
-        self.assert_equal(buzhash64(b"abcdefghijklmnop", key0), 15080163834872228739)
-        self.assert_equal(buzhash64(b"abcdefghijklmnop", key1), 9505908538285923444)
+        self.assert_equal(buzhash64(b"abcdefghijklmnop", key0), 17414563089559790077)
+        self.assert_equal(buzhash64(b"abcdefghijklmnop", key1), 1397285894609271345)
         expected = buzhash64(b"abcdefghijklmnop", key0)
         expected = buzhash64(b"abcdefghijklmnop", key0)
         previous = buzhash64(b"Xabcdefghijklmno", key0)
         previous = buzhash64(b"Xabcdefghijklmno", key0)
         this = buzhash64_update(previous, ord("X"), ord("p"), 16, key0)
         this = buzhash64_update(previous, ord("X"), ord("p"), 16, key0)
         self.assert_equal(this, expected)
         self.assert_equal(this, expected)
         # Test with more than 63 bytes to make sure our barrel_shift macro works correctly
         # Test with more than 63 bytes to make sure our barrel_shift macro works correctly
-        self.assert_equal(buzhash64(b"abcdefghijklmnopqrstuvwxyz" * 4, key0), 1936382207158378368)
+        self.assert_equal(buzhash64(b"abcdefghijklmnopqrstuvwxyz" * 4, key0), 17683050804041322250)
 
 
     def test_small_reads64(self):
     def test_small_reads64(self):
         class SmallReadFile:
         class SmallReadFile:

+ 3 - 3
src/borg/testsuite/chunkers/buzhash64_test.py

@@ -36,7 +36,7 @@ def test_chunkpoints64_unchanged():
                 if minexp >= maxexp:
                 if minexp >= maxexp:
                     continue
                     continue
                 for maskbits in (4, 7, 10, 12):
                 for maskbits in (4, 7, 10, 12):
-                    for key in (b"first_key", b"second_key"):
+                    for key in (key0, key1):
                         fh = BytesIO(data)
                         fh = BytesIO(data)
                         chunker = ChunkerBuzHash64(key, minexp, maxexp, maskbits, winsize)
                         chunker = ChunkerBuzHash64(key, minexp, maxexp, maskbits, winsize)
                         chunks = [H(c) for c in cf(chunker.chunkify(fh, -1))]
                         chunks = [H(c) for c in cf(chunker.chunkify(fh, -1))]
@@ -46,13 +46,13 @@ def test_chunkpoints64_unchanged():
     # Future chunker optimisations must not change this, or existing repos will bloat.
     # Future chunker optimisations must not change this, or existing repos will bloat.
     overall_hash = H(b"".join(runs))
     overall_hash = H(b"".join(runs))
     print(overall_hash.hex())
     print(overall_hash.hex())
-    assert overall_hash == hex_to_bin("db4b37fbe0cb841d79cfbb52bff8ac2f11040bf83a7d389640c7afb314fc4bfb")
+    assert overall_hash == hex_to_bin("676676133fb3621ada0f6cc1b18002c3e37016c9469217d18f8e382fadaf23fd")
 
 
 
 
 def test_buzhash64_chunksize_distribution():
 def test_buzhash64_chunksize_distribution():
     data = os.urandom(1048576)
     data = os.urandom(1048576)
     min_exp, max_exp, mask = 10, 16, 14  # chunk size target 16kiB, clip at 1kiB and 64kiB
     min_exp, max_exp, mask = 10, 16, 14  # chunk size target 16kiB, clip at 1kiB and 64kiB
-    chunker = ChunkerBuzHash64(b"", min_exp, max_exp, mask, 4095)
+    chunker = ChunkerBuzHash64(key0, min_exp, max_exp, mask, 4095)
     f = BytesIO(data)
     f = BytesIO(data)
     chunks = cf(chunker.chunkify(f))
     chunks = cf(chunker.chunkify(f))
     del chunks[-1]  # get rid of the last chunk, it can be smaller than 2**min_exp
     del chunks[-1]  # get rid of the last chunk, it can be smaller than 2**min_exp