Kaynağa Gözat

Merge pull request #8872 from ThomasWaldmann/chunker-params-reject-even-window-size

ChunkerParams: reject even window size for buzhash, fixes #8868
TW 1 hafta önce
ebeveyn
işleme
2b655fccf7

+ 1 - 1
docs/internals/data-structures.rst

@@ -462,7 +462,7 @@ can be used to tune the chunker parameters, the default is:
 - CHUNK_MIN_EXP = 19 (minimum chunk size = 2^19 B = 512 kiB)
 - CHUNK_MAX_EXP = 23 (maximum chunk size = 2^23 B = 8 MiB)
 - HASH_MASK_BITS = 21 (target chunk size ~= 2^21 B = 2 MiB)
-- HASH_WINDOW_SIZE = 4095 [B] (`0xFFF`)
+- HASH_WINDOW_SIZE = 4095 [B] (`0xFFF`) (must be an odd number)
 
 The buzhash table is altered by XORing it with a seed randomly generated once
 for the repository, and stored encrypted in the keyfile. This is to prevent

+ 1 - 2
docs/misc/create_chunker-params.txt

@@ -18,7 +18,7 @@ determined by the windows contents rather than the min/max. chunk size).
 Default: 21 (statistically, chunks will be about 2^21 == 2MiB in size)
 
 HASH_WINDOW_SIZE: the size of the window used for the rolling hash computation.
-Default: 4095B
+Must be an odd number. Default: 4095B
 
 
 Trying it out
@@ -114,4 +114,3 @@ $ ls -l /extra/repo-xl/index*
 
 $ du -sk /extra/repo-xl/
 14253464    /extra/repo-xl/
-

+ 2 - 0
src/borg/helpers/parseformat.py

@@ -201,6 +201,8 @@ def ChunkerParams(s):
             raise argparse.ArgumentTypeError(
                 "max. chunk size exponent must not be more than 23 (2^23 = 8MiB max. chunk size)"
             )
+        if window_size % 2 == 0:
+            raise argparse.ArgumentTypeError("window_size must be an uneven (odd) number")
         return CH_BUZHASH, chunk_min, chunk_max, chunk_mask, window_size
     raise argparse.ArgumentTypeError("invalid chunker params")
 

+ 3 - 3
src/borg/testsuite/archiver/recreate_cmd_test.py

@@ -138,7 +138,7 @@ def test_recreate_rechunkify(archivers, request):
         fd.write(b"a" * 280)
         fd.write(b"b" * 280)
     cmd(archiver, "repo-create", RK_ENCRYPTION)
-    cmd(archiver, "create", "test1", "input", "--chunker-params", "7,9,8,128")
+    cmd(archiver, "create", "test1", "input", "--chunker-params", "7,9,8,127")
     cmd(archiver, "create", "test2", "input", "--files-cache=disabled")
     num_chunks1 = int(cmd(archiver, "list", "test1", "input/large_file", "--format", "{num_chunks}"))
     num_chunks2 = int(cmd(archiver, "list", "test2", "input/large_file", "--format", "{num_chunks}"))
@@ -159,7 +159,7 @@ def test_recreate_fixed_rechunkify(archivers, request):
     with open(os.path.join(archiver.input_path, "file"), "wb") as fd:
         fd.write(b"a" * 8192)
     cmd(archiver, "repo-create", RK_ENCRYPTION)
-    cmd(archiver, "create", "test", "input", "--chunker-params", "7,9,8,128")
+    cmd(archiver, "create", "test", "input", "--chunker-params", "7,9,8,127")
     output = cmd(archiver, "list", "test", "input/file", "--format", "{num_chunks}")
     num_chunks = int(output)
     assert num_chunks > 2
@@ -175,7 +175,7 @@ def test_recreate_no_rechunkify(archivers, request):
         fd.write(b"a" * 8192)
     cmd(archiver, "repo-create", RK_ENCRYPTION)
     # first create an archive with non-default chunker params:
-    cmd(archiver, "create", "test", "input", "--chunker-params", "7,9,8,128")
+    cmd(archiver, "create", "test", "input", "--chunker-params", "7,9,8,127")
     output = cmd(archiver, "list", "test", "input/file", "--format", "{num_chunks}")
     num_chunks = int(output)
     # now recreate the archive and do NOT specify chunker params:

+ 1 - 0
src/borg/testsuite/helpers/parseformat_test.py

@@ -596,6 +596,7 @@ def test_valid_chunkerparams(chunker_params, expected_return):
         "buzhash,5,7,6,4095",  # too small min. size
         "buzhash,19,24,21,4095",  # too big max. size
         "buzhash,23,19,21,4095",  # violates min <= mask <= max
+        "buzhash,19,23,21,4096",  # even window size
         "fixed,63",  # too small block size
         "fixed,%d,%d" % (MAX_DATA_SIZE + 1, 4096),  # too big block size
         "fixed,%d,%d" % (4096, MAX_DATA_SIZE + 1),  # too big header size