Просмотр исходного кода

tests: check buzhash chunksize distribution, see #7586

Thomas Waldmann 2 лет назад
Родитель
Сommit
965e8a957f
1 измененных файлов с 25 добавлено и 1 удалено
  1. 25 1
      src/borg/testsuite/chunker_pytest.py

+ 25 - 1
src/borg/testsuite/chunker_pytest.py

@@ -5,7 +5,7 @@ import tempfile
 import pytest
 
 from .chunker import cf
-from ..chunker import ChunkerFixed, sparsemap, has_seek_hole
+from ..chunker import Chunker, ChunkerFixed, sparsemap, has_seek_hole
 from ..constants import *  # NOQA
 
 BS = 4096  # fs block size
@@ -136,3 +136,27 @@ def test_chunkify_sparse(tmpdir, fname, sparse_map, header_size, sparse):
     fn = str(tmpdir / fname)
     make_sparsefile(fn, sparse_map, header_size=header_size)
     get_chunks(fn, sparse=sparse, header_size=header_size) == make_content(sparse_map, header_size=header_size)
+
+
+def test_buzhash_chunksize_distribution():
+    data = os.urandom(1048576)
+    min_exp, max_exp, mask = 10, 16, 14  # chunk size target 16kiB, clip at 1kiB and 64kiB
+    chunker = Chunker(0, min_exp, max_exp, mask, 4095)
+    f = BytesIO(data)
+    chunks = cf(chunker.chunkify(f))
+    chunk_sizes = [len(chunk) for chunk in chunks]
+    chunks_count = len(chunks)
+    min_chunksize_observed = min(chunk_sizes)
+    max_chunksize_observed = max(chunk_sizes)
+    min_count = sum((int(size == 2 ** min_exp) for size in chunk_sizes))
+    max_count = sum((int(size == 2 ** max_exp) for size in chunk_sizes))
+    print(f"count: {chunks_count} min: {min_chunksize_observed} max: {max_chunksize_observed} "
+          f"min count: {min_count} max count: {max_count}")
+    # usually there will about 64 chunks
+    assert 32 < chunks_count < 128
+    # chunks always must be between min and max (clipping must work):
+    assert min_chunksize_observed >= 2 ** min_exp
+    assert max_chunksize_observed <= 2 ** max_exp
+    # most chunks should be cut due to buzhash triggering, not due to clipping at min/max size:
+    assert min_count < 10
+    assert max_count < 10