2
0
Эх сурвалжийг харах

chunker params parsing: add more validation

avoiding too large chunks that the repository can not store.

avoiding too small chunks that would create excessively many chunks
and way to much storage and management overhead. we only disallow
extreme cases, this does not mean that everything that is allowed
also makes sense in practice (and does not eat lots of memory and
storage space).
Thomas Waldmann 6 жил өмнө
parent
commit
be2c061733

+ 14 - 0
src/borg/helpers/parseformat.py

@@ -116,12 +116,26 @@ def ChunkerParams(s):
     if algo == 'fixed' and 2 <= count <= 3:  # fixed, block_size[, header_size]
     if algo == 'fixed' and 2 <= count <= 3:  # fixed, block_size[, header_size]
         block_size = int(params[1])
         block_size = int(params[1])
         header_size = int(params[2]) if count == 3 else 0
         header_size = int(params[2]) if count == 3 else 0
+        if block_size < 64:
+            # we are only disallowing the most extreme cases of abuse here - this does NOT imply
+            # that cutting chunks of the minimum allowed size is efficient concerning storage
+            # or in-memory chunk management.
+            # choose the block (chunk) size wisely: if you have a lot of data and you cut
+            # it into very small chunks, you are asking for trouble!
+            raise ValueError('block_size must not be less than 64 Bytes')
+        if block_size > MAX_DATA_SIZE or header_size > MAX_DATA_SIZE:
+            raise ValueError('block_size and header_size must not exceed MAX_DATA_SIZE [%d]' % MAX_DATA_SIZE)
         return algo, block_size, header_size
         return algo, block_size, header_size
     if algo == 'default' and count == 1:  # default
     if algo == 'default' and count == 1:  # default
         return CHUNKER_PARAMS
         return CHUNKER_PARAMS
     # this must stay last as it deals with old-style compat mode (no algorithm, 4 params, buzhash):
     # this must stay last as it deals with old-style compat mode (no algorithm, 4 params, buzhash):
     if algo == 'buzhash' and count == 5 or count == 4:  # [buzhash, ]chunk_min, chunk_max, chunk_mask, window_size
     if algo == 'buzhash' and count == 5 or count == 4:  # [buzhash, ]chunk_min, chunk_max, chunk_mask, window_size
         chunk_min, chunk_max, chunk_mask, window_size = [int(p) for p in params[count - 4:]]
         chunk_min, chunk_max, chunk_mask, window_size = [int(p) for p in params[count - 4:]]
+        if not (chunk_min <= chunk_mask <= chunk_max):
+            raise ValueError('required: chunk_min <= chunk_mask <= chunk_max')
+        if chunk_min < 6:
+            # see comment in 'fixed' algo check
+            raise ValueError('min. chunk size exponent must not be less than 6 (2^6 = 64B min. chunk size)')
         if chunk_max > 23:
         if chunk_max > 23:
             raise ValueError('max. chunk size exponent must not be more than 23 (2^23 = 8MiB max. chunk size)')
             raise ValueError('max. chunk size exponent must not be more than 23 (2^23 = 8MiB max. chunk size)')
         return 'buzhash', chunk_min, chunk_max, chunk_mask, window_size
         return 'buzhash', chunk_min, chunk_max, chunk_mask, window_size

+ 15 - 3
src/borg/testsuite/helpers.py

@@ -9,6 +9,7 @@ from time import sleep
 import pytest
 import pytest
 
 
 from .. import platform
 from .. import platform
+from ..constants import MAX_DATA_SIZE
 from ..helpers import Location
 from ..helpers import Location
 from ..helpers import Buffer
 from ..helpers import Buffer
 from ..helpers import partial_format, format_file_size, parse_file_size, format_timedelta, format_line, PlaceholderError, replace_placeholders
 from ..helpers import partial_format, format_file_size, parse_file_size, format_timedelta, format_line, PlaceholderError, replace_placeholders
@@ -313,12 +314,23 @@ def test_chunkerparams():
     assert ChunkerParams('19,23,21,4095') == ('buzhash', 19, 23, 21, 4095)
     assert ChunkerParams('19,23,21,4095') == ('buzhash', 19, 23, 21, 4095)
     assert ChunkerParams('buzhash,19,23,21,4095') == ('buzhash', 19, 23, 21, 4095)
     assert ChunkerParams('buzhash,19,23,21,4095') == ('buzhash', 19, 23, 21, 4095)
     assert ChunkerParams('10,23,16,4095') == ('buzhash', 10, 23, 16, 4095)
     assert ChunkerParams('10,23,16,4095') == ('buzhash', 10, 23, 16, 4095)
-    with pytest.raises(ValueError):
-        ChunkerParams('19,24,21,4095')
     assert ChunkerParams('fixed,4096') == ('fixed', 4096, 0)
     assert ChunkerParams('fixed,4096') == ('fixed', 4096, 0)
     assert ChunkerParams('fixed,4096,200') == ('fixed', 4096, 200)
     assert ChunkerParams('fixed,4096,200') == ('fixed', 4096, 200)
+    # invalid values checking
+    with pytest.raises(ValueError):
+        ChunkerParams('crap,1,2,3,4')  # invalid algo
+    with pytest.raises(ValueError):
+        ChunkerParams('buzhash,5,7,6,4095')  # too small min. size
+    with pytest.raises(ValueError):
+        ChunkerParams('buzhash,19,24,21,4095')  # too big max. size
+    with pytest.raises(ValueError):
+        ChunkerParams('buzhash,23,19,21,4095')  # violates min <= mask <= max
+    with pytest.raises(ValueError):
+        ChunkerParams('fixed,63')  # too small block size
+    with pytest.raises(ValueError):
+        ChunkerParams('fixed,%d,%d' % (MAX_DATA_SIZE + 1, 4096))  # too big block size
     with pytest.raises(ValueError):
     with pytest.raises(ValueError):
-        ChunkerParams('crap,1,2,3,4')
+        ChunkerParams('fixed,%d,%d' % (4096, MAX_DATA_SIZE + 1))  # too big header size
 
 
 
 
 class MakePathSafeTestCase(BaseTestCase):
 class MakePathSafeTestCase(BaseTestCase):