浏览代码

implement padme chunk size obfuscation (SPEC 250), fixes #8705

---------

Co-authored-by: Divyansh Agrawal <your.email@example.com>
Divyansh Agrawal 2 月之前
父节点
当前提交
d2066ed13d
共有 3 个文件被更改,包括 60 次插入2 次删除
  1. 11 1
      src/borg/archiver/help_cmd.py
  2. 19 1
      src/borg/compress.pyx
  3. 30 0
      src/borg/testsuite/compress_test.py

+ 11 - 1
src/borg/archiver/help_cmd.py

@@ -470,6 +470,15 @@ class HelpMixIn:
               ...
               123: 8MiB (max.)
 
+            *Padmé padding* (deterministic)
+
+            ::
+
+              250: pads to sums of powers of 2, max 12% overhead
+
+            Uses the Padmé algorithm to deterministically pad the compressed size to a sum of
+            powers of 2, limiting overhead to 12%. See https://lbarman.ch/blog/padme/ for details.
+
         Examples::
 
             borg create --compression lz4 --repo REPO ARCHIVE data
@@ -481,7 +490,8 @@ class HelpMixIn:
             borg create --compression auto,lzma ...
             borg create --compression obfuscate,110,none ...
             borg create --compression obfuscate,3,auto,zstd,10 ...
-            borg create --compression obfuscate,2,zstd,6 ...\n\n"""
+            borg create --compression obfuscate,2,zstd,6 ...
+            borg create --compression obfuscate,250,zstd,3 ...\n\n"""
     )
 
     def do_help(self, parser, commands, args):

+ 19 - 1
src/borg/compress.pyx

@@ -16,6 +16,7 @@ decompressor.
 """
 
 from argparse import ArgumentTypeError
+import math
 import random
 from struct import Struct
 import zlib
@@ -25,7 +26,6 @@ try:
 except ImportError:
     lzma = None
 
-
 from .constants import MAX_DATA_SIZE
 from .helpers import Buffer, DecompressionError
 
@@ -556,6 +556,8 @@ class ObfuscateSize(CompressorBase):
         elif 110 <= level <= 123:
             self._obfuscate = self._random_padding_obfuscate
             self.max_padding_size = 2 ** (level - 100)  # 1kiB .. 8MiB
+        elif level == 250:  # Padmé
+            self._obfuscate = self._padme_obfuscate
 
     def _obfuscate(self, compr_size):
         # implementations need to return the size of obfuscation data,
@@ -600,6 +602,22 @@ class ObfuscateSize(CompressorBase):
             self.compressor = compressor_cls()
         return self.compressor.decompress(meta, compressed_data)  # decompress data
 
+    def _padme_obfuscate(self, compr_size):
+        if compr_size < 2:
+            return 0
+
+        E = math.floor(math.log2(compr_size))  # Get exponent (power of 2)
+        S = math.floor(math.log2(E)) + 1       # Second log component
+        lastBits = E - S                       # Bits to be zeroed
+        bitMask = (2 ** lastBits - 1)          # Mask for rounding
+
+        padded_size = (compr_size + bitMask) & ~bitMask  # Apply rounding
+
+        # Ensure max 12% overhead
+        max_allowed = int(compr_size * 1.12)
+        final_size = min(padded_size, max_allowed)
+
+        return final_size - compr_size  # Return only the additional padding size
 
 # Maps valid compressor names to their class
 COMPRESSOR_TABLE = {

+ 30 - 0
src/borg/testsuite/compress_test.py

@@ -210,3 +210,33 @@ def test_specified_compression_level(c_type, c_name, c_levels):
 def test_invalid_compression_level(invalid_spec):
     with pytest.raises(argparse.ArgumentTypeError):
         CompressionSpec(invalid_spec)
+
+
+@pytest.mark.parametrize(
+    "data_length, expected_padding",
+    [
+        (0, 0),
+        (1, 0),
+        (10, 0),
+        (100, 4),
+        (1000, 24),
+        (10000, 240),
+        (20000, 480),
+        (50000, 1200),
+        (100000, 352),
+        (1000000, 15808),
+        (5000000, 111808),
+        (10000000, 223616),
+        (20000000, 447232),
+    ],
+)
+def test_padme_obfuscation(data_length, expected_padding):
+    compressor = Compressor(name="obfuscate", level=250, compressor=Compressor("none"))
+    data = b"x" * data_length
+    meta, compressed = compressor.compress({}, data)
+
+    expected_padded_size = data_length + expected_padding
+
+    assert (
+        len(compressed) == expected_padded_size
+    ), f"For {data_length}, expected {expected_padded_size}, got {len(compressed)}"