Browse Source

Merge pull request #8765 from ThomasWaldmann/padme-obfuscate-1.4

implement padme chunk size obfuscation (SPEC 250), fixes #8705
TW 6 months ago
parent
commit
51434dd038
3 changed files with 65 additions and 6 deletions
  1. 15 5
      src/borg/archiver.py
  2. 19 1
      src/borg/compress.pyx
  3. 31 0
      src/borg/testsuite/compress.py

+ 15 - 5
src/borg/archiver.py

@@ -2423,13 +2423,13 @@ class Archiver:
         from within a shell, the patterns should be quoted to protect them from
         expansion.
 
-        Patterns matching special characters, e.g. white space, within a shell may 
+        Patterns matching special characters, e.g. white space, within a shell may
         require adjustments, such as putting quotation marks around the arguments.
-        Example: 
+        Example:
         Using bash, the following command line option would match and exclude "item name":
         ``--pattern='-path/item name'``
-        Note that when patterns are used within a pattern file directly read by borg,  
-        e.g. when using ``--exclude-from`` or ``--patterns-from``, there is no shell 
+        Note that when patterns are used within a pattern file directly read by borg,
+        e.g. when using ``--exclude-from`` or ``--patterns-from``, there is no shell
         involved and thus no quotation marks are required.
 
         The ``--exclude-from`` option permits loading exclusion patterns from a text
@@ -2742,6 +2742,15 @@ class Archiver:
               ...
               123: 8MiB (max.)
 
+            *Padmé padding* (deterministic)
+
+            ::
+
+              250: pads to sums of powers of 2, max 12% overhead
+
+            Uses the Padmé algorithm to deterministically pad the compressed size to a sum of
+            powers of 2, limiting overhead to 12%. See https://lbarman.ch/blog/padme/ for details.
+
         Examples::
 
             borg create --compression lz4 REPO::ARCHIVE data
@@ -2753,7 +2762,8 @@ class Archiver:
             borg create --compression auto,lzma ...
             borg create --compression obfuscate,110,none ...
             borg create --compression obfuscate,3,auto,zstd,10 ...
-            borg create --compression obfuscate,2,zstd,6 ...\n\n''')
+            borg create --compression obfuscate,2,zstd,6 ...
+            borg create --compression obfuscate,250,zstd,3 ...\n\n''')
 
     def do_help(self, parser, commands, args):
         if not args.topic:

+ 19 - 1
src/borg/compress.pyx

@@ -16,6 +16,7 @@ decompressor.
 """
 
 from argparse import ArgumentTypeError
+import math
 import random
 from struct import Struct
 import zlib
@@ -25,7 +26,6 @@ try:
 except ImportError:
     lzma = None
 
-
 from .constants import MAX_DATA_SIZE
 from .helpers import Buffer, DecompressionError
 
@@ -459,6 +459,8 @@ class ObfuscateSize(CompressorBase):
         elif 110 <= level <= 123:
             self._obfuscate = self._random_padding_obfuscate
             self.max_padding_size = 2 ** (level - 100)  # 1kiB .. 8MiB
+        elif level == 250:  # Padmé
+            self._obfuscate = self._padme_obfuscate
 
     def _obfuscate(self, compr_size):
         # implementations need to return the size of obfuscation data,
@@ -499,6 +501,22 @@ class ObfuscateSize(CompressorBase):
             self.compressor = Compressor.detect(compressed_data)()
         return self.compressor.decompress(compressed_data)  # decompress data
 
+    def _padme_obfuscate(self, compr_size):
+        if compr_size < 2:
+            return 0
+
+        E = math.floor(math.log2(compr_size))  # Get exponent (power of 2)
+        S = math.floor(math.log2(E)) + 1       # Second log component
+        lastBits = E - S                       # Bits to be zeroed
+        bitMask = (2 ** lastBits - 1)          # Mask for rounding
+
+        padded_size = (compr_size + bitMask) & ~bitMask  # Apply rounding
+
+        # Ensure max 12% overhead
+        max_allowed = int(compr_size * 1.12)
+        final_size = min(padded_size, max_allowed)
+
+        return final_size - compr_size  # Return only the additional padding size
 
 # Maps valid compressor names to their class
 COMPRESSOR_TABLE = {

+ 31 - 0
src/borg/testsuite/compress.py

@@ -198,6 +198,37 @@ def test_obfuscate():
     assert 6 + 2 + 1100 <= len(compressed) <= 6 + 2 + 1100 + 1024
 
 
+@pytest.mark.parametrize(
+    "data_length, expected_padding",
+    [
+        (10, 0),
+        (100, 4),
+        (1000, 24),
+        (10000, 240),
+        (20000, 480),
+        (50000, 1200),
+        (100000, 352),
+        (1000000, 15808),
+        (5000000, 111808),
+        (10000000, 223616),
+        (20000000, 447232),
+    ],
+)
+def test_padme_obfuscation(data_length, expected_padding):
+    compressor = Compressor(name="obfuscate", level=250, compressor=Compressor("none"))
+    # the innner compressor will add an inner header of 2 bytes, so we reduce the data length by 2 bytes
+    # to be able to use (almost) the same test cases as in master branch.
+    data = b"x" * (data_length - 2)
+    compressed = compressor.compress(data)
+
+    # the outer "obfuscate" pseudo-compressor adds an outer header of 6 bytes.
+    expected_padded_size = 6 + data_length + expected_padding
+
+    assert (
+        len(compressed) == expected_padded_size
+    ), f"For {data_length}, expected {expected_padded_size}, got {len(compressed)}"
+
+
 def test_compression_specs():
     with pytest.raises(argparse.ArgumentTypeError):
         CompressionSpec('')