Browse Source

compression: use the 2 bytes for type and level, fixes #6698

adapt borg transfer, transferred chunks are set to compression level "unknown".
Thomas Waldmann 3 years ago
parent
commit
6584a92c81
3 changed files with 42 additions and 29 deletions
  1. 6 1
      src/borg/archiver.py
  2. 32 24
      src/borg/compress.pyx
  3. 4 4
      src/borg/testsuite/key.py

+ 6 - 1
src/borg/archiver.py

@@ -379,8 +379,13 @@ class Archiver:
             return new_item
 
         def upgrade_compressed_chunk(chunk):
+            level = b'\xFF'  # FF means unknown compression level
             if ZLIB_legacy.detect(chunk):
-                chunk = ZLIB.ID + chunk  # get rid of the attic legacy: prepend separate type bytes for zlib
+                ctype = ZLIB.ID
+                chunk = ctype + level + chunk  # get rid of the attic legacy: prepend separate type/level bytes
+            else:
+                ctype = chunk[0:1]
+                chunk = ctype + level + chunk[2:]  # keep type same, but set level
             return chunk
 
         dry_run = args.dry_run

+ 32 - 24
src/borg/compress.pyx

@@ -56,16 +56,21 @@ cdef class CompressorBase:
     also handles compression format auto detection and
     adding/stripping the ID header (which enable auto detection).
     """
-    ID = b'\xFF\xFF'  # reserved and not used
-                      # overwrite with a unique 2-bytes bytestring in child classes
+    ID = b'\xFF'  # reserved and not used
+                  # overwrite with a unique 1-byte bytestring in child classes
     name = 'baseclass'
 
     @classmethod
     def detect(cls, data):
         return data.startswith(cls.ID)
 
-    def __init__(self, **kwargs):
-        pass
+    def __init__(self, level=255, **kwargs):
+        assert 0 <= level <= 255
+        if self.ID is not None:
+            self.id_level = self.ID + bytes((level, ))  # level 255 means "unknown level"
+            assert len(self.id_level) == 2
+        else:
+            self.id_level = None
 
     def decide(self, data):
         """
@@ -85,8 +90,8 @@ cdef class CompressorBase:
         Compress *data* (bytes) and return bytes result. Prepend the ID bytes of this compressor,
         which is needed so that the correct decompressor can be used for decompression.
         """
-        # add ID bytes
-        return self.ID + data
+        # add id_level bytes
+        return self.id_level + data
 
     def decompress(self, data):
         """
@@ -96,7 +101,7 @@ cdef class CompressorBase:
         Only handles input generated by _this_ Compressor - for a general purpose
         decompression method see *Compressor.decompress*.
         """
-        # strip ID bytes
+        # strip id_level bytes
         return data[2:]
 
 cdef class DecidingCompressor(CompressorBase):
@@ -106,8 +111,8 @@ cdef class DecidingCompressor(CompressorBase):
     """
     name = 'decidebaseclass'
 
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
+    def __init__(self, level=255, **kwargs):
+        super().__init__(level=level, **kwargs)
 
     def _decide(self, data):
         """
@@ -148,9 +153,12 @@ class CNONE(CompressorBase):
     """
     none - no compression, just pass through data
     """
-    ID = b'\x00\x00'
+    ID = b'\x00'
     name = 'none'
 
+    def __init__(self, level=255, **kwargs):
+        super().__init__(level=level, **kwargs)  # no defined levels for CNONE, so just say "unknown"
+
     def compress(self, data):
         return super().compress(data)
 
@@ -170,11 +178,11 @@ class LZ4(DecidingCompressor):
         - wrapper releases CPython's GIL to support multithreaded code
         - uses safe lz4 methods that never go beyond the end of the output buffer
     """
-    ID = b'\x01\x00'
+    ID = b'\x01'
     name = 'lz4'
 
-    def __init__(self, **kwargs):
-        pass
+    def __init__(self, level=255, **kwargs):
+        super().__init__(level=level, **kwargs)  # no defined levels for LZ4, so just say "unknown"
 
     def _decide(self, idata):
         """
@@ -235,11 +243,11 @@ class LZMA(DecidingCompressor):
     """
     lzma compression / decompression
     """
-    ID = b'\x02\x00'
+    ID = b'\x02'
     name = 'lzma'
 
     def __init__(self, level=6, **kwargs):
-        super().__init__(**kwargs)
+        super().__init__(level=level, **kwargs)
         self.level = level
         if lzma is None:
             raise ValueError('No lzma support found.')
@@ -270,11 +278,11 @@ class ZSTD(DecidingCompressor):
     # This is a NOT THREAD SAFE implementation.
     # Only ONE python context must be created at a time.
     # It should work flawlessly as long as borg will call ONLY ONE compression job at time.
-    ID = b'\x03\x00'
+    ID = b'\x03'
     name = 'zstd'
 
     def __init__(self, level=3, **kwargs):
-        super().__init__(**kwargs)
+        super().__init__(level=level, **kwargs)
         self.level = level
 
     def _decide(self, idata):
@@ -335,11 +343,11 @@ class ZLIB(DecidingCompressor):
     """
     zlib compression / decompression (python stdlib)
     """
-    ID = b'\x05\x00'
+    ID = b'\x05'
     name = 'zlib'
 
     def __init__(self, level=6, **kwargs):
-        super().__init__(**kwargs)
+        super().__init__(level=level, **kwargs)
         self.level = level
 
     def _decide(self, data):
@@ -373,8 +381,8 @@ class ZLIB_legacy(CompressorBase):
           Newer borg uses the ZLIB class that has separate ID bytes (as all the other
           compressors) and does not need this hack.
     """
-    ID = b'\x08\x00'  # not used here, see detect()
-    # avoid all 0x.8.. IDs elsewhere!
+    ID = b'\x08'  # not used here, see detect()
+    # avoid all 0x.8 IDs elsewhere!
     name = 'zlib_legacy'
 
     @classmethod
@@ -386,7 +394,7 @@ class ZLIB_legacy(CompressorBase):
         return check_ok and is_deflate
 
     def __init__(self, level=6, **kwargs):
-        super().__init__(**kwargs)
+        super().__init__(level=level, **kwargs)
         self.level = level
 
     def compress(self, data):
@@ -478,14 +486,14 @@ class ObfuscateSize(CompressorBase):
     """
     Meta-Compressor that obfuscates the compressed data size.
     """
-    ID = b'\x04\x00'
+    ID = b'\x04'
     name = 'obfuscate'
 
     header_fmt = Struct('>I')
     header_len = len(header_fmt.pack(0))
 
     def __init__(self, level=None, compressor=None):
-        super().__init__()
+        super().__init__(level=level)  # data will be encrypted, so we can tell the level
         self.compressor = compressor
         if level is None:
             pass  # decompression

+ 4 - 4
src/borg/testsuite/key.py

@@ -256,8 +256,8 @@ class TestKey:
         plaintext = b'123456789'
         id = key.id_hash(plaintext)
         authenticated = key.encrypt(id, plaintext)
-        # 0x07 is the key TYPE, \x0000 identifies no compression.
-        assert authenticated == b'\x07\x00\x00' + plaintext
+        # 0x07 is the key TYPE, \x00ff identifies no compression / unknown level.
+        assert authenticated == b'\x07\x00\xff' + plaintext
 
     def test_blake2_authenticated_encrypt(self, monkeypatch):
         monkeypatch.setenv('BORG_PASSPHRASE', 'test')
@@ -267,8 +267,8 @@ class TestKey:
         plaintext = b'123456789'
         id = key.id_hash(plaintext)
         authenticated = key.encrypt(id, plaintext)
-        # 0x06 is the key TYPE, 0x0000 identifies no compression.
-        assert authenticated == b'\x06\x00\x00' + plaintext
+        # 0x06 is the key TYPE, 0x00ff identifies no compression / unknown level.
+        assert authenticated == b'\x06\x00\xff' + plaintext
 
 
 class TestTAM: