浏览代码

compression: use the 2 bytes for type and level, fixes #6698

adapt borg transfer, transferred chunks are set to compression level "unknown".
Thomas Waldmann 3 年之前
父节点
当前提交
6584a92c81
共有 3 个文件被更改,包括 42 次插入29 次删除
  1. 6 1
      src/borg/archiver.py
  2. 32 24
      src/borg/compress.pyx
  3. 4 4
      src/borg/testsuite/key.py

+ 6 - 1
src/borg/archiver.py

@@ -379,8 +379,13 @@ class Archiver:
             return new_item
             return new_item
 
 
         def upgrade_compressed_chunk(chunk):
         def upgrade_compressed_chunk(chunk):
+            level = b'\xFF'  # FF means unknown compression level
             if ZLIB_legacy.detect(chunk):
             if ZLIB_legacy.detect(chunk):
-                chunk = ZLIB.ID + chunk  # get rid of the attic legacy: prepend separate type bytes for zlib
+                ctype = ZLIB.ID
+                chunk = ctype + level + chunk  # get rid of the attic legacy: prepend separate type/level bytes
+            else:
+                ctype = chunk[0:1]
+                chunk = ctype + level + chunk[2:]  # keep type same, but set level
             return chunk
             return chunk
 
 
         dry_run = args.dry_run
         dry_run = args.dry_run

+ 32 - 24
src/borg/compress.pyx

@@ -56,16 +56,21 @@ cdef class CompressorBase:
     also handles compression format auto detection and
     also handles compression format auto detection and
     adding/stripping the ID header (which enable auto detection).
     adding/stripping the ID header (which enable auto detection).
     """
     """
-    ID = b'\xFF\xFF'  # reserved and not used
-                      # overwrite with a unique 2-bytes bytestring in child classes
+    ID = b'\xFF'  # reserved and not used
+                  # overwrite with a unique 1-byte bytestring in child classes
     name = 'baseclass'
     name = 'baseclass'
 
 
     @classmethod
     @classmethod
     def detect(cls, data):
     def detect(cls, data):
         return data.startswith(cls.ID)
         return data.startswith(cls.ID)
 
 
-    def __init__(self, **kwargs):
-        pass
+    def __init__(self, level=255, **kwargs):
+        assert 0 <= level <= 255
+        if self.ID is not None:
+            self.id_level = self.ID + bytes((level, ))  # level 255 means "unknown level"
+            assert len(self.id_level) == 2
+        else:
+            self.id_level = None
 
 
     def decide(self, data):
     def decide(self, data):
         """
         """
@@ -85,8 +90,8 @@ cdef class CompressorBase:
         Compress *data* (bytes) and return bytes result. Prepend the ID bytes of this compressor,
         Compress *data* (bytes) and return bytes result. Prepend the ID bytes of this compressor,
         which is needed so that the correct decompressor can be used for decompression.
         which is needed so that the correct decompressor can be used for decompression.
         """
         """
-        # add ID bytes
-        return self.ID + data
+        # add id_level bytes
+        return self.id_level + data
 
 
     def decompress(self, data):
     def decompress(self, data):
         """
         """
@@ -96,7 +101,7 @@ cdef class CompressorBase:
         Only handles input generated by _this_ Compressor - for a general purpose
         Only handles input generated by _this_ Compressor - for a general purpose
         decompression method see *Compressor.decompress*.
         decompression method see *Compressor.decompress*.
         """
         """
-        # strip ID bytes
+        # strip id_level bytes
         return data[2:]
         return data[2:]
 
 
 cdef class DecidingCompressor(CompressorBase):
 cdef class DecidingCompressor(CompressorBase):
@@ -106,8 +111,8 @@ cdef class DecidingCompressor(CompressorBase):
     """
     """
     name = 'decidebaseclass'
     name = 'decidebaseclass'
 
 
-    def __init__(self, **kwargs):
-        super().__init__(**kwargs)
+    def __init__(self, level=255, **kwargs):
+        super().__init__(level=level, **kwargs)
 
 
     def _decide(self, data):
     def _decide(self, data):
         """
         """
@@ -148,9 +153,12 @@ class CNONE(CompressorBase):
     """
     """
     none - no compression, just pass through data
     none - no compression, just pass through data
     """
     """
-    ID = b'\x00\x00'
+    ID = b'\x00'
     name = 'none'
     name = 'none'
 
 
+    def __init__(self, level=255, **kwargs):
+        super().__init__(level=level, **kwargs)  # no defined levels for CNONE, so just say "unknown"
+
     def compress(self, data):
     def compress(self, data):
         return super().compress(data)
         return super().compress(data)
 
 
@@ -170,11 +178,11 @@ class LZ4(DecidingCompressor):
         - wrapper releases CPython's GIL to support multithreaded code
         - wrapper releases CPython's GIL to support multithreaded code
         - uses safe lz4 methods that never go beyond the end of the output buffer
         - uses safe lz4 methods that never go beyond the end of the output buffer
     """
     """
-    ID = b'\x01\x00'
+    ID = b'\x01'
     name = 'lz4'
     name = 'lz4'
 
 
-    def __init__(self, **kwargs):
-        pass
+    def __init__(self, level=255, **kwargs):
+        super().__init__(level=level, **kwargs)  # no defined levels for LZ4, so just say "unknown"
 
 
     def _decide(self, idata):
     def _decide(self, idata):
         """
         """
@@ -235,11 +243,11 @@ class LZMA(DecidingCompressor):
     """
     """
     lzma compression / decompression
     lzma compression / decompression
     """
     """
-    ID = b'\x02\x00'
+    ID = b'\x02'
     name = 'lzma'
     name = 'lzma'
 
 
     def __init__(self, level=6, **kwargs):
     def __init__(self, level=6, **kwargs):
-        super().__init__(**kwargs)
+        super().__init__(level=level, **kwargs)
         self.level = level
         self.level = level
         if lzma is None:
         if lzma is None:
             raise ValueError('No lzma support found.')
             raise ValueError('No lzma support found.')
@@ -270,11 +278,11 @@ class ZSTD(DecidingCompressor):
     # This is a NOT THREAD SAFE implementation.
     # This is a NOT THREAD SAFE implementation.
     # Only ONE python context must be created at a time.
     # Only ONE python context must be created at a time.
     # It should work flawlessly as long as borg will call ONLY ONE compression job at time.
     # It should work flawlessly as long as borg will call ONLY ONE compression job at time.
-    ID = b'\x03\x00'
+    ID = b'\x03'
     name = 'zstd'
     name = 'zstd'
 
 
     def __init__(self, level=3, **kwargs):
     def __init__(self, level=3, **kwargs):
-        super().__init__(**kwargs)
+        super().__init__(level=level, **kwargs)
         self.level = level
         self.level = level
 
 
     def _decide(self, idata):
     def _decide(self, idata):
@@ -335,11 +343,11 @@ class ZLIB(DecidingCompressor):
     """
     """
     zlib compression / decompression (python stdlib)
     zlib compression / decompression (python stdlib)
     """
     """
-    ID = b'\x05\x00'
+    ID = b'\x05'
     name = 'zlib'
     name = 'zlib'
 
 
     def __init__(self, level=6, **kwargs):
     def __init__(self, level=6, **kwargs):
-        super().__init__(**kwargs)
+        super().__init__(level=level, **kwargs)
         self.level = level
         self.level = level
 
 
     def _decide(self, data):
     def _decide(self, data):
@@ -373,8 +381,8 @@ class ZLIB_legacy(CompressorBase):
           Newer borg uses the ZLIB class that has separate ID bytes (as all the other
           Newer borg uses the ZLIB class that has separate ID bytes (as all the other
           compressors) and does not need this hack.
           compressors) and does not need this hack.
     """
     """
-    ID = b'\x08\x00'  # not used here, see detect()
-    # avoid all 0x.8.. IDs elsewhere!
+    ID = b'\x08'  # not used here, see detect()
+    # avoid all 0x.8 IDs elsewhere!
     name = 'zlib_legacy'
     name = 'zlib_legacy'
 
 
     @classmethod
     @classmethod
@@ -386,7 +394,7 @@ class ZLIB_legacy(CompressorBase):
         return check_ok and is_deflate
         return check_ok and is_deflate
 
 
     def __init__(self, level=6, **kwargs):
     def __init__(self, level=6, **kwargs):
-        super().__init__(**kwargs)
+        super().__init__(level=level, **kwargs)
         self.level = level
         self.level = level
 
 
     def compress(self, data):
     def compress(self, data):
@@ -478,14 +486,14 @@ class ObfuscateSize(CompressorBase):
     """
     """
     Meta-Compressor that obfuscates the compressed data size.
     Meta-Compressor that obfuscates the compressed data size.
     """
     """
-    ID = b'\x04\x00'
+    ID = b'\x04'
     name = 'obfuscate'
     name = 'obfuscate'
 
 
     header_fmt = Struct('>I')
     header_fmt = Struct('>I')
     header_len = len(header_fmt.pack(0))
     header_len = len(header_fmt.pack(0))
 
 
     def __init__(self, level=None, compressor=None):
     def __init__(self, level=None, compressor=None):
-        super().__init__()
+        super().__init__(level=level)  # data will be encrypted, so we can tell the level
         self.compressor = compressor
         self.compressor = compressor
         if level is None:
         if level is None:
             pass  # decompression
             pass  # decompression

+ 4 - 4
src/borg/testsuite/key.py

@@ -256,8 +256,8 @@ class TestKey:
         plaintext = b'123456789'
         plaintext = b'123456789'
         id = key.id_hash(plaintext)
         id = key.id_hash(plaintext)
         authenticated = key.encrypt(id, plaintext)
         authenticated = key.encrypt(id, plaintext)
-        # 0x07 is the key TYPE, \x0000 identifies no compression.
-        assert authenticated == b'\x07\x00\x00' + plaintext
+        # 0x07 is the key TYPE, \x00ff identifies no compression / unknown level.
+        assert authenticated == b'\x07\x00\xff' + plaintext
 
 
     def test_blake2_authenticated_encrypt(self, monkeypatch):
     def test_blake2_authenticated_encrypt(self, monkeypatch):
         monkeypatch.setenv('BORG_PASSPHRASE', 'test')
         monkeypatch.setenv('BORG_PASSPHRASE', 'test')
@@ -267,8 +267,8 @@ class TestKey:
         plaintext = b'123456789'
         plaintext = b'123456789'
         id = key.id_hash(plaintext)
         id = key.id_hash(plaintext)
         authenticated = key.encrypt(id, plaintext)
         authenticated = key.encrypt(id, plaintext)
-        # 0x06 is the key TYPE, 0x0000 identifies no compression.
-        assert authenticated == b'\x06\x00\x00' + plaintext
+        # 0x06 is the key TYPE, 0x00ff identifies no compression / unknown level.
+        assert authenticated == b'\x06\x00\xff' + plaintext
 
 
 
 
 class TestTAM:
 class TestTAM: