Browse Source

add zstd compression

based on willyvmm's work in PR #3116, but some changes:

- removed any mulithreading changes
- add zstandard in setup.py install_requires
- tests
- fix: minimum compression level is 1 (not 0)
- use 3 for the default compression level
- use ID 03 00 for zstd
- only convert to bytes if we don't have bytes yet
- move zstd code so that code blocks are ordered by ID
- other cosmetic fixes

(cherry picked from commit 11b2311e6ebc43546cef88a39bce744d67940c5a)
Thomas Waldmann 7 năm trước cách đây
mục cha
commit
6bad2395dd
4 tập tin đã thay đổi với 86 bổ sung6 xóa
  1. 1 1
      setup.py
  2. 52 3
      src/borg/compress.pyx
  3. 1 1
      src/borg/helpers.py
  4. 32 1
      src/borg/testsuite/compress.py

+ 1 - 1
setup.py

@@ -24,7 +24,7 @@ on_rtd = os.environ.get('READTHEDOCS')
 
 # msgpack pure python data corruption was fixed in 0.4.6.
 # Also, we might use some rather recent API features.
-install_requires = ['msgpack-python>=0.4.6', ]
+install_requires = ['msgpack-python>=0.4.6', 'zstandard', ]
 
 # note for package maintainers: if you package borgbackup for distribution,
 # please add llfuse as a *requirement* on all platforms that have a working

+ 52 - 3
src/borg/compress.pyx

@@ -22,9 +22,15 @@ try:
 except ImportError:
     lzma = None
 
+try:
+    import zstd
+except ImportError:
+    zstd = None
+
+
 from .helpers import Buffer, DecompressionError
 
-API_VERSION = '1.1_03'
+API_VERSION = '1.1_04'
 
 cdef extern from "lz4.h":
     int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
@@ -186,6 +192,38 @@ class LZMA(CompressorBase):
             raise DecompressionError(str(e)) from None
 
 
+class ZSTD(CompressorBase):
+    """zstd compression / decompression (pypi: zstandard, gh: python-zstandard)"""
+    # This is a NOT THREAD SAFE implementation.
+    # Only ONE python context must to be created at a time.
+    # It should work flawlessly as long as borg will call ONLY ONE compression job at time.
+    ID = b'\x03\x00'
+    name = 'zstd'
+
+    def __init__(self, level=3, **kwargs):
+        super().__init__(**kwargs)
+        self.level = level
+        if zstd is None:
+            raise ValueError('No zstd support found.')
+
+    def compress(self, data):
+        if not isinstance(data, bytes):
+            data = bytes(data)  # zstd < 0.9.0 does not work with memoryview
+        cctx = zstd.ZstdCompressor(level=self.level, write_content_size=True)
+        data = cctx.compress(data)
+        return super().compress(data)
+
+    def decompress(self, data):
+        if not isinstance(data, bytes):
+            data = bytes(data)  # zstd < 0.9.0 does not work with memoryview
+        dctx = zstd.ZstdDecompressor()
+        data = super().decompress(data)
+        try:
+            return dctx.decompress(data)
+        except zstd.ZstdError as e:
+            raise DecompressionError(str(e)) from None
+
+
 class ZLIB(CompressorBase):
     """
     zlib compression / decompression (python stdlib)
@@ -289,9 +327,10 @@ COMPRESSOR_TABLE = {
     ZLIB.name: ZLIB,
     LZMA.name: LZMA,
     Auto.name: Auto,
+    ZSTD.name: ZSTD,
 }
 # List of possible compression types. Does not include Auto, since it is a meta-Compressor.
-COMPRESSOR_LIST = [LZ4, CNONE, ZLIB, LZMA, ]  # check fast stuff first
+COMPRESSOR_LIST = [LZ4, ZSTD, CNONE, ZLIB, LZMA, ]  # check fast stuff first
 
 def get_compressor(name, **kwargs):
     cls = COMPRESSOR_TABLE[name]
@@ -344,6 +383,16 @@ class CompressionSpec:
             else:
                 raise ValueError
             self.level = level
+        elif self.name in ('zstd', ):
+            if count < 2:
+                level = 3  # default compression level in zstd
+            elif count == 2:
+                level = int(values[1])
+                if not 1 <= level <= 22:
+                    raise ValueError
+            else:
+                raise ValueError
+            self.level = level
         elif self.name == 'auto':
             if 2 <= count <= 3:
                 compression = ','.join(values[1:])
@@ -357,7 +406,7 @@ class CompressionSpec:
     def compressor(self):
         if self.name in ('none', 'lz4', ):
             return get_compressor(self.name)
-        elif self.name in ('zlib', 'lzma', ):
+        elif self.name in ('zlib', 'lzma', 'zstd', ):
             return get_compressor(self.name, level=self.level)
         elif self.name == 'auto':
             return get_compressor(self.name, compressor=self.inner.compressor)

+ 1 - 1
src/borg/helpers.py

@@ -135,7 +135,7 @@ def check_extension_modules():
         raise ExtensionModuleError
     if chunker.API_VERSION != '1.1_01':
         raise ExtensionModuleError
-    if compress.API_VERSION != '1.1_03':
+    if compress.API_VERSION != '1.1_04':
         raise ExtensionModuleError
     if borg.crypto.low_level.API_VERSION != '1.1_02':
         raise ExtensionModuleError

+ 32 - 1
src/borg/testsuite/compress.py

@@ -5,9 +5,14 @@ try:
 except ImportError:
     lzma = None
 
+try:
+    import zstd
+except ImportError:
+    zstd = None
+
 import pytest
 
-from ..compress import get_compressor, Compressor, CompressionSpec, CNONE, ZLIB, LZ4, LZMA, Auto
+from ..compress import get_compressor, Compressor, CompressionSpec, CNONE, ZLIB, LZ4, LZMA, ZSTD, Auto
 
 
 buffer = bytes(2**16)
@@ -69,6 +74,16 @@ def test_lzma():
     assert data == Compressor(**params).decompress(cdata)  # autodetect
 
 
+def test_zstd():
+    if zstd is None:
+        pytest.skip("No zstd support found.")
+    c = get_compressor(name='zstd')
+    cdata = c.compress(data)
+    assert len(cdata) < len(data)
+    assert data == c.decompress(cdata)
+    assert data == Compressor(**params).decompress(cdata)  # autodetect
+
+
 def test_autodetect_invalid():
     with pytest.raises(ValueError):
         Compressor(**params).decompress(b'\xff\xfftotalcrap')
@@ -104,6 +119,12 @@ def test_compressor():
             dict(name='lzma', level=6),
             # we do not test lzma on level 9 because of the huge memory needs
         ]
+    if zstd:
+        params_list += [
+            dict(name='zstd', level=1),
+            dict(name='zstd', level=3),
+            # also avoiding high zstd levels, memory needs unclear
+        ]
     for params in params_list:
         c = Compressor(**params)
         assert data == c.decompress(c.compress(data))
@@ -154,6 +175,16 @@ def test_compression_specs():
     assert isinstance(lzma, LZMA)
     assert lzma.level == 9
 
+    zstd = CompressionSpec('zstd').compressor
+    assert isinstance(zstd, ZSTD)
+    assert zstd.level == 3
+    zstd = CompressionSpec('zstd,1').compressor
+    assert isinstance(zstd, ZSTD)
+    assert zstd.level == 1
+    zstd = CompressionSpec('zstd,22').compressor
+    assert isinstance(zstd, ZSTD)
+    assert zstd.level == 22
+
     with pytest.raises(ValueError):
         CompressionSpec('lzma,9,invalid')
     with pytest.raises(ValueError):