Jelajahi Sumber

add zstd compression

based on willyvmm's work in PR #3116, but some changes:

- removed any mulithreading changes
- add zstandard in setup.py install_requires
- tests
- fix: minimum compression level is 1 (not 0)
- use 3 for the default compression level
- use ID 03 00 for zstd
- only convert to bytes if we don't have bytes yet
- move zstd code so that code blocks are ordered by ID
- other cosmetic fixes
Thomas Waldmann 8 tahun lalu
induk
melakukan
11b2311e6e
4 mengubah file dengan 87 tambahan dan 5 penghapusan
  1. 2 0
      setup.py
  2. 52 3
      src/borg/compress.pyx
  3. 1 1
      src/borg/helpers/checks.py
  4. 32 1
      src/borg/testsuite/compress.py

+ 2 - 0
setup.py

@@ -27,6 +27,8 @@ install_requires = [
     # Also, we might use some rather recent API features.
     'msgpack-python>=0.4.6',
     'pyzmq',
+    # https://github.com/indygreg/python-zstandard
+    'zstandard',
 ]
 
 # note for package maintainers: if you package borgbackup for distribution,

+ 52 - 3
src/borg/compress.pyx

@@ -22,9 +22,15 @@ try:
 except ImportError:
     lzma = None
 
+try:
+    import zstd
+except ImportError:
+    zstd = None
+
+
 from .helpers import Buffer, DecompressionError
 
-API_VERSION = '1.1_03'
+API_VERSION = '1.1_04'
 
 cdef extern from "lz4.h":
     int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
@@ -188,6 +194,38 @@ class LZMA(CompressorBase):
             raise DecompressionError(str(e)) from None
 
 
+class ZSTD(CompressorBase):
+    """zstd compression / decompression (pypi: zstandard, gh: python-zstandard)"""
+    # This is a NOT THREAD SAFE implementation.
+    # Only ONE python context must to be created at a time.
+    # It should work flawlessly as long as borg will call ONLY ONE compression job at time.
+    ID = b'\x03\x00'
+    name = 'zstd'
+
+    def __init__(self, level=3, **kwargs):
+        super().__init__(**kwargs)
+        self.level = level
+        if zstd is None:
+            raise ValueError('No zstd support found.')
+
+    def compress(self, data):
+        if not isinstance(data, bytes):
+            data = bytes(data)  # zstd < 0.9.0 does not work with memoryview
+        cctx = zstd.ZstdCompressor(level=self.level, write_content_size=True)
+        data = cctx.compress(data)
+        return super().compress(data)
+
+    def decompress(self, data):
+        if not isinstance(data, bytes):
+            data = bytes(data)  # zstd < 0.9.0 does not work with memoryview
+        dctx = zstd.ZstdDecompressor()
+        data = super().decompress(data)
+        try:
+            return dctx.decompress(data)
+        except zstd.ZstdError as e:
+            raise DecompressionError(str(e)) from None
+
+
 class ZLIB(CompressorBase):
     """
     zlib compression / decompression (python stdlib)
@@ -291,9 +329,10 @@ COMPRESSOR_TABLE = {
     ZLIB.name: ZLIB,
     LZMA.name: LZMA,
     Auto.name: Auto,
+    ZSTD.name: ZSTD,
 }
 # List of possible compression types. Does not include Auto, since it is a meta-Compressor.
-COMPRESSOR_LIST = [LZ4, CNONE, ZLIB, LZMA, ]  # check fast stuff first
+COMPRESSOR_LIST = [LZ4, ZSTD, CNONE, ZLIB, LZMA, ]  # check fast stuff first
 
 def get_compressor(name, **kwargs):
     cls = COMPRESSOR_TABLE[name]
@@ -346,6 +385,16 @@ class CompressionSpec:
             else:
                 raise ValueError
             self.level = level
+        elif self.name in ('zstd', ):
+            if count < 2:
+                level = 3  # default compression level in zstd
+            elif count == 2:
+                level = int(values[1])
+                if not 1 <= level <= 22:
+                    raise ValueError
+            else:
+                raise ValueError
+            self.level = level
         elif self.name == 'auto':
             if 2 <= count <= 3:
                 compression = ','.join(values[1:])
@@ -359,7 +408,7 @@ class CompressionSpec:
     def compressor(self):
         if self.name in ('none', 'lz4', ):
             return get_compressor(self.name)
-        elif self.name in ('zlib', 'lzma', ):
+        elif self.name in ('zlib', 'lzma', 'zstd', ):
             return get_compressor(self.name, level=self.level)
         elif self.name == 'auto':
             return get_compressor(self.name, compressor=self.inner.compressor)

+ 1 - 1
src/borg/helpers/checks.py

@@ -24,7 +24,7 @@ def check_extension_modules():
         raise ExtensionModuleError
     if chunker.API_VERSION != '1.1_01':
         raise ExtensionModuleError
-    if compress.API_VERSION != '1.1_03':
+    if compress.API_VERSION != '1.1_04':
         raise ExtensionModuleError
     if borg.crypto.low_level.API_VERSION != '1.1_02':
         raise ExtensionModuleError

+ 32 - 1
src/borg/testsuite/compress.py

@@ -5,9 +5,14 @@ try:
 except ImportError:
     lzma = None
 
+try:
+    import zstd
+except ImportError:
+    zstd = None
+
 import pytest
 
-from ..compress import get_compressor, Compressor, CompressionSpec, CNONE, ZLIB, LZ4, LZMA, Auto
+from ..compress import get_compressor, Compressor, CompressionSpec, CNONE, ZLIB, LZ4, LZMA, ZSTD, Auto
 
 
 buffer = bytes(2**16)
@@ -69,6 +74,16 @@ def test_lzma():
     assert data == Compressor(**params).decompress(cdata)  # autodetect
 
 
+def test_zstd():
+    if zstd is None:
+        pytest.skip("No zstd support found.")
+    c = get_compressor(name='zstd')
+    cdata = c.compress(data)
+    assert len(cdata) < len(data)
+    assert data == c.decompress(cdata)
+    assert data == Compressor(**params).decompress(cdata)  # autodetect
+
+
 def test_autodetect_invalid():
     with pytest.raises(ValueError):
         Compressor(**params).decompress(b'\xff\xfftotalcrap')
@@ -104,6 +119,12 @@ def test_compressor():
             dict(name='lzma', level=6),
             # we do not test lzma on level 9 because of the huge memory needs
         ]
+    if zstd:
+        params_list += [
+            dict(name='zstd', level=1),
+            dict(name='zstd', level=3),
+            # also avoiding high zstd levels, memory needs unclear
+        ]
     for params in params_list:
         c = Compressor(**params)
         assert data == c.decompress(c.compress(data))
@@ -154,6 +175,16 @@ def test_compression_specs():
     assert isinstance(lzma, LZMA)
     assert lzma.level == 9
 
+    zstd = CompressionSpec('zstd').compressor
+    assert isinstance(zstd, ZSTD)
+    assert zstd.level == 3
+    zstd = CompressionSpec('zstd,1').compressor
+    assert isinstance(zstd, ZSTD)
+    assert zstd.level == 1
+    zstd = CompressionSpec('zstd,22').compressor
+    assert isinstance(zstd, ZSTD)
+    assert zstd.level == 22
+
     with pytest.raises(ValueError):
         CompressionSpec('lzma,9,invalid')
     with pytest.raises(ValueError):