Browse Source

zstd: use own Cython-based binding, remove python-zstandard dep

currently requires an externally available libzstd >= 1.3.0,
no bundled zstd yet.
Thomas Waldmann 7 years ago
parent
commit
aec36f64a2
3 changed files with 80 additions and 24 deletions
  1. 23 3
      setup.py
  2. 5 0
      src/borg/algorithms/zstd-libselect.h
  3. 52 21
      src/borg/compress.pyx

+ 23 - 3
setup.py

@@ -27,8 +27,6 @@ install_requires = [
     # Also, we might use some rather recent API features.
     'msgpack-python>=0.4.6',
     'pyzmq',
-    # https://github.com/indygreg/python-zstandard
-    'zstandard',
 ]
 
 # note for package maintainers: if you package borgbackup for distribution,
@@ -161,10 +159,20 @@ def detect_libb2(prefixes):
                     return prefix
 
 
+def detect_libzstd(prefixes):
+    for prefix in prefixes:
+        filename = os.path.join(prefix, 'include', 'zstd.h')
+        if os.path.exists(filename):
+            with open(filename, 'r') as fd:
+                if 'ZSTD_getFrameContentSize' in fd.read():
+                    return prefix
+
+
 include_dirs = []
 library_dirs = []
 define_macros = []
 crypto_libraries = ['crypto']
+compression_libraries = ['lz4']
 
 possible_openssl_prefixes = ['/usr', '/usr/local', '/usr/local/opt/openssl', '/usr/local/ssl', '/usr/local/openssl',
                              '/usr/local/borg', '/opt/local', '/opt/pkg', ]
@@ -200,6 +208,18 @@ if libb2_prefix:
     crypto_libraries.append('b2')
     define_macros.append(('BORG_USE_LIBB2', 'YES'))
 
+possible_libzstd_prefixes = ['/usr', '/usr/local', '/usr/local/opt/libzstd', '/usr/local/libzstd',
+                             '/usr/local/borg', '/opt/local', '/opt/pkg', ]
+if os.environ.get('BORG_LIBZSTD_PREFIX'):
+    possible_libzstd_prefixes.insert(0, os.environ.get('BORG_LIBZSTD_PREFIX'))
+libzstd_prefix = detect_libzstd(possible_libzstd_prefixes)
+if libzstd_prefix:
+    print('Detected and preferring libzstd over bundled ZSTD')
+    include_dirs.append(os.path.join(libzstd_prefix, 'include'))
+    library_dirs.append(os.path.join(libzstd_prefix, 'lib'))
+    compression_libraries.append('zstd')
+    define_macros.append(('BORG_USE_LIBZSTD', 'YES'))
+
 
 with open('README.rst', 'r') as fd:
     long_description = fd.read()
@@ -758,7 +778,7 @@ cmdclass = {
 ext_modules = []
 if not on_rtd:
     ext_modules += [
-    Extension('borg.compress', [compress_source], libraries=['lz4'], include_dirs=include_dirs, library_dirs=library_dirs, define_macros=define_macros),
+    Extension('borg.compress', [compress_source], libraries=compression_libraries, include_dirs=include_dirs, library_dirs=library_dirs, define_macros=define_macros),
     Extension('borg.crypto.low_level', [crypto_ll_source, crypto_helpers], libraries=crypto_libraries, include_dirs=include_dirs, library_dirs=library_dirs, define_macros=define_macros),
     Extension('borg.hashindex', [hashindex_source]),
     Extension('borg.item', [item_source]),

+ 5 - 0
src/borg/algorithms/zstd-libselect.h

@@ -0,0 +1,5 @@
+#ifdef BORG_USE_LIBZSTD
+#include <zstd.h>
+#else
+#error "TODO"
+#endif

+ 52 - 21
src/borg/compress.pyx

@@ -22,11 +22,6 @@ try:
 except ImportError:
     lzma = None
 
-try:
-    import zstd
-except ImportError:
-    zstd = None
-
 
 from .helpers import Buffer, DecompressionError
 
@@ -38,6 +33,17 @@ cdef extern from "lz4.h":
     int LZ4_compressBound(int inputSize) nogil
 
 
+cdef extern from "algorithms/zstd-libselect.h":
+    size_t ZSTD_compress(void* dst, size_t dstCapacity, const void* src, size_t srcSize, int  compressionLevel) nogil
+    size_t ZSTD_decompress(void* dst, size_t dstCapacity, const void* src, size_t compressedSize) nogil
+    size_t ZSTD_compressBound(size_t srcSize) nogil
+    unsigned long long ZSTD_CONTENTSIZE_UNKNOWN
+    unsigned long long ZSTD_CONTENTSIZE_ERROR
+    unsigned long long ZSTD_getFrameContentSize(const void *src, size_t srcSize) nogil
+    unsigned ZSTD_isError(size_t code) nogil
+    const char* ZSTD_getErrorName(size_t code) nogil
+
+
 buffer = Buffer(bytearray, size=0)
 
 
@@ -205,25 +211,50 @@ class ZSTD(CompressorBase):
     def __init__(self, level=3, **kwargs):
         super().__init__(**kwargs)
         self.level = level
-        if zstd is None:
-            raise ValueError('No zstd support found.')
 
-    def compress(self, data):
-        if not isinstance(data, bytes):
-            data = bytes(data)  # zstd < 0.9.0 does not work with memoryview
-        cctx = zstd.ZstdCompressor(level=self.level, write_content_size=True)
-        data = cctx.compress(data)
-        return super().compress(data)
+    def compress(self, idata):
+        if not isinstance(idata, bytes):
+            idata = bytes(idata)  # code below does not work with memoryview
+        cdef int isize = len(idata)
+        cdef size_t osize
+        cdef char *source = idata
+        cdef char *dest
+        cdef int level = self.level
+        osize = ZSTD_compressBound(isize)
+        buf = buffer.get(osize)
+        dest = <char *> buf
+        with nogil:
+            osize = ZSTD_compress(dest, osize, source, isize, level)
+        if ZSTD_isError(osize):
+            raise Exception('zstd compress failed: %s' % ZSTD_getErrorName(osize))
+        return super().compress(dest[:osize])
 
-    def decompress(self, data):
-        if not isinstance(data, bytes):
-            data = bytes(data)  # zstd < 0.9.0 does not work with memoryview
-        dctx = zstd.ZstdDecompressor()
-        data = super().decompress(data)
+    def decompress(self, idata):
+        if not isinstance(idata, bytes):
+            idata = bytes(idata)  # code below does not work with memoryview
+        idata = super().decompress(idata)
+        cdef int isize = len(idata)
+        cdef unsigned long long osize
+        cdef unsigned long long rsize
+        cdef char *source = idata
+        cdef char *dest
+        osize = ZSTD_getFrameContentSize(source, isize)
+        if osize == ZSTD_CONTENTSIZE_ERROR:
+            raise DecompressionError('zstd get size failed: data was not compressed by zstd')
+        if osize == ZSTD_CONTENTSIZE_UNKNOWN:
+            raise DecompressionError('zstd get size failed: original size unknown')
         try:
-            return dctx.decompress(data)
-        except zstd.ZstdError as e:
-            raise DecompressionError(str(e)) from None
+            buf = buffer.get(osize)
+        except MemoryError:
+            raise DecompressionError('MemoryError')
+        dest = <char *> buf
+        with nogil:
+            rsize = ZSTD_decompress(dest, osize, source, isize)
+        if ZSTD_isError(rsize):
+            raise DecompressionError('zstd decompress failed: %s' % ZSTD_getErrorName(rsize))
+        if rsize != osize:
+            raise DecompressionError('zstd decompress failed: size mismatch')
+        return dest[:osize]
 
 
 class ZLIB(CompressorBase):