Преглед на файлове

add a wrapper around liblz4

Thomas Waldmann преди 10 години
родител
ревизия
27de1b0a43
променени са 5 файла, в които са добавени 87 реда и са изтрити 4 реда
  1. 1 0
      .gitignore
  2. 67 0
      borg/compress.pyx
  3. 1 0
      docs/global.rst.inc
  4. 11 3
      docs/installation.rst
  5. 7 1
      setup.py

+ 1 - 0
.gitignore

@@ -6,6 +6,7 @@ env
 .tox
 hashindex.c
 chunker.c
+compress.c
 crypto.c
 platform_darwin.c
 platform_freebsd.c

+ 67 - 0
borg/compress.pyx

@@ -0,0 +1,67 @@
+"""
+A thin liblz4 wrapper for raw LZ4 compression / decompression.
+
+Features:
+    - lz4 is super fast
+    - wrapper releases CPython's GIL to support multithreaded code
+    - helper buffer only allocated once at instance creation and then reused
+
+But beware:
+    - this is not very generic, you MUST know the maximum uncompressed input
+      data size you will feed into the compressor / get from the decompressor!
+    - you must not do method calls to the same LZ4 instance from different
+      threads at the same time - create one LZ4 instance per thread!
+    - compress returns raw compressed data without adding any frame metadata
+      (like checksums, magics, length of data, etc.)
+    - decompress expects such raw compressed data as input
+"""
+
+from libc.stdlib cimport malloc, free
+
+
+cdef extern from "lz4.h":
+    int LZ4_compressBound(int inputSize)
+    int LZ4_compress(const char* source, char* dest, int inputSize) nogil
+    int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
+
+
+cdef class LZ4:
+    cdef char *buffer  # helper buffer for (de)compression output
+    cdef int bufsize  # size of this buffer
+    cdef int max_isize  # maximum compressor input size safe for this bufsize
+
+    def __cinit__(self, int max_isize):
+        self.max_isize = max_isize
+        # compute worst case bufsize for not compressible data:
+        self.bufsize = LZ4_compressBound(max_isize)
+        self.buffer = <char *>malloc(self.bufsize)
+        if not self.buffer:
+            raise MemoryError
+
+    def __dealloc__(self):
+        free(self.buffer)
+
+    def compress(self, idata):
+        cdef int isize = len(idata)
+        if isize > self.max_isize:
+            raise Exception('lz4 buffer might be too small, increase max_isize!')
+        cdef int osize
+        cdef char *source = idata
+        cdef char *dest = self.buffer
+        with nogil:
+            osize = LZ4_compress(source, dest, isize)
+        if not osize:
+            raise Exception('lz4 compress failed')
+        return dest[:osize]
+
+    def decompress(self, idata):
+        cdef int isize = len(idata)
+        cdef int osize = self.bufsize
+        cdef char *source = idata  # <-- does not work for memoryview idata, wants bytes
+        cdef char *dest = self.buffer
+        with nogil:
+            osize = LZ4_decompress_safe(source, dest, isize, osize)
+        if osize < 0:
+            # malformed input data, buffer too small, ...
+            raise Exception('lz4 decompress failed')
+        return dest[:osize]

+ 1 - 0
docs/global.rst.inc

@@ -13,6 +13,7 @@
 .. _PBKDF2: https://en.wikipedia.org/wiki/PBKDF2
 .. _ACL: https://en.wikipedia.org/wiki/Access_control_list
 .. _libacl: http://savannah.nongnu.org/projects/acl/
+.. _liblz4: https://github.com/Cyan4973/lz4
 .. _OpenSSL: https://www.openssl.org/
 .. _Python: http://www.python.org/
 .. _Buzhash: https://en.wikipedia.org/wiki/Buzhash

+ 11 - 3
docs/installation.rst

@@ -9,6 +9,7 @@ Installation
 * Python_ >= 3.2
 * OpenSSL_ >= 1.0.0
 * libacl_
+* liblz4_
 * some python dependencies, see install_requires in setup.py
 
 General notes
@@ -59,6 +60,9 @@ Some of the steps detailled below might be useful also for non-git installs.
     # ACL support Headers + Library
     apt-get install libacl1-dev libacl1
 
+    # lz4 super fast compression support Headers + Library
+    apt-get install liblz4-dev liblz4-1
+
     # if you do not have gcc / make / etc. yet
     apt-get install build-essential
 
@@ -107,13 +111,16 @@ Some of the steps detailled below might be useful also for non-git installs.
 
     # ACL support Headers + Library
     sudo dnf install libacl-devel libacl
-    
+
+    # lz4 super fast compression support Headers + Library
+    sudo dnf install lz4
+
     # optional: lowlevel FUSE py binding - to mount backup archives
     sudo dnf install python3-llfuse fuse
-    
+
     # optional: for unit testing
     sudo dnf install fakeroot
-    
+
     # get |project_name| from github, install it
     git clone |git_url|
 
@@ -148,6 +155,7 @@ You'll need at least (use the cygwin installer to fetch/install these):
     gcc-core
     git
     libopenssl
+    liblz4_1 liblz4-devel  # from cygwinports.org
     make
     openssh
     openssl-devel

+ 7 - 1
setup.py

@@ -19,6 +19,7 @@ if sys.version_info < min_python:
 
 from setuptools import setup, Extension
 
+compress_source = 'borg/compress.pyx'
 crypto_source = 'borg/crypto.pyx'
 chunker_source = 'borg/chunker.pyx'
 hashindex_source = 'borg/hashindex.pyx'
@@ -38,6 +39,7 @@ try:
 
         def make_distribution(self):
             self.filelist.extend([
+                'borg/compress.c',
                 'borg/crypto.c',
                 'borg/chunker.c', 'borg/_chunker.c',
                 'borg/hashindex.c', 'borg/_hashindex.c',
@@ -52,6 +54,7 @@ except ImportError:
         def __init__(self, *args, **kwargs):
             raise Exception('Cython is required to run sdist')
 
+    compress_source = compress_source.replace('.pyx', '.c')
     crypto_source = crypto_source.replace('.pyx', '.c')
     chunker_source = chunker_source.replace('.pyx', '.c')
     hashindex_source = hashindex_source.replace('.pyx', '.c')
@@ -59,7 +62,9 @@ except ImportError:
     platform_freebsd_source = platform_freebsd_source.replace('.pyx', '.c')
     platform_darwin_source = platform_darwin_source.replace('.pyx', '.c')
     from distutils.command.build_ext import build_ext
-    if not all(os.path.exists(path) for path in [crypto_source, chunker_source, hashindex_source, platform_linux_source, platform_freebsd_source]):
+    if not all(os.path.exists(path) for path in [
+        compress_source, crypto_source, chunker_source, hashindex_source,
+        platform_linux_source, platform_freebsd_source]):
         raise ImportError('The GIT version of Borg needs Cython. Install Cython or use a released version')
 
 
@@ -89,6 +94,7 @@ cmdclass = versioneer.get_cmdclass()
 cmdclass.update({'build_ext': build_ext, 'sdist': Sdist})
 
 ext_modules = [
+    Extension('borg.compress', [compress_source], libraries=['lz4']),
     Extension('borg.crypto', [crypto_source], libraries=['crypto'], include_dirs=include_dirs, library_dirs=library_dirs),
     Extension('borg.chunker', [chunker_source]),
     Extension('borg.hashindex', [hashindex_source])