10 éve · fffe509268
--- a/.gitignore
+++ b/.gitignore
@@ -6,6 +6,7 @@ env
 
				 .tox
			
 
				 hashindex.c
			
 
				 chunker.c
			
 
				+compress.c
			
 
				 crypto.c
			
 
				 platform_darwin.c
			
 
				 platform_freebsd.c
			
--- a/.travis/install.sh
+++ b/.travis/install.sh
@@ -14,6 +14,7 @@ if [[ "$(uname -s)" == 'Darwin' ]]; then
 
				         eval "$(pyenv init -)"
			
 
				     fi
			
 
				 
			
 
				+    brew install lz4
			
 
				     brew outdated pyenv || brew upgrade pyenv
			
 
				 
			
 
				     case "${TOXENV}" in
			
@@ -34,6 +35,9 @@ if [[ "$(uname -s)" == 'Darwin' ]]; then
 
				     python -m pip install --user virtualenv
			
 
				 else
			
 
				     pip install virtualenv
			
 
				+    sudo add-apt-repository -y ppa:gezakovacs/lz4
			
 
				+    sudo apt-get update
			
 
				+    sudo apt-get install -y liblz4-dev
			
 
				     sudo apt-get install -y libacl1-dev
			
 
				 fi
			
 
				 
			
--- a/CHANGES.rst
+++ b/CHANGES.rst
@@ -5,16 +5,35 @@ Borg Changelog
 
				 Version 0.25.0 (not released yet)
			
 
				 ---------------------------------
			
 
				 
			
 
				-Incompatible changes (compared to 0.24):
			
 
				+Compatibility notes:
			
 
				 
			
 
				-- none yet
			
 
				+- the new compression code is very compatible: as long as you stay with zlib
			
 
				+  compression, older borg releases will still be able to read data from a
			
 
				+  repo/archive made with the new code (note: this is not the case for the
			
 
				+  default "none" compression, use "zlib,0" if you want a "no compression" mode
			
 
				+  that can be read by older borg). Also the new code is able to read repos and
			
 
				+  archives made with older borg versions (for all zlib levels  0..9).
			
 
				 
			
 
				 Deprecations:
			
 
				 
			
 
				-- none yet
			
 
				+- --compression N (with N being a number, as in 0.24) is deprecated.
			
 
				+  We keep the --compression 0..9 for now to not break scripts, but it is
			
 
				+  deprecated and will be removed later, so better fix your scripts now:
			
 
				+  --compression 0 (as in 0.24) is the same as --compression zlib,0 (now).
			
 
				+  BUT: if you do not want compression, you rather want --compression none
			
 
				+  (which is the default).
			
 
				+  --compression 1 (in 0.24) is the same as --compression zlib,1 (now)
			
 
				+  --compression 9 (in 0.24) is the same as --compression zlib,9 (now)
			
 
				+
			
 
				 
			
 
				 New features:
			
 
				 
			
 
				+- create --compression none (default, means: do not compress, just pass through
			
 
				+  data "as is". this is more efficient than zlib level 0 as used in borg 0.24)
			
 
				+- create --compression lz4 (super-fast, but not very high compression)
			
 
				+  Please note that borgbackup needs lz4 library as additional requirement.
			
 
				+- create --compression zlib,N (slower, higher compression, default for N is 6)
			
 
				+- create --compression lzma,N (slowest, highest compression, default N is 6)
			
 
				 - honor the nodump flag (UF_NODUMP) and do not backup such items
			
 
				 
			
 
				 Bug fixes:
			
--- a/README.rst
+++ b/README.rst
@@ -51,7 +51,8 @@ Main features
 
				     authenticity is verified using HMAC-SHA256.
			
 
				 
			
 
				 **Compression**
			
 
				-    All data can be compressed by zlib, level 0-9.
			
 
				+    All data can be compressed by lz4 (super fast, low compression), zlib
			
 
				+    (medium speed and compression) or lzma (low speed, high compression).
			
 
				 
			
 
				 **Off-site backups**
			
 
				     Borg can store data on any remote host accessible over SSH.  If Borg is
			
--- a/borg/archiver.py
+++ b/borg/archiver.py
@@ -14,6 +14,7 @@ import traceback
 
				 
			
 
				 from . import __version__
			
 
				 from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS
			
 
				+from .compress import Compressor, COMPR_BUFFER
			
 
				 from .repository import Repository
			
 
				 from .cache import Cache
			
 
				 from .key import key_creator
			
@@ -21,7 +22,7 @@ from .helpers import Error, location_validator, format_time, format_file_size, \
 
				     format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \
			
 
				     get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \
			
 
				     Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
			
 
				-    is_cachedir, bigint_to_int, ChunkerParams
			
 
				+    is_cachedir, bigint_to_int, ChunkerParams, CompressionSpec
			
 
				 from .remote import RepositoryServer, RemoteRepository
			
 
				 
			
 
				 has_lchflags = hasattr(os, 'lchflags')
			
@@ -104,7 +105,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
 
				         t0 = datetime.now()
			
 
				         repository = self.open_repository(args.archive, exclusive=True)
			
 
				         manifest, key = Manifest.load(repository)
			
 
				-        key.compression_level = args.compression
			
 
				+        compr_args = dict(buffer=COMPR_BUFFER)
			
 
				+        compr_args.update(args.compression)
			
 
				+        key.compressor = Compressor(**compr_args)
			
 
				         cache = Cache(repository, key, manifest, do_files=args.cache_files)
			
 
				         archive = Archive(repository, key, manifest, args.archive.archive, cache=cache,
			
 
				                           create=True, checkpoint_interval=args.checkpoint_interval,
			
@@ -670,9 +673,14 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
 
				                                metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE',
			
 
				                                help='specify the chunker parameters. default: %d,%d,%d,%d' % CHUNKER_PARAMS)
			
 
				         subparser.add_argument('-C', '--compression', dest='compression',
			
 
				-                               type=int, default=0, metavar='N',
			
 
				-                               help='select compression algorithm and level. 0..9 is supported and means zlib '
			
 
				-                                    'level 0 (no compression, fast, default) .. zlib level 9 (high compression, slow).')
			
 
				+                               type=CompressionSpec, default=dict(name='none'), metavar='COMPRESSION',
			
 
				+                               help='select compression algorithm (and level): '
			
 
				+                                    'none == no compression (default), '
			
 
				+                                    'lz4 == lz4, '
			
 
				+                                    'zlib == zlib (default level 6), '
			
 
				+                                    'zlib,0 .. zlib,9 == zlib (with level 0..9), '
			
 
				+                                    'lzma == lzma (default level 6), '
			
 
				+                                    'lzma,0 .. lzma,9 == lzma (with level 0..9).')
			
 
				         subparser.add_argument('archive', metavar='ARCHIVE',
			
 
				                                type=location_validator(archive=True),
			
 
				                                help='archive to create')
			
--- a/borg/compress.pyx
+++ b/borg/compress.pyx
@@ -0,0 +1,199 @@
 
				+import zlib
			
 
				+try:
			
 
				+    import lzma
			
 
				+except ImportError:
			
 
				+    lzma = None
			
 
				+
			
 
				+cdef extern from "lz4.h":
			
 
				+    int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
			
 
				+    int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
			
 
				+
			
 
				+
			
 
				+cdef class CompressorBase:
			
 
				+    """
			
 
				+    base class for all (de)compression classes,
			
 
				+    also handles compression format auto detection and
			
 
				+    adding/stripping the ID header (which enable auto detection).
			
 
				+    """
			
 
				+    ID = b'\xFF\xFF'  # reserved and not used
			
 
				+                      # overwrite with a unique 2-bytes bytestring in child classes
			
 
				+    name = 'baseclass'
			
 
				+
			
 
				+    @classmethod
			
 
				+    def detect(cls, data):
			
 
				+        return data.startswith(cls.ID)
			
 
				+
			
 
				+    def __init__(self, **kwargs):
			
 
				+        pass
			
 
				+
			
 
				+    def compress(self, data):
			
 
				+        # add ID bytes
			
 
				+        return self.ID + data
			
 
				+
			
 
				+    def decompress(self, data):
			
 
				+        # strip ID bytes
			
 
				+        return data[2:]
			
 
				+
			
 
				+
			
 
				+class CNONE(CompressorBase):
			
 
				+    """
			
 
				+    none - no compression, just pass through data
			
 
				+    """
			
 
				+    ID = b'\x00\x00'
			
 
				+    name = 'none'
			
 
				+
			
 
				+    def compress(self, data):
			
 
				+        return super().compress(data)
			
 
				+
			
 
				+    def decompress(self, data):
			
 
				+        data = super().decompress(data)
			
 
				+        if not isinstance(data, bytes):
			
 
				+            data = bytes(data)
			
 
				+        return data
			
 
				+
			
 
				+
			
 
				+cdef class LZ4(CompressorBase):
			
 
				+    """
			
 
				+    raw LZ4 compression / decompression (liblz4).
			
 
				+
			
 
				+    Features:
			
 
				+        - lz4 is super fast
			
 
				+        - wrapper releases CPython's GIL to support multithreaded code
			
 
				+        - buffer given by caller, avoiding frequent reallocation and buffer duplication
			
 
				+        - uses safe lz4 methods that never go beyond the end of the output buffer
			
 
				+
			
 
				+    But beware:
			
 
				+        - this is not very generic, the given buffer MUST be large enough to
			
 
				+          handle all compression or decompression output (or it will fail).
			
 
				+        - you must not do method calls to the same LZ4 instance from different
			
 
				+          threads at the same time - create one LZ4 instance per thread!
			
 
				+    """
			
 
				+    ID = b'\x01\x00'
			
 
				+    name = 'lz4'
			
 
				+
			
 
				+    cdef char *buffer  # helper buffer for (de)compression output
			
 
				+    cdef int bufsize  # size of this buffer
			
 
				+
			
 
				+    def __cinit__(self, **kwargs):
			
 
				+        buffer = kwargs['buffer']
			
 
				+        self.buffer = buffer
			
 
				+        self.bufsize = len(buffer)
			
 
				+
			
 
				+    def compress(self, idata):
			
 
				+        if not isinstance(idata, bytes):
			
 
				+            idata = bytes(idata)  # code below does not work with memoryview
			
 
				+        cdef int isize = len(idata)
			
 
				+        cdef int osize = self.bufsize
			
 
				+        cdef char *source = idata
			
 
				+        cdef char *dest = self.buffer
			
 
				+        with nogil:
			
 
				+            osize = LZ4_compress_limitedOutput(source, dest, isize, osize)
			
 
				+        if not osize:
			
 
				+            raise Exception('lz4 compress failed')
			
 
				+        return super().compress(dest[:osize])
			
 
				+
			
 
				+    def decompress(self, idata):
			
 
				+        if not isinstance(idata, bytes):
			
 
				+            idata = bytes(idata)  # code below does not work with memoryview
			
 
				+        idata = super().decompress(idata)
			
 
				+        cdef int isize = len(idata)
			
 
				+        cdef int osize = self.bufsize
			
 
				+        cdef char *source = idata
			
 
				+        cdef char *dest = self.buffer
			
 
				+        with nogil:
			
 
				+            osize = LZ4_decompress_safe(source, dest, isize, osize)
			
 
				+        if osize < 0:
			
 
				+            # malformed input data, buffer too small, ...
			
 
				+            raise Exception('lz4 decompress failed')
			
 
				+        return dest[:osize]
			
 
				+
			
 
				+
			
 
				+class LZMA(CompressorBase):
			
 
				+    """
			
 
				+    lzma compression / decompression (python 3.3+ stdlib)
			
 
				+    """
			
 
				+    ID = b'\x02\x00'
			
 
				+    name = 'lzma'
			
 
				+
			
 
				+    def __init__(self, level=6, **kwargs):
			
 
				+        super().__init__(**kwargs)
			
 
				+        self.level = level
			
 
				+        if lzma is None:
			
 
				+            raise ValueError('No lzma support found.')
			
 
				+
			
 
				+    def compress(self, data):
			
 
				+        # we do not need integrity checks in lzma, we do that already
			
 
				+        data = lzma.compress(data, preset=self.level, check=lzma.CHECK_NONE)
			
 
				+        return super().compress(data)
			
 
				+
			
 
				+    def decompress(self, data):
			
 
				+        data = super().decompress(data)
			
 
				+        return lzma.decompress(data)
			
 
				+
			
 
				+
			
 
				+class ZLIB(CompressorBase):
			
 
				+    """
			
 
				+    zlib compression / decompression (python stdlib)
			
 
				+    """
			
 
				+    ID = b'\x08\x00'  # not used here, see detect()
			
 
				+                      # avoid all 0x.8.. IDs elsewhere!
			
 
				+    name = 'zlib'
			
 
				+
			
 
				+    @classmethod
			
 
				+    def detect(cls, data):
			
 
				+        # matches misc. patterns 0x.8.. used by zlib
			
 
				+        cmf, flg = data[:2]
			
 
				+        is_deflate = cmf & 0x0f == 8
			
 
				+        check_ok = (cmf * 256 + flg) % 31 == 0
			
 
				+        return check_ok and is_deflate
			
 
				+
			
 
				+    def __init__(self, level=6, **kwargs):
			
 
				+        super().__init__(**kwargs)
			
 
				+        self.level = level
			
 
				+
			
 
				+    def compress(self, data):
			
 
				+        # note: for compatibility no super call, do not add ID bytes
			
 
				+        return zlib.compress(data, self.level)
			
 
				+
			
 
				+    def decompress(self, data):
			
 
				+        # note: for compatibility no super call, do not strip ID bytes
			
 
				+        return zlib.decompress(data)
			
 
				+
			
 
				+
			
 
				+COMPRESSOR_TABLE = {
			
 
				+    CNONE.name: CNONE,
			
 
				+    LZ4.name: LZ4,
			
 
				+    ZLIB.name: ZLIB,
			
 
				+    LZMA.name: LZMA,
			
 
				+}
			
 
				+COMPRESSOR_LIST = [LZ4, CNONE, ZLIB, LZMA, ]  # check fast stuff first
			
 
				+
			
 
				+def get_compressor(name, **kwargs):
			
 
				+    cls = COMPRESSOR_TABLE[name]
			
 
				+    return cls(**kwargs)
			
 
				+
			
 
				+
			
 
				+class Compressor:
			
 
				+    """
			
 
				+    compresses using a compressor with given name and parameters
			
 
				+    decompresses everything we can handle (autodetect)
			
 
				+    """
			
 
				+    def __init__(self, name='null', **kwargs):
			
 
				+        self.params = kwargs
			
 
				+        self.compressor = get_compressor(name, **self.params)
			
 
				+
			
 
				+    def compress(self, data):
			
 
				+        return self.compressor.compress(data)
			
 
				+
			
 
				+    def decompress(self, data):
			
 
				+        hdr = bytes(data[:2])  # detect() does not work with memoryview
			
 
				+        for cls in COMPRESSOR_LIST:
			
 
				+            if cls.detect(hdr):
			
 
				+                return cls(**self.params).decompress(data)
			
 
				+        else:
			
 
				+            raise ValueError('No decompressor for this data found: %r.', data[:2])
			
 
				+
			
 
				+
			
 
				+# a buffer used for (de)compression result, which can be slightly bigger
			
 
				+# than the chunk buffer in the worst (incompressible data) case, add 10%:
			
 
				+COMPR_BUFFER = bytes(int(1.1 * 2 ** 23))  # CHUNK_MAX_EXP == 23
			
--- a/borg/helpers.py
+++ b/borg/helpers.py
@@ -278,9 +278,45 @@ def timestamp(s):
 
				 
			
 
				 def ChunkerParams(s):
			
 
				     window_size, chunk_mask, chunk_min, chunk_max = s.split(',')
			
 
				+    if int(chunk_max) > 23:
			
 
				+        # do not go beyond 2**23 (8MB) chunk size now,
			
 
				+        # COMPR_BUFFER can only cope with up to this size
			
 
				+        raise ValueError
			
 
				     return int(window_size), int(chunk_mask), int(chunk_min), int(chunk_max)
			
 
				 
			
 
				 
			
 
				+def CompressionSpec(s):
			
 
				+    values = s.split(',')
			
 
				+    count = len(values)
			
 
				+    if count < 1:
			
 
				+        raise ValueError
			
 
				+    compression = values[0]
			
 
				+    try:
			
 
				+        compression = int(compression)
			
 
				+        if count > 1:
			
 
				+            raise ValueError
			
 
				+        # DEPRECATED: it is just --compression N
			
 
				+        if 0 <= compression <= 9:
			
 
				+            return dict(name='zlib', level=compression)
			
 
				+        raise ValueError
			
 
				+    except ValueError:
			
 
				+        # --compression algo[,...]
			
 
				+        name = compression
			
 
				+        if name in ('none', 'lz4', ):
			
 
				+            return dict(name=name)
			
 
				+        if name in ('zlib', 'lzma', ):
			
 
				+            if count < 2:
			
 
				+                level = 6  # default compression level in py stdlib
			
 
				+            elif count == 2:
			
 
				+                level = int(values[1])
			
 
				+                if not 0 <= level <= 9:
			
 
				+                    raise ValueError
			
 
				+            else:
			
 
				+                raise ValueError
			
 
				+            return dict(name=name, level=level)
			
 
				+        raise ValueError
			
 
				+
			
 
				+
			
 
				 def is_cachedir(path):
			
 
				     """Determines whether the specified path is a cache directory (and
			
 
				     therefore should potentially be excluded from the backup) according to
			
--- a/borg/key.py
+++ b/borg/key.py
@@ -6,9 +6,9 @@ import msgpack
 
				 import textwrap
			
 
				 import hmac
			
 
				 from hashlib import sha256
			
 
				-import zlib
			
 
				 
			
 
				 from .crypto import pbkdf2_sha256, get_random_bytes, AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks
			
 
				+from .compress import Compressor, COMPR_BUFFER
			
 
				 from .helpers import IntegrityError, get_keys_dir, Error
			
 
				 
			
 
				 PREFIX = b'\0' * 8
			
@@ -68,7 +68,7 @@ class KeyBase:
 
				         self.TYPE_STR = bytes([self.TYPE])
			
 
				         self.repository = repository
			
 
				         self.target = None  # key location file path / repo obj
			
 
				-        self.compression_level = 0
			
 
				+        self.compressor = Compressor('none', buffer=COMPR_BUFFER)
			
 
				 
			
 
				     def id_hash(self, data):
			
 
				         """Return HMAC hash using the "id" HMAC key
			
@@ -99,12 +99,12 @@ class PlaintextKey(KeyBase):
 
				         return sha256(data).digest()
			
 
				 
			
 
				     def encrypt(self, data):
			
 
				-        return b''.join([self.TYPE_STR, zlib.compress(data, self.compression_level)])
			
 
				+        return b''.join([self.TYPE_STR, self.compressor.compress(data)])
			
 
				 
			
 
				     def decrypt(self, id, data):
			
 
				         if data[0] != self.TYPE:
			
 
				             raise IntegrityError('Invalid encryption envelope')
			
 
				-        data = zlib.decompress(memoryview(data)[1:])
			
 
				+        data = self.compressor.decompress(memoryview(data)[1:])
			
 
				         if id and sha256(data).digest() != id:
			
 
				             raise IntegrityError('Chunk id verification failed')
			
 
				         return data
			
@@ -131,7 +131,7 @@ class AESKeyBase(KeyBase):
 
				         return HMAC(self.id_key, data, sha256).digest()
			
 
				 
			
 
				     def encrypt(self, data):
			
 
				-        data = zlib.compress(data, self.compression_level)
			
 
				+        data = self.compressor.compress(data)
			
 
				         self.enc_cipher.reset()
			
 
				         data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data)))
			
 
				         hmac = HMAC(self.enc_hmac_key, data, sha256).digest()
			
@@ -144,7 +144,7 @@ class AESKeyBase(KeyBase):
 
				         if memoryview(HMAC(self.enc_hmac_key, memoryview(data)[33:], sha256).digest()) != hmac:
			
 
				             raise IntegrityError('Encryption envelope checksum mismatch')
			
 
				         self.dec_cipher.reset(iv=PREFIX + data[33:41])
			
 
				-        data = zlib.decompress(self.dec_cipher.decrypt(data[41:]))  # should use memoryview
			
 
				+        data = self.compressor.decompress(self.dec_cipher.decrypt(data[41:]))
			
 
				         if id and HMAC(self.id_key, data, sha256).digest() != id:
			
 
				             raise IntegrityError('Chunk id verification failed')
			
 
				         return data
			
--- a/borg/testsuite/compress.py
+++ b/borg/testsuite/compress.py
@@ -0,0 +1,102 @@
 
				+import zlib
			
 
				+try:
			
 
				+    import lzma
			
 
				+except ImportError:
			
 
				+    lzma = None
			
 
				+
			
 
				+import pytest
			
 
				+
			
 
				+from ..compress import get_compressor, Compressor, CNONE, ZLIB, LZ4
			
 
				+
			
 
				+
			
 
				+buffer = bytes(2**16)
			
 
				+data = b'fooooooooobaaaaaaaar' * 10
			
 
				+params = dict(name='zlib', level=6, buffer=buffer)
			
 
				+
			
 
				+
			
 
				+def test_get_compressor():
			
 
				+    c = get_compressor(name='none')
			
 
				+    assert isinstance(c, CNONE)
			
 
				+    c = get_compressor(name='lz4', buffer=buffer)
			
 
				+    assert isinstance(c, LZ4)
			
 
				+    c = get_compressor(name='zlib')
			
 
				+    assert isinstance(c, ZLIB)
			
 
				+    with pytest.raises(KeyError):
			
 
				+        get_compressor(name='foobar')
			
 
				+
			
 
				+
			
 
				+def test_cnull():
			
 
				+    c = get_compressor(name='none')
			
 
				+    cdata = c.compress(data)
			
 
				+    assert len(cdata) > len(data)
			
 
				+    assert data in cdata  # it's not compressed and just in there 1:1
			
 
				+    assert data == c.decompress(cdata)
			
 
				+    assert data == Compressor(**params).decompress(cdata)  # autodetect
			
 
				+
			
 
				+
			
 
				+def test_lz4():
			
 
				+    c = get_compressor(name='lz4', buffer=buffer)
			
 
				+    cdata = c.compress(data)
			
 
				+    assert len(cdata) < len(data)
			
 
				+    assert data == c.decompress(cdata)
			
 
				+    assert data == Compressor(**params).decompress(cdata)  # autodetect
			
 
				+
			
 
				+
			
 
				+def test_zlib():
			
 
				+    c = get_compressor(name='zlib')
			
 
				+    cdata = c.compress(data)
			
 
				+    assert len(cdata) < len(data)
			
 
				+    assert data == c.decompress(cdata)
			
 
				+    assert data == Compressor(**params).decompress(cdata)  # autodetect
			
 
				+
			
 
				+
			
 
				+def test_lzma():
			
 
				+    if lzma is None:
			
 
				+        pytest.skip("No lzma support found.")
			
 
				+    c = get_compressor(name='lzma')
			
 
				+    cdata = c.compress(data)
			
 
				+    assert len(cdata) < len(data)
			
 
				+    assert data == c.decompress(cdata)
			
 
				+    assert data == Compressor(**params).decompress(cdata)  # autodetect
			
 
				+
			
 
				+
			
 
				+def test_autodetect_invalid():
			
 
				+    with pytest.raises(ValueError):
			
 
				+        Compressor(**params).decompress(b'\xff\xfftotalcrap')
			
 
				+    with pytest.raises(ValueError):
			
 
				+        Compressor(**params).decompress(b'\x08\x00notreallyzlib')
			
 
				+
			
 
				+
			
 
				+def test_zlib_compat():
			
 
				+    # for compatibility reasons, we do not add an extra header for zlib,
			
 
				+    # nor do we expect one when decompressing / autodetecting
			
 
				+    for level in range(10):
			
 
				+        c = get_compressor(name='zlib', level=level)
			
 
				+        cdata1 = c.compress(data)
			
 
				+        cdata2 = zlib.compress(data, level)
			
 
				+        assert cdata1 == cdata2
			
 
				+        data2 = c.decompress(cdata2)
			
 
				+        assert data == data2
			
 
				+        data2 = Compressor(**params).decompress(cdata2)
			
 
				+        assert data == data2
			
 
				+
			
 
				+
			
 
				+def test_compressor():
			
 
				+    params_list = [
			
 
				+        dict(name='none', buffer=buffer),
			
 
				+        dict(name='lz4', buffer=buffer),
			
 
				+        dict(name='zlib', level=0, buffer=buffer),
			
 
				+        dict(name='zlib', level=6, buffer=buffer),
			
 
				+        dict(name='zlib', level=9, buffer=buffer),
			
 
				+    ]
			
 
				+    if lzma:
			
 
				+        params_list += [
			
 
				+            dict(name='lzma', level=0, buffer=buffer),
			
 
				+            dict(name='lzma', level=6, buffer=buffer),
			
 
				+            dict(name='lzma', level=9, buffer=buffer),
			
 
				+        ]
			
 
				+    for params in params_list:
			
 
				+        c = Compressor(**params)
			
 
				+        assert data == c.decompress(c.compress(data))
			
 
				+
			
 
				+
			
--- a/borg/testsuite/helpers.py
+++ b/borg/testsuite/helpers.py
@@ -2,11 +2,12 @@ import hashlib
 
				 from time import mktime, strptime
			
 
				 from datetime import datetime, timezone, timedelta
			
 
				 
			
 
				+import pytest
			
 
				 import msgpack
			
 
				 
			
 
				 from ..helpers import adjust_patterns, exclude_path, Location, format_timedelta, ExcludePattern, make_path_safe, \
			
 
				     prune_within, prune_split, \
			
 
				-    StableDict, int_to_bigint, bigint_to_int, parse_timestamp
			
 
				+    StableDict, int_to_bigint, bigint_to_int, parse_timestamp, CompressionSpec
			
 
				 from . import BaseTestCase
			
 
				 
			
 
				 
			
@@ -104,6 +105,30 @@ class PatternTestCase(BaseTestCase):
 
				                           ['/etc/passwd', '/etc/hosts', '/var/log/messages', '/var/log/dmesg'])
			
 
				 
			
 
				 
			
 
				+def test_compression_specs():
			
 
				+    with pytest.raises(ValueError):
			
 
				+        CompressionSpec('')
			
 
				+    assert CompressionSpec('0') == dict(name='zlib', level=0)
			
 
				+    assert CompressionSpec('1') == dict(name='zlib', level=1)
			
 
				+    assert CompressionSpec('9') == dict(name='zlib', level=9)
			
 
				+    with pytest.raises(ValueError):
			
 
				+        CompressionSpec('10')
			
 
				+    assert CompressionSpec('none') == dict(name='none')
			
 
				+    assert CompressionSpec('lz4') == dict(name='lz4')
			
 
				+    assert CompressionSpec('zlib') == dict(name='zlib', level=6)
			
 
				+    assert CompressionSpec('zlib,0') == dict(name='zlib', level=0)
			
 
				+    assert CompressionSpec('zlib,9') == dict(name='zlib', level=9)
			
 
				+    with pytest.raises(ValueError):
			
 
				+        CompressionSpec('zlib,9,invalid')
			
 
				+    assert CompressionSpec('lzma') == dict(name='lzma', level=6)
			
 
				+    assert CompressionSpec('lzma,0') == dict(name='lzma', level=0)
			
 
				+    assert CompressionSpec('lzma,9') == dict(name='lzma', level=9)
			
 
				+    with pytest.raises(ValueError):
			
 
				+        CompressionSpec('lzma,9,invalid')
			
 
				+    with pytest.raises(ValueError):
			
 
				+        CompressionSpec('invalid')
			
 
				+
			
 
				+
			
 
				 class MakePathSafeTestCase(BaseTestCase):
			
 
				 
			
 
				     def test(self):
			
--- a/docs/global.rst.inc
+++ b/docs/global.rst.inc
@@ -13,6 +13,7 @@
 
				 .. _PBKDF2: https://en.wikipedia.org/wiki/PBKDF2
			
 
				 .. _ACL: https://en.wikipedia.org/wiki/Access_control_list
			
 
				 .. _libacl: http://savannah.nongnu.org/projects/acl/
			
 
				+.. _liblz4: https://github.com/Cyan4973/lz4
			
 
				 .. _OpenSSL: https://www.openssl.org/
			
 
				 .. _Python: http://www.python.org/
			
 
				 .. _Buzhash: https://en.wikipedia.org/wiki/Buzhash
			
--- a/docs/installation.rst
+++ b/docs/installation.rst
@@ -9,6 +9,7 @@ Installation
 
				 * Python_ >= 3.2
			
 
				 * OpenSSL_ >= 1.0.0
			
 
				 * libacl_
			
 
				+* liblz4_
			
 
				 * some python dependencies, see install_requires in setup.py
			
 
				 
			
 
				 General notes
			
@@ -59,6 +60,9 @@ Some of the steps detailled below might be useful also for non-git installs.
 
				     # ACL support Headers + Library
			
 
				     apt-get install libacl1-dev libacl1
			
 
				 
			
 
				+    # lz4 super fast compression support Headers + Library
			
 
				+    apt-get install liblz4-dev liblz4-1
			
 
				+
			
 
				     # if you do not have gcc / make / etc. yet
			
 
				     apt-get install build-essential
			
 
				 
			
@@ -106,13 +110,16 @@ Some of the steps detailled below might be useful also for non-git installs.
 
				 
			
 
				     # ACL support Headers + Library
			
 
				     sudo dnf install libacl-devel libacl
			
 
				-    
			
 
				+
			
 
				+    # lz4 super fast compression support Headers + Library
			
 
				+    sudo dnf install lz4
			
 
				+
			
 
				     # optional: FUSE support - to mount backup archives
			
 
				     sudo dnf install fuse-devel fuse
			
 
				     
			
 
				     # optional: for unit testing
			
 
				     sudo dnf install fakeroot
			
 
				-    
			
 
				+
			
 
				     # get |project_name| from github, install it
			
 
				     git clone |git_url|
			
 
				 
			
@@ -148,6 +155,7 @@ You'll need at least (use the cygwin installer to fetch/install these):
 
				     gcc-core
			
 
				     git
			
 
				     libopenssl
			
 
				+    liblz4_1 liblz4-devel  # from cygwinports.org
			
 
				     make
			
 
				     openssh
			
 
				     openssl-devel
			
--- a/docs/internals.rst
+++ b/docs/internals.rst
@@ -382,10 +382,35 @@ representation of the repository id.
 
				 Compression
			
 
				 -----------
			
 
				 
			
 
				-|project_name| currently always pipes all data through a zlib compressor which
			
 
				-supports compression levels 0 (no compression, fast) to 9 (high compression, slow).
			
 
				+|project_name| supports the following compression methods:
			
 
				 
			
 
				-See ``borg create --help`` about how to specify the compression level and its default.
			
 
				+- none (no compression, pass through data 1:1)
			
 
				+- lz4 (low compression, but super fast)
			
 
				+- zlib (level 0-9, level 0 is no compression [but still adding zlib overhead],
			
 
				+  level 1 is low, level 9 is high compression)
			
 
				+- lzma (level 0-9, level 0 is low, level 9 is high compression).
			
 
				+
			
 
				+Speed:  none > lz4 > zlib > lzma
			
 
				+Compression: lzma > zlib > lz4 > none
			
 
				+
			
 
				+Be careful, higher zlib and especially lzma compression levels might take a
			
 
				+lot of resources (CPU and memory).
			
 
				+
			
 
				+The overall speed of course also depends on the speed of your target storage.
			
 
				+If that is slow, using a higher compression level might yield better overall
			
 
				+performance. You need to experiment a bit. Maybe just watch your CPU load, if
			
 
				+that is relatively low, increase compression until 1 core is 70-100% loaded.
			
 
				 
			
 
				-Note: zlib level 0 creates a little bit more output data than it gets as input,
			
 
				-due to zlib protocol overhead.
			
 
				+Even if your target storage is rather fast, you might see interesting effects:
			
 
				+while doing no compression at all (none) is a operation that takes no time, it
			
 
				+likely will need to store more data to the storage compared to using lz4.
			
 
				+The time needed to transfer and store the additional data might be much more
			
 
				+than if you had used lz4 (which is super fast, but still might compress your
			
 
				+data about 2:1). This is assuming your data is compressible (if you backup
			
 
				+already compressed data, trying to compress them at backup time is usually
			
 
				+pointless).
			
 
				+
			
 
				+Compression is applied after deduplication, thus using different compression
			
 
				+methods in one repo does not influence deduplication.
			
 
				+
			
 
				+See ``borg create --help`` about how to specify the compression level and its default.
			
--- a/docs/quickstart.rst
+++ b/docs/quickstart.rst
@@ -89,6 +89,31 @@ certain number of old archives::
 
				     # and 6 monthly archives.
			
 
				     borg prune -v $REPOSITORY --keep-daily=7 --keep-weekly=4 --keep-monthly=6
			
 
				 
			
 
				+.. backup_compression:
			
 
				+
			
 
				+Backup compression
			
 
				+------------------
			
 
				+
			
 
				+Default is no compression, but we support different methods with high speed
			
 
				+or high compression:
			
 
				+
			
 
				+If you have a quick repo storage and you want a little compression:
			
 
				+
			
 
				+    $ borg create --compression lz4 /mnt/backup::repo ~
			
 
				+
			
 
				+If you have a medium fast repo storage and you want a bit more compression (N=0..9,
			
 
				+0 means no compression, 9 means high compression):
			
 
				+
			
 
				+    $ borg create --compression zlib,N /mnt/backup::repo ~
			
 
				+
			
 
				+If you have a very slow repo storage and you want high compression (N=0..9, 0 means
			
 
				+low compression, 9 means high compression):
			
 
				+
			
 
				+    $ borg create --compression lzma,N /mnt/backup::repo ~
			
 
				+
			
 
				+You'll need to experiment a bit to find the best compression for your use case.
			
 
				+Keep an eye on CPU load and throughput.
			
 
				+
			
 
				 .. _encrypted_repos:
			
 
				 
			
 
				 Repository encryption
			
--- a/docs/support.rst
+++ b/docs/support.rst
@@ -4,6 +4,9 @@
 
				 Support
			
 
				 =======
			
 
				 
			
 
				+Please first read the docs and the FAQ section in the docs, a lot of stuff is
			
 
				+documented / explained there.
			
 
				+
			
 
				 Issue Tracker
			
 
				 -------------
			
 
				 
			
--- a/docs/usage.rst
+++ b/docs/usage.rst
@@ -76,8 +76,11 @@ Resource Usage
 
				 |project_name| might use a lot of resources depending on the size of the data set it is dealing with.
			
 
				 
			
 
				 CPU: it won't go beyond 100% of 1 core as the code is currently single-threaded.
			
 
				+     Especially higher zlib and lzma compression levels use significant amounts of CPU cycles.
			
 
				 
			
 
				 Memory (RAM): the chunks index and the files index are read into memory for performance reasons.
			
 
				+              compression, esp. lzma compression with high levels might need substantial amounts
			
 
				+              of memory.
			
 
				 
			
 
				 Temporary files: reading data and metadata from a FUSE mounted repository will consume about the same space as the
			
 
				                  deduplicated chunks used to represent them in the repository.
			
@@ -175,6 +178,18 @@ Examples
 
				     # Backup a raw device (must not be active/in use/mounted at that time)
			
 
				     $ dd if=/dev/sda bs=10M | borg create /mnt/backup::my-sda -
			
 
				 
			
 
				+    # No compression (default)
			
 
				+    $ borg create /mnt/backup::repo ~
			
 
				+
			
 
				+    # Super fast, low compression
			
 
				+    $ borg create --compression lz4 /mnt/backup::repo ~
			
 
				+
			
 
				+    # Less fast, higher compression (N = 0..9)
			
 
				+    $ borg create --compression zlib,N /mnt/backup::repo ~
			
 
				+
			
 
				+    # Even slower, even higher compression (N = 0..9)
			
 
				+    $ borg create --compression lzma,N /mnt/backup::repo ~
			
 
				+
			
 
				 
			
 
				 .. include:: usage/extract.rst.inc
			
 
				 
			
--- a/setup.py
+++ b/setup.py
@@ -19,6 +19,7 @@ if sys.version_info < min_python:
 
				 
			
 
				 from setuptools import setup, Extension
			
 
				 
			
 
				+compress_source = 'borg/compress.pyx'
			
 
				 crypto_source = 'borg/crypto.pyx'
			
 
				 chunker_source = 'borg/chunker.pyx'
			
 
				 hashindex_source = 'borg/hashindex.pyx'
			
@@ -38,6 +39,7 @@ try:
 
				 
			
 
				         def make_distribution(self):
			
 
				             self.filelist.extend([
			
 
				+                'borg/compress.c',
			
 
				                 'borg/crypto.c',
			
 
				                 'borg/chunker.c', 'borg/_chunker.c',
			
 
				                 'borg/hashindex.c', 'borg/_hashindex.c',
			
@@ -52,6 +54,7 @@ except ImportError:
 
				         def __init__(self, *args, **kwargs):
			
 
				             raise Exception('Cython is required to run sdist')
			
 
				 
			
 
				+    compress_source = compress_source.replace('.pyx', '.c')
			
 
				     crypto_source = crypto_source.replace('.pyx', '.c')
			
 
				     chunker_source = chunker_source.replace('.pyx', '.c')
			
 
				     hashindex_source = hashindex_source.replace('.pyx', '.c')
			
@@ -59,7 +62,9 @@ except ImportError:
 
				     platform_freebsd_source = platform_freebsd_source.replace('.pyx', '.c')
			
 
				     platform_darwin_source = platform_darwin_source.replace('.pyx', '.c')
			
 
				     from distutils.command.build_ext import build_ext
			
 
				-    if not all(os.path.exists(path) for path in [crypto_source, chunker_source, hashindex_source, platform_linux_source, platform_freebsd_source]):
			
 
				+    if not all(os.path.exists(path) for path in [
			
 
				+        compress_source, crypto_source, chunker_source, hashindex_source,
			
 
				+        platform_linux_source, platform_freebsd_source]):
			
 
				         raise ImportError('The GIT version of Borg needs Cython. Install Cython or use a released version')
			
 
				 
			
 
				 
			
@@ -89,6 +94,7 @@ cmdclass = versioneer.get_cmdclass()
 
				 cmdclass.update({'build_ext': build_ext, 'sdist': Sdist})
			
 
				 
			
 
				 ext_modules = [
			
 
				+    Extension('borg.compress', [compress_source], libraries=['lz4']),
			
 
				     Extension('borg.crypto', [crypto_source], libraries=['crypto'], include_dirs=include_dirs, library_dirs=library_dirs),
			
 
				     Extension('borg.chunker', [chunker_source]),
			
 
				     Extension('borg.hashindex', [hashindex_source])