|
@@ -28,17 +28,15 @@ decompressor.
|
|
|
"""
|
|
|
|
|
|
import zlib
|
|
|
-from collections import namedtuple
|
|
|
|
|
|
try:
|
|
|
import lzma
|
|
|
except ImportError:
|
|
|
lzma = None
|
|
|
|
|
|
-from .logger import create_logger
|
|
|
from .helpers import Buffer, DecompressionError
|
|
|
|
|
|
-API_VERSION = '1.1_02'
|
|
|
+API_VERSION = '1.1_03'
|
|
|
|
|
|
cdef extern from "lz4.h":
|
|
|
int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
|
|
@@ -66,11 +64,34 @@ cdef class CompressorBase:
|
|
|
def __init__(self, **kwargs):
|
|
|
pass
|
|
|
|
|
|
+ def decide(self, data):
|
|
|
+ """
|
|
|
+ Return which compressor will perform the actual compression for *data*.
|
|
|
+
|
|
|
+ This exists for a very specific case: If borg recreate is instructed to recompress
|
|
|
+ using Auto compression it needs to determine the _actual_ target compression of a chunk
|
|
|
+ in order to detect whether it should be recompressed.
|
|
|
+
|
|
|
+ For all Compressors that are not Auto this always returns *self*.
|
|
|
+ """
|
|
|
+ return self
|
|
|
+
|
|
|
def compress(self, data):
|
|
|
+ """
|
|
|
+ Compress *data* (bytes) and return bytes result. Prepend the ID bytes of this compressor,
|
|
|
+ which is needed so that the correct decompressor can be used for decompression.
|
|
|
+ """
|
|
|
# add ID bytes
|
|
|
return self.ID + data
|
|
|
|
|
|
def decompress(self, data):
|
|
|
+ """
|
|
|
+ Decompress *data* (bytes) and return bytes result. The leading Compressor ID
|
|
|
+ bytes need to be present.
|
|
|
+
|
|
|
+ Only handles input generated by _this_ Compressor - for a general purpose
|
|
|
+ decompression method see *Compressor.decompress*.
|
|
|
+ """
|
|
|
# strip ID bytes
|
|
|
return data[2:]
|
|
|
|
|
@@ -222,22 +243,36 @@ class Auto(CompressorBase):
|
|
|
ID = None
|
|
|
name = 'auto'
|
|
|
|
|
|
- logger = create_logger('borg.debug.file-compression')
|
|
|
-
|
|
|
def __init__(self, compressor):
|
|
|
super().__init__()
|
|
|
self.compressor = compressor
|
|
|
self.lz4 = get_compressor('lz4')
|
|
|
self.none = get_compressor('none')
|
|
|
|
|
|
- def compress(self, data):
|
|
|
+ def _decide(self, data):
|
|
|
+ """
|
|
|
+ Decides what to do with *data*. Returns (compressor, lz4_data).
|
|
|
+
|
|
|
+ *lz4_data* is the LZ4 result if *compressor* is LZ4 as well, otherwise it is None.
|
|
|
+ """
|
|
|
lz4_data = self.lz4.compress(data)
|
|
|
- if len(lz4_data) < 0.97 * len(data):
|
|
|
- return self.compressor.compress(data)
|
|
|
- elif len(lz4_data) < len(data):
|
|
|
- return lz4_data
|
|
|
+ ratio = len(lz4_data) / len(data)
|
|
|
+ if ratio < 0.97:
|
|
|
+ return self.compressor, None
|
|
|
+ elif ratio < 1:
|
|
|
+ return self.lz4, lz4_data
|
|
|
else:
|
|
|
- return self.none.compress(data)
|
|
|
+ return self.none, None
|
|
|
+
|
|
|
+ def decide(self, data):
|
|
|
+ return self._decide(data)[0]
|
|
|
+
|
|
|
+ def compress(self, data):
|
|
|
+ compressor, lz4_data = self._decide(data)
|
|
|
+ if lz4_data is None:
|
|
|
+ return compressor.compress(data)
|
|
|
+ else:
|
|
|
+ return lz4_data
|
|
|
|
|
|
def decompress(self, data):
|
|
|
raise NotImplementedError
|
|
@@ -288,35 +323,40 @@ class Compressor:
|
|
|
raise ValueError('No decompressor for this data found: %r.', data[:2])
|
|
|
|
|
|
|
|
|
-ComprSpec = namedtuple('ComprSpec', ('name', 'spec', 'compressor'))
|
|
|
-
|
|
|
-
|
|
|
-def CompressionSpec(s):
|
|
|
- values = s.split(',')
|
|
|
- count = len(values)
|
|
|
- if count < 1:
|
|
|
- raise ValueError
|
|
|
- # --compression algo[,level]
|
|
|
- name = values[0]
|
|
|
- if name == 'none':
|
|
|
- return ComprSpec(name=name, spec=None, compressor=CNONE())
|
|
|
- elif name == 'lz4':
|
|
|
- return ComprSpec(name=name, spec=None, compressor=LZ4())
|
|
|
- if name in ('zlib', 'lzma', ):
|
|
|
- if count < 2:
|
|
|
- level = 6 # default compression level in py stdlib
|
|
|
- elif count == 2:
|
|
|
- level = int(values[1])
|
|
|
- if not 0 <= level <= 9:
|
|
|
- raise ValueError
|
|
|
- else:
|
|
|
+class CompressionSpec:
|
|
|
+ def __init__(self, s):
|
|
|
+ values = s.split(',')
|
|
|
+ count = len(values)
|
|
|
+ if count < 1:
|
|
|
raise ValueError
|
|
|
- return ComprSpec(name=name, spec=level, compressor=get_compressor(name, level=level))
|
|
|
- if name == 'auto':
|
|
|
- if 2 <= count <= 3:
|
|
|
- compression = ','.join(values[1:])
|
|
|
+ # --compression algo[,level]
|
|
|
+ self.name = values[0]
|
|
|
+ if self.name in ('none', 'lz4', ):
|
|
|
+ return
|
|
|
+ elif self.name in ('zlib', 'lzma', ):
|
|
|
+ if count < 2:
|
|
|
+ level = 6 # default compression level in py stdlib
|
|
|
+ elif count == 2:
|
|
|
+ level = int(values[1])
|
|
|
+ if not 0 <= level <= 9:
|
|
|
+ raise ValueError
|
|
|
+ else:
|
|
|
+ raise ValueError
|
|
|
+ self.level = level
|
|
|
+ elif self.name == 'auto':
|
|
|
+ if 2 <= count <= 3:
|
|
|
+ compression = ','.join(values[1:])
|
|
|
+ else:
|
|
|
+ raise ValueError
|
|
|
+ self.inner = CompressionSpec(compression)
|
|
|
else:
|
|
|
raise ValueError
|
|
|
- inner = CompressionSpec(compression)
|
|
|
- return ComprSpec(name=name, spec=inner, compressor=Auto(inner.compressor))
|
|
|
- raise ValueError
|
|
|
+
|
|
|
+ @property
|
|
|
+ def compressor(self):
|
|
|
+ if self.name in ('none', 'lz4', ):
|
|
|
+ return get_compressor(self.name)
|
|
|
+ elif self.name in ('zlib', 'lzma', ):
|
|
|
+ return get_compressor(self.name, level=self.level)
|
|
|
+ elif self.name == 'auto':
|
|
|
+ return get_compressor(self.name, compressor=self.inner.compressor)
|