瀏覽代碼

use blosc for multithreaded and fast compression, including lz4, lz4hc

Thomas Waldmann 10 年之前
父節點
當前提交
7c66779345
共有 3 個文件被更改,包括 95 次插入1 次删除
  1. 5 0
      attic/archiver.py
  2. 82 0
      attic/key.py
  3. 8 1
      setup.py

+ 5 - 0
attic/archiver.py

@@ -517,6 +517,11 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         - 00      no compression
         - 01..09  zlib levels 1..9 (1 means low compression, 9 max. compression)
         - 10..19  lzma levels 0..9 (0 means low compression, 9 max. compression)
+        - 20..29  lz4 (blosc) levels 0..9 (0 = no, 9 = max. compression)
+        - 30..39  lz4hc (blosc) levels 0..9 (0 = no, 9 = max. compression)
+        - 40..49  blosclz (blosc) levels 0..9 (0 = no, 9 = max. compression)
+        - 50..59  snappy (blosc) levels 0..9 (0 = no, 9 = max. compression)
+        - 60..69  zlib (blosc) levels 0..9 (0 = no, 9 = max. compression)
 
         --cipher METHODs (default: %02d or %02d)
 

+ 82 - 0
attic/key.py

@@ -16,6 +16,11 @@ except ImportError:
     except ImportError:
         lzma = None
 
+try:
+    import blosc
+except ImportError:
+    blosc = None
+
 from attic.crypto import pbkdf2_sha256, get_random_bytes, AES, AES_CTR_MODE, AES_GCM_MODE, \
     bytes_to_int, increment_iv
 from attic.helpers import IntegrityError, get_keys_dir, Error
@@ -195,6 +200,68 @@ class LzmaCompressor(object):  # uses 10..19 in the mapping
         return lzma.decompress(data)
 
 
+class BLOSCCompressor(object):
+    TYPE = 0  # override in subclass
+    LEVELS = range(10)
+    CNAME = ''  # override in subclass
+
+    def __init__(self):
+        if blosc is None:
+            raise NotImplemented("%s compression needs blosc from PyPi" % self.CNAME)
+        if self.CNAME not in blosc.compressor_list():
+            raise NotImplemented("%s compression is not supported by blosc" % self.CNAME)
+        blosc.set_blocksize(8192)  # maybe 8 threads processing a 64KB chunks -> 8KB block
+
+    def _get_level(self):
+        raise NotImplemented
+
+    def compress(self, data):
+        return blosc.compress(bytes(data), 1, cname=self.CNAME, clevel=self._get_level())
+
+    def decompress(self, data):
+        return blosc.decompress(data)
+
+
+class LZ4Compressor(BLOSCCompressor):
+    TYPE = 20
+    CNAME = 'lz4'
+
+    def _get_level(self):
+        return self.TYPE - LZ4Compressor.TYPE
+
+
+class LZ4HCCompressor(BLOSCCompressor):
+    TYPE = 30
+    CNAME = 'lz4hc'
+
+    def _get_level(self):
+        return self.TYPE - LZ4HCCompressor.TYPE
+
+
+class BLOSCLZCompressor(BLOSCCompressor):
+    TYPE = 40
+    CNAME = 'blosclz'
+
+    def _get_level(self):
+        return self.TYPE - BLOSCLZCompressor.TYPE
+
+
+class SnappyCompressor(BLOSCCompressor):
+    TYPE = 50
+    CNAME = 'snappy'
+
+    def _get_level(self):
+        return self.TYPE - SnappyCompressor.TYPE
+
+
+class BLOSCZlibCompressor(BLOSCCompressor):
+    TYPE = 60
+    CNAME = 'zlib'
+
+    def _get_level(self):
+        return self.TYPE - BLOSCZlibCompressor.TYPE
+
+
 # default is optimized for speed
 COMPR_DEFAULT = NullCompressor.TYPE # no compression
 
@@ -581,6 +648,21 @@ for level in ZlibCompressor.LEVELS:
 for preset in LzmaCompressor.PRESETS:
     compressor_mapping[LzmaCompressor.TYPE + preset] = \
         type('LzmaCompressorPreset%d' % preset, (LzmaCompressor, ), dict(TYPE=LzmaCompressor.TYPE + preset))
+for level in LZ4Compressor.LEVELS:
+    compressor_mapping[LZ4Compressor.TYPE + level] = \
+        type('LZ4CompressorLevel%d' % level, (LZ4Compressor, ), dict(TYPE=LZ4Compressor.TYPE + level))
+for level in LZ4HCCompressor.LEVELS:
+    compressor_mapping[LZ4HCCompressor.TYPE + level] = \
+        type('LZ4HCCompressorLevel%d' % level, (LZ4HCCompressor, ), dict(TYPE=LZ4HCCompressor.TYPE + level))
+for level in BLOSCLZCompressor.LEVELS:
+    compressor_mapping[BLOSCLZCompressor.TYPE + level] = \
+        type('BLOSCLZCompressorLevel%d' % level, (BLOSCLZCompressor, ), dict(TYPE=BLOSCLZCompressor.TYPE + level))
+for level in SnappyCompressor.LEVELS:
+    compressor_mapping[SnappyCompressor.TYPE + level] = \
+        type('SnappyCompressorLevel%d' % level, (SnappyCompressor, ), dict(TYPE=SnappyCompressor.TYPE + level))
+for level in BLOSCZlibCompressor.LEVELS:
+    compressor_mapping[BLOSCZlibCompressor.TYPE + level] = \
+        type('BLOSCZlibCompressorLevel%d' % level, (BLOSCZlibCompressor, ), dict(TYPE=BLOSCZlibCompressor.TYPE + level))
 # overwrite 0 with NullCompressor
 compressor_mapping[NullCompressor.TYPE] = NullCompressor
 

+ 8 - 1
setup.py

@@ -98,10 +98,16 @@ elif platform == 'Darwin':
 
 # msgpack pure python data corruption was fixed in 0.4.6.
 # Also, we might use some rather recent API features.
-install_requires=['msgpack-python>=0.4.6']
+install_requires=['msgpack-python>=0.4.6', 'blosc>1.2.4']
 if sys.version_info < (3, 3):
     install_requires.append('backports.lzma')
 
+dependency_links=[
+    # blosc 1.2.5 is not released yet, but needed for set_blocksize so we can
+    # get parallel compression even if only feeding it 64KB chunks of data...
+    "https://github.com/Blosc/python-blosc/archive/master.zip#egg=blosc-1.2.5"
+]
+
 setup(
     name='Attic',
     version=versioneer.get_version(),
@@ -129,4 +135,5 @@ setup(
     cmdclass=cmdclass,
     ext_modules=ext_modules,
     install_requires=install_requires,
+    dependency_links=dependency_links,
 )