Pārlūkot izejas kodu

flexible type header parsing, compression levels, sha256 + sha512_256

e.g.:
 attic init --encryption=none --compression 0 --mac 1 repo.attic

Note: Numeric --compression and --mac values are a bit simplistic, but even if one
used lots of string choices there, one would still have to look them up in the docs.
Thomas Waldmann 10 gadi atpakaļ
vecāks
revīzija
ff542e612a
5 mainītis faili ar 252 papildinājumiem un 118 dzēšanām
  1. 6 3
      attic/archiver.py
  2. 232 107
      attic/key.py
  3. 7 2
      attic/testsuite/archive.py
  4. 2 2
      attic/testsuite/archiver.py
  5. 5 4
      attic/testsuite/key.py

+ 6 - 3
attic/archiver.py

@@ -13,7 +13,7 @@ from attic import __version__
 from attic.archive import Archive, ArchiveChecker
 from attic.repository import Repository
 from attic.cache import Cache
-from attic.key import key_creator
+from attic.key import key_creator, COMPR_DEFAULT
 from attic.helpers import Error, location_validator, format_time, \
     format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, \
     get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \
@@ -476,8 +476,11 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                choices=('none', 'passphrase', 'keyfile'), default='none',
                                help='select encryption method')
         subparser.add_argument('-c', '--compression', dest='compression',
-                               choices=('none', 'zlib', 'lzma'), default='zlib',
-                               help='select compression method')
+                               type=int, default=COMPR_DEFAULT, metavar='METHOD',
+                               help='select compression method (0..19)')
+        subparser.add_argument('-m', '--mac', dest='mac',
+                               type=int, default=None, metavar='METHOD',
+                               help='select hash/mac method (0..3)')
 
         check_epilog = textwrap.dedent("""
         The check command verifies the consistency of a repository and the corresponding

+ 232 - 107
attic/key.py

@@ -4,7 +4,7 @@ import os
 import msgpack
 import textwrap
 import hmac
-from hashlib import sha256
+from hashlib import sha256, sha512
 import zlib
 
 try:
@@ -25,6 +25,21 @@ class UnsupportedPayloadError(Error):
     """Unsupported payload type {}. A newer version is required to access this repository.
     """
 
+class sha512_256(object):  # note: can't subclass sha512
+    """sha512, but digest truncated to 256bit - faster than sha256 on 64bit platforms"""
+    def __init__(self, data=b''):
+        self.h = sha512(data)
+
+    def update(self, data):
+        self.h.update(data)
+
+    def digest(self):
+        return self.h.digest()[:32]
+
+    def hexdigest(self):
+        return self.h.hexdigest()[:64]
+
+
 class HMAC(hmac.HMAC):
     """Workaround a bug in Python < 3.4 Where HMAC does not accept memoryviews
     """
@@ -32,108 +47,97 @@ class HMAC(hmac.HMAC):
         self.inner.update(msg)
 
 
-def compressor_creator(args):
-    if args is None:  # used by unit tests
-        return ZlibCompressor.create(args)
-    if args.compression == 'zlib':
-        return ZlibCompressor.create(args)
-    if args.compression == 'lzma':
-        return LzmaCompressor.create(args)
-    if args.compression == 'none':
-        return NullCompressor.create(args)
-    raise NotImplemented(args.compression)
-
-
-def compressor_factory(manifest_data):
-    # compression is determined by 4 upper bits of the type byte
-    compression_type = manifest_data[0] & 0xf0
-    if compression_type == ZlibCompressor.TYPE:
-        return ZlibCompressor()
-    if compression_type == LzmaCompressor.TYPE:
-        return LzmaCompressor()
-    if compression_type == NullCompressor.TYPE:
-        return NullCompressor()
-    raise UnsupportedPayloadError(manifest_data[0])
-
-
-class CompressorBase(object):
-    @classmethod
-    def create(cls, args):
-        return cls()
+class SHA256(object):  # note: can't subclass sha256
+    TYPE = 0x00
 
-    def compress(self, data):
-        pass
+    def __init__(self, key, data=b''):
+        # signature is like for a MAC, we ignore the key as this is a simple hash
+        if key is not None:
+            raise Exception("use a HMAC if you have a key")
+        self.h = sha256(data)
+
+    def update(self, data):
+        self.h.update(data)
+
+    def digest(self):
+        return self.h.digest()
+
+    def hexdigest(self):
+        return self.h.hexdigest()
+
+
+class SHA512_256(sha512_256):
+    """sha512, but digest truncated to 256bit - faster than sha256 on 64bit platforms"""
+    TYPE = 0x01
+
+    def __init__(self, key, data):
+        # signature is like for a MAC, we ignore the key as this is a simple hash
+        if key is not None:
+            raise Exception("use a HMAC if you have a key")
+        super().__init__(data)
+
+
+HASH_DEFAULT = SHA256.TYPE
+
+
+class HMAC_SHA256(HMAC):
+    TYPE = 0x02
+
+    def __init__(self, key, data):
+        if key is None:
+            raise Exception("do not use HMAC if you don't have a key")
+        super().__init__(key, data, sha256)
+
+
+class HMAC_SHA512_256(HMAC):
+    TYPE = 0x03
+
+    def __init__(self, key, data):
+        if key is None:
+            raise Exception("do not use HMAC if you don't have a key")
+        super().__init__(key, data, sha512_256)
 
-    def decompress(self, data):
-        pass
 
+MAC_DEFAULT = HMAC_SHA256.TYPE
 
-class ZlibCompressor(CompressorBase):
-    TYPE = 0x00  # must be 0x00 for backwards compatibility
+
+class ZlibCompressor(object):  # uses 0..9 in the mapping
+    TYPE = 0
+    LEVELS = range(10)
 
     def compress(self, data):
-        return zlib.compress(data)
+        level = self.TYPE - ZlibCompressor.TYPE
+        return zlib.compress(data, level)
 
     def decompress(self, data):
         return zlib.decompress(data)
 
 
-class LzmaCompressor(CompressorBase):
-    TYPE = 0x10
+class LzmaCompressor(object):  # uses 10..19 in the mapping
+    TYPE = 10
+    PRESETS = range(10)
 
     def __init__(self):
         if lzma is None:
             raise NotImplemented("lzma compression needs Python >= 3.3 or backports.lzma from PyPi")
 
     def compress(self, data):
-        return lzma.compress(data)
+        preset = self.TYPE - LzmaCompressor.TYPE
+        return lzma.compress(data, preset=preset)
 
     def decompress(self, data):
         return lzma.decompress(data)
 
 
-class NullCompressor(CompressorBase):
-    TYPE = 0x20
-
-    def compress(self, data):
-        return data
-
-    def decompress(self, data):
-        return data
-
-
-def key_creator(repository, args):
-    if args.encryption == 'keyfile':
-        return KeyfileKey.create(repository, args)
-    if args.encryption == 'passphrase':
-        return PassphraseKey.create(repository, args)
-    if args.encryption == 'none':
-        return PlaintextKey.create(repository, args)
-    raise NotImplemented(args.encryption)
-
-
-def key_factory(repository, manifest_data):
-    # key type is determined by 4 lower bits of the type byte
-    key_type = manifest_data[0] & 0x0f
-    if key_type == KeyfileKey.TYPE:
-        return KeyfileKey.detect(repository, manifest_data)
-    if key_type == PassphraseKey.TYPE:
-        return PassphraseKey.detect(repository, manifest_data)
-    if key_type == PlaintextKey.TYPE:
-        return PlaintextKey.detect(repository, manifest_data)
-    raise UnsupportedPayloadError(manifest_data[0])
+COMPR_DEFAULT = ZlibCompressor.TYPE + 6  # zlib level 6
 
 
 class KeyBase(object):
+    TYPE = 0x00  # override in derived classes
 
-    def __init__(self, compressor):
-        self.compressor = compressor
-        self.TYPE_STR = bytes([self.TYPE | self.compressor.TYPE])
-
-    def type_check(self, type_byte):
-        type_str = bytes([type_byte])
-        if type_str != self.TYPE_STR:
-            raise IntegrityError('Invalid encryption envelope %r' % type_str)
+    def __init__(self, compressor, maccer):
+        self.compressor = compressor()
+        self.maccer = maccer
 
     def id_hash(self, data):
         """Return HMAC hash using the "id" HMAC key
@@ -155,23 +159,27 @@ class PlaintextKey(KeyBase):
     def create(cls, repository, args):
         print('Encryption NOT enabled.\nUse the "--encryption=passphrase|keyfile" to enable encryption.')
         compressor = compressor_creator(args)
-        return cls(compressor)
+        maccer = maccer_creator(args, cls)
+        return cls(compressor, maccer)
 
     @classmethod
     def detect(cls, repository, manifest_data):
-        compressor = compressor_factory(manifest_data)
-        return cls(compressor)
+        offset, compressor, crypter, maccer = parser(manifest_data)
+        return cls(compressor, maccer)
 
     def id_hash(self, data):
-        return sha256(data).digest()
+        return self.maccer(None, data).digest()
 
     def encrypt(self, data):
-        return b''.join([self.TYPE_STR, self.compressor.compress(data)])
+        header = make_header(compr_type=self.compressor.TYPE, crypt_type=self.TYPE, mac_type=self.maccer.TYPE)
+        return b''.join([header, self.compressor.compress(data)])
 
     def decrypt(self, id, data):
-        self.type_check(data[0])
-        data = self.compressor.decompress(memoryview(data)[1:])
-        if id and sha256(data).digest() != id:
+        offset, compressor, crypter, maccer = parser(data)
+        assert isinstance(self, crypter)
+        assert self.maccer is maccer
+        data = self.compressor.decompress(memoryview(data)[offset:])
+        if id and self.id_hash(data) != id:
             raise IntegrityError('Chunk id verification failed')
         return data
 
@@ -181,42 +189,45 @@ class AESKeyBase(KeyBase):
 
     Chunks are encrypted using 256bit AES in Counter Mode (CTR)
 
-    Payload layout: TYPE(1) + HMAC(32) + NONCE(8) + CIPHERTEXT
+    Payload layout: HEADER(4) + HMAC(32) + NONCE(8) + CIPHERTEXT
 
     To reduce payload size only 8 bytes of the 16 bytes nonce is saved
     in the payload, the first 8 bytes are always zeros. This does not
     affect security but limits the maximum repository capacity to
     only 295 exabytes!
     """
-
-    PAYLOAD_OVERHEAD = 1 + 32 + 8  # TYPE + HMAC + NONCE
+    PAYLOAD_OVERHEAD = 4 + 32 + 8  # HEADER + HMAC + NONCE
 
     def id_hash(self, data):
         """Return HMAC hash using the "id" HMAC key
         """
-        return HMAC(self.id_key, data, sha256).digest()
+        return self.maccer(self.id_key, data).digest()
 
     def encrypt(self, data):
         data = self.compressor.compress(data)
         self.enc_cipher.reset()
         data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data)))
-        hmac = HMAC(self.enc_hmac_key, data, sha256).digest()
-        return b''.join((self.TYPE_STR, hmac, data))
+        hmac = self.maccer(self.enc_hmac_key, data).digest()
+        header = make_header(compr_type=self.compressor.TYPE, crypt_type=self.TYPE, mac_type=self.maccer.TYPE)
+        return b''.join((header, hmac, data))
 
     def decrypt(self, id, data):
-        self.type_check(data[0])
-        hmac = memoryview(data)[1:33]
-        if memoryview(HMAC(self.enc_hmac_key, memoryview(data)[33:], sha256).digest()) != hmac:
+        offset, compressor, crypter, maccer = parser(data)
+        assert isinstance(self, crypter)
+        assert self.maccer is maccer
+        hmac = memoryview(data)[offset:offset+32]
+        if memoryview(self.maccer(self.enc_hmac_key, memoryview(data)[offset+32:]).digest()) != hmac:
             raise IntegrityError('Encryption envelope checksum mismatch')
-        self.dec_cipher.reset(iv=PREFIX + data[33:41])
-        data = self.compressor.decompress(self.dec_cipher.decrypt(data[41:]))  # should use memoryview
-        if id and HMAC(self.id_key, data, sha256).digest() != id:
+        self.dec_cipher.reset(iv=PREFIX + data[offset+32:offset+40])
+        data = self.compressor.decompress(self.dec_cipher.decrypt(data[offset+40:]))  # should use memoryview
+        if id and self.id_hash(data) != id:
             raise IntegrityError('Chunk id verification failed')
         return data
 
     def extract_nonce(self, payload):
-        self.type_check(payload[0])
-        nonce = bytes_to_long(payload[33:41])
+        offset, compressor, crypter, maccer = parser(payload)
+        assert isinstance(self, crypter)
+        nonce = bytes_to_long(payload[offset+32:offset+40])
         return nonce
 
     def init_from_random_data(self, data):
@@ -240,7 +251,8 @@ class PassphraseKey(AESKeyBase):
     @classmethod
     def create(cls, repository, args):
         compressor = compressor_creator(args)
-        key = cls(compressor)
+        maccer = maccer_creator(args, cls)
+        key = cls(compressor, maccer)
         passphrase = os.environ.get('ATTIC_PASSPHRASE')
         if passphrase is not None:
             passphrase2 = passphrase
@@ -262,8 +274,8 @@ class PassphraseKey(AESKeyBase):
     @classmethod
     def detect(cls, repository, manifest_data):
         prompt = 'Enter passphrase for %s: ' % repository._location.orig
-        compressor = compressor_factory(manifest_data)
-        key = cls(compressor)
+        offset, compressor, crypter, maccer = parser(manifest_data)
+        key = cls(compressor, maccer)
         passphrase = os.environ.get('ATTIC_PASSPHRASE')
         if passphrase is None:
             passphrase = getpass(prompt)
@@ -271,7 +283,7 @@ class PassphraseKey(AESKeyBase):
             key.init(repository, passphrase)
             try:
                 key.decrypt(None, manifest_data)
-                num_blocks = num_aes_blocks(len(manifest_data) - 41)
+                num_blocks = num_aes_blocks(len(manifest_data) - offset - 40)
                 key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks))
                 return key
             except IntegrityError:
@@ -288,14 +300,14 @@ class KeyfileKey(AESKeyBase):
 
     @classmethod
     def detect(cls, repository, manifest_data):
-        compressor = compressor_factory(manifest_data)
-        key = cls(compressor)
+        offset, compressor, crypter, maccer = parser(manifest_data)
+        key = cls(compressor, maccer)
         path = cls.find_key_file(repository)
         prompt = 'Enter passphrase for key file %s: ' % path
         passphrase = os.environ.get('ATTIC_PASSPHRASE', '')
         while not key.load(path, passphrase):
             passphrase = getpass(prompt)
-        num_blocks = num_aes_blocks(len(manifest_data) - 41)
+        num_blocks = num_aes_blocks(len(manifest_data) - offset - 40)
         key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks))
         return key
 
@@ -398,7 +410,8 @@ class KeyfileKey(AESKeyBase):
             if passphrase != passphrase2:
                 print('Passphrases do not match')
         compressor = compressor_creator(args)
-        key = cls(compressor)
+        maccer = maccer_creator(args, cls)
+        key = cls(compressor, maccer)
         key.repository_id = repository.id
         key.init_from_random_data(get_random_bytes(100))
         key.init_ciphers()
@@ -406,3 +419,115 @@ class KeyfileKey(AESKeyBase):
         print('Key file "%s" created.' % key.path)
         print('Keep this file safe. Your data will be inaccessible without it.')
         return key
+
+
+# note: key 0 nicely maps to a zlib compressor with level 0 which means "no compression"
+compressor_mapping = {}
+for level in ZlibCompressor.LEVELS:
+    compressor_mapping[ZlibCompressor.TYPE + level] = \
+        type('ZlibCompressorLevel%d' % level, (ZlibCompressor, ), dict(TYPE=ZlibCompressor.TYPE + level))
+for preset in LzmaCompressor.PRESETS:
+    compressor_mapping[LzmaCompressor.TYPE + preset] = \
+        type('LzmaCompressorPreset%d' % preset, (LzmaCompressor, ), dict(TYPE=LzmaCompressor.TYPE + preset))
+
+
+crypter_mapping = {
+    KeyfileKey.TYPE: KeyfileKey,
+    PassphraseKey.TYPE: PassphraseKey,
+    PlaintextKey.TYPE: PlaintextKey,
+}
+
+
+maccer_mapping = {
+    # simple hashes, not MACs (but MAC-like signature):
+    SHA256.TYPE: SHA256,
+    SHA512_256.TYPE: SHA512_256,
+    # MACs:
+    HMAC_SHA256.TYPE: HMAC_SHA256,
+    HMAC_SHA512_256.TYPE: HMAC_SHA512_256,
+}
+
+
+def p(offset, compr_type, crypt_type, mac_type):
+    try:
+        compressor = compressor_mapping[compr_type]
+        crypter = crypter_mapping[crypt_type]
+        maccer = maccer_mapping[mac_type]
+    except KeyError:
+        raise UnsupportedPayloadError("compr_type %x crypt_type %x mac_type %x" % (compr_type, crypt_type, mac_type))
+    return offset, compressor, crypter, maccer
+
+
+def parser00(data):  # legacy, hardcoded
+    return p(offset=1, compr_type=6, crypt_type=KeyfileKey.TYPE, mac_type=HMAC_SHA256.TYPE)
+
+
+def parser01(data):  # legacy, hardcoded
+    return p(offset=1, compr_type=6, crypt_type=PassphraseKey.TYPE, mac_type=HMAC_SHA256.TYPE)
+
+
+def parser02(data):  # legacy, hardcoded
+    return p(offset=1, compr_type=6, crypt_type=PlaintextKey.TYPE, mac_type=SHA256.TYPE)
+
+
+def parser03(data):  # new & flexible
+    offset = 4
+    compr_type, crypt_type, mac_type = data[1:offset]
+    return p(offset=offset, compr_type=compr_type, crypt_type=crypt_type, mac_type=mac_type)
+
+
+def parser(data):
+    parser_mapping = {
+        0x00: parser00,
+        0x01: parser01,
+        0x02: parser02,
+        0x03: parser03,
+    }
+    header_type = data[0]
+    parser_func = parser_mapping[header_type]
+    return parser_func(data)
+
+
+def key_factory(repository, manifest_data):
+    offset, compressor, crypter, maccer = parser(manifest_data)
+    return crypter.detect(repository, manifest_data)
+
+
+def make_header(compr_type, crypt_type, mac_type):
+    # always create new-style 0x03 headers
+    return bytes([0x03, compr_type, crypt_type, mac_type])
+
+
+def compressor_creator(args):
+    # args == None is used by unit tests
+    compression = COMPR_DEFAULT if args is None else args.compression
+    compressor = compressor_mapping.get(compression)
+    if compressor is None:
+        raise NotImplementedError("no compression %d" % args.compression)
+    return compressor
+
+
+def key_creator(repository, args):
+    if args.encryption == 'keyfile':
+        return KeyfileKey.create(repository, args)
+    if args.encryption == 'passphrase':
+        return PassphraseKey.create(repository, args)
+    if args.encryption == 'none':
+        return PlaintextKey.create(repository, args)
+    raise NotImplemented("no encryption %s" % args.encryption)
+
+
+def maccer_creator(args, key_cls):
+    # args == None is used by unit tests
+    mac = None if args is None else args.mac
+    if mac is None:
+        if key_cls is PlaintextKey:
+            mac = HASH_DEFAULT
+        elif key_cls in (KeyfileKey, PassphraseKey):
+            mac = MAC_DEFAULT
+        else:
+            raise NotImplementedError("unknown key class")
+    maccer = maccer_mapping.get(mac)
+    if maccer is None:
+        raise NotImplementedError("no mac %d" % args.mac)
+    return maccer

+ 7 - 2
attic/testsuite/archive.py

@@ -1,7 +1,7 @@
 import msgpack
 from attic.testsuite import AtticTestCase
 from attic.archive import CacheChunkBuffer, RobustUnpacker
-from attic.key import PlaintextKey, ZlibCompressor
+from attic.key import PlaintextKey, COMPR_DEFAULT
 
 
 class MockCache:
@@ -16,10 +16,15 @@ class MockCache:
 
 class ChunkBufferTestCase(AtticTestCase):
 
+    class MockArgs(object):
+        repository = None
+        compression = COMPR_DEFAULT
+        mac = None
+
     def test(self):
         data = [{b'foo': 1}, {b'bar': 2}]
         cache = MockCache()
-        key = PlaintextKey(ZlibCompressor())
+        key = PlaintextKey.create(None, self.MockArgs())
         chunks = CacheChunkBuffer(cache, key, None)
         for d in data:
             chunks.add(d)

+ 2 - 2
attic/testsuite/archiver.py

@@ -359,8 +359,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
                 hash = sha256(data).digest()
                 if not hash in seen:
                     seen.add(hash)
-                    num_blocks = num_aes_blocks(len(data) - 41)
-                    nonce = bytes_to_long(data[33:41])
+                    num_blocks = num_aes_blocks(len(data) - 4 - 40)
+                    nonce = bytes_to_long(data[4+32:4+40])
                     for counter in range(nonce, nonce + num_blocks):
                         self.assert_not_in(counter, used)
                         used.add(counter)

+ 5 - 4
attic/testsuite/key.py

@@ -5,7 +5,7 @@ import tempfile
 from binascii import hexlify
 from attic.crypto import bytes_to_long, num_aes_blocks
 from attic.testsuite import AtticTestCase
-from attic.key import PlaintextKey, PassphraseKey, KeyfileKey
+from attic.key import PlaintextKey, PassphraseKey, KeyfileKey, COMPR_DEFAULT
 from attic.helpers import Location, unhexlify
 
 
@@ -13,7 +13,8 @@ class KeyTestCase(AtticTestCase):
 
     class MockArgs(object):
         repository = Location(tempfile.mkstemp()[1])
-        compression = 'zlib'
+        compression = COMPR_DEFAULT
+        mac = None
 
     keyfile2_key_file = """
         ATTIC KEY 0000000000000000000000000000000000000000000000000000000000000000
@@ -46,7 +47,7 @@ class KeyTestCase(AtticTestCase):
         id = bytes(32)
 
     def test_plaintext(self):
-        key = PlaintextKey.create(None, None)
+        key = PlaintextKey.create(None, self.MockArgs())
         data = b'foo'
         self.assert_equal(hexlify(key.id_hash(data)), b'2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae')
         self.assert_equal(data, key.decrypt(key.id_hash(data), key.encrypt(data)))
@@ -79,7 +80,7 @@ class KeyTestCase(AtticTestCase):
 
     def test_passphrase(self):
         os.environ['ATTIC_PASSPHRASE'] = 'test'
-        key = PassphraseKey.create(self.MockRepository(), None)
+        key = PassphraseKey.create(self.MockRepository(), self.MockArgs())
         self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0)
         self.assert_equal(hexlify(key.id_key), b'793b0717f9d8fb01c751a487e9b827897ceea62409870600013fbc6b4d8d7ca6')
         self.assert_equal(hexlify(key.enc_hmac_key), b'b885a05d329a086627412a6142aaeb9f6c54ab7950f996dd65587251f6bc0901')