瀏覽代碼

Merge branch 'merge-all' of ../attic into experimental

Thomas Waldmann 10 年之前
父節點
當前提交
3a38457def

+ 48 - 0
CHANGES-experimental.txt

@@ -0,0 +1,48 @@
+Important note about "experimental" branch
+==========================================
+
+Goal of the "experimental" branch is to merge all the stuff:
+- changesets from master branch
+- features that DO IMPACT compatibility
+- play with new technologies
+- etc.
+
+THERE IS NO GUARANTEE THAT IT IS COMPATIBLE WITH MASTER BRANCH OR PREVIOUS
+"experimental" CODE nor THAT YOU CAN SWITCH BACK AND FORTH BETWEEN BRANCHES
+WITHIN THE SAME REPOSITORY WITHOUT ENCOUNTERING SEVERE ISSUES.
+
+Please also see the LICENSE for more informations.
+
+
+Stuff in "experimental" that is not in "master" minus minor changes
+===================================================================
+
+added tuning docs
+
+attic init --compression NN --cipher NN --mac NN ...
+(see attic init --help)
+
+new hashes:      sha512-256
+                 sha512
+                 sha1
+                 ghash (default)
+new MACs:        hmac-sha512-256
+                 hmac-sha512
+                 hmac-sha1
+                 gmac (default)
+new ciphers:     aes256-ctr + hmac-sha512-256
+                 aes256-gcm (default)
+new compression: no compression (default)
+                 zlib level 1..9 (previously, level 6 was hardcoded)
+                 lzma preset 0..9
+                 lz4 (and other) multi-threaded algos from blosc library
+
+source: more flexible type 0x03 header format, allowing to give hash algo,
+compression algo and level, encryption algo, key type.
+
+IV is stored in full length, length of stored IV/MAC/hash is flexible.
+Indexing key size (key = id_hash()) is flexible and configurable per repo.
+
+source: less hardcoding, numeric offsets / lengths
+source: flexible hashing, compression, encryption, key dispatching
+

+ 2 - 1
README.rst

@@ -34,7 +34,7 @@ Space efficient storage
 
 
 Optional data encryption
 Optional data encryption
     All data can be protected using 256-bit AES encryption and data integrity
     All data can be protected using 256-bit AES encryption and data integrity
-    and authenticity is verified using HMAC-SHA256.
+    and authenticity is verified using a MAC (message authentication code).
 
 
 Off-site backups
 Off-site backups
     Borg can store data on any remote host accessible over SSH.  This is
     Borg can store data on any remote host accessible over SSH.  This is
@@ -49,6 +49,7 @@ What do I need?
 Borg requires Python 3.2 or above to work.
 Borg requires Python 3.2 or above to work.
 Borg also requires a sufficiently recent OpenSSL (>= 1.0.0).
 Borg also requires a sufficiently recent OpenSSL (>= 1.0.0).
 In order to mount archives as filesystems, llfuse is required.
 In order to mount archives as filesystems, llfuse is required.
+For other python requirements, please see setup.py install_requires.
 
 
 How do I install it?
 How do I install it?
 --------------------
 --------------------

+ 1 - 1
borg/_hashindex.c

@@ -366,7 +366,7 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs
     void *key = NULL;
     void *key = NULL;
 
 
     while((key = hashindex_next_key(index, key))) {
     while((key = hashindex_next_key(index, key))) {
-        values = key + 32;
+        values = key + index->key_size;
         unique_size += values[1];
         unique_size += values[1];
         unique_csize += values[2];
         unique_csize += values[2];
         size += values[0] * values[1];
         size += values[0] * values[1];

+ 3 - 3
borg/archive.py

@@ -616,7 +616,7 @@ class ArchiveChecker:
         self.repository = repository
         self.repository = repository
         self.init_chunks()
         self.init_chunks()
         self.key = self.identify_key(repository)
         self.key = self.identify_key(repository)
-        if Manifest.MANIFEST_ID not in self.chunks:
+        if Manifest.manifest_id(repository) not in self.chunks:
             self.manifest = self.rebuild_manifest()
             self.manifest = self.rebuild_manifest()
         else:
         else:
             self.manifest, _ = Manifest.load(repository, key=self.key)
             self.manifest, _ = Manifest.load(repository, key=self.key)
@@ -635,7 +635,7 @@ class ArchiveChecker:
         # Explicity set the initial hash table capacity to avoid performance issues
         # Explicity set the initial hash table capacity to avoid performance issues
         # due to hash table "resonance"
         # due to hash table "resonance"
         capacity = int(len(self.repository) * 1.2)
         capacity = int(len(self.repository) * 1.2)
-        self.chunks = ChunkIndex(capacity)
+        self.chunks = ChunkIndex(capacity, key_size=self.repository.key_size)
         marker = None
         marker = None
         while True:
         while True:
             result = self.repository.list(limit=10000, marker=marker)
             result = self.repository.list(limit=10000, marker=marker)
@@ -687,7 +687,7 @@ class ArchiveChecker:
         Missing and/or incorrect data is repaired when detected
         Missing and/or incorrect data is repaired when detected
         """
         """
         # Exclude the manifest from chunks
         # Exclude the manifest from chunks
-        del self.chunks[Manifest.MANIFEST_ID]
+        del self.chunks[Manifest.manifest_id(self.repository)]
 
 
         def mark_as_possibly_superseded(id_):
         def mark_as_possibly_superseded(id_):
             if self.chunks.get(id_, (0,))[0] == 0:
             if self.chunks.get(id_, (0,))[0] == 0:

+ 52 - 10
borg/archiver.py

@@ -16,7 +16,7 @@ from . import __version__
 from .archive import Archive, ArchiveChecker
 from .archive import Archive, ArchiveChecker
 from .repository import Repository
 from .repository import Repository
 from .cache import Cache
 from .cache import Cache
-from .key import key_creator
+from .key import key_creator, maccer_creator, COMPR_DEFAULT, HASH_DEFAULT, MAC_DEFAULT, PLAIN_DEFAULT, CIPHER_DEFAULT
 from .helpers import Error, location_validator, format_time, format_file_size, \
 from .helpers import Error, location_validator, format_time, format_file_size, \
     format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \
     format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \
     get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \
     get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \
@@ -30,11 +30,11 @@ class Archiver:
     def __init__(self):
     def __init__(self):
         self.exit_code = 0
         self.exit_code = 0
 
 
-    def open_repository(self, location, create=False, exclusive=False):
+    def open_repository(self, location, create=False, exclusive=False, key_size=None):
         if location.proto == 'ssh':
         if location.proto == 'ssh':
-            repository = RemoteRepository(location, create=create)
+            repository = RemoteRepository(location, create=create, key_size=key_size)
         else:
         else:
-            repository = Repository(location.path, create=create, exclusive=exclusive)
+            repository = Repository(location.path, create=create, exclusive=exclusive, key_size=key_size)
         repository._location = location
         repository._location = location
         return repository
         return repository
 
 
@@ -59,10 +59,12 @@ class Archiver:
     def do_init(self, args):
     def do_init(self, args):
         """Initialize an empty repository"""
         """Initialize an empty repository"""
         print('Initializing repository at "%s"' % args.repository.orig)
         print('Initializing repository at "%s"' % args.repository.orig)
-        repository = self.open_repository(args.repository, create=True, exclusive=True)
-        key = key_creator(repository, args)
+        key_cls = key_creator(args)
+        maccer_cls = maccer_creator(args, key_cls)
+        repository = self.open_repository(args.repository, create=True, exclusive=True,
+                                          key_size=maccer_cls.digest_size)
+        key = key_cls.create(repository, args)
         manifest = Manifest(key, repository)
         manifest = Manifest(key, repository)
-        manifest.key = key
         manifest.write()
         manifest.write()
         repository.commit()
         repository.commit()
         Cache(repository, key, manifest, warn_if_unencrypted=False)
         Cache(repository, key, manifest, warn_if_unencrypted=False)
@@ -523,8 +525,39 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         init_epilog = textwrap.dedent("""
         init_epilog = textwrap.dedent("""
         This command initializes an empty repository. A repository is a filesystem
         This command initializes an empty repository. A repository is a filesystem
         directory containing the deduplicated data from zero or more archives.
         directory containing the deduplicated data from zero or more archives.
-        Encryption can be enabled at repository init time.
-        """)
+        Encryption can be enabled, compression, cipher and mac method can be chosen at
+        repository init time.
+
+        --compression METHODs (default: %02d):
+
+        - 00      no compression
+        - 01..09  zlib levels 1..9 (1 means low compression, 9 max. compression)
+        - 10..19  lzma levels 0..9 (0 means low compression, 9 max. compression)
+        - 20..29  lz4 (blosc) levels 0..9 (0 = no, 9 = max. compression)
+        - 30..39  lz4hc (blosc) levels 0..9 (0 = no, 9 = max. compression)
+        - 40..49  blosclz (blosc) levels 0..9 (0 = no, 9 = max. compression)
+        - 50..59  snappy (blosc) levels 0..9 (0 = no, 9 = max. compression)
+        - 60..69  zlib (blosc) levels 0..9 (0 = no, 9 = max. compression)
+
+        --cipher METHODs (default: %02d or %02d)
+
+        - 00      No encryption
+        - 01      AEAD: AES-CTR + HMAC-SHA256
+        - 02      AEAD: AES-GCM
+
+        --mac METHODs (default: %02d or %02d):
+
+        - 00      sha256 (simple hash, no MAC, faster on 32bit CPU)
+        - 01      sha512-256 (simple hash, no MAC, faster on 64bit CPU)
+        - 02      ghash (simple hash, no MAC, fastest on CPUs with AES-GCM support)
+        - 03      sha1 (simple hash, no MAC, fastest on CPUs without AES-GCM support)
+        - 04      sha512 (simple hash, no MAC, faster on 64bit CPU)
+        - 10      hmac-sha256 (MAC, faster on 32bit CPU)
+        - 11      hmac-sha512-256 (MAC, faster on 64bit CPU)
+        - 13      hmac-sha1 (MAC, fastest on CPUs without AES-GCM support)
+        - 14      hmac-sha512 (MAC, faster on 64bit CPU)
+        - 20      gmac (MAC, fastest on CPUs with AES-GCM support)
+        """ % (COMPR_DEFAULT, PLAIN_DEFAULT, CIPHER_DEFAULT, HASH_DEFAULT, MAC_DEFAULT))
         subparser = subparsers.add_parser('init', parents=[common_parser],
         subparser = subparsers.add_parser('init', parents=[common_parser],
                                           description=self.do_init.__doc__, epilog=init_epilog,
                                           description=self.do_init.__doc__, epilog=init_epilog,
                                           formatter_class=argparse.RawDescriptionHelpFormatter)
                                           formatter_class=argparse.RawDescriptionHelpFormatter)
@@ -534,7 +567,16 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                help='repository to create')
                                help='repository to create')
         subparser.add_argument('-e', '--encryption', dest='encryption',
         subparser.add_argument('-e', '--encryption', dest='encryption',
                                choices=('none', 'passphrase', 'keyfile'), default='none',
                                choices=('none', 'passphrase', 'keyfile'), default='none',
-                               help='select encryption method')
+                               help='select encryption key method')
+        subparser.add_argument('-C', '--cipher', dest='cipher',
+                               type=int, default=None, metavar='METHOD',
+                               help='select cipher (0..2)')
+        subparser.add_argument('-c', '--compression', dest='compression',
+                               type=int, default=COMPR_DEFAULT, metavar='METHOD',
+                               help='select compression method (0..19)')
+        subparser.add_argument('-m', '--mac', dest='mac',
+                               type=int, default=None, metavar='METHOD',
+                               help='select hash/mac method (0..3)')
 
 
         check_epilog = textwrap.dedent("""
         check_epilog = textwrap.dedent("""
         The check command verifies the consistency of a repository and the corresponding
         The check command verifies the consistency of a repository and the corresponding

+ 7 - 6
borg/cache.py

@@ -95,7 +95,7 @@ class Cache:
         config.set('cache', 'manifest', '')
         config.set('cache', 'manifest', '')
         with open(os.path.join(self.path, 'config'), 'w') as fd:
         with open(os.path.join(self.path, 'config'), 'w') as fd:
             config.write(fd)
             config.write(fd)
-        ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8'))
+        ChunkIndex(key_size=self.repository.key_size).write(os.path.join(self.path, 'chunks').encode('utf-8'))
         with open(os.path.join(self.path, 'chunks.archive'), 'wb') as fd:
         with open(os.path.join(self.path, 'chunks.archive'), 'wb') as fd:
             pass  # empty file
             pass  # empty file
         with open(os.path.join(self.path, 'files'), 'wb') as fd:
         with open(os.path.join(self.path, 'files'), 'wb') as fd:
@@ -118,7 +118,8 @@ class Cache:
         self.timestamp = self.config.get('cache', 'timestamp', fallback=None)
         self.timestamp = self.config.get('cache', 'timestamp', fallback=None)
         self.key_type = self.config.get('cache', 'key_type', fallback=None)
         self.key_type = self.config.get('cache', 'key_type', fallback=None)
         self.previous_location = self.config.get('cache', 'previous_location', fallback=None)
         self.previous_location = self.config.get('cache', 'previous_location', fallback=None)
-        self.chunks = ChunkIndex.read(os.path.join(self.path, 'chunks').encode('utf-8'))
+        self.chunks = ChunkIndex.read(os.path.join(self.path, 'chunks').encode('utf-8'),
+                                      key_size=self.repository.key_size)
         self.files = None
         self.files = None
 
 
     def open(self):
     def open(self):
@@ -272,7 +273,7 @@ class Cache:
             return archive_name
             return archive_name
 
 
         def fetch_and_build_idx(archive_id, repository, key, tmp_dir, tf_out):
         def fetch_and_build_idx(archive_id, repository, key, tmp_dir, tf_out):
-            chunk_idx = ChunkIndex()
+            chunk_idx = ChunkIndex(key_size=repository.key_size)
             cdata = repository.get(archive_id)
             cdata = repository.get(archive_id)
             data = key.decrypt(archive_id, cdata)
             data = key.decrypt(archive_id, cdata)
             add(chunk_idx, archive_id, len(data), len(cdata))
             add(chunk_idx, archive_id, len(data), len(cdata))
@@ -299,13 +300,13 @@ class Cache:
                 tf_out.addfile(tarinfo, f)
                 tf_out.addfile(tarinfo, f)
             os.unlink(file_tmp)
             os.unlink(file_tmp)
 
 
-        def create_master_idx(chunk_idx, tf_in, tmp_dir):
+        def create_master_idx(chunk_idx, repository, tf_in, tmp_dir):
             chunk_idx.clear()
             chunk_idx.clear()
             for tarinfo in tf_in:
             for tarinfo in tf_in:
                 archive_id_hex = tarinfo.name
                 archive_id_hex = tarinfo.name
                 tf_in.extract(archive_id_hex, tmp_dir)
                 tf_in.extract(archive_id_hex, tmp_dir)
                 chunk_idx_path = os.path.join(tmp_dir, archive_id_hex).encode('utf-8')
                 chunk_idx_path = os.path.join(tmp_dir, archive_id_hex).encode('utf-8')
-                archive_chunk_idx = ChunkIndex.read(chunk_idx_path)
+                archive_chunk_idx = ChunkIndex.read(chunk_idx_path, key_size=repository.key_size)
                 for chunk_id, (count, size, csize) in archive_chunk_idx.iteritems():
                 for chunk_id, (count, size, csize) in archive_chunk_idx.iteritems():
                     add(chunk_idx, chunk_id, size, csize, incr=count)
                     add(chunk_idx, chunk_id, size, csize, incr=count)
                 os.unlink(chunk_idx_path)
                 os.unlink(chunk_idx_path)
@@ -334,7 +335,7 @@ class Cache:
             rename_out_archive()
             rename_out_archive()
             print('Merging collection into master chunks cache...')
             print('Merging collection into master chunks cache...')
             in_archive = open_in_archive()
             in_archive = open_in_archive()
-            create_master_idx(self.chunks, in_archive, tmp_dir)
+            create_master_idx(self.chunks, repository, in_archive, tmp_dir)
             close_archive(in_archive)
             close_archive(in_archive)
             print('Done.')
             print('Done.')
 
 

+ 81 - 17
borg/crypto.pyx

@@ -7,6 +7,12 @@ from libc.stdlib cimport malloc, free
 
 
 API_VERSION = 2
 API_VERSION = 2
 
 
+AES_CTR_MODE = 1
+AES_GCM_MODE = 2
+
+MAC_SIZE = 16  # bytes; 128 bits is the maximum allowed value. see "hack" below.
+IV_SIZE = 16  # bytes; 128 bits
+
 cdef extern from "openssl/rand.h":
 cdef extern from "openssl/rand.h":
     int  RAND_bytes(unsigned char *buf, int num)
     int  RAND_bytes(unsigned char *buf, int num)
 
 
@@ -23,6 +29,7 @@ cdef extern from "openssl/evp.h":
         pass
         pass
     const EVP_MD *EVP_sha256()
     const EVP_MD *EVP_sha256()
     const EVP_CIPHER *EVP_aes_256_ctr()
     const EVP_CIPHER *EVP_aes_256_ctr()
+    const EVP_CIPHER *EVP_aes_256_gcm()
     void EVP_CIPHER_CTX_init(EVP_CIPHER_CTX *a)
     void EVP_CIPHER_CTX_init(EVP_CIPHER_CTX *a)
     void EVP_CIPHER_CTX_cleanup(EVP_CIPHER_CTX *a)
     void EVP_CIPHER_CTX_cleanup(EVP_CIPHER_CTX *a)
 
 
@@ -36,20 +43,33 @@ cdef extern from "openssl/evp.h":
                           const unsigned char *in_, int inl)
                           const unsigned char *in_, int inl)
     int EVP_EncryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl)
     int EVP_EncryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl)
     int EVP_DecryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl)
     int EVP_DecryptFinal_ex(EVP_CIPHER_CTX *ctx, unsigned char *out, int *outl)
-
+    int EVP_CIPHER_CTX_ctrl(EVP_CIPHER_CTX *ctx, int type, int arg, unsigned char *ptr)
     int PKCS5_PBKDF2_HMAC(const char *password, int passwordlen,
     int PKCS5_PBKDF2_HMAC(const char *password, int passwordlen,
                           const unsigned char *salt, int saltlen, int iter,
                           const unsigned char *salt, int saltlen, int iter,
                           const EVP_MD *digest,
                           const EVP_MD *digest,
                           int keylen, unsigned char *out)
                           int keylen, unsigned char *out)
+    int EVP_CTRL_GCM_GET_TAG
+    int EVP_CTRL_GCM_SET_TAG
+    int EVP_CTRL_GCM_SET_IVLEN
 
 
 import struct
 import struct
 
 
 _int = struct.Struct('>I')
 _int = struct.Struct('>I')
-_long = struct.Struct('>Q')
+_2long = struct.Struct('>QQ')
 
 
 bytes_to_int = lambda x, offset=0: _int.unpack_from(x, offset)[0]
 bytes_to_int = lambda x, offset=0: _int.unpack_from(x, offset)[0]
-bytes_to_long = lambda x, offset=0: _long.unpack_from(x, offset)[0]
-long_to_bytes = lambda x: _long.pack(x)
+
+
+def bytes16_to_int(b, offset=0):
+    h, l = _2long.unpack_from(b, offset)
+    return (h << 64) + l
+
+
+def int_to_bytes16(i):
+    max_uint64 = 0xffffffffffffffff
+    l = i & max_uint64
+    h = (i >> 64) & max_uint64
+    return _2long.pack(h, l)
 
 
 
 
 def num_aes_blocks(length):
 def num_aes_blocks(length):
@@ -59,6 +79,22 @@ def num_aes_blocks(length):
     return (length + 15) // 16
     return (length + 15) // 16
 
 
 
 
+def increment_iv(iv, amount):
+    """
+    increment the given IV considering that <amount> bytes of data was
+    encrypted based on it. In CTR / GCM mode, the IV is just a counter and
+    must never repeat.
+
+    :param iv: current IV, 16 bytes (128 bit)
+    :param amount: amount of data (in bytes) that was encrypted
+    :return: new IV, 16 bytes (128 bit)
+    """
+    iv = bytes16_to_int(iv)
+    iv += num_aes_blocks(amount)
+    iv = int_to_bytes16(iv)
+    return iv
+
+
 def pbkdf2_sha256(password, salt, iterations, size):
 def pbkdf2_sha256(password, salt, iterations, size):
     """Password based key derivation function 2 (RFC2898)
     """Password based key derivation function 2 (RFC2898)
     """
     """
@@ -93,12 +129,19 @@ cdef class AES:
     """
     """
     cdef EVP_CIPHER_CTX ctx
     cdef EVP_CIPHER_CTX ctx
     cdef int is_encrypt
     cdef int is_encrypt
+    cdef int mode
 
 
-    def __cinit__(self, is_encrypt, key, iv=None):
+    def __cinit__(self, mode, is_encrypt, key, iv=None):
         EVP_CIPHER_CTX_init(&self.ctx)
         EVP_CIPHER_CTX_init(&self.ctx)
+        self.mode = mode
         self.is_encrypt = is_encrypt
         self.is_encrypt = is_encrypt
         # Set cipher type and mode
         # Set cipher type and mode
-        cipher_mode = EVP_aes_256_ctr()
+        if mode == AES_CTR_MODE:
+            cipher_mode = EVP_aes_256_ctr()
+        elif mode == AES_GCM_MODE:
+            cipher_mode = EVP_aes_256_gcm()
+        else:
+            raise Exception('unknown mode')
         if self.is_encrypt:
         if self.is_encrypt:
             if not EVP_EncryptInit_ex(&self.ctx, cipher_mode, NULL, NULL, NULL):
             if not EVP_EncryptInit_ex(&self.ctx, cipher_mode, NULL, NULL, NULL):
                 raise Exception('EVP_EncryptInit_ex failed')
                 raise Exception('EVP_EncryptInit_ex failed')
@@ -117,6 +160,10 @@ cdef class AES:
             key2 = key
             key2 = key
         if iv:
         if iv:
             iv2 = iv
             iv2 = iv
+        if self.mode == AES_GCM_MODE:
+            # Set IV length (bytes)
+            if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_SET_IVLEN, IV_SIZE, NULL):
+                raise Exception('EVP_CIPHER_CTX_ctrl SET IVLEN failed')
         # Initialise key and IV
         # Initialise key and IV
         if self.is_encrypt:
         if self.is_encrypt:
             if not EVP_EncryptInit_ex(&self.ctx, NULL, NULL, key2, iv2):
             if not EVP_EncryptInit_ex(&self.ctx, NULL, NULL, key2, iv2):
@@ -125,16 +172,26 @@ cdef class AES:
             if not EVP_DecryptInit_ex(&self.ctx, NULL, NULL, key2, iv2):
             if not EVP_DecryptInit_ex(&self.ctx, NULL, NULL, key2, iv2):
                 raise Exception('EVP_DecryptInit_ex failed')
                 raise Exception('EVP_DecryptInit_ex failed')
 
 
-    @property
-    def iv(self):
-        return self.ctx.iv[:16]
+    def add(self, aad):
+        cdef int aadl = len(aad)
+        cdef int outl
+        if self.mode != AES_GCM_MODE:
+            raise Exception('additional data only supported for AES GCM mode')
+        # Zero or more calls to specify any AAD
+        if self.is_encrypt:
+            if not EVP_EncryptUpdate(&self.ctx, NULL, &outl, aad, aadl):
+                raise Exception('EVP_EncryptUpdate failed')
+        else:  # decrypt
+            if not EVP_DecryptUpdate(&self.ctx, NULL, &outl, aad, aadl):
+                raise Exception('EVP_DecryptUpdate failed')
 
 
-    def encrypt(self, data):
+    def compute_mac_and_encrypt(self, data):
         cdef int inl = len(data)
         cdef int inl = len(data)
         cdef int ctl = 0
         cdef int ctl = 0
         cdef int outl = 0
         cdef int outl = 0
-        # note: modes that use padding, need up to one extra AES block (16b)
+        # note: modes that use padding, need up to one extra AES block (16B)
         cdef unsigned char *out = <unsigned char *>malloc(inl+16)
         cdef unsigned char *out = <unsigned char *>malloc(inl+16)
+        cdef unsigned char *mac = <unsigned char *>malloc(MAC_SIZE)
         if not out:
         if not out:
             raise MemoryError
             raise MemoryError
         try:
         try:
@@ -144,15 +201,20 @@ cdef class AES:
             if not EVP_EncryptFinal_ex(&self.ctx, out+ctl, &outl):
             if not EVP_EncryptFinal_ex(&self.ctx, out+ctl, &outl):
                 raise Exception('EVP_EncryptFinal failed')
                 raise Exception('EVP_EncryptFinal failed')
             ctl += outl
             ctl += outl
-            return out[:ctl]
+            if self.mode == AES_GCM_MODE:
+                # Get tag (mac) - only GCM mode. for CTR, the returned mac is undefined
+                if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_GET_TAG, MAC_SIZE, mac):
+                    raise Exception('EVP_CIPHER_CTX_ctrl GET TAG failed')
+            return (mac[:MAC_SIZE]), out[:ctl]
         finally:
         finally:
+            free(mac)
             free(out)
             free(out)
 
 
-    def decrypt(self, data):
+    def check_mac_and_decrypt(self, mac, data):
         cdef int inl = len(data)
         cdef int inl = len(data)
         cdef int ptl = 0
         cdef int ptl = 0
         cdef int outl = 0
         cdef int outl = 0
-        # note: modes that use padding, need up to one extra AES block (16b).
+        # note: modes that use padding, need up to one extra AES block (16B).
         # This is what the openssl docs say. I am not sure this is correct,
         # This is what the openssl docs say. I am not sure this is correct,
         # but OTOH it will not cause any harm if our buffer is a little bigger.
         # but OTOH it will not cause any harm if our buffer is a little bigger.
         cdef unsigned char *out = <unsigned char *>malloc(inl+16)
         cdef unsigned char *out = <unsigned char *>malloc(inl+16)
@@ -162,10 +224,12 @@ cdef class AES:
             if not EVP_DecryptUpdate(&self.ctx, out, &outl, data, inl):
             if not EVP_DecryptUpdate(&self.ctx, out, &outl, data, inl):
                 raise Exception('EVP_DecryptUpdate failed')
                 raise Exception('EVP_DecryptUpdate failed')
             ptl = outl
             ptl = outl
+            if self.mode == AES_GCM_MODE:
+                # Set expected tag (mac) value.
+                if not EVP_CIPHER_CTX_ctrl(&self.ctx, EVP_CTRL_GCM_SET_TAG, MAC_SIZE, mac):
+                    raise Exception('EVP_CIPHER_CTX_ctrl SET TAG failed')
             if EVP_DecryptFinal_ex(&self.ctx, out+ptl, &outl) <= 0:
             if EVP_DecryptFinal_ex(&self.ctx, out+ptl, &outl) <= 0:
-                # this error check is very important for modes with padding or
-                # authentication. for them, a failure here means corrupted data.
-                # CTR mode does not use padding nor authentication.
+                # for GCM mode, a failure here means corrupted / tampered tag (mac) or data
                 raise Exception('EVP_DecryptFinal failed')
                 raise Exception('EVP_DecryptFinal failed')
             ptl += outl
             ptl += outl
             return out[:ptl]
             return out[:ptl]

+ 27 - 21
borg/hashindex.pyx

@@ -26,9 +26,11 @@ _NoDefault = object()
 
 
 cdef class IndexBase:
 cdef class IndexBase:
     cdef HashIndex *index
     cdef HashIndex *index
-    key_size = 32
+    cdef int key_size
 
 
-    def __cinit__(self, capacity=0, path=None):
+    def __cinit__(self, capacity=0, path=None, key_size=None):
+        assert key_size is not None
+        self.key_size = key_size
         if path:
         if path:
             self.index = hashindex_read(<bytes>os.fsencode(path))
             self.index = hashindex_read(<bytes>os.fsencode(path))
             if not self.index:
             if not self.index:
@@ -43,8 +45,8 @@ cdef class IndexBase:
             hashindex_free(self.index)
             hashindex_free(self.index)
 
 
     @classmethod
     @classmethod
-    def read(cls, path):
-        return cls(path=path)
+    def read(cls, path, key_size=None):
+        return cls(path=path, key_size=key_size)
 
 
     def write(self, path):
     def write(self, path):
         if not hashindex_write(self.index, <bytes>os.fsencode(path)):
         if not hashindex_write(self.index, <bytes>os.fsencode(path)):
@@ -61,7 +63,7 @@ cdef class IndexBase:
             self[key] = value
             self[key] = value
 
 
     def __delitem__(self, key):
     def __delitem__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         if not hashindex_delete(self.index, <char *>key):
         if not hashindex_delete(self.index, <char *>key):
             raise Exception('hashindex_delete failed')
             raise Exception('hashindex_delete failed')
 
 
@@ -90,14 +92,14 @@ cdef class NSIndex(IndexBase):
     value_size = 8
     value_size = 8
 
 
     def __getitem__(self, key):
     def __getitem__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         data = <int *>hashindex_get(self.index, <char *>key)
         if not data:
         if not data:
             raise KeyError
             raise KeyError
         return _le32toh(data[0]), _le32toh(data[1])
         return _le32toh(data[0]), _le32toh(data[1])
 
 
     def __setitem__(self, key, value):
     def __setitem__(self, key, value):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         cdef int[2] data
         cdef int[2] data
         data[0] = _htole32(value[0])
         data[0] = _htole32(value[0])
         data[1] = _htole32(value[1])
         data[1] = _htole32(value[1])
@@ -105,20 +107,20 @@ cdef class NSIndex(IndexBase):
             raise Exception('hashindex_set failed')
             raise Exception('hashindex_set failed')
 
 
     def __contains__(self, key):
     def __contains__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         data = <int *>hashindex_get(self.index, <char *>key)
         return data != NULL
         return data != NULL
 
 
     def iteritems(self, marker=None):
     def iteritems(self, marker=None):
         cdef const void *key
         cdef const void *key
-        iter = NSKeyIterator()
+        iter = NSKeyIterator(self.key_size)
         iter.idx = self
         iter.idx = self
         iter.index = self.index
         iter.index = self.index
         if marker:
         if marker:
             key = hashindex_get(self.index, <char *>marker)
             key = hashindex_get(self.index, <char *>marker)
             if marker is None:
             if marker is None:
                 raise IndexError
                 raise IndexError
-            iter.key = key - 32
+            iter.key = key - self.key_size
         return iter
         return iter
 
 
 
 
@@ -126,9 +128,11 @@ cdef class NSKeyIterator:
     cdef NSIndex idx
     cdef NSIndex idx
     cdef HashIndex *index
     cdef HashIndex *index
     cdef const void *key
     cdef const void *key
+    cdef int key_size
 
 
-    def __cinit__(self):
+    def __cinit__(self, key_size):
         self.key = NULL
         self.key = NULL
+        self.key_size = key_size
 
 
     def __iter__(self):
     def __iter__(self):
         return self
         return self
@@ -137,8 +141,8 @@ cdef class NSKeyIterator:
         self.key = hashindex_next_key(self.index, <char *>self.key)
         self.key = hashindex_next_key(self.index, <char *>self.key)
         if not self.key:
         if not self.key:
             raise StopIteration
             raise StopIteration
-        cdef int *value = <int *>(self.key + 32)
-        return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]))
+        cdef int *value = <int *>(self.key + self.key_size)
+        return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]))
 
 
 
 
 cdef class ChunkIndex(IndexBase):
 cdef class ChunkIndex(IndexBase):
@@ -146,14 +150,14 @@ cdef class ChunkIndex(IndexBase):
     value_size = 12
     value_size = 12
 
 
     def __getitem__(self, key):
     def __getitem__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         data = <int *>hashindex_get(self.index, <char *>key)
         if not data:
         if not data:
             raise KeyError
             raise KeyError
         return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2])
         return _le32toh(data[0]), _le32toh(data[1]), _le32toh(data[2])
 
 
     def __setitem__(self, key, value):
     def __setitem__(self, key, value):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         cdef int[3] data
         cdef int[3] data
         data[0] = _htole32(value[0])
         data[0] = _htole32(value[0])
         data[1] = _htole32(value[1])
         data[1] = _htole32(value[1])
@@ -162,20 +166,20 @@ cdef class ChunkIndex(IndexBase):
             raise Exception('hashindex_set failed')
             raise Exception('hashindex_set failed')
 
 
     def __contains__(self, key):
     def __contains__(self, key):
-        assert len(key) == 32
+        assert len(key) == self.key_size
         data = <int *>hashindex_get(self.index, <char *>key)
         data = <int *>hashindex_get(self.index, <char *>key)
         return data != NULL
         return data != NULL
 
 
     def iteritems(self, marker=None):
     def iteritems(self, marker=None):
         cdef const void *key
         cdef const void *key
-        iter = ChunkKeyIterator()
+        iter = ChunkKeyIterator(self.key_size)
         iter.idx = self
         iter.idx = self
         iter.index = self.index
         iter.index = self.index
         if marker:
         if marker:
             key = hashindex_get(self.index, <char *>marker)
             key = hashindex_get(self.index, <char *>marker)
             if marker is None:
             if marker is None:
                 raise IndexError
                 raise IndexError
-            iter.key = key - 32
+            iter.key = key - self.key_size
         return iter
         return iter
 
 
     def summarize(self):
     def summarize(self):
@@ -188,9 +192,11 @@ cdef class ChunkKeyIterator:
     cdef ChunkIndex idx
     cdef ChunkIndex idx
     cdef HashIndex *index
     cdef HashIndex *index
     cdef const void *key
     cdef const void *key
+    cdef int key_size
 
 
-    def __cinit__(self):
+    def __cinit__(self, key_size):
         self.key = NULL
         self.key = NULL
+        self.key_size = key_size
 
 
     def __iter__(self):
     def __iter__(self):
         return self
         return self
@@ -199,5 +205,5 @@ cdef class ChunkKeyIterator:
         self.key = hashindex_next_key(self.index, <char *>self.key)
         self.key = hashindex_next_key(self.index, <char *>self.key)
         if not self.key:
         if not self.key:
             raise StopIteration
             raise StopIteration
-        cdef int *value = <int *>(self.key + 32)
-        return (<char *>self.key)[:32], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))
+        cdef int *value = <int *>(self.key + self.key_size)
+        return (<char *>self.key)[:self.key_size], (_le32toh(value[0]), _le32toh(value[1]), _le32toh(value[2]))

+ 6 - 4
borg/helpers.py

@@ -82,18 +82,20 @@ def check_extension_modules():
 
 
 class Manifest:
 class Manifest:
 
 
-    MANIFEST_ID = b'\0' * 32
-
     def __init__(self, key, repository):
     def __init__(self, key, repository):
         self.archives = {}
         self.archives = {}
         self.config = {}
         self.config = {}
         self.key = key
         self.key = key
         self.repository = repository
         self.repository = repository
 
 
+    @classmethod
+    def manifest_id(cls, repository):
+        return b'\0' * repository.key_size
+
     @classmethod
     @classmethod
     def load(cls, repository, key=None):
     def load(cls, repository, key=None):
         from .key import key_factory
         from .key import key_factory
-        cdata = repository.get(cls.MANIFEST_ID)
+        cdata = repository.get(cls.manifest_id(repository))
         if not key:
         if not key:
             key = key_factory(repository, cdata)
             key = key_factory(repository, cdata)
         manifest = cls(key, repository)
         manifest = cls(key, repository)
@@ -118,7 +120,7 @@ class Manifest:
             'config': self.config,
             'config': self.config,
         }))
         }))
         self.id = self.key.id_hash(data)
         self.id = self.key.id_hash(data)
-        self.repository.put(self.MANIFEST_ID, self.key.encrypt(data))
+        self.repository.put(self.manifest_id(self.repository), self.key.encrypt(data))
 
 
     def list_archive_infos(self, sort_by=None, reverse=False):
     def list_archive_infos(self, sort_by=None, reverse=False):
         # inexpensive Archive.list_archives replacement if we just need .name, .id, .ts
         # inexpensive Archive.list_archives replacement if we just need .name, .id, .ts

+ 647 - 95
borg/key.py

@@ -3,14 +3,33 @@ from getpass import getpass
 import os
 import os
 import msgpack
 import msgpack
 import textwrap
 import textwrap
+from collections import namedtuple
 import hmac
 import hmac
-from hashlib import sha256
+from hashlib import sha1, sha256, sha512
 import zlib
 import zlib
 
 
-from .crypto import pbkdf2_sha256, get_random_bytes, AES, bytes_to_long, long_to_bytes, bytes_to_int, num_aes_blocks
+try:
+    import lzma  # python >= 3.3
+except ImportError:
+    try:
+        from backports import lzma  # backports.lzma from pypi
+    except ImportError:
+        lzma = None
+
+try:
+    import blosc
+except ImportError:
+    blosc = None
+
+from .crypto import pbkdf2_sha256, get_random_bytes, AES, AES_CTR_MODE, AES_GCM_MODE, \
+                    bytes_to_int, increment_iv, num_aes_blocks
 from .helpers import IntegrityError, get_keys_dir, Error
 from .helpers import IntegrityError, get_keys_dir, Error
 
 
-PREFIX = b'\0' * 8
+# TODO fix cyclic import:
+#from .archive import CHUNK_MAX
+CHUNK_MAX = 10 * 1024 * 1024
+
+Meta = namedtuple('Meta', 'compr_type, key_type, mac_type, cipher_type, iv, legacy')
 
 
 
 
 class UnsupportedPayloadError(Error):
 class UnsupportedPayloadError(Error):
@@ -22,47 +41,393 @@ class KeyfileNotFoundError(Error):
     """
     """
 
 
 
 
+class sha512_256(object):  # note: can't subclass sha512
+    """sha512, but digest truncated to 256bit - faster than sha256 on 64bit platforms"""
+    digestsize = digest_size = 32
+    block_size = 64
+
+    def __init__(self, data=None):
+        self.name = 'sha512-256'
+        self._h = sha512()
+        if data:
+            self.update(data)
+
+    def update(self, data):
+        self._h.update(data)
+
+    def digest(self):
+        return self._h.digest()[:self.digest_size]
+
+    def hexdigest(self):
+        return self._h.hexdigest()[:self.digest_size * 2]
+
+    def copy(self):
+        new = sha512_256.__new__(sha512_256)
+        new._h = self._h.copy()
+        return new
+
+
+# HASH / MAC stuff below all has a mac-like interface, so it can be used in the same way.
+# special case: hashes do not use keys (and thus, do not sign/authenticate)
+
+class HASH:  # note: we can't subclass sha1/sha256/sha512
+    TYPE = 0  # override in subclass
+    digest_size = 0  # override in subclass
+    hash_func = None  # override in subclass
+
+    def __init__(self, key, data=b''):
+        # signature is like for a MAC, we ignore the key as this is a simple hash
+        if key is not None:
+            raise Exception("use a HMAC if you have a key")
+        self.h = self.hash_func(data)
+
+    def update(self, data):
+        self.h.update(data)
+
+    def digest(self):
+        return self.h.digest()
+
+    def hexdigest(self):
+        return self.h.hexdigest()
+
+
+class SHA256(HASH):
+    TYPE = 0
+    digest_size = 32
+    hash_func = sha256
+
+
+class SHA512_256(HASH):
+    TYPE = 1
+    digest_size = 32
+    hash_func = sha512_256
+
+
+class GHASH:
+    TYPE = 2
+    digest_size = 16
+
+    def __init__(self, key, data=b''):
+        # signature is like for a MAC, we ignore the key as this is a simple hash
+        if key is not None:
+            raise Exception("use a MAC if you have a key")
+        self.mac_cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=b'\0' * 32, iv=b'\0' * 16)
+        if data:
+            self.update(data)
+
+    def update(self, data):
+        # GMAC = aes-gcm with all data as AAD, no data as to-be-encrypted data
+        self.mac_cipher.add(bytes(data))
+
+    def digest(self):
+        hash, _ = self.mac_cipher.compute_mac_and_encrypt(b'')
+        return hash
+
+
+class SHA1(HASH):
+    TYPE = 3
+    digest_size = 20
+    hash_func = sha1
+
+
+class SHA512(HASH):
+    TYPE = 4
+    digest_size = 64
+    hash_func = sha512
+
+
 class HMAC(hmac.HMAC):
 class HMAC(hmac.HMAC):
-    """Workaround a bug in Python < 3.4 Where HMAC does not accept memoryviews
-    """
+    TYPE = 0  # override in subclass
+    digest_size = 0  # override in subclass
+    hash_func = None  # override in subclass
+
+    def __init__(self, key, data):
+        if key is None:
+            raise Exception("do not use HMAC if you don't have a key")
+        super().__init__(key, data, self.hash_func)
+
     def update(self, msg):
     def update(self, msg):
+        # Workaround a bug in Python < 3.4 Where HMAC does not accept memoryviews
         self.inner.update(msg)
         self.inner.update(msg)
 
 
 
 
-def key_creator(repository, args):
-    if args.encryption == 'keyfile':
-        return KeyfileKey.create(repository, args)
-    elif args.encryption == 'passphrase':
-        return PassphraseKey.create(repository, args)
-    else:
-        return PlaintextKey.create(repository, args)
+class HMAC_SHA256(HMAC):
+    TYPE = 10
+    digest_size = 32
+    hash_func = sha256
 
 
 
 
-def key_factory(repository, manifest_data):
-    if manifest_data[0] == KeyfileKey.TYPE:
-        return KeyfileKey.detect(repository, manifest_data)
-    elif manifest_data[0] == PassphraseKey.TYPE:
-        return PassphraseKey.detect(repository, manifest_data)
-    elif manifest_data[0] == PlaintextKey.TYPE:
-        return PlaintextKey.detect(repository, manifest_data)
-    else:
-        raise UnsupportedPayloadError(manifest_data[0])
+class HMAC_SHA512_256(HMAC):
+    TYPE = 11
+    digest_size = 32
+    hash_func = sha512_256
+
+
+class HMAC_SHA1(HMAC):
+    TYPE = 13
+    digest_size = 20
+    hash_func = sha1
+
+
+class HMAC_SHA512(HMAC):
+    TYPE = 14
+    digest_size = 64
+    hash_func = sha512
+
 
 
+class GMAC(GHASH):
+    TYPE = 20
+    digest_size = 16
 
 
-class KeyBase:
+    def __init__(self, key, data=b''):
+        if key is None:
+            raise Exception("do not use GMAC if you don't have a key")
+        self.mac_cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=key, iv=b'\0' * 16)
+        if data:
+            self.update(data)
+
+
+# defaults are optimized for speed on modern CPUs with AES hw support
+HASH_DEFAULT = GHASH.TYPE
+MAC_DEFAULT = GMAC.TYPE
+
+
+# compressor classes, all same interface
+
+class NullCompressor(object):  # uses 0 in the mapping
+    TYPE = 0
+
+    def compress(self, data):
+        return bytes(data)
+
+    def decompress(self, data):
+        return bytes(data)
+
+
+class ZlibCompressor(object):  # uses 1..9 in the mapping
+    TYPE = 0
+    LEVELS = range(10)
+
+    def compress(self, data):
+        level = self.TYPE - ZlibCompressor.TYPE
+        return zlib.compress(data, level)
+
+    def decompress(self, data):
+        return zlib.decompress(data)
+
+
+class LzmaCompressor(object):  # uses 10..19 in the mapping
+    TYPE = 10
+    PRESETS = range(10)
+
+    def __init__(self):
+        if lzma is None:
+            raise NotImplemented("lzma compression needs Python >= 3.3 or backports.lzma from PyPi")
+
+    def compress(self, data):
+        preset = self.TYPE - LzmaCompressor.TYPE
+        return lzma.compress(data, preset=preset)
+
+    def decompress(self, data):
+        return lzma.decompress(data)
+
+
+class BLOSCCompressor(object):
+    TYPE = 0  # override in subclass
+    LEVELS = range(10)
+    CNAME = ''  # override in subclass
 
 
     def __init__(self):
     def __init__(self):
-        self.TYPE_STR = bytes([self.TYPE])
+        if blosc is None:
+            raise NotImplemented("%s compression needs blosc from PyPi" % self.CNAME)
+        if self.CNAME not in blosc.compressor_list():
+            raise NotImplemented("%s compression is not supported by blosc" % self.CNAME)
+        blosc.set_blocksize(16384)  # 16kiB is the minimum, so 64kiB are enough for 4 threads
+
+    def _get_level(self):
+        raise NotImplemented
+
+    def compress(self, data):
+        return blosc.compress(bytes(data), 1, cname=self.CNAME, clevel=self._get_level())
+
+    def decompress(self, data):
+        return blosc.decompress(data)
+
+
+class LZ4Compressor(BLOSCCompressor):
+    TYPE = 20
+    CNAME = 'lz4'
+
+    def _get_level(self):
+        return self.TYPE - LZ4Compressor.TYPE
+
+
+class LZ4HCCompressor(BLOSCCompressor):
+    TYPE = 30
+    CNAME = 'lz4hc'
+
+    def _get_level(self):
+        return self.TYPE - LZ4HCCompressor.TYPE
+
+
+class BLOSCLZCompressor(BLOSCCompressor):
+    TYPE = 40
+    CNAME = 'blosclz'
+
+    def _get_level(self):
+        return self.TYPE - BLOSCLZCompressor.TYPE
+
+
+class SnappyCompressor(BLOSCCompressor):
+    TYPE = 50
+    CNAME = 'snappy'
+
+    def _get_level(self):
+        return self.TYPE - SnappyCompressor.TYPE
+
+
+class BLOSCZlibCompressor(BLOSCCompressor):
+    TYPE = 60
+    CNAME = 'zlib'
+
+    def _get_level(self):
+        return self.TYPE - BLOSCZlibCompressor.TYPE
+
+
+# default is optimized for speed
+COMPR_DEFAULT = NullCompressor.TYPE # no compression
+
+
+# ciphers - AEAD (authenticated encryption with assoc. data) style interface
+# special case: PLAIN dummy does not encrypt / authenticate
+
+class PLAIN:
+    TYPE = 0
+    enc_iv = None  # dummy
+
+    def __init__(self, **kw):
+        pass
+
+    def compute_mac_and_encrypt(self, meta, data):
+        return None, data
+
+    def check_mac_and_decrypt(self, mac, meta, data):
+        return data
+
+
+def get_aad(meta):
+    """get additional authenticated data for AEAD ciphers"""
+    if meta.legacy:
+        # legacy format computed the mac over (iv_last8 +  data)
+        return meta.iv[8:]
+    else:
+        return msgpack.packb(meta)
+
+
+class AES_CTR_HMAC:
+    TYPE = 1
+
+    def __init__(self, enc_key=b'\0' * 32, enc_iv=b'\0' * 16, enc_hmac_key=b'\0' * 32, **kw):
+        self.hmac_key = enc_hmac_key
+        self.enc_iv = enc_iv
+        self.enc_cipher = AES(mode=AES_CTR_MODE, is_encrypt=True, key=enc_key, iv=enc_iv)
+        self.dec_cipher = AES(mode=AES_CTR_MODE, is_encrypt=False, key=enc_key)
+
+    def compute_mac_and_encrypt(self, meta, data):
+        self.enc_cipher.reset(iv=meta.iv)
+        _, data = self.enc_cipher.compute_mac_and_encrypt(data)
+        self.enc_iv = increment_iv(meta.iv, len(data))
+        aad = get_aad(meta)
+        mac = HMAC_SHA256(self.hmac_key, aad + data).digest()  # XXX mac / hash flexibility
+        return mac, data
+
+    def check_mac_and_decrypt(self, mac, meta, data):
+        aad = get_aad(meta)
+        if HMAC_SHA256(self.hmac_key, aad + data).digest() != mac:  # XXX mac / hash flexibility
+            raise IntegrityError('Encryption envelope checksum mismatch')
+        self.dec_cipher.reset(iv=meta.iv)
+        data = self.dec_cipher.check_mac_and_decrypt(None, data)
+        return data
+
+
+class AES_GCM:
+    TYPE = 2
+
+    def __init__(self, enc_key=b'\0' * 32, enc_iv=b'\0' * 16, **kw):
+        # note: hmac_key is not used for aes-gcm, it does aes+gmac in 1 pass
+        self.enc_iv = enc_iv
+        self.enc_cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=enc_key, iv=enc_iv)
+        self.dec_cipher = AES(mode=AES_GCM_MODE, is_encrypt=False, key=enc_key)
+
+    def compute_mac_and_encrypt(self, meta, data):
+        self.enc_cipher.reset(iv=meta.iv)
+        aad = get_aad(meta)
+        self.enc_cipher.add(aad)
+        mac, data = self.enc_cipher.compute_mac_and_encrypt(data)
+        self.enc_iv = increment_iv(meta.iv, len(data))
+        return mac, data
+
+    def check_mac_and_decrypt(self, mac, meta, data):
+        self.dec_cipher.reset(iv=meta.iv)
+        aad = get_aad(meta)
+        self.dec_cipher.add(aad)
+        try:
+            data = self.dec_cipher.check_mac_and_decrypt(mac, data)
+        except Exception:
+            raise IntegrityError('Encryption envelope checksum mismatch')
+        return data
+
+
+# cipher default is optimized for speed on modern CPUs with AES hw support
+PLAIN_DEFAULT = PLAIN.TYPE
+CIPHER_DEFAULT = AES_GCM.TYPE
+
+
+# misc. types of keys
+# special case: no keys (thus: no encryption, no signing/authentication)
+
+class KeyBase(object):
+    TYPE = 0x00  # override in derived classes
+
+    def __init__(self, compressor_cls, maccer_cls, cipher_cls):
+        self.compressor = compressor_cls()
+        self.maccer_cls = maccer_cls  # hasher/maccer used by id_hash
+        self.cipher_cls = cipher_cls  # plaintext dummy or AEAD cipher
+        self.cipher = cipher_cls()
+        self.id_key = None
 
 
     def id_hash(self, data):
     def id_hash(self, data):
-        """Return HMAC hash using the "id" HMAC key
+        """Return a HASH (no id_key) or a MAC (using the "id_key" key)
+
+        XXX do we need a cryptographic hash function here or is a keyed hash
+        function like GMAC / GHASH good enough? See NIST SP 800-38D.
+
+        IMPORTANT: in 1 repo, there should be only 1 kind of id_hash, otherwise
+        data hashed/maced with one id_hash might result in same ID as already
+        exists in the repo for other data created with another id_hash method.
+        somehow unlikely considering 128 or 256bits, but still.
         """
         """
+        return self.maccer_cls(self.id_key, data).digest()
 
 
     def encrypt(self, data):
     def encrypt(self, data):
-        pass
+        data = self.compressor.compress(data)
+        meta = Meta(compr_type=self.compressor.TYPE, key_type=self.TYPE,
+                    mac_type=self.maccer_cls.TYPE, cipher_type=self.cipher.TYPE,
+                    iv=self.cipher.enc_iv, legacy=False)
+        mac, data = self.cipher.compute_mac_and_encrypt(meta, data)
+        return generate(mac, meta, data)
 
 
     def decrypt(self, id, data):
     def decrypt(self, id, data):
-        pass
+        mac, meta, data = parser(data)
+        compressor, keyer, maccer, cipher = get_implementations(meta)
+        assert isinstance(self, keyer)
+        assert self.maccer_cls is maccer
+        assert self.cipher_cls is cipher
+        data = self.cipher.check_mac_and_decrypt(mac, meta, data)
+        data = self.compressor.decompress(data)
+        if id and self.id_hash(data) != id:
+            raise IntegrityError('Chunk id verification failed')
+        return data
 
 
 
 
 class PlaintextKey(KeyBase):
 class PlaintextKey(KeyBase):
@@ -73,71 +438,34 @@ class PlaintextKey(KeyBase):
     @classmethod
     @classmethod
     def create(cls, repository, args):
     def create(cls, repository, args):
         print('Encryption NOT enabled.\nUse the "--encryption=passphrase|keyfile" to enable encryption.')
         print('Encryption NOT enabled.\nUse the "--encryption=passphrase|keyfile" to enable encryption.')
-        return cls()
+        compressor = compressor_creator(args)
+        maccer = maccer_creator(args, cls)
+        cipher = cipher_creator(args, cls)
+        return cls(compressor, maccer, cipher)
 
 
     @classmethod
     @classmethod
     def detect(cls, repository, manifest_data):
     def detect(cls, repository, manifest_data):
-        return cls()
-
-    def id_hash(self, data):
-        return sha256(data).digest()
-
-    def encrypt(self, data):
-        return b''.join([self.TYPE_STR, zlib.compress(data)])
-
-    def decrypt(self, id, data):
-        if data[0] != self.TYPE:
-            raise IntegrityError('Invalid encryption envelope')
-        data = zlib.decompress(memoryview(data)[1:])
-        if id and sha256(data).digest() != id:
-            raise IntegrityError('Chunk id verification failed')
-        return data
+        mac, meta, data = parser(manifest_data)
+        compressor, keyer, maccer, cipher = get_implementations(meta)
+        return cls(compressor, maccer, cipher)
 
 
 
 
 class AESKeyBase(KeyBase):
 class AESKeyBase(KeyBase):
     """Common base class shared by KeyfileKey and PassphraseKey
     """Common base class shared by KeyfileKey and PassphraseKey
 
 
-    Chunks are encrypted using 256bit AES in Counter Mode (CTR)
+    Chunks are encrypted using 256bit AES in CTR or GCM mode.
+    Chunks are authenticated by a GCM GMAC or a HMAC.
 
 
-    Payload layout: TYPE(1) + HMAC(32) + NONCE(8) + CIPHERTEXT
+    Payload layout: TYPE(1) + MAC(32) + NONCE(8) + CIPHERTEXT
 
 
     To reduce payload size only 8 bytes of the 16 bytes nonce is saved
     To reduce payload size only 8 bytes of the 16 bytes nonce is saved
     in the payload, the first 8 bytes are always zeros. This does not
     in the payload, the first 8 bytes are always zeros. This does not
     affect security but limits the maximum repository capacity to
     affect security but limits the maximum repository capacity to
     only 295 exabytes!
     only 295 exabytes!
     """
     """
-
-    PAYLOAD_OVERHEAD = 1 + 32 + 8  # TYPE + HMAC + NONCE
-
-    def id_hash(self, data):
-        """Return HMAC hash using the "id" HMAC key
-        """
-        return HMAC(self.id_key, data, sha256).digest()
-
-    def encrypt(self, data):
-        data = zlib.compress(data)
-        self.enc_cipher.reset()
-        data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data)))
-        hmac = HMAC(self.enc_hmac_key, data, sha256).digest()
-        return b''.join((self.TYPE_STR, hmac, data))
-
-    def decrypt(self, id, data):
-        if data[0] != self.TYPE:
-            raise IntegrityError('Invalid encryption envelope')
-        hmac = memoryview(data)[1:33]
-        if memoryview(HMAC(self.enc_hmac_key, memoryview(data)[33:], sha256).digest()) != hmac:
-            raise IntegrityError('Encryption envelope checksum mismatch')
-        self.dec_cipher.reset(iv=PREFIX + data[33:41])
-        data = zlib.decompress(self.dec_cipher.decrypt(data[41:]))  # should use memoryview
-        if id and HMAC(self.id_key, data, sha256).digest() != id:
-            raise IntegrityError('Chunk id verification failed')
-        return data
-
-    def extract_nonce(self, payload):
-        if payload[0] != self.TYPE:
-            raise IntegrityError('Invalid encryption envelope')
-        nonce = bytes_to_long(payload[33:41])
-        return nonce
+    def extract_iv(self, payload):
+        _, meta, _ = parser(payload)
+        return meta.iv
 
 
     def init_from_random_data(self, data):
     def init_from_random_data(self, data):
         self.enc_key = data[0:32]
         self.enc_key = data[0:32]
@@ -148,9 +476,13 @@ class AESKeyBase(KeyBase):
         if self.chunk_seed & 0x80000000:
         if self.chunk_seed & 0x80000000:
             self.chunk_seed = self.chunk_seed - 0xffffffff - 1
             self.chunk_seed = self.chunk_seed - 0xffffffff - 1
 
 
-    def init_ciphers(self, enc_iv=b''):
-        self.enc_cipher = AES(is_encrypt=True, key=self.enc_key, iv=enc_iv)
-        self.dec_cipher = AES(is_encrypt=False, key=self.enc_key)
+    def init_ciphers(self, enc_iv=b'\0' * 16):
+        self.cipher = self.cipher_cls(enc_key=self.enc_key, enc_iv=enc_iv,
+                                      enc_hmac_key=self.enc_hmac_key)
+
+    @property
+    def enc_iv(self):
+        return self.cipher.enc_iv
 
 
 
 
 class PassphraseKey(AESKeyBase):
 class PassphraseKey(AESKeyBase):
@@ -159,7 +491,10 @@ class PassphraseKey(AESKeyBase):
 
 
     @classmethod
     @classmethod
     def create(cls, repository, args):
     def create(cls, repository, args):
-        key = cls()
+        compressor = compressor_creator(args)
+        maccer = maccer_creator(args, cls)
+        cipher = cipher_creator(args, cls)
+        key = cls(compressor, maccer, cipher)
         passphrase = os.environ.get('BORG_PASSPHRASE')
         passphrase = os.environ.get('BORG_PASSPHRASE')
         if passphrase is not None:
         if passphrase is not None:
             passphrase2 = passphrase
             passphrase2 = passphrase
@@ -181,7 +516,9 @@ class PassphraseKey(AESKeyBase):
     @classmethod
     @classmethod
     def detect(cls, repository, manifest_data):
     def detect(cls, repository, manifest_data):
         prompt = 'Enter passphrase for %s: ' % repository._location.orig
         prompt = 'Enter passphrase for %s: ' % repository._location.orig
-        key = cls()
+        mac, meta, data = parser(manifest_data)
+        compressor, keyer, maccer, cipher = get_implementations(meta)
+        key = cls(compressor, maccer, cipher)
         passphrase = os.environ.get('BORG_PASSPHRASE')
         passphrase = os.environ.get('BORG_PASSPHRASE')
         if passphrase is None:
         if passphrase is None:
             passphrase = getpass(prompt)
             passphrase = getpass(prompt)
@@ -189,8 +526,7 @@ class PassphraseKey(AESKeyBase):
             key.init(repository, passphrase)
             key.init(repository, passphrase)
             try:
             try:
                 key.decrypt(None, manifest_data)
                 key.decrypt(None, manifest_data)
-                num_blocks = num_aes_blocks(len(manifest_data) - 41)
-                key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks))
+                key.init_ciphers(increment_iv(key.extract_iv(manifest_data), len(data)))
                 return key
                 return key
             except IntegrityError:
             except IntegrityError:
                 passphrase = getpass(prompt)
                 passphrase = getpass(prompt)
@@ -212,14 +548,15 @@ class KeyfileKey(AESKeyBase):
 
 
     @classmethod
     @classmethod
     def detect(cls, repository, manifest_data):
     def detect(cls, repository, manifest_data):
-        key = cls()
+        mac, meta, data = parser(manifest_data)
+        compressor, keyer, maccer, cipher = get_implementations(meta)
+        key = cls(compressor, maccer, cipher)
         path = cls.find_key_file(repository)
         path = cls.find_key_file(repository)
         prompt = 'Enter passphrase for key file %s: ' % path
         prompt = 'Enter passphrase for key file %s: ' % path
         passphrase = os.environ.get('BORG_PASSPHRASE', '')
         passphrase = os.environ.get('BORG_PASSPHRASE', '')
         while not key.load(path, passphrase):
         while not key.load(path, passphrase):
             passphrase = getpass(prompt)
             passphrase = getpass(prompt)
-        num_blocks = num_aes_blocks(len(manifest_data) - 41)
-        key.init_ciphers(PREFIX + long_to_bytes(key.extract_nonce(manifest_data) + num_blocks))
+        key.init_ciphers(increment_iv(key.extract_iv(manifest_data), len(data)))
         return key
         return key
 
 
     @classmethod
     @classmethod
@@ -254,25 +591,27 @@ class KeyfileKey(AESKeyBase):
     def decrypt_key_file(self, data, passphrase):
     def decrypt_key_file(self, data, passphrase):
         d = msgpack.unpackb(data)
         d = msgpack.unpackb(data)
         assert d[b'version'] == 1
         assert d[b'version'] == 1
-        assert d[b'algorithm'] == b'sha256'
+        assert d[b'algorithm'] == b'gmac'
         key = pbkdf2_sha256(passphrase.encode('utf-8'), d[b'salt'], d[b'iterations'], 32)
         key = pbkdf2_sha256(passphrase.encode('utf-8'), d[b'salt'], d[b'iterations'], 32)
-        data = AES(is_encrypt=False, key=key).decrypt(d[b'data'])
-        if HMAC(key, data, sha256).digest() != d[b'hash']:
+        try:
+            cipher = AES(mode=AES_GCM_MODE, is_encrypt=False, key=key, iv=b'\0'*16)
+            data = cipher.check_mac_and_decrypt(d[b'hash'], d[b'data'])
+            return data
+        except Exception:
             return None
             return None
-        return data
 
 
     def encrypt_key_file(self, data, passphrase):
     def encrypt_key_file(self, data, passphrase):
         salt = get_random_bytes(32)
         salt = get_random_bytes(32)
         iterations = 100000
         iterations = 100000
         key = pbkdf2_sha256(passphrase.encode('utf-8'), salt, iterations, 32)
         key = pbkdf2_sha256(passphrase.encode('utf-8'), salt, iterations, 32)
-        hash = HMAC(key, data, sha256).digest()
-        cdata = AES(is_encrypt=True, key=key).encrypt(data)
+        cipher = AES(mode=AES_GCM_MODE, is_encrypt=True, key=key, iv=b'\0'*16)
+        mac, cdata = cipher.compute_mac_and_encrypt(data)
         d = {
         d = {
             'version': 1,
             'version': 1,
             'salt': salt,
             'salt': salt,
             'iterations': iterations,
             'iterations': iterations,
-            'algorithm': 'sha256',
-            'hash': hash,
+            'algorithm': 'gmac',
+            'hash': mac,
             'data': cdata,
             'data': cdata,
         }
         }
         return msgpack.packb(d)
         return msgpack.packb(d)
@@ -321,7 +660,10 @@ class KeyfileKey(AESKeyBase):
             passphrase2 = getpass('Enter same passphrase again: ')
             passphrase2 = getpass('Enter same passphrase again: ')
             if passphrase != passphrase2:
             if passphrase != passphrase2:
                 print('Passphrases do not match')
                 print('Passphrases do not match')
-        key = cls()
+        compressor = compressor_creator(args)
+        maccer = maccer_creator(args, cls)
+        cipher = cipher_creator(args, cls)
+        key = cls(compressor, maccer, cipher)
         key.repository_id = repository.id
         key.repository_id = repository.id
         key.init_from_random_data(get_random_bytes(100))
         key.init_from_random_data(get_random_bytes(100))
         key.init_ciphers()
         key.init_ciphers()
@@ -329,3 +671,213 @@ class KeyfileKey(AESKeyBase):
         print('Key file "%s" created.' % key.path)
         print('Key file "%s" created.' % key.path)
         print('Keep this file safe. Your data will be inaccessible without it.')
         print('Keep this file safe. Your data will be inaccessible without it.')
         return key
         return key
+
+
+# note: key 0 nicely maps to a zlib compressor with level 0 which means "no compression"
+compressor_mapping = {}
+for level in ZlibCompressor.LEVELS:
+    compressor_mapping[ZlibCompressor.TYPE + level] = \
+        type('ZlibCompressorLevel%d' % level, (ZlibCompressor, ), dict(TYPE=ZlibCompressor.TYPE + level))
+for preset in LzmaCompressor.PRESETS:
+    compressor_mapping[LzmaCompressor.TYPE + preset] = \
+        type('LzmaCompressorPreset%d' % preset, (LzmaCompressor, ), dict(TYPE=LzmaCompressor.TYPE + preset))
+for level in LZ4Compressor.LEVELS:
+    compressor_mapping[LZ4Compressor.TYPE + level] = \
+        type('LZ4CompressorLevel%d' % level, (LZ4Compressor, ), dict(TYPE=LZ4Compressor.TYPE + level))
+for level in LZ4HCCompressor.LEVELS:
+    compressor_mapping[LZ4HCCompressor.TYPE + level] = \
+        type('LZ4HCCompressorLevel%d' % level, (LZ4HCCompressor, ), dict(TYPE=LZ4HCCompressor.TYPE + level))
+for level in BLOSCLZCompressor.LEVELS:
+    compressor_mapping[BLOSCLZCompressor.TYPE + level] = \
+        type('BLOSCLZCompressorLevel%d' % level, (BLOSCLZCompressor, ), dict(TYPE=BLOSCLZCompressor.TYPE + level))
+for level in SnappyCompressor.LEVELS:
+    compressor_mapping[SnappyCompressor.TYPE + level] = \
+        type('SnappyCompressorLevel%d' % level, (SnappyCompressor, ), dict(TYPE=SnappyCompressor.TYPE + level))
+for level in BLOSCZlibCompressor.LEVELS:
+    compressor_mapping[BLOSCZlibCompressor.TYPE + level] = \
+        type('BLOSCZlibCompressorLevel%d' % level, (BLOSCZlibCompressor, ), dict(TYPE=BLOSCZlibCompressor.TYPE + level))
+# overwrite 0 with NullCompressor
+compressor_mapping[NullCompressor.TYPE] = NullCompressor
+
+
+keyer_mapping = {
+    KeyfileKey.TYPE: KeyfileKey,
+    PassphraseKey.TYPE: PassphraseKey,
+    PlaintextKey.TYPE: PlaintextKey,
+}
+
+
+maccer_mapping = {
+    # simple hashes, not MACs (but MAC-like class __init__ method signature):
+    SHA1.TYPE: SHA1,
+    SHA256.TYPE: SHA256,
+    SHA512_256.TYPE: SHA512_256,
+    SHA512.TYPE: SHA512,
+    GHASH.TYPE: GHASH,
+    # MACs:
+    HMAC_SHA1.TYPE: HMAC_SHA1,
+    HMAC_SHA256.TYPE: HMAC_SHA256,
+    HMAC_SHA512_256.TYPE: HMAC_SHA512_256,
+    HMAC_SHA512.TYPE: HMAC_SHA512,
+    GMAC.TYPE: GMAC,
+}
+
+
+cipher_mapping = {
+    # no cipher (but cipher-like class __init__ method signature):
+    PLAIN.TYPE: PLAIN,
+    # AEAD cipher implementations
+    AES_CTR_HMAC.TYPE: AES_CTR_HMAC,
+    AES_GCM.TYPE: AES_GCM,
+}
+
+
+def get_implementations(meta):
+    try:
+        compressor = compressor_mapping[meta.compr_type]
+        keyer = keyer_mapping[meta.key_type]
+        maccer = maccer_mapping[meta.mac_type]
+        cipher = cipher_mapping[meta.cipher_type]
+    except KeyError:
+        raise UnsupportedPayloadError("compr_type %x key_type %x mac_type %x cipher_type %x" % (
+            meta.compr_type, meta.key_type, meta.mac_type, meta.cipher_type))
+    return compressor, keyer, maccer, cipher
+
+
+def legacy_parser(all_data, key_type):  # all rather hardcoded
+    """
+    Payload layout:
+    no encryption:   TYPE(1) + data
+    with encryption: TYPE(1) + HMAC(32) + NONCE(8) + data
+    data is compressed with zlib level 6 and (in the 2nd case) encrypted.
+
+    To reduce payload size only 8 bytes of the 16 bytes nonce is saved
+    in the payload, the first 8 bytes are always zeros. This does not
+    affect security but limits the maximum repository capacity to
+    only 295 exabytes!
+    """
+    offset = 1
+    if key_type == PlaintextKey.TYPE:
+        mac_type = SHA256.TYPE
+        mac = None
+        cipher_type = PLAIN.TYPE
+        iv = None
+        data = all_data[offset:]
+    else:
+        mac_type = HMAC_SHA256.TYPE
+        mac = all_data[offset:offset+32]
+        cipher_type = AES_CTR_HMAC.TYPE
+        # legacy attic did not store the full IV on disk, as the upper 8 bytes
+        # are expected to be zero anyway as the full IV is a 128bit counter.
+        iv = b'\0' * 8 + all_data[offset+32:offset+40]
+        data = all_data[offset+40:]
+    meta = Meta(compr_type=6, key_type=key_type, mac_type=mac_type,
+                cipher_type=cipher_type, iv=iv, legacy=True)
+    return mac, meta, data
+
+def parser00(all_data):
+    return legacy_parser(all_data, KeyfileKey.TYPE)
+
+def parser01(all_data):
+    return legacy_parser(all_data, PassphraseKey.TYPE)
+
+def parser02(all_data):
+    return legacy_parser(all_data, PlaintextKey.TYPE)
+
+
+def parser03(all_data):  # new & flexible
+    """
+    Payload layout:
+    always: TYPE(1) + MSGPACK((mac, meta, data))
+
+    meta is a Meta namedtuple and contains all required information about data.
+    data is maybe compressed (see meta) and maybe encrypted (see meta).
+    """
+    unpacker = msgpack.Unpacker(
+        use_list=False,
+        # avoid memory allocation issues causes by tampered input data.
+        max_buffer_size=CHUNK_MAX + 1000,  # does not work in 0.4.6 unpackb C implementation
+        max_array_len=10,  # meta_tuple
+        max_bin_len=CHUNK_MAX,  # data
+        max_str_len=0,  # not used yet
+        max_map_len=0,  # not used yet
+        max_ext_len=0,  # not used yet
+        )
+    unpacker.feed(all_data[1:])
+    mac, meta_tuple, data = unpacker.unpack()
+    meta = Meta(*meta_tuple)
+    return mac, meta, data
+
+
+def parser(data):
+    parser_mapping = {
+        0x00: parser00,
+        0x01: parser01,
+        0x02: parser02,
+        0x03: parser03,
+    }
+    header_type = data[0]
+    parser_func = parser_mapping[header_type]
+    return parser_func(data)
+
+
+def key_factory(repository, manifest_data):
+    mac, meta, data = parser(manifest_data)
+    compressor, keyer, maccer, cipher = get_implementations(meta)
+    return keyer.detect(repository, manifest_data)
+
+
+def generate(mac, meta, data):
+    # always create new-style 0x03 format
+    return b'\x03' + msgpack.packb((mac, meta, data), use_bin_type=True)
+
+
+def compressor_creator(args):
+    # args == None is used by unit tests
+    compression = COMPR_DEFAULT if args is None else args.compression
+    compressor = compressor_mapping.get(compression)
+    if compressor is None:
+        raise NotImplementedError("no compression %d" % args.compression)
+    return compressor
+
+
+def key_creator(args):
+    if args.encryption == 'keyfile':
+        return KeyfileKey
+    if args.encryption == 'passphrase':
+        return PassphraseKey
+    if args.encryption == 'none':
+        return PlaintextKey
+    raise NotImplemented("no encryption %s" % args.encryption)
+
+
+def maccer_creator(args, key_cls):
+    # args == None is used by unit tests
+    mac = None if args is None else args.mac
+    if mac is None:
+        if key_cls is PlaintextKey:
+            mac = HASH_DEFAULT
+        elif key_cls in (KeyfileKey, PassphraseKey):
+            mac = MAC_DEFAULT
+        else:
+            raise NotImplementedError("unknown key class")
+    maccer = maccer_mapping.get(mac)
+    if maccer is None:
+        raise NotImplementedError("no mac %d" % args.mac)
+    return maccer
+
+
+def cipher_creator(args, key_cls):
+    # args == None is used by unit tests
+    cipher = None if args is None else args.cipher
+    if cipher is None:
+        if key_cls is PlaintextKey:
+            cipher = PLAIN_DEFAULT
+        elif key_cls in (KeyfileKey, PassphraseKey):
+            cipher = CIPHER_DEFAULT
+        else:
+            raise NotImplementedError("unknown key class")
+    cipher = cipher_mapping.get(cipher)
+    if cipher is None:
+        raise NotImplementedError("no cipher %d" % args.cipher)
+    return cipher

+ 7 - 6
borg/remote.py

@@ -89,7 +89,7 @@ class RepositoryServer:
     def negotiate(self, versions):
     def negotiate(self, versions):
         return 1
         return 1
 
 
-    def open(self, path, create=False):
+    def open(self, path, create=False, key_size=None):
         path = os.fsdecode(path)
         path = os.fsdecode(path)
         if path.startswith('/~'):
         if path.startswith('/~'):
             path = path[1:]
             path = path[1:]
@@ -100,8 +100,8 @@ class RepositoryServer:
                     break
                     break
             else:
             else:
                 raise PathNotAllowed(path)
                 raise PathNotAllowed(path)
-        self.repository = Repository(path, create)
-        return self.repository.id
+        self.repository = Repository(path, create, key_size=key_size)
+        return self.repository.id, self.repository.key_size
 
 
 
 
 class RemoteRepository:
 class RemoteRepository:
@@ -112,7 +112,7 @@ class RemoteRepository:
         def __init__(self, name):
         def __init__(self, name):
             self.name = name
             self.name = name
 
 
-    def __init__(self, location, create=False):
+    def __init__(self, location, create=False, key_size=None):
         self.location = location
         self.location = location
         self.preload_ids = []
         self.preload_ids = []
         self.msgid = 0
         self.msgid = 0
@@ -144,7 +144,7 @@ class RemoteRepository:
         version = self.call('negotiate', 1)
         version = self.call('negotiate', 1)
         if version != 1:
         if version != 1:
             raise Exception('Server insisted on using unsupported protocol version %d' % version)
             raise Exception('Server insisted on using unsupported protocol version %d' % version)
-        self.id = self.call('open', location.path, create)
+        self.id, self.key_size = self.call('open', location.path, create, key_size)
 
 
     def __del__(self):
     def __del__(self):
         self.close()
         self.close()
@@ -303,7 +303,8 @@ class RepositoryCache:
 
 
     def initialize(self):
     def initialize(self):
         self.tmppath = tempfile.mkdtemp()
         self.tmppath = tempfile.mkdtemp()
-        self.index = NSIndex()
+        self.key_size = self.repository.key_size
+        self.index = NSIndex(key_size=self.key_size)
         self.data_fd = open(os.path.join(self.tmppath, 'data'), 'a+b')
         self.data_fd = open(os.path.join(self.tmppath, 'data'), 'a+b')
 
 
     def cleanup(self):
     def cleanup(self):

+ 17 - 11
borg/repository.py

@@ -47,22 +47,23 @@ class Repository:
     class ObjectNotFound(Error):
     class ObjectNotFound(Error):
         """Object with key {} not found in repository {}."""
         """Object with key {} not found in repository {}."""
 
 
-    def __init__(self, path, create=False, exclusive=False):
+    def __init__(self, path, create=False, exclusive=False, key_size=None):
         self.path = path
         self.path = path
         self.io = None
         self.io = None
         self.lock = None
         self.lock = None
         self.index = None
         self.index = None
         self._active_txn = False
         self._active_txn = False
         if create:
         if create:
-            self.create(path)
+            self.create(path, key_size)
         self.open(path, exclusive)
         self.open(path, exclusive)
 
 
     def __del__(self):
     def __del__(self):
         self.close()
         self.close()
 
 
-    def create(self, path):
+    def create(self, path, key_size):
         """Create a new empty repository at `path`
         """Create a new empty repository at `path`
         """
         """
+        assert key_size is not None
         if os.path.exists(path) and (not os.path.isdir(path) or os.listdir(path)):
         if os.path.exists(path) and (not os.path.isdir(path) or os.listdir(path)):
             raise self.AlreadyExists(path)
             raise self.AlreadyExists(path)
         if not os.path.exists(path):
         if not os.path.exists(path):
@@ -75,6 +76,7 @@ class Repository:
         config.set('repository', 'version', '1')
         config.set('repository', 'version', '1')
         config.set('repository', 'segments_per_dir', self.DEFAULT_SEGMENTS_PER_DIR)
         config.set('repository', 'segments_per_dir', self.DEFAULT_SEGMENTS_PER_DIR)
         config.set('repository', 'max_segment_size', self.DEFAULT_MAX_SEGMENT_SIZE)
         config.set('repository', 'max_segment_size', self.DEFAULT_MAX_SEGMENT_SIZE)
+        config.set('repository', 'key_size', key_size)
         config.set('repository', 'id', hexlify(os.urandom(32)).decode('ascii'))
         config.set('repository', 'id', hexlify(os.urandom(32)).decode('ascii'))
         with open(os.path.join(path, 'config'), 'w') as fd:
         with open(os.path.join(path, 'config'), 'w') as fd:
             config.write(fd)
             config.write(fd)
@@ -117,10 +119,12 @@ class Repository:
         if 'repository' not in self.config.sections() or self.config.getint('repository', 'version') != 1:
         if 'repository' not in self.config.sections() or self.config.getint('repository', 'version') != 1:
             raise self.InvalidRepository(path)
             raise self.InvalidRepository(path)
         self.lock = UpgradableLock(os.path.join(path, 'config'), exclusive)
         self.lock = UpgradableLock(os.path.join(path, 'config'), exclusive)
+        # legacy attic repositories always have key size 32B (256b)
+        self.key_size = self.config.getint('repository', 'key_size', fallback=32)
         self.max_segment_size = self.config.getint('repository', 'max_segment_size')
         self.max_segment_size = self.config.getint('repository', 'max_segment_size')
         self.segments_per_dir = self.config.getint('repository', 'segments_per_dir')
         self.segments_per_dir = self.config.getint('repository', 'segments_per_dir')
         self.id = unhexlify(self.config.get('repository', 'id').strip())
         self.id = unhexlify(self.config.get('repository', 'id').strip())
-        self.io = LoggedIO(self.path, self.max_segment_size, self.segments_per_dir)
+        self.io = LoggedIO(self.path, self.max_segment_size, self.segments_per_dir, self.key_size)
 
 
     def close(self):
     def close(self):
         if self.lock:
         if self.lock:
@@ -140,8 +144,9 @@ class Repository:
 
 
     def open_index(self, transaction_id):
     def open_index(self, transaction_id):
         if transaction_id is None:
         if transaction_id is None:
-            return NSIndex()
-        return NSIndex.read((os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8'))
+            return NSIndex(key_size=self.key_size)
+        return NSIndex.read((os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8'),
+                            key_size=self.key_size)
 
 
     def prepare_txn(self, transaction_id, do_cleanup=True):
     def prepare_txn(self, transaction_id, do_cleanup=True):
         self._active_txn = True
         self._active_txn = True
@@ -397,8 +402,6 @@ class LoggedIO:
 
 
     header_fmt = struct.Struct('<IIB')
     header_fmt = struct.Struct('<IIB')
     assert header_fmt.size == 9
     assert header_fmt.size == 9
-    put_header_fmt = struct.Struct('<IIB32s')
-    assert put_header_fmt.size == 41
     header_no_crc_fmt = struct.Struct('<IB')
     header_no_crc_fmt = struct.Struct('<IB')
     assert header_no_crc_fmt.size == 5
     assert header_no_crc_fmt.size == 5
     crc_fmt = struct.Struct('<I')
     crc_fmt = struct.Struct('<I')
@@ -407,13 +410,16 @@ class LoggedIO:
     _commit = header_no_crc_fmt.pack(9, TAG_COMMIT)
     _commit = header_no_crc_fmt.pack(9, TAG_COMMIT)
     COMMIT = crc_fmt.pack(crc32(_commit)) + _commit
     COMMIT = crc_fmt.pack(crc32(_commit)) + _commit
 
 
-    def __init__(self, path, limit, segments_per_dir, capacity=90):
+    def __init__(self, path, limit, segments_per_dir, key_size, capacity=90):
         self.path = path
         self.path = path
         self.fds = LRUCache(capacity)
         self.fds = LRUCache(capacity)
         self.segment = 0
         self.segment = 0
         self.limit = limit
         self.limit = limit
         self.segments_per_dir = segments_per_dir
         self.segments_per_dir = segments_per_dir
+        self.key_size = key_size
         self.offset = 0
         self.offset = 0
+        self.put_header_fmt = struct.Struct('<IIB%ds' % key_size)
+        assert self.put_header_fmt.size == self.header_fmt.size + key_size
         self._write_fd = None
         self._write_fd = None
 
 
     def close(self):
     def close(self):
@@ -519,9 +525,9 @@ class LoggedIO:
                 raise IntegrityError('Invalid segment entry header')
                 raise IntegrityError('Invalid segment entry header')
             key = None
             key = None
             if tag in (TAG_PUT, TAG_DELETE):
             if tag in (TAG_PUT, TAG_DELETE):
-                key = rest[:32]
+                key = rest[:self.key_size]
             if include_data:
             if include_data:
-                yield tag, key, offset, rest[32:]
+                yield tag, key, offset, rest[self.key_size:]
             else:
             else:
                 yield tag, key, offset
                 yield tag, key, offset
             offset += size
             offset += size

+ 15 - 3
borg/testsuite/archive.py

@@ -3,7 +3,7 @@ from datetime import datetime, timezone
 import msgpack
 import msgpack
 
 
 from ..archive import Archive, CacheChunkBuffer, RobustUnpacker
 from ..archive import Archive, CacheChunkBuffer, RobustUnpacker
-from ..key import PlaintextKey
+from ..key import PlaintextKey, COMPR_DEFAULT
 from ..helpers import Manifest
 from ..helpers import Manifest
 from . import BaseTestCase
 from . import BaseTestCase
 from .mock import Mock
 from .mock import Mock
@@ -21,9 +21,15 @@ class MockCache:
 
 
 class ArchiveTimestampTestCase(BaseTestCase):
 class ArchiveTimestampTestCase(BaseTestCase):
 
 
+    class MockArgs(object):
+        repository = None
+        compression = COMPR_DEFAULT
+        mac = None
+        cipher = None
+
     def _test_timestamp_parsing(self, isoformat, expected):
     def _test_timestamp_parsing(self, isoformat, expected):
         repository = Mock()
         repository = Mock()
-        key = PlaintextKey()
+        key = PlaintextKey.create(None, self.MockArgs())
         manifest = Manifest(repository, key)
         manifest = Manifest(repository, key)
         a = Archive(repository, key, manifest, 'test', create=True)
         a = Archive(repository, key, manifest, 'test', create=True)
         a.metadata = {b'time': isoformat}
         a.metadata = {b'time': isoformat}
@@ -42,10 +48,16 @@ class ArchiveTimestampTestCase(BaseTestCase):
 
 
 class ChunkBufferTestCase(BaseTestCase):
 class ChunkBufferTestCase(BaseTestCase):
 
 
+    class MockArgs(object):
+        repository = None
+        compression = COMPR_DEFAULT
+        mac = None
+        cipher = None
+
     def test(self):
     def test(self):
         data = [{b'foo': 1}, {b'bar': 2}]
         data = [{b'foo': 1}, {b'bar': 2}]
         cache = MockCache()
         cache = MockCache()
-        key = PlaintextKey()
+        key = PlaintextKey.create(None, self.MockArgs())
         chunks = CacheChunkBuffer(cache, key, None)
         chunks = CacheChunkBuffer(cache, key, None)
         for d in data:
         for d in data:
             chunks.add(d)
             chunks.add(d)

+ 7 - 5
borg/testsuite/archiver.py

@@ -15,8 +15,9 @@ from .. import xattr
 from ..archive import Archive, ChunkBuffer, CHUNK_MAX
 from ..archive import Archive, ChunkBuffer, CHUNK_MAX
 from ..archiver import Archiver
 from ..archiver import Archiver
 from ..cache import Cache
 from ..cache import Cache
-from ..crypto import bytes_to_long, num_aes_blocks
+from ..crypto import bytes16_to_int, num_aes_blocks
 from ..helpers import Manifest
 from ..helpers import Manifest
+from ..key import parser
 from ..remote import RemoteRepository, PathNotAllowed
 from ..remote import RemoteRepository, PathNotAllowed
 from ..repository import Repository
 from ..repository import Repository
 from . import BaseTestCase
 from . import BaseTestCase
@@ -496,8 +497,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
                 hash = sha256(data).digest()
                 hash = sha256(data).digest()
                 if hash not in seen:
                 if hash not in seen:
                     seen.add(hash)
                     seen.add(hash)
-                    num_blocks = num_aes_blocks(len(data) - 41)
-                    nonce = bytes_to_long(data[33:41])
+                    mac, meta, data = parser(data)
+                    num_blocks = num_aes_blocks(len(data))
+                    nonce = bytes16_to_int(meta.iv)
                     for counter in range(nonce, nonce + num_blocks):
                     for counter in range(nonce, nonce + num_blocks):
                         self.assert_not_in(counter, used)
                         self.assert_not_in(counter, used)
                         used.add(counter)
                         used.add(counter)
@@ -576,7 +578,7 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
 
 
     def test_missing_manifest(self):
     def test_missing_manifest(self):
         archive, repository = self.open_archive('archive1')
         archive, repository = self.open_archive('archive1')
-        repository.delete(Manifest.MANIFEST_ID)
+        repository.delete(Manifest.manifest_id(repository))
         repository.commit()
         repository.commit()
         self.cmd('check', self.repository_location, exit_code=1)
         self.cmd('check', self.repository_location, exit_code=1)
         output = self.cmd('check', '--repair', self.repository_location, exit_code=0)
         output = self.cmd('check', '--repair', self.repository_location, exit_code=0)
@@ -587,7 +589,7 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
     def test_extra_chunks(self):
     def test_extra_chunks(self):
         self.cmd('check', self.repository_location, exit_code=0)
         self.cmd('check', self.repository_location, exit_code=0)
         repository = Repository(self.repository_location)
         repository = Repository(self.repository_location)
-        repository.put(b'01234567890123456789012345678901', b'xxxx')
+        repository.put(b'0123456789012345', b'xxxx')
         repository.commit()
         repository.commit()
         repository.close()
         repository.close()
         self.cmd('check', self.repository_location, exit_code=1)
         self.cmd('check', self.repository_location, exit_code=1)

+ 48 - 14
borg/testsuite/crypto.py

@@ -1,6 +1,7 @@
 from binascii import hexlify
 from binascii import hexlify
 
 
-from ..crypto import AES, bytes_to_long, bytes_to_int, long_to_bytes, pbkdf2_sha256, get_random_bytes
+from ..crypto import AES, AES_GCM_MODE, AES_CTR_MODE, pbkdf2_sha256, get_random_bytes, \
+                     bytes_to_int, bytes16_to_int, int_to_bytes16, increment_iv
 from . import BaseTestCase
 from . import BaseTestCase
 
 
 
 
@@ -9,9 +10,27 @@ class CryptoTestCase(BaseTestCase):
     def test_bytes_to_int(self):
     def test_bytes_to_int(self):
         self.assert_equal(bytes_to_int(b'\0\0\0\1'), 1)
         self.assert_equal(bytes_to_int(b'\0\0\0\1'), 1)
 
 
-    def test_bytes_to_long(self):
-        self.assert_equal(bytes_to_long(b'\0\0\0\0\0\0\0\1'), 1)
-        self.assert_equal(long_to_bytes(1), b'\0\0\0\0\0\0\0\1')
+    def test_bytes16_to_int(self):
+        i, b = 1, b'\0\0\0\0\0\0\0\0\0\0\0\0\0\0\0\1'
+        self.assert_equal(bytes16_to_int(b), i)
+        self.assert_equal(int_to_bytes16(i), b)
+        i, b = (1 << 64) + 2, b'\0\0\0\0\0\0\0\1\0\0\0\0\0\0\0\2'
+        self.assert_equal(bytes16_to_int(b), i)
+        self.assert_equal(int_to_bytes16(i), b)
+
+    def test_increment_iv(self):
+        tests = [
+            # iv, amount, iv_expected
+            (0, 0, 0),
+            (0, 15, 1),
+            (0, 16, 1),
+            (0, 17, 2),
+            (0xffffffffffffffff, 32, 0x10000000000000001),
+        ]
+        for iv, amount, iv_expected in tests:
+            iv = int_to_bytes16(iv)
+            iv_expected = int_to_bytes16(iv_expected)
+            self.assert_equal(increment_iv(iv, amount), iv_expected)
 
 
     def test_pbkdf2_sha256(self):
     def test_pbkdf2_sha256(self):
         self.assert_equal(hexlify(pbkdf2_sha256(b'password', b'salt', 1, 32)),
         self.assert_equal(hexlify(pbkdf2_sha256(b'password', b'salt', 1, 32)),
@@ -28,18 +47,33 @@ class CryptoTestCase(BaseTestCase):
         self.assert_equal(len(bytes2), 10)
         self.assert_equal(len(bytes2), 10)
         self.assert_not_equal(bytes, bytes2)
         self.assert_not_equal(bytes, bytes2)
 
 
-    def test_aes(self):
+    def test_aes_ctr(self):
         key = b'X' * 32
         key = b'X' * 32
+        iv = b'\0' * 16
         data = b'foo' * 10
         data = b'foo' * 10
         # encrypt
         # encrypt
-        aes = AES(is_encrypt=True, key=key)
-        self.assert_equal(bytes_to_long(aes.iv, 8), 0)
-        cdata = aes.encrypt(data)
+        aes = AES(mode=AES_CTR_MODE, is_encrypt=True, key=key, iv=iv)
+        _, cdata = aes.compute_mac_and_encrypt(data)
         self.assert_equal(hexlify(cdata), b'c6efb702de12498f34a2c2bbc8149e759996d08bf6dc5c610aefc0c3a466')
         self.assert_equal(hexlify(cdata), b'c6efb702de12498f34a2c2bbc8149e759996d08bf6dc5c610aefc0c3a466')
-        self.assert_equal(bytes_to_long(aes.iv, 8), 2)
-        # decrypt
-        aes = AES(is_encrypt=False, key=key)
-        self.assert_equal(bytes_to_long(aes.iv, 8), 0)
-        pdata = aes.decrypt(cdata)
+        # decrypt (correct mac/cdata)
+        aes = AES(mode=AES_CTR_MODE, is_encrypt=False, key=key, iv=iv)
+        pdata = aes.check_mac_and_decrypt(None, cdata)
+        self.assert_equal(data, pdata)
+
+    def test_aes_gcm(self):
+        key = b'X' * 32
+        iv = b'A' * 16
+        data = b'foo' * 10
+        # encrypt
+        aes = AES(mode=AES_GCM_MODE, is_encrypt=True, key=key, iv=iv)
+        mac, cdata = aes.compute_mac_and_encrypt(data)
+        self.assert_equal(hexlify(mac), b'c98aa10eb6b7031bcc2160878d9438fb')
+        self.assert_equal(hexlify(cdata), b'841bcce405df769d22ee9f7f012edf5dc7fb2594d924c7400ffd050f2741')
+        # decrypt (correct mac/cdata)
+        aes = AES(mode=AES_GCM_MODE, is_encrypt=False, key=key, iv=iv)
+        pdata = aes.check_mac_and_decrypt(mac, cdata)
         self.assert_equal(data, pdata)
         self.assert_equal(data, pdata)
-        self.assert_equal(bytes_to_long(aes.iv, 8), 2)
+        # decrypt (incorrect mac/cdata)
+        aes = AES(mode=AES_GCM_MODE, is_encrypt=False, key=key, iv=iv)
+        cdata = b'x' + cdata[1:]  # corrupt cdata
+        self.assertRaises(Exception, aes.check_mac_and_decrypt, mac, cdata)

+ 5 - 5
borg/testsuite/hashindex.py

@@ -9,7 +9,7 @@ from . import BaseTestCase
 class HashIndexTestCase(BaseTestCase):
 class HashIndexTestCase(BaseTestCase):
 
 
     def _generic_test(self, cls, make_value, sha):
     def _generic_test(self, cls, make_value, sha):
-        idx = cls()
+        idx = cls(key_size=32)
         self.assert_equal(len(idx), 0)
         self.assert_equal(len(idx), 0)
         # Test set
         # Test set
         for x in range(100):
         for x in range(100):
@@ -34,7 +34,7 @@ class HashIndexTestCase(BaseTestCase):
         with open(idx_name.name, 'rb') as fd:
         with open(idx_name.name, 'rb') as fd:
             self.assert_equal(hashlib.sha256(fd.read()).hexdigest(), sha)
             self.assert_equal(hashlib.sha256(fd.read()).hexdigest(), sha)
         # Make sure we can open the file
         # Make sure we can open the file
-        idx = cls.read(idx_name.name)
+        idx = cls.read(idx_name.name, key_size=32)
         self.assert_equal(len(idx), 50)
         self.assert_equal(len(idx), 50)
         for x in range(50, 100):
         for x in range(50, 100):
             self.assert_equal(idx[bytes('%-32d' % x, 'ascii')], make_value(x * 2))
             self.assert_equal(idx[bytes('%-32d' % x, 'ascii')], make_value(x * 2))
@@ -42,7 +42,7 @@ class HashIndexTestCase(BaseTestCase):
         self.assert_equal(len(idx), 0)
         self.assert_equal(len(idx), 0)
         idx.write(idx_name.name)
         idx.write(idx_name.name)
         del idx
         del idx
-        self.assert_equal(len(cls.read(idx_name.name)), 0)
+        self.assert_equal(len(cls.read(idx_name.name, key_size=32)), 0)
 
 
     def test_nsindex(self):
     def test_nsindex(self):
         self._generic_test(NSIndex, lambda x: (x, x),
         self._generic_test(NSIndex, lambda x: (x, x),
@@ -55,7 +55,7 @@ class HashIndexTestCase(BaseTestCase):
     def test_resize(self):
     def test_resize(self):
         n = 2000  # Must be >= MIN_BUCKETS
         n = 2000  # Must be >= MIN_BUCKETS
         idx_name = tempfile.NamedTemporaryFile()
         idx_name = tempfile.NamedTemporaryFile()
-        idx = NSIndex()
+        idx = NSIndex(key_size=32)
         idx.write(idx_name.name)
         idx.write(idx_name.name)
         initial_size = os.path.getsize(idx_name.name)
         initial_size = os.path.getsize(idx_name.name)
         self.assert_equal(len(idx), 0)
         self.assert_equal(len(idx), 0)
@@ -70,7 +70,7 @@ class HashIndexTestCase(BaseTestCase):
         self.assert_equal(initial_size, os.path.getsize(idx_name.name))
         self.assert_equal(initial_size, os.path.getsize(idx_name.name))
 
 
     def test_iteritems(self):
     def test_iteritems(self):
-        idx = NSIndex()
+        idx = NSIndex(key_size=32)
         for x in range(100):
         for x in range(100):
             idx[bytes('%-0.32d' % x, 'ascii')] = x, x
             idx[bytes('%-0.32d' % x, 'ascii')] = x, x
         all = list(idx.iteritems())
         all = list(idx.iteritems())

+ 44 - 27
borg/testsuite/key.py

@@ -4,8 +4,7 @@ import shutil
 import tempfile
 import tempfile
 from binascii import hexlify
 from binascii import hexlify
 
 
-from ..crypto import bytes_to_long, num_aes_blocks
-from ..key import PlaintextKey, PassphraseKey, KeyfileKey
+from ..key import PlaintextKey, PassphraseKey, KeyfileKey, COMPR_DEFAULT, increment_iv
 from ..helpers import Location, unhexlify
 from ..helpers import Location, unhexlify
 from . import BaseTestCase
 from . import BaseTestCase
 
 
@@ -14,22 +13,26 @@ class KeyTestCase(BaseTestCase):
 
 
     class MockArgs:
     class MockArgs:
         repository = Location(tempfile.mkstemp()[1])
         repository = Location(tempfile.mkstemp()[1])
+        compression = COMPR_DEFAULT
+        mac = None
+        cipher = None
 
 
     keyfile2_key_file = """
     keyfile2_key_file = """
-        BORG_KEY 0000000000000000000000000000000000000000000000000000000000000000
-        hqppdGVyYXRpb25zzgABhqCkaGFzaNoAIMyonNI+7Cjv0qHi0AOBM6bLGxACJhfgzVD2oq
-        bIS9SFqWFsZ29yaXRobaZzaGEyNTakc2FsdNoAINNK5qqJc1JWSUjACwFEWGTdM7Nd0a5l
-        1uBGPEb+9XM9p3ZlcnNpb24BpGRhdGHaANAYDT5yfPpU099oBJwMomsxouKyx/OG4QIXK2
-        hQCG2L2L/9PUu4WIuKvGrsXoP7syemujNfcZws5jLp2UPva4PkQhQsrF1RYDEMLh2eF9Ol
-        rwtkThq1tnh7KjWMG9Ijt7/aoQtq0zDYP/xaFF8XXSJxiyP5zjH5+spB6RL0oQHvbsliSh
-        /cXJq7jrqmrJ1phd6dg4SHAM/i+hubadZoS6m25OQzYAW09wZD/phG8OVa698Z5ed3HTaT
-        SmrtgJL3EoOKgUI9d6BLE4dJdBqntifo""".strip()
+BORG_KEY 0000000000000000000000000000000000000000000000000000000000000000
+hqRzYWx02gAgA1l4jfyv22y6U/mxxDT8HodSWAcX0g3nOESrQcNnBsundmVyc2lvbgGqaX
+RlcmF0aW9uc84AAYagqWFsZ29yaXRobaRnbWFjpGhhc2iw7eaB54JssAOnM1S4S9CeTaRk
+YXRh2gDQzmuyg3iYjMeTLObY+ybI+QfngB+5mmHeEAfBa42fuEZgqM3rYyMj2XfgvamF+O
+0asvhEyy9om190FaOxQ4RiiTMNqSP0FKLmd1i5ZyDMfRyp7JbscRFs9Ryk28yXWkv0MgQy
+EAYlaycY+6lWdRSgEPxidyPl9t9dr2AI/UuiQytwqmcmXgWD6Px6wgpOS/4AcRmEvDqIIl
+Rc2xsu+RevGAxk5rnrIIRPr7WB5R2cinzEn9ylDgBDt9LZbq706ELgtwVTnjWB8FBTPwVI
+vLTTXQ==
+""".strip()
 
 
     keyfile2_cdata = unhexlify(re.sub('\W', '', """
     keyfile2_cdata = unhexlify(re.sub('\W', '', """
-        0055f161493fcfc16276e8c31493c4641e1eb19a79d0326fad0291e5a9c98e5933
-        00000000000003e8d21eaf9b86c297a8cd56432e1915bb
+        0393c4102e5ce8f5e9477c9e4ce2de453121aa139600001402c41000000000000000000000000000000000
+        c2c407b0147a64a379d1
         """))
         """))
-    keyfile2_id = unhexlify('c3fbf14bc001ebcc3cd86e696c13482ed071740927cd7cbe1b01b4bfcee49314')
+    keyfile2_id = unhexlify('dd9451069663931c8abd85452d016733')
 
 
     def setUp(self):
     def setUp(self):
         self.tmppath = tempfile.mkdtemp()
         self.tmppath = tempfile.mkdtemp()
@@ -45,25 +48,36 @@ class KeyTestCase(BaseTestCase):
         _location = _Location()
         _location = _Location()
         id = bytes(32)
         id = bytes(32)
 
 
+    def _test_make_testdata(self):
+        # modify tearDown to not kill the key file first, before using this
+        os.environ['ATTIC_PASSPHRASE'] = 'passphrase'
+        key = KeyfileKey.create(self.MockRepository(), self.MockArgs())
+        print("keyfile2_key_file: find the it in the filesystem, see location in test log output")
+        print("keyfile2_cdata:", hexlify(key.encrypt(b'payload')))
+        print("keyfile2_id:", hexlify(key.id_hash(b'payload')))
+        assert False
+
     def test_plaintext(self):
     def test_plaintext(self):
-        key = PlaintextKey.create(None, None)
+        key = PlaintextKey.create(None, self.MockArgs())
         data = b'foo'
         data = b'foo'
-        self.assert_equal(hexlify(key.id_hash(data)), b'2c26b46b68ffc68ff99b453c1d30413413422d706483bfa0f98a5e886266e7ae')
+        self.assert_equal(hexlify(key.id_hash(data)), b'4c9137bc0dd3ddb31de4e138a49d7eb3')
         self.assert_equal(data, key.decrypt(key.id_hash(data), key.encrypt(data)))
         self.assert_equal(data, key.decrypt(key.id_hash(data), key.encrypt(data)))
 
 
     def test_keyfile(self):
     def test_keyfile(self):
         os.environ['BORG_PASSPHRASE'] = 'test'
         os.environ['BORG_PASSPHRASE'] = 'test'
         key = KeyfileKey.create(self.MockRepository(), self.MockArgs())
         key = KeyfileKey.create(self.MockRepository(), self.MockArgs())
-        self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0)
+        self.assert_equal(key.enc_iv, b'\0'*16)
         manifest = key.encrypt(b'XXX')
         manifest = key.encrypt(b'XXX')
-        self.assert_equal(key.extract_nonce(manifest), 0)
+        self.assert_equal(key.extract_iv(manifest), b'\0'*16)
         manifest2 = key.encrypt(b'XXX')
         manifest2 = key.encrypt(b'XXX')
         self.assert_not_equal(manifest, manifest2)
         self.assert_not_equal(manifest, manifest2)
         self.assert_equal(key.decrypt(None, manifest), key.decrypt(None, manifest2))
         self.assert_equal(key.decrypt(None, manifest), key.decrypt(None, manifest2))
-        self.assert_equal(key.extract_nonce(manifest2), 1)
-        iv = key.extract_nonce(manifest)
+        self.assert_equal(key.extract_iv(manifest2), b'\0'*15+b'\x01')
+        iv = key.extract_iv(manifest)
         key2 = KeyfileKey.detect(self.MockRepository(), manifest)
         key2 = KeyfileKey.detect(self.MockRepository(), manifest)
-        self.assert_equal(bytes_to_long(key2.enc_cipher.iv, 8), iv + num_aes_blocks(len(manifest) - KeyfileKey.PAYLOAD_OVERHEAD))
+        # we assume that the payload fits into one 16B AES block (which is given for b'XXX').
+        iv_plus_1 = increment_iv(iv, 16)
+        self.assert_equal(key2.enc_iv, iv_plus_1)
         # Key data sanity check
         # Key data sanity check
         self.assert_equal(len(set([key2.id_key, key2.enc_key, key2.enc_hmac_key])), 3)
         self.assert_equal(len(set([key2.id_key, key2.enc_key, key2.enc_hmac_key])), 3)
         self.assert_equal(key2.chunk_seed == 0, False)
         self.assert_equal(key2.chunk_seed == 0, False)
@@ -79,25 +93,28 @@ class KeyTestCase(BaseTestCase):
 
 
     def test_passphrase(self):
     def test_passphrase(self):
         os.environ['BORG_PASSPHRASE'] = 'test'
         os.environ['BORG_PASSPHRASE'] = 'test'
-        key = PassphraseKey.create(self.MockRepository(), None)
-        self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0)
+        key = PassphraseKey.create(self.MockRepository(), self.MockArgs())
+        # XXX self.assert_equal(bytes_to_long(key.enc_cipher.iv, 8), 0)
+        self.assert_equal(key.enc_iv, b'\0'*16)
         self.assert_equal(hexlify(key.id_key), b'793b0717f9d8fb01c751a487e9b827897ceea62409870600013fbc6b4d8d7ca6')
         self.assert_equal(hexlify(key.id_key), b'793b0717f9d8fb01c751a487e9b827897ceea62409870600013fbc6b4d8d7ca6')
         self.assert_equal(hexlify(key.enc_hmac_key), b'b885a05d329a086627412a6142aaeb9f6c54ab7950f996dd65587251f6bc0901')
         self.assert_equal(hexlify(key.enc_hmac_key), b'b885a05d329a086627412a6142aaeb9f6c54ab7950f996dd65587251f6bc0901')
         self.assert_equal(hexlify(key.enc_key), b'2ff3654c6daf7381dbbe718d2b20b4f1ea1e34caa6cc65f6bb3ac376b93fed2a')
         self.assert_equal(hexlify(key.enc_key), b'2ff3654c6daf7381dbbe718d2b20b4f1ea1e34caa6cc65f6bb3ac376b93fed2a')
         self.assert_equal(key.chunk_seed, -775740477)
         self.assert_equal(key.chunk_seed, -775740477)
         manifest = key.encrypt(b'XXX')
         manifest = key.encrypt(b'XXX')
-        self.assert_equal(key.extract_nonce(manifest), 0)
+        self.assert_equal(key.extract_iv(manifest), b'\0'*16)
         manifest2 = key.encrypt(b'XXX')
         manifest2 = key.encrypt(b'XXX')
         self.assert_not_equal(manifest, manifest2)
         self.assert_not_equal(manifest, manifest2)
         self.assert_equal(key.decrypt(None, manifest), key.decrypt(None, manifest2))
         self.assert_equal(key.decrypt(None, manifest), key.decrypt(None, manifest2))
-        self.assert_equal(key.extract_nonce(manifest2), 1)
-        iv = key.extract_nonce(manifest)
+        self.assert_equal(key.extract_iv(manifest2), b'\0'*15+b'\x01')
+        iv = key.extract_iv(manifest)
         key2 = PassphraseKey.detect(self.MockRepository(), manifest)
         key2 = PassphraseKey.detect(self.MockRepository(), manifest)
-        self.assert_equal(bytes_to_long(key2.enc_cipher.iv, 8), iv + num_aes_blocks(len(manifest) - PassphraseKey.PAYLOAD_OVERHEAD))
+        # we assume that the payload fits into one 16B AES block (which is given for b'XXX').
+        iv_plus_1 = increment_iv(iv, 16)
+        self.assert_equal(key2.enc_iv, iv_plus_1)
         self.assert_equal(key.id_key, key2.id_key)
         self.assert_equal(key.id_key, key2.id_key)
         self.assert_equal(key.enc_hmac_key, key2.enc_hmac_key)
         self.assert_equal(key.enc_hmac_key, key2.enc_hmac_key)
         self.assert_equal(key.enc_key, key2.enc_key)
         self.assert_equal(key.enc_key, key2.enc_key)
         self.assert_equal(key.chunk_seed, key2.chunk_seed)
         self.assert_equal(key.chunk_seed, key2.chunk_seed)
         data = b'foo'
         data = b'foo'
-        self.assert_equal(hexlify(key.id_hash(data)), b'818217cf07d37efad3860766dcdf1d21e401650fed2d76ed1d797d3aae925990')
+        self.assert_equal(hexlify(key.id_hash(data)), b'a409d69859b8a07625f066e42cde0501')
         self.assert_equal(data, key2.decrypt(key2.id_hash(data), key.encrypt(data)))
         self.assert_equal(data, key2.decrypt(key2.id_hash(data), key.encrypt(data)))

+ 11 - 9
borg/testsuite/repository.py

@@ -9,16 +9,15 @@ from ..repository import Repository
 from . import BaseTestCase
 from . import BaseTestCase
 from .mock import patch
 from .mock import patch
 
 
-
 class RepositoryTestCaseBase(BaseTestCase):
 class RepositoryTestCaseBase(BaseTestCase):
     key_size = 32
     key_size = 32
 
 
-    def open(self, create=False):
-        return Repository(os.path.join(self.tmppath, 'repository'), create=create)
+    def open(self, create=False, key_size=None):
+        return Repository(os.path.join(self.tmppath, 'repository'), create=create, key_size=key_size)
 
 
     def setUp(self):
     def setUp(self):
         self.tmppath = tempfile.mkdtemp()
         self.tmppath = tempfile.mkdtemp()
-        self.repository = self.open(create=True)
+        self.repository = self.open(create=True, key_size=self.key_size)
 
 
     def tearDown(self):
     def tearDown(self):
         self.repository.close()
         self.repository.close()
@@ -209,7 +208,8 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
         return sorted(int(n) for n in os.listdir(os.path.join(self.tmppath, 'repository', 'data', '0')) if n.isdigit())[-1]
         return sorted(int(n) for n in os.listdir(os.path.join(self.tmppath, 'repository', 'data', '0')) if n.isdigit())[-1]
 
 
     def open_index(self):
     def open_index(self):
-        return NSIndex.read(os.path.join(self.tmppath, 'repository', 'index.{}'.format(self.get_head())))
+        return NSIndex.read(os.path.join(self.tmppath, 'repository', 'index.{}'.format(self.get_head())),
+                            key_size=self.key_size)
 
 
     def corrupt_object(self, id_):
     def corrupt_object(self, id_):
         idx = self.open_index()
         idx = self.open_index()
@@ -317,8 +317,9 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
 
 
 class RemoteRepositoryTestCase(RepositoryTestCase):
 class RemoteRepositoryTestCase(RepositoryTestCase):
 
 
-    def open(self, create=False):
-        return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create)
+    def open(self, create=False, key_size=None):
+        return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')),
+                                create=create, key_size=key_size)
 
 
     def test_invalid_rpc(self):
     def test_invalid_rpc(self):
         self.assert_raises(InvalidRPCMethod, lambda: self.repository.call('__init__', None))
         self.assert_raises(InvalidRPCMethod, lambda: self.repository.call('__init__', None))
@@ -326,5 +327,6 @@ class RemoteRepositoryTestCase(RepositoryTestCase):
 
 
 class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase):
 class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase):
 
 
-    def open(self, create=False):
-        return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create)
+    def open(self, create=False, key_size=None):
+        return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')),
+                                create=create, key_size=key_size)

+ 1 - 0
docs/index.rst

@@ -53,6 +53,7 @@ User's Guide
    quickstart
    quickstart
    usage
    usage
    faq
    faq
+   tuning
    internals
    internals
 
 
 Getting help
 Getting help

+ 147 - 0
docs/tuning.rst

@@ -0,0 +1,147 @@
+.. _tuning:
+.. include:: global.rst.inc
+
+Tuning
+======
+
+General hints
+-------------
+CPU load, backup speed, memory and storage usage are covered below.
+
+As performance and resource usage depend on a lot of factors, you may need to
+tweak the parameters a bit and retry until you found the best ones for your
+setup.
+
+Usually, the default parameters are selected for best speed under the assumption
+that you run a modern machine with fast CPU, fast I/O and a good amount of RAM.
+
+If you run an older or low-resource machine or your backup target or connection
+to it is slow, tweaking parameters might give significant speedups.
+
+Exclude crap data
+-----------------
+Maybe you don't want to backup:
+
+* cache / temporary files (they can be rebuilt / are useless)
+* specific directories / filenames / file extensions you do not need
+* backups (some people make backups of backups...)
+
+You can exclude these, so they don't waste time and space.
+
+Avoid scrolling
+---------------
+If you do benchmarks, avoid creating a lot of log output, especially if it
+means scrolling text in a window on a graphical user interface.
+
+Rather use much less log output or at least redirect the output to a log file,
+that is also much faster than scrolling.
+
+Speed (in general)
+------------------
+Keep an eye on CPU and I/O bounds. Try to find the sweet spot in the middle
+where it is not too much I/O bound and not too much CPU bound.
+
+I/O bound
+~~~~~~~~~
+If CPU load does not sum up to 1 core fully loaded while backing up, the
+process is likely I/O bound (can't read or write data fast enough).
+
+Maybe you want to try higher compression then so it has less data to write.
+Or get faster I/O, if possible.
+
+CPU bound
+~~~~~~~~~
+If you have 1 core fully loaded most of the time, but your backup seems slow,
+the process is likely CPU bound (can't compute fast enough).
+
+Maybe you want to try lower compression then so it has less to compute.
+Using a faster MAC or cipher method might also be an option.
+Or get a faster CPU.
+
+I/O speed
+---------
+From fast to slower:
+
+* fast local filesystem, SSD or HDD, via PCIe, SATA, USB
+* ssh connection to a remote server's borg instance
+* mounted network filesystems of a remote server
+
+Not only throughput influences timing, latency does also.
+
+Backup space needed
+-------------------
+If you always backup the same data mostly, you will often save a lot of space
+due to deduplication - this works independently from compression.
+
+To avoid running out of space, regularly prune your backup archives according
+to your needs. Backups of same machine which are close in time are usually
+very cheap (because most data is same and deduplicated).
+
+Compression
+-----------
+If you have a fast backup source and destination and you are not low on backup space:
+Switch off compression, your backup will run faster and with less cpu load.
+
+If you just want to save a bit space, but stay relatively fast:
+Try zlib level 1.
+
+If you have very slow source or destination (e.g. a remote backup space via a
+network connection that is quite slower than your local and remote storage):
+Try a higher zlib or lzma.
+
+Authentication & MAC selection
+------------------------------
+Real MACs (Message Authentication Codes) can only be used when a secret key is
+available. It is signing your backup data and can detect malicious tampering.
+Without a key, a simple hash will be used (which helps to detect accidental
+data corruption, but can not detect malicious data tampering).
+
+Older or simple 32bit machine architecture
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Use sha256 (no key) or hmac-sha256 (key).
+
+64bit architecture, but no AES hardware acceleration in the CPU
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Use sha512-256 (no key) or hmac-sha512-256 (key).
+
+Modern 64bit CPU with AES hardware acceleration (AES-NI, PCLMULQDQ)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Use ghash (no key) or gmac (key).
+
+Encryption & Cipher selection
+-----------------------------
+Always encrypt your backups (and keep passphrase and key file [if any] safe).
+
+The cipher selection chooses between misc. AEAD ciphers (authenticated
+encryption with associated data), it is EtM (encrypt-then-mac):
+
+Older or simple 32bit machine architecture
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Use aes256-ctr + hmac-sha256.
+
+64bit architecture, but no AES hardware acceleration in the CPU
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Use aes256-ctr + hmac-sha512-256.
+
+Modern 64bit CPU with AES hardware acceleration (AES-NI, PCLMULQDQ)
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+Use aes256-gcm (AEAD 1-pass cipher).
+
+RAM usage
+---------
+Depending on the amount of files and chunks in the repository, memory usage
+varies:
+
+* about 250+B RAM per file (for "files" cache)
+* about 44B RAM per 64kiB chunk (for "chunks" cache)
+* about 40B RAM per 64kiB chunk (for repository index, if remote repo is used,
+  this will be allocated on remote side)
+
+If you run into memory usage issues, your options are:
+
+* get more RAM (or more swapspace, speed will be slower)
+* disable the "files" cache, speed will be slower
+* have less files / chunks per repo
+
+Note: RAM compression likely won't help as a lot of that data is using
+msgpack, which is already rather efficient.

+ 7 - 3
setup.py

@@ -102,6 +102,12 @@ elif sys.platform.startswith('freebsd'):
 elif sys.platform == 'darwin':
 elif sys.platform == 'darwin':
     ext_modules.append(Extension('borg.platform_darwin', [platform_darwin_source]))
     ext_modules.append(Extension('borg.platform_darwin', [platform_darwin_source]))
 
 
+# msgpack pure python data corruption was fixed in 0.4.6.
+# Also, we might use some rather recent API features.
+install_requires=['msgpack-python>=0.4.6', 'blosc>=1.2.5']
+if sys.version_info < (3, 3):
+    install_requires.append('backports.lzma')
+
 setup(
 setup(
     name='borgbackup',
     name='borgbackup',
     version=versioneer.get_version(),
     version=versioneer.get_version(),
@@ -132,7 +138,5 @@ setup(
     scripts=['scripts/borg'],
     scripts=['scripts/borg'],
     cmdclass=cmdclass,
     cmdclass=cmdclass,
     ext_modules=ext_modules,
     ext_modules=ext_modules,
-    # msgpack pure python data corruption was fixed in 0.4.6.
-    # Also, we might use some rather recent API features.
-    install_requires=['msgpack-python>=0.4.6']
+    install_requires=install_requires,
 )
 )