Browse Source

flexible repository key_size (key like in indexing key, id_hash())

Thomas Waldmann 10 years ago
parent
commit
0c183acb55

+ 3 - 3
attic/archive.py

@@ -577,7 +577,7 @@ class ArchiveChecker:
         self.repository = repository
         self.init_chunks()
         self.key = self.identify_key(repository)
-        if Manifest.MANIFEST_ID not in self.chunks:
+        if Manifest.manifest_id(repository) not in self.chunks:
             self.manifest = self.rebuild_manifest()
         else:
             self.manifest, _ = Manifest.load(repository, key=self.key)
@@ -596,7 +596,7 @@ class ArchiveChecker:
         # Explicity set the initial hash table capacity to avoid performance issues
         # due to hash table "resonance"
         capacity = int(len(self.repository) * 1.2)
-        self.chunks = ChunkIndex(capacity)
+        self.chunks = ChunkIndex(capacity, key_size=self.repository.key_size)
         marker = None
         while True:
             result = self.repository.list(limit=10000, marker=marker)
@@ -648,7 +648,7 @@ class ArchiveChecker:
         Missing and/or incorrect data is repaired when detected
         """
         # Exclude the manifest from chunks
-        del self.chunks[Manifest.MANIFEST_ID]
+        del self.chunks[Manifest.manifest_id(self.repository)]
 
         def mark_as_possibly_superseded(id_):
             if self.chunks.get(id_, (0,))[0] == 0:

+ 7 - 5
attic/archiver.py

@@ -13,7 +13,7 @@ from attic import __version__
 from attic.archive import Archive, ArchiveChecker
 from attic.repository import Repository
 from attic.cache import Cache
-from attic.key import key_creator, COMPR_DEFAULT, HASH_DEFAULT, MAC_DEFAULT, PLAIN_DEFAULT, CIPHER_DEFAULT
+from attic.key import key_creator, maccer_creator, COMPR_DEFAULT, HASH_DEFAULT, MAC_DEFAULT, PLAIN_DEFAULT, CIPHER_DEFAULT
 from attic.helpers import Error, location_validator, format_time, \
     format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, \
     get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \
@@ -27,11 +27,11 @@ class Archiver:
     def __init__(self):
         self.exit_code = 0
 
-    def open_repository(self, location, create=False, exclusive=False):
+    def open_repository(self, location, create=False, exclusive=False, key_size=None):
         if location.proto == 'ssh':
-            repository = RemoteRepository(location, create=create)
+            repository = RemoteRepository(location, create=create, key_size=key_size)
         else:
-            repository = Repository(location.path, create=create, exclusive=exclusive)
+            repository = Repository(location.path, create=create, exclusive=exclusive, key_size=key_size)
         repository._location = location
         return repository
 
@@ -56,8 +56,10 @@ class Archiver:
     def do_init(self, args):
         """Initialize an empty repository"""
         print('Initializing repository at "%s"' % args.repository.orig)
-        repository = self.open_repository(args.repository, create=True, exclusive=True)
         key_cls = key_creator(args)
+        maccer_cls = maccer_creator(args, key_cls)
+        repository = self.open_repository(args.repository, create=True, exclusive=True,
+                                          key_size=maccer_cls.digest_size)
         key = key_cls.create(repository, args)
         manifest = Manifest(key, repository)
         manifest.write()

+ 3 - 2
attic/cache.py

@@ -51,7 +51,7 @@ class Cache:
         config.set('cache', 'manifest', '')
         with open(os.path.join(self.path, 'config'), 'w') as fd:
             config.write(fd)
-        ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8'))
+        ChunkIndex(key_size=self.repository.key_size).write(os.path.join(self.path, 'chunks').encode('utf-8'))
         with open(os.path.join(self.path, 'files'), 'w') as fd:
             pass  # empty file
 
@@ -67,7 +67,8 @@ class Cache:
         self.id = self.config.get('cache', 'repository')
         self.manifest_id = unhexlify(self.config.get('cache', 'manifest'))
         self.timestamp = self.config.get('cache', 'timestamp', fallback=None)
-        self.chunks = ChunkIndex.read(os.path.join(self.path, 'chunks').encode('utf-8'))
+        self.chunks = ChunkIndex.read(os.path.join(self.path, 'chunks').encode('utf-8'),
+                                      key_size=self.repository.key_size)
         self.files = None
 
     def close(self):

+ 4 - 3
attic/hashindex.pyx

@@ -28,7 +28,8 @@ cdef class IndexBase:
     cdef HashIndex *index
     cdef int key_size
 
-    def __cinit__(self, capacity=0, path=None, key_size=32):
+    def __cinit__(self, capacity=0, path=None, key_size=None):
+        assert key_size is not None
         self.key_size = key_size
         if path:
             self.index = hashindex_read(<bytes>os.fsencode(path))
@@ -44,8 +45,8 @@ cdef class IndexBase:
             hashindex_free(self.index)
 
     @classmethod
-    def read(cls, path):
-        return cls(path=path)
+    def read(cls, path, key_size=None):
+        return cls(path=path, key_size=key_size)
 
     def write(self, path):
         if not hashindex_write(self.index, <bytes>os.fsencode(path)):

+ 6 - 4
attic/helpers.py

@@ -81,18 +81,20 @@ def check_extension_modules():
 
 class Manifest:
 
-    MANIFEST_ID = b'\0' * 32
-
     def __init__(self, key, repository):
         self.archives = {}
         self.config = {}
         self.key = key
         self.repository = repository
 
+    @classmethod
+    def manifest_id(cls, repository):
+        return b'\0' * repository.key_size
+
     @classmethod
     def load(cls, repository, key=None):
         from .key import key_factory
-        cdata = repository.get(cls.MANIFEST_ID)
+        cdata = repository.get(cls.manifest_id(repository))
         if not key:
             key = key_factory(repository, cdata)
         manifest = cls(key, repository)
@@ -117,7 +119,7 @@ class Manifest:
             'config': self.config,
         }))
         self.id = self.key.id_hash(data)
-        self.repository.put(self.MANIFEST_ID, self.key.encrypt(data))
+        self.repository.put(self.manifest_id(self.repository), self.key.encrypt(data))
 
 
 def prune_within(archives, within):

+ 7 - 1
attic/key.py

@@ -69,6 +69,7 @@ class HMAC(hmac.HMAC):
 
 class SHA256(object):  # note: can't subclass sha256
     TYPE = 0
+    digest_size = 32
 
     def __init__(self, key, data=b''):
         # signature is like for a MAC, we ignore the key as this is a simple hash
@@ -89,6 +90,7 @@ class SHA256(object):  # note: can't subclass sha256
 class SHA512_256(sha512_256):
     """sha512, but digest truncated to 256bit - faster than sha256 on 64bit platforms"""
     TYPE = 1
+    digest_size = 32
 
     def __init__(self, key, data):
         # signature is like for a MAC, we ignore the key as this is a simple hash
@@ -99,6 +101,7 @@ class SHA512_256(sha512_256):
 
 class GHASH:
     TYPE = 2
+    digest_size = 16
 
     def __init__(self, key, data):
         # signature is like for a MAC, we ignore the key as this is a simple hash
@@ -112,11 +115,12 @@ class GHASH:
         # GMAC = aes-gcm with all data as AAD, no data as to-be-encrypted data
         mac_cipher.add(bytes(self.data))
         hash, _ = mac_cipher.compute_mac_and_encrypt(b'')
-        return hash + b'\0'*16  # XXX hashindex code wants 32 bytes (256 bit)
+        return hash
 
 
 class HMAC_SHA256(HMAC):
     TYPE = 10
+    digest_size = 32
 
     def __init__(self, key, data):
         if key is None:
@@ -126,6 +130,7 @@ class HMAC_SHA256(HMAC):
 
 class HMAC_SHA512_256(HMAC):
     TYPE = 11
+    digest_size = 32
 
     def __init__(self, key, data):
         if key is None:
@@ -135,6 +140,7 @@ class HMAC_SHA512_256(HMAC):
 
 class GMAC(GHASH):
     TYPE = 20
+    digest_size = 16
 
     def __init__(self, key, data):
         super().__init__(None, data)

+ 6 - 6
attic/remote.py

@@ -89,7 +89,7 @@ class RepositoryServer:
     def negotiate(self, versions):
         return 1
 
-    def open(self, path, create=False):
+    def open(self, path, create=False, key_size=None):
         path = os.fsdecode(path)
         if path.startswith('/~'):
             path = path[1:]
@@ -100,8 +100,8 @@ class RepositoryServer:
                     break
             else:
                 raise PathNotAllowed(path)
-        self.repository = Repository(path, create)
-        return self.repository.id
+        self.repository = Repository(path, create, key_size=key_size)
+        return self.repository.id, self.repository.key_size
 
 
 class RemoteRepository:
@@ -112,7 +112,7 @@ class RemoteRepository:
         def __init__(self, name):
             self.name = name
 
-    def __init__(self, location, create=False):
+    def __init__(self, location, create=False, key_size=None):
         self.location = location
         self.preload_ids = []
         self.msgid = 0
@@ -144,7 +144,7 @@ class RemoteRepository:
         version = self.call('negotiate', 1)
         if version != 1:
             raise Exception('Server insisted on using unsupported protocol version %d' % version)
-        self.id = self.call('open', location.path, create)
+        self.id, self.key_size = self.call('open', location.path, create, key_size)
 
     def __del__(self):
         self.close()
@@ -299,7 +299,7 @@ class RepositoryCache:
 
     def initialize(self):
         self.tmppath = tempfile.mkdtemp()
-        self.index = NSIndex()
+        self.index = NSIndex(key_size=self.repository.key_size)
         self.data_fd = open(os.path.join(self.tmppath, 'data'), 'a+b')
 
     def cleanup(self):

+ 17 - 11
attic/repository.py

@@ -46,22 +46,23 @@ class Repository:
     class ObjectNotFound(Error):
         """Object with key {} not found in repository {}."""
 
-    def __init__(self, path, create=False, exclusive=False):
+    def __init__(self, path, create=False, exclusive=False, key_size=None):
         self.path = path
         self.io = None
         self.lock = None
         self.index = None
         self._active_txn = False
         if create:
-            self.create(path)
+            self.create(path, key_size)
         self.open(path, exclusive)
 
     def __del__(self):
         self.close()
 
-    def create(self, path):
+    def create(self, path, key_size):
         """Create a new empty repository at `path`
         """
+        assert key_size is not None
         if os.path.exists(path) and (not os.path.isdir(path) or os.listdir(path)):
             raise self.AlreadyExists(path)
         if not os.path.exists(path):
@@ -74,6 +75,7 @@ class Repository:
         config.set('repository', 'version', '1')
         config.set('repository', 'segments_per_dir', self.DEFAULT_SEGMENTS_PER_DIR)
         config.set('repository', 'max_segment_size', self.DEFAULT_MAX_SEGMENT_SIZE)
+        config.set('repository', 'key_size', key_size)
         config.set('repository', 'id', hexlify(os.urandom(32)).decode('ascii'))
         with open(os.path.join(path, 'config'), 'w') as fd:
             config.write(fd)
@@ -109,10 +111,12 @@ class Repository:
         if 'repository' not in self.config.sections() or self.config.getint('repository', 'version') != 1:
             raise self.InvalidRepository(path)
         self.lock = UpgradableLock(os.path.join(path, 'config'), exclusive)
+        # legacy attic repositories always have key size 32B (256b)
+        self.key_size = self.config.getint('repository', 'key_size', fallback=32)
         self.max_segment_size = self.config.getint('repository', 'max_segment_size')
         self.segments_per_dir = self.config.getint('repository', 'segments_per_dir')
         self.id = unhexlify(self.config.get('repository', 'id').strip())
-        self.io = LoggedIO(self.path, self.max_segment_size, self.segments_per_dir)
+        self.io = LoggedIO(self.path, self.max_segment_size, self.segments_per_dir, self.key_size)
 
     def close(self):
         if self.lock:
@@ -132,8 +136,9 @@ class Repository:
 
     def open_index(self, transaction_id):
         if transaction_id is None:
-            return NSIndex()
-        return NSIndex.read((os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8'))
+            return NSIndex(key_size=self.key_size)
+        return NSIndex.read((os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8'),
+                            key_size=self.key_size)
 
     def prepare_txn(self, transaction_id, do_cleanup=True):
         self._active_txn = True
@@ -382,8 +387,6 @@ class LoggedIO:
 
     header_fmt = struct.Struct('<IIB')
     assert header_fmt.size == 9
-    put_header_fmt = struct.Struct('<IIB32s')
-    assert put_header_fmt.size == 41
     header_no_crc_fmt = struct.Struct('<IB')
     assert header_no_crc_fmt.size == 5
     crc_fmt = struct.Struct('<I')
@@ -392,13 +395,16 @@ class LoggedIO:
     _commit = header_no_crc_fmt.pack(9, TAG_COMMIT)
     COMMIT = crc_fmt.pack(crc32(_commit)) + _commit
 
-    def __init__(self, path, limit, segments_per_dir, capacity=90):
+    def __init__(self, path, limit, segments_per_dir, key_size, capacity=90):
         self.path = path
         self.fds = LRUCache(capacity)
         self.segment = 0
         self.limit = limit
         self.segments_per_dir = segments_per_dir
+        self.key_size = key_size
         self.offset = 0
+        self.put_header_fmt = struct.Struct('<IIB%ds' % key_size)
+        assert self.put_header_fmt.size == self.header_fmt.size + key_size
         self._write_fd = None
 
     def close(self):
@@ -504,9 +510,9 @@ class LoggedIO:
                 raise IntegrityError('Invalid segment entry header')
             key = None
             if tag in (TAG_PUT, TAG_DELETE):
-                key = rest[:32]
+                key = rest[:self.key_size]
             if include_data:
-                yield tag, key, offset, rest[32:]
+                yield tag, key, offset, rest[self.key_size:]
             else:
                 yield tag, key, offset
             offset += size

+ 2 - 2
attic/testsuite/archiver.py

@@ -464,7 +464,7 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
 
     def test_missing_manifest(self):
         archive, repository = self.open_archive('archive1')
-        repository.delete(Manifest.MANIFEST_ID)
+        repository.delete(Manifest.manifest_id(repository))
         repository.commit()
         self.attic('check', self.repository_location, exit_code=1)
         output = self.attic('check', '--repair', self.repository_location, exit_code=0)
@@ -475,7 +475,7 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
     def test_extra_chunks(self):
         self.attic('check', self.repository_location, exit_code=0)
         repository = Repository(self.repository_location)
-        repository.put(b'01234567890123456789012345678901', b'xxxx')
+        repository.put(b'0123456789012345', b'xxxx')
         repository.commit()
         repository.close()
         self.attic('check', self.repository_location, exit_code=1)

+ 5 - 5
attic/testsuite/hashindex.py

@@ -8,7 +8,7 @@ from attic.testsuite import AtticTestCase
 class HashIndexTestCase(AtticTestCase):
 
     def _generic_test(self, cls, make_value, sha):
-        idx = cls()
+        idx = cls(key_size=32)
         self.assert_equal(len(idx), 0)
         # Test set
         for x in range(100):
@@ -33,7 +33,7 @@ class HashIndexTestCase(AtticTestCase):
         with open(idx_name.name, 'rb') as fd:
             self.assert_equal(hashlib.sha256(fd.read()).hexdigest(), sha)
         # Make sure we can open the file
-        idx = cls.read(idx_name.name)
+        idx = cls.read(idx_name.name, key_size=32)
         self.assert_equal(len(idx), 50)
         for x in range(50, 100):
             self.assert_equal(idx[bytes('%-32d' % x, 'ascii')], make_value(x * 2))
@@ -41,7 +41,7 @@ class HashIndexTestCase(AtticTestCase):
         self.assert_equal(len(idx), 0)
         idx.write(idx_name.name)
         del idx
-        self.assert_equal(len(cls.read(idx_name.name)), 0)
+        self.assert_equal(len(cls.read(idx_name.name, key_size=32)), 0)
 
     def test_nsindex(self):
         self._generic_test(NSIndex, lambda x: (x, x), '369a18ae6a52524eb2884a3c0fdc2824947edd017a2688c5d4d7b3510c245ab9')
@@ -52,7 +52,7 @@ class HashIndexTestCase(AtticTestCase):
     def test_resize(self):
         n = 2000  # Must be >= MIN_BUCKETS
         idx_name = tempfile.NamedTemporaryFile()
-        idx = NSIndex()
+        idx = NSIndex(key_size=32)
         idx.write(idx_name.name)
         initial_size = os.path.getsize(idx_name.name)
         self.assert_equal(len(idx), 0)
@@ -67,7 +67,7 @@ class HashIndexTestCase(AtticTestCase):
         self.assert_equal(initial_size, os.path.getsize(idx_name.name))
 
     def test_iteritems(self):
-        idx = NSIndex()
+        idx = NSIndex(key_size=32)
         for x in range(100):
             idx[bytes('%-0.32d' % x, 'ascii')] = x, x
         all = list(idx.iteritems())

+ 12 - 12
attic/testsuite/key.py

@@ -19,20 +19,20 @@ class KeyTestCase(AtticTestCase):
 
     keyfile2_key_file = """
 ATTIC KEY 0000000000000000000000000000000000000000000000000000000000000000
-hqlhbGdvcml0aG2kZ21hY6RoYXNo2gAgY7jwSMnBwpqD3Fk/aAdSAgAAAAAAAAAAAAAAAA
-AAAACqaXRlcmF0aW9uc84AAYagp3ZlcnNpb24BpHNhbHTaACASqCq8G6a/K/W+bOrNDW65
-Sfl9ZHrTEtq6l+AMUmATxKRkYXRh2gDQuDVCijDzeZDD/JLPrOtsQL/vrZEWvCt5RuXFOt
-tTZfbCJDmv2nt4KvYToVsp82pffZDcsLaOOBCTGurpkdefsdiLMgGiLlbrsXlES+fbKZfq
-Tx2x2DjU4L1bFxuoypDIdk2lB3S98ZpFZ6yd1XtDBVTQ34FZTlDXIZ5HyuxAJBrGKYj/Un
-Fk24N5xSoPfeQhE3r7hqEsGwEEX0s6sg0LHMGyc4xSBb13iZxWRlSdnvBC7teIeevhT/DU
-scOrlrX0NO2eqe5jQF+zj1Q6OtBvRA==
+hqRzYWx02gAgA1l4jfyv22y6U/mxxDT8HodSWAcX0g3nOESrQcNnBsundmVyc2lvbgGqaX
+RlcmF0aW9uc84AAYagqWFsZ29yaXRobaRnbWFjpGhhc2iw7eaB54JssAOnM1S4S9CeTaRk
+YXRh2gDQzmuyg3iYjMeTLObY+ybI+QfngB+5mmHeEAfBa42fuEZgqM3rYyMj2XfgvamF+O
+0asvhEyy9om190FaOxQ4RiiTMNqSP0FKLmd1i5ZyDMfRyp7JbscRFs9Ryk28yXWkv0MgQy
+EAYlaycY+6lWdRSgEPxidyPl9t9dr2AI/UuiQytwqmcmXgWD6Px6wgpOS/4AcRmEvDqIIl
+Rc2xsu+RevGAxk5rnrIIRPr7WB5R2cinzEn9ylDgBDt9LZbq706ELgtwVTnjWB8FBTPwVI
+vLTTXQ==
 """.strip()
 
     keyfile2_cdata = unhexlify(re.sub('\W', '', """
-        0393c420fd6e9ac6f8c49c4789d1c924c14c309200000000000000000000000000000000
-        9600001402c41000000000000000000000000000000000c2c4071352fe2286e3ed
+        0393c4102e5ce8f5e9477c9e4ce2de453121aa139600001402c41000000000000000000000000000000000
+        c2c407b0147a64a379d1
         """))
-    keyfile2_id = unhexlify('d4954bcf8d7b1762356e91b2611c727800000000000000000000000000000000')
+    keyfile2_id = unhexlify('dd9451069663931c8abd85452d016733')
 
     def setUp(self):
         self.tmppath = tempfile.mkdtemp()
@@ -60,7 +60,7 @@ scOrlrX0NO2eqe5jQF+zj1Q6OtBvRA==
     def test_plaintext(self):
         key = PlaintextKey.create(None, self.MockArgs())
         data = b'foo'
-        self.assert_equal(hexlify(key.id_hash(data)), b'4c9137bc0dd3ddb31de4e138a49d7eb300000000000000000000000000000000')
+        self.assert_equal(hexlify(key.id_hash(data)), b'4c9137bc0dd3ddb31de4e138a49d7eb3')
         self.assert_equal(data, key.decrypt(key.id_hash(data), key.encrypt(data)))
 
     def test_keyfile(self):
@@ -115,5 +115,5 @@ scOrlrX0NO2eqe5jQF+zj1Q6OtBvRA==
         self.assert_equal(key.enc_key, key2.enc_key)
         self.assert_equal(key.chunk_seed, key2.chunk_seed)
         data = b'foo'
-        self.assert_equal(hexlify(key.id_hash(data)), b'a409d69859b8a07625f066e42cde050100000000000000000000000000000000')
+        self.assert_equal(hexlify(key.id_hash(data)), b'a409d69859b8a07625f066e42cde0501')
         self.assert_equal(data, key2.decrypt(key2.id_hash(data), key.encrypt(data)))

+ 12 - 8
attic/testsuite/repository.py

@@ -10,13 +10,14 @@ from attic.testsuite import AtticTestCase
 
 
 class RepositoryTestCaseBase(AtticTestCase):
+    key_size = 32
 
-    def open(self, create=False):
-        return Repository(os.path.join(self.tmppath, 'repository'), create=create)
+    def open(self, create=False, key_size=None):
+        return Repository(os.path.join(self.tmppath, 'repository'), create=create, key_size=key_size)
 
     def setUp(self):
         self.tmppath = tempfile.mkdtemp()
-        self.repository = self.open(create=True)
+        self.repository = self.open(create=True, key_size=self.key_size)
 
     def tearDown(self):
         self.repository.close()
@@ -207,7 +208,8 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
         return sorted(int(n) for n in os.listdir(os.path.join(self.tmppath, 'repository', 'data', '0')) if n.isdigit())[-1]
 
     def open_index(self):
-        return NSIndex.read(os.path.join(self.tmppath, 'repository', 'index.{}'.format(self.get_head())))
+        return NSIndex.read(os.path.join(self.tmppath, 'repository', 'index.{}'.format(self.get_head())),
+                            key_size=self.key_size)
 
     def corrupt_object(self, id_):
         idx = self.open_index()
@@ -315,8 +317,9 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
 
 class RemoteRepositoryTestCase(RepositoryTestCase):
 
-    def open(self, create=False):
-        return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create)
+    def open(self, create=False, key_size=None):
+        return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')),
+                                create=create, key_size=key_size)
 
     def test_invalid_rpc(self):
         self.assert_raises(InvalidRPCMethod, lambda: self.repository.call('__init__', None))
@@ -324,5 +327,6 @@ class RemoteRepositoryTestCase(RepositoryTestCase):
 
 class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase):
 
-    def open(self, create=False):
-        return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create)
+    def open(self, create=False, key_size=None):
+        return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')),
+                                create=create, key_size=key_size)