Forráskód Böngészése

Switch to pure AES256 encryption and improved metadata storage

Jonas Borgström 14 éve
szülő
commit
b294ceba67
10 módosított fájl, 401 hozzáadás és 449 törlés
  1. 0 10
      darc/__init__.py
  2. 108 93
      darc/archive.py
  3. 34 45
      darc/archiver.py
  4. 42 29
      darc/cache.py
  5. 56 0
      darc/hashindex.pyx
  6. 0 11
      darc/helpers.py
  7. 160 0
      darc/key.py
  8. 0 189
      darc/keychain.py
  9. 0 71
      darc/oaep.py
  10. 1 1
      darc/store.py

+ 0 - 10
darc/__init__.py

@@ -2,13 +2,3 @@
 
 NS_CHUNK = 0
 NS_ARCHIVE_METADATA = 1
-NS_ARCHIVE_CHUNKS = 2
-NS_ARCHIVE_ITEMS = 3
-
-PACKET_ENCRYPT_READ   = 2 ** 7
-PACKET_ENCRYPT_CREATE = 2 ** 6
-PACKET_CHUNK            = 1 | PACKET_ENCRYPT_READ
-PACKET_ARCHIVE_METADATA = 2 | PACKET_ENCRYPT_READ
-PACKET_ARCHIVE_ITEMS    = 3 | PACKET_ENCRYPT_READ
-PACKET_ARCHIVE_CHUNKS   = 1 | PACKET_ENCRYPT_CREATE
-

+ 108 - 93
darc/archive.py

@@ -6,11 +6,10 @@ import os
 import socket
 import stat
 import sys
-from itertools import izip
+from os.path import dirname
 from xattr import xattr, XATTR_NOFOLLOW
 
-from . import NS_ARCHIVE_METADATA, NS_ARCHIVE_ITEMS, NS_ARCHIVE_CHUNKS, NS_CHUNK, \
-    PACKET_ARCHIVE_METADATA, PACKET_ARCHIVE_ITEMS, PACKET_ARCHIVE_CHUNKS, PACKET_CHUNK
+from . import NS_ARCHIVE_METADATA, NS_CHUNK
 from ._speedups import chunkify
 from .helpers import uid2user, user2uid, gid2group, group2gid, IntegrityError
 
@@ -26,22 +25,24 @@ class Archive(object):
     class DoesNotExist(Exception):
         pass
 
-    def __init__(self, store, keychain, name=None):
-        self.keychain = keychain
+    def __init__(self, store, key, name=None, cache=None):
+        self.key = key
         self.store = store
-        self.items = []
+        self.cache = cache
+        self.items = ''
+        self.items_refs = []
+        self.items_prefix = ''
         self.items_ids = []
         self.hard_links = {}
         if name:
-            self.load(self.keychain.id_hash(name))
+            self.load(self.key.archive_hash(name))
 
     def load(self, id):
         self.id = id
         try:
-            kind, data, self.hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_METADATA, self.id))
+            data, self.hash = self.key.decrypt(self.store.get(NS_ARCHIVE_METADATA, self.id))
         except self.store.DoesNotExist:
             raise self.DoesNotExist
-        assert kind == PACKET_ARCHIVE_METADATA
         self.metadata = msgpack.unpackb(data)
         assert self.metadata['version'] == 1
 
@@ -51,80 +52,90 @@ class Archive(object):
         t, f = self.metadata['time'].split('.', 1)
         return datetime.strptime(t, '%Y-%m-%dT%H:%M:%S') + timedelta(seconds=float('.' + f))
 
-    def get_chunks(self):
-        for id in self.metadata['chunks_ids']:
-            magic, data, hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_CHUNKS, id))
-            assert magic == PACKET_ARCHIVE_CHUNKS
-            assert hash == id
-            chunks = msgpack.unpackb(data)
-            for chunk in chunks:
-                yield chunk
-
     def get_items(self):
-        for id in self.metadata['items_ids']:
-            magic, data, items_hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_ITEMS, id))
-            assert magic == PACKET_ARCHIVE_ITEMS
-            assert items_hash == id
-            items = msgpack.unpackb(data)
-            for item in items:
+        unpacker = msgpack.Unpacker()
+        for id, size, csize in self.metadata['items']:
+            data, items_hash = self.key.decrypt(self.store.get(NS_CHUNK, id))
+            assert self.key.id_hash(data) == id
+            unpacker.feed(data)
+            for item in unpacker:
                 yield item
 
-    def add_item(self, item):
-        self.items.append(item)
-        if len(self.items) > 100000:
+    def add_item(self, item, refs=None):
+        data = msgpack.packb(item)
+        prefix = dirname(item['path'])
+        if self.items_prefix and self.items_prefix != prefix:
             self.flush_items()
+        if refs:
+            self.items_refs += refs
+        self.items += data
+        self.items_prefix = prefix
 
     def flush_items(self):
-        data, hash = self.keychain.encrypt(PACKET_ARCHIVE_ITEMS, msgpack.packb(self.items))
-        self.store.put(NS_ARCHIVE_ITEMS, hash, data)
-        self.items_ids.append(hash)
-        self.items = []
-
-    def save_chunks(self, cache):
-        chunks = []
-        ids = []
-        def flush(chunks):
-            data, hash = self.keychain.encrypt(PACKET_ARCHIVE_CHUNKS, msgpack.packb(chunks))
-            self.store.put(NS_ARCHIVE_CHUNKS, hash, data)
-            ids.append(hash)
-        for id, (count, size) in cache.chunks.iteritems():
-            if count > 1000000:
-                chunks.append((id, size))
-            if len(chunks) > 100000:
-                flush(chunks)
-                chunks = []
-        flush(chunks)
-        return ids
+        if not self.items:
+            return
+        id = self.key.id_hash(self.items)
+        if self.cache.seen_chunk(id):
+            self.items_ids.append(self.cache.chunk_incref(id))
+            for id in self.items_refs:
+                self.cache.chunk_decref(id)
+        else:
+            self.items_ids.append(self.cache.add_chunk(id, self.items))
+        self.items = ''
+        self.items_refs = []
+        self.items_prefix = ''
 
     def save(self, name, cache):
-        self.id = self.keychain.id_hash(name)
-        chunks_ids = self.save_chunks(cache)
+        self.id = self.key.archive_hash(name)
         self.flush_items()
         metadata = {
             'version': 1,
             'name': name,
-            'chunks_ids': chunks_ids,
-            'items_ids': self.items_ids,
+            'items': self.items_ids,
             'cmdline': sys.argv,
             'hostname': socket.gethostname(),
             'username': getuser(),
             'time': datetime.utcnow().isoformat(),
         }
-        data, self.hash = self.keychain.encrypt(PACKET_ARCHIVE_METADATA, msgpack.packb(metadata))
+        data, self.hash = self.key.encrypt(msgpack.packb(metadata))
         self.store.put(NS_ARCHIVE_METADATA, self.id, data)
         self.store.commit()
         cache.commit()
 
-    def stats(self, cache):
-        osize = csize = usize = 0
+    def get_chunks(self):
         for item in self.get_items():
-            if stat.S_ISREG(item['mode']) and not 'source' in item:
-                osize += item['size']
-        for id, size in self.get_chunks():
-            csize += size
-            if cache.seen_chunk(id) == 1:
-                usize += size
-        return osize, csize, usize
+            try:
+                for chunk in item['chunks']:
+                    yield chunk
+            except KeyError:
+                pass
+
+    def stats(self, cache):
+        # This function is a bit evil since it abuses the cache to calculate
+        # the stats. The cache transaction must be rolled back afterwards
+        unpacker = msgpack.Unpacker()
+        cache.begin_txn()
+        osize = zsize = usize = 0
+        for id, size, csize in self.metadata['items']:
+            osize += size
+            zsize += csize
+            unique = self.cache.seen_chunk(id) == 1
+            if unique:
+                usize += csize
+            data, items_hash = self.key.decrypt(self.store.get(NS_CHUNK, id))
+            assert self.key.id_hash(data) == id
+            unpacker.feed(data)
+            for item in unpacker:
+                try:
+                    for id, size, csize in item['chunks']:
+                        osize += size
+                        zsize += csize
+                        if unique and self.cache.seen_chunk(id) == 1:
+                            usize += csize
+                except KeyError:
+                    pass
+        cache.rollback()
+        return osize, zsize, usize
 
     def extract_item(self, item, dest=None, start_cb=None):
         dest = dest or os.getcwdu()
@@ -163,14 +174,13 @@ class Archive(object):
                     if i==0:
                         start_cb(item)
                     assert not error
-                    magic, data, hash = self.keychain.decrypt(chunk)
-                    assert magic == PACKET_CHUNK
-                    if self.keychain.id_hash(data) != id:
+                    data, hash = self.key.decrypt(chunk)
+                    if self.key.id_hash(data) != id:
                         raise IntegrityError('chunk hash did not match')
                     fd.write(data)
                     if last:
-                        self.restore_attrs(path, item)
                         fd.close()
+                        self.restore_attrs(path, item)
 
                 fd = open(path, 'wb')
                 n = len(item['chunks'])
@@ -179,7 +189,7 @@ class Archive(object):
                     self.restore_attrs(path, item)
                     fd.close()
                 else:
-                    for i, id in enumerate(item['chunks']):
+                    for i, (id, size, csize) in enumerate(item['chunks']):
                         self.store.get(NS_CHUNK, id, callback=extract_cb, callback_data=(id, i, i==n-1))
 
         else:
@@ -206,16 +216,15 @@ class Archive(object):
             pass
         if not symlink:
             # FIXME: We should really call futimes here (c extension required)
-            os.utime(path, (item['atime'], item['mtime']))
+            os.utime(path, (item['mtime'], item['mtime']))
 
     def verify_file(self, item, start, result):
         def verify_chunk(chunk, error, (id, i, last)):
             if i == 0:
                 start(item)
             assert not error
-            magic, data, hash = self.keychain.decrypt(chunk)
-            assert magic == PACKET_CHUNK
-            if self.keychain.id_hash(data) != id:
+            data, hash = self.key.decrypt(chunk)
+            if self.key.id_hash(data) != id:
                 result(item, False)
             elif last:
                 result(item, True)
@@ -224,17 +233,24 @@ class Archive(object):
             start(item)
             result(item, True)
         else:
-            for i, id in enumerate(item['chunks']):
+            for i, (id, size, csize) in enumerate(item['chunks']):
                 self.store.get(NS_CHUNK, id, callback=verify_chunk, callback_data=(id, i, i==n-1))
 
     def delete(self, cache):
-        for id, size in self.get_chunks():
-            cache.chunk_decref(id)
+        unpacker = msgpack.Unpacker()
+        for id, size, csize in self.metadata['items']:
+            if self.cache.seen_chunk(id) == 1:
+                data, items_hash = self.key.decrypt(self.store.get(NS_CHUNK, id))
+                assert self.key.id_hash(data) == id
+                unpacker.feed(data)
+                for item in unpacker:
+                    try:
+                        for chunk_id, size, csize in item['chunks']:
+                            self.cache.chunk_decref(chunk_id)
+                    except KeyError:
+                        pass
+            self.cache.chunk_decref(id)
         self.store.delete(NS_ARCHIVE_METADATA, self.id)
-        for id in self.metadata['chunks_ids']:
-            self.store.delete(NS_ARCHIVE_CHUNKS, id)
-        for id in self.metadata['items_ids']:
-            self.store.delete(NS_ARCHIVE_ITEMS, id)
         self.store.commit()
         cache.commit()
 
@@ -243,7 +259,7 @@ class Archive(object):
             'mode': st.st_mode,
             'uid': st.st_uid, 'user': uid2user(st.st_uid),
             'gid': st.st_gid, 'group': gid2group(st.st_gid),
-            'atime': st.st_atime, 'mtime': st.st_mtime,
+            'mtime': st.st_mtime,
         }
         try:
             xa = xattr(path, XATTR_NOFOLLOW)
@@ -287,34 +303,33 @@ class Archive(object):
                 return
             else:
                 self.hard_links[st.st_ino, st.st_dev] = safe_path
-        path_hash = self.keychain.id_hash(path.encode('utf-8'))
-        ids, size = cache.file_known_and_unchanged(path_hash, st)
+        path_hash = self.key.id_hash(path.encode('utf-8'))
+        ids = cache.file_known_and_unchanged(path_hash, st)
+        chunks = None
         if ids is not None:
             # Make sure all ids are available
             for id in ids:
                 if not cache.seen_chunk(id):
-                    ids = None
                     break
             else:
-                for id in ids:
-                    cache.chunk_incref(id)
+                chunks = [cache.chunk_incref(id) for id in ids]
         # Only chunkify the file if needed
-        if ids is None:
+        if chunks is None:
             with open(path, 'rb') as fd:
-                size = 0
-                ids = []
+                chunks = []
                 for chunk in chunkify(fd, CHUNK_SIZE, WINDOW_SIZE,
-                                      self.keychain.get_chunkify_seed()):
-                    ids.append(cache.add_chunk(self.keychain.id_hash(chunk), chunk))
-                    size += len(chunk)
+                                      self.key.chunk_seed):
+                    chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk))
+            ids = [id for id, _, _ in chunks]
             cache.memorize_file(path_hash, st, ids)
-        item = {'path': safe_path, 'chunks': ids, 'size': size}
+        item = {'path': safe_path, 'chunks': chunks}
         item.update(self.stat_attrs(st, path))
-        self.add_item(item)
+        self.add_item(item, ids)
 
     @staticmethod
-    def list_archives(store, keychain):
+    def list_archives(store, key):
         for id in list(store.list(NS_ARCHIVE_METADATA)):
-            archive = Archive(store, keychain)
+            archive = Archive(store, key)
             archive.load(id)
             yield archive
+

+ 34 - 45
darc/archiver.py

@@ -8,7 +8,7 @@ import sys
 from .archive import Archive
 from .store import Store
 from .cache import Cache
-from .keychain import Keychain
+from .key import Key
 from .helpers import location_validator, format_file_size, format_time,\
     format_file_mode, IncludePattern, ExcludePattern, exclude_path, to_localtime
 from .remote import StoreServer, RemoteStore
@@ -44,18 +44,22 @@ class Archiver(object):
     def do_serve(self, args):
         return StoreServer().serve()
 
+    def do_init(self, args):
+        store = self.open_store(args.store, create=True)
+        key = Key.create(store)
+
     def do_create(self, args):
-        store = self.open_store(args.archive, create=True)
-        keychain = Keychain(args.keychain)
+        store = self.open_store(args.archive)
+        key = Key(store)
         try:
-            Archive(store, keychain, args.archive.archive)
+            Archive(store, key, args.archive.archive)
         except Archive.DoesNotExist:
             pass
         else:
             self.print_error('Archive already exists')
             return self.exit_code
-        archive = Archive(store, keychain)
-        cache = Cache(store, keychain)
+        cache = Cache(store, key)
+        archive = Archive(store, key, cache=cache)
         # Add darc cache dir to inode_skip list
         skip_inodes = set()
         try:
@@ -112,8 +116,8 @@ class Archiver(object):
         def start_cb(item):
             self.print_verbose(item['path'].decode('utf-8'))
         store = self.open_store(args.archive)
-        keychain = Keychain(args.keychain)
-        archive = Archive(store, keychain, args.archive.archive)
+        key = Key(store)
+        archive = Archive(store, key, args.archive.archive)
         dirs = []
         for item in archive.get_items():
             if exclude_path(item['path'], args.patterns):
@@ -131,22 +135,24 @@ class Archiver(object):
 
     def do_delete(self, args):
         store = self.open_store(args.archive)
-        keychain = Keychain(args.keychain)
-        archive = Archive(store, keychain, args.archive.archive)
-        cache = Cache(store, keychain)
+        key = Key(store)
+        cache = Cache(store, key)
+        archive = Archive(store, key, args.archive.archive, cache=cache)
         archive.delete(cache)
         return self.exit_code
 
     def do_list(self, args):
         store = self.open_store(args.src)
-        keychain = Keychain(args.keychain)
+        key = Key(store)
         if args.src.archive:
             tmap = {1: 'p', 2: 'c', 4: 'd', 6: 'b', 010: '-', 012: 'l', 014: 's'}
-            archive = Archive(store, keychain, args.src.archive)
+            archive = Archive(store, key, args.src.archive)
             for item in archive.get_items():
                 type = tmap.get(item['mode'] / 4096, '?')
                 mode = format_file_mode(item['mode'])
-                size = item.get('size', 0)
+                size = 0
+                if type == '-':
+                    size = sum(size for _, size, _ in item['chunks'])
                 mtime = format_time(datetime.fromtimestamp(item['mtime']))
                 if 'source' in item:
                     if type == 'l':
@@ -160,14 +166,14 @@ class Archiver(object):
                                                   item['group'], size, mtime,
                                                   item['path'], extra)
         else:
-            for archive in sorted(Archive.list_archives(store, keychain), key=attrgetter('ts')):
+            for archive in sorted(Archive.list_archives(store, key), key=attrgetter('ts')):
                 print '%-20s %s' % (archive.metadata['name'], to_localtime(archive.ts).strftime('%c'))
         return self.exit_code
 
     def do_verify(self, args):
         store = self.open_store(args.archive)
-        keychain = Keychain(args.keychain)
-        archive = Archive(store, keychain, args.archive.archive)
+        key = Key(store)
+        archive = Archive(store, key, args.archive.archive)
         def start_cb(item):
             self.print_verbose('%s ...', item['path'].decode('utf-8'), newline=False)
         def result_cb(item, success):
@@ -187,9 +193,9 @@ class Archiver(object):
 
     def do_info(self, args):
         store = self.open_store(args.archive)
-        keychain = Keychain(args.keychain)
-        archive = Archive(store, keychain, args.archive.archive)
-        cache = Cache(store, keychain)
+        key = Key(store)
+        cache = Cache(store, key)
+        archive = Archive(store, key, args.archive.archive, cache=cache)
         osize, csize, usize = archive.stats(cache)
         print 'Name:', archive.metadata['name']
         print 'Hostname:', archive.metadata['hostname']
@@ -201,45 +207,28 @@ class Archiver(object):
         print 'Unique data:', format_file_size(usize)
         return self.exit_code
 
-    def do_init_keychain(self, args):
-        return Keychain.generate(args.keychain)
-
-    def do_export_restricted(self, args):
-        keychain = Keychain(args.keychain)
-        keychain.restrict(args.output)
-        return self.exit_code
-
-    def do_keychain_chpass(self, args):
-        return Keychain(args.keychain).chpass()
-
     def run(self, args=None):
         dot_path = os.path.join(os.path.expanduser('~'), '.darc')
         if not os.path.exists(dot_path):
             os.mkdir(dot_path)
-        default_keychain = os.path.join(os.path.expanduser('~'),
-                                        '.darc', 'keychain')
+            os.mkdir(os.path.join(dot_path, 'keys'))
+            os.mkdir(os.path.join(dot_path, 'cache'))
         parser = argparse.ArgumentParser(description='DARC - Deduplicating Archiver')
-        parser.add_argument('-k', '--keychain', dest='keychain', type=str,
-                            default=default_keychain,
-                            help='Keychain to use')
         parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
                             default=False,
                             help='Verbose output')
 
-
         subparsers = parser.add_subparsers(title='Available subcommands')
-        subparser = subparsers.add_parser('init-keychain')
-        subparser.set_defaults(func=self.do_init_keychain)
-        subparser = subparsers.add_parser('export-restricted')
-        subparser.add_argument('output', metavar='OUTPUT', type=str,
-                               help='Keychain to create')
-        subparser.set_defaults(func=self.do_export_restricted)
-        subparser = subparsers.add_parser('change-password')
-        subparser.set_defaults(func=self.do_keychain_chpass)
 
         subparser = subparsers.add_parser('serve')
         subparser.set_defaults(func=self.do_serve)
 
+        subparser = subparsers.add_parser('init')
+        subparser.set_defaults(func=self.do_init)
+        subparser.add_argument('store', metavar='ARCHIVE',
+                               type=location_validator(archive=False),
+                               help='Store to create')
+
         subparser = subparsers.add_parser('create')
         subparser.set_defaults(func=self.do_create)
         subparser.add_argument('-i', '--include', dest='patterns',

+ 42 - 29
darc/cache.py

@@ -5,19 +5,19 @@ import msgpack
 import os
 import shutil
 
-from . import NS_ARCHIVE_CHUNKS, NS_CHUNK, PACKET_ARCHIVE_CHUNKS, PACKET_CHUNK
+from . import NS_CHUNK, NS_ARCHIVE_METADATA
 from .helpers import error_callback
-from .hashindex import NSIndex
+from .hashindex import ChunkIndex
 
 
 class Cache(object):
     """Client Side cache
     """
 
-    def __init__(self, store, keychain):
+    def __init__(self, store, key):
         self.txn_active = False
         self.store = store
-        self.keychain = keychain
+        self.key = key
         self.path = os.path.join(Cache.cache_dir_path(), self.store.id.encode('hex'))
         if not os.path.exists(self.path):
             self.create()
@@ -25,6 +25,7 @@ class Cache(object):
         assert self.id == store.id
         if self.tid != store.tid:
             self.sync()
+            self.commit()
 
     @staticmethod
     def cache_dir_path():
@@ -44,7 +45,7 @@ class Cache(object):
         config.set('cache', 'tid', '0')
         with open(os.path.join(self.path, 'config'), 'wb') as fd:
             config.write(fd)
-        NSIndex.create(os.path.join(self.path, 'chunks'))
+        ChunkIndex.create(os.path.join(self.path, 'chunks'))
         with open(os.path.join(self.path, 'files'), 'wb') as fd:
             pass # empty file
 
@@ -60,7 +61,7 @@ class Cache(object):
             raise Exception('%s Does not look like a darc cache')
         self.id = self.config.get('cache', 'store_id').decode('hex')
         self.tid = self.config.getint('cache', 'tid')
-        self.chunks = NSIndex(os.path.join(self.path, 'chunks'))
+        self.chunks = ChunkIndex(os.path.join(self.path, 'chunks'))
         self.files = None
 
     def _read_files(self):
@@ -96,9 +97,6 @@ class Cache(object):
             with open(os.path.join(self.path, 'files'), 'wb') as fd:
                 for item in self.files.iteritems():
                     msgpack.pack(item, fd)
-        for id, (count, size) in self.chunks.iteritems():
-            if count > 1000000:
-                self.chunks[id] = count - 1000000, size
         self.config.set('cache', 'tid', self.store.tid)
         with open(os.path.join(self.path, 'config'), 'w') as fd:
             self.config.write(fd)
@@ -129,48 +127,63 @@ class Cache(object):
         self.begin_txn()
         print 'Initializing cache...'
         self.chunks.clear()
-        for id in self.store.list(NS_ARCHIVE_CHUNKS):
-            magic, data, hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_CHUNKS, id))
-            assert magic == PACKET_ARCHIVE_CHUNKS
-            chunks = msgpack.unpackb(data)
-            for id, size in chunks:
+        unpacker = msgpack.Unpacker()
+        for id in self.store.list(NS_ARCHIVE_METADATA):
+            data, hash = self.key.decrypt(self.store.get(NS_ARCHIVE_METADATA, id))
+            archive = msgpack.unpackb(data)
+            print 'Analyzing archive:', archive['name']
+            for id, size, csize in archive['items']:
+                data, hash = self.key.decrypt(self.store.get(NS_CHUNK, id))
+                assert self.key.id_hash(data) == id
                 try:
-                    count, size = self.chunks[id]
-                    self.chunks[id] = count + 1, size
+                    count, size, csize = self.chunks[id]
+                    self.chunks[id] = count + 1, size, csize
                 except KeyError:
-                    self.chunks[id] = 1, size
+                    self.chunks[id] = 1, size, csize
+                    unpacker.feed(data)
+                    for item in unpacker:
+                        try:
+                            for id, size, csize in item['chunks']:
+                                try:
+                                    count, size, csize = self.chunks[id]
+                                    self.chunks[id] = count + 1, size, csize
+                                except KeyError:
+                                    self.chunks[id] = 1, size, csize
+                                pass
+                        except KeyError:
+                            pass
 
     def add_chunk(self, id, data):
         if not self.txn_active:
             self.begin_txn()
         if self.seen_chunk(id):
             return self.chunk_incref(id)
-        data, hash = self.keychain.encrypt(PACKET_CHUNK, data)
+        size = len(data)
+        data, hash = self.key.encrypt(data)
         csize = len(data)
         self.store.put(NS_CHUNK, id, data, callback=error_callback)
-        self.chunks[id] = (1000001, csize)
-        return id
+        self.chunks[id] = (1, size, csize)
+        return id, size, csize
 
     def seen_chunk(self, id):
-        return self.chunks.get(id, (0, 0))[0]
+        return self.chunks.get(id, (0, 0, 0))[0]
 
     def chunk_incref(self, id):
         if not self.txn_active:
             self.begin_txn()
-        count, size = self.chunks[id]
-        if count < 1000000:
-            self.chunks[id] = (count + 1000001, size)
-        return id
+        count, size, csize = self.chunks[id]
+        self.chunks[id] = (count + 1, size, csize)
+        return id, size, csize
 
     def chunk_decref(self, id):
         if not self.txn_active:
             self.begin_txn()
-        count, size = self.chunks[id]
+        count, size, csize = self.chunks[id]
         if count == 1:
             del self.chunks[id]
             self.store.delete(NS_CHUNK, id, callback=error_callback)
         else:
-            self.chunks[id] = (count - 1, size)
+            self.chunks[id] = (count - 1, size, csize)
 
     def file_known_and_unchanged(self, path_hash, st):
         if self.files is None:
@@ -180,9 +193,9 @@ class Cache(object):
             and entry[2] == st.st_size and entry[1] == st.st_ino):
             # reset entry age
             self.files[path_hash] = (0,) + entry[1:]
-            return entry[4], entry[2]
+            return entry[4]
         else:
-            return None, 0
+            return None
 
     def memorize_file(self, path_hash, st, ids):
         # Entry: Age, inode, size, mtime, chunk ids

+ 56 - 0
darc/hashindex.pyx

@@ -113,6 +113,62 @@ cdef class NSKeyIterator:
         return self.key[:32], (value[0], value[1])
 
 
+cdef class ChunkIndex(IndexBase):
+
+    @classmethod
+    def create(cls, path, capacity=16):
+        index = hashindex_create(path, capacity, 32, 12)
+        hashindex_close(index)
+        return cls(path)
+
+    def __getitem__(self, key):
+        assert len(key) == 32
+        data = <int *>hashindex_get(self.index, <char *>key)
+        if not data:
+            raise KeyError
+        return data[0], data[1], data[2]
+
+    def __delitem__(self, key):
+        assert len(key) == 32
+        hashindex_delete(self.index, <char *>key)
+
+    def __setitem__(self, key, value):
+        assert len(key) == 32
+        cdef int[3] data
+        data[0] = value[0]
+        data[1] = value[1]
+        data[2] = value[2]
+        hashindex_set(self.index, <char *>key, data)
+
+    def __contains__(self, key):
+        assert len(key) == 32
+        data = <int *>hashindex_get(self.index, <char *>key)
+        return data != NULL
+
+    def iteritems(self, marker=None, limit=0):
+        iter = ChunkKeyIterator()
+        iter.index = self.index
+        return iter
+
+
+cdef class ChunkKeyIterator:
+    cdef HashIndex *index
+    cdef char *key
+
+    def __cinit__(self):
+        self.key = NULL
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        self.key = <char *>hashindex_next_key(self.index, <char *>self.key)
+        if not self.key:
+            raise StopIteration
+        cdef int *value = <int *>(self.key + 32)
+        return self.key[:32], (value[0], value[1], value[2])
+
+
 cdef class BandIndex(IndexBase):
 
     @classmethod

+ 0 - 11
darc/helpers.py

@@ -70,17 +70,6 @@ def decode_long(bytes):
             return v + (b << base)
 
 
-def zero_pad(data, length):
-    """Make sure data is `length` bytes long by prepending zero bytes
-
-    >>> zero_pad('foo', 5)
-    '\\x00\\x00foo'
-    >>> zero_pad('foo', 3)
-    'foo'
-    """
-    return '\0' * (length - len(data)) + data
-
-
 def exclude_path(path, patterns):
     """Used by create and extract sub-commands to determine
     if an item should be processed or not

+ 160 - 0
darc/key.py

@@ -0,0 +1,160 @@
+from __future__ import with_statement
+from getpass import getpass
+import hashlib
+import os
+import msgpack
+import zlib
+
+from pbkdf2 import pbkdf2
+from Crypto.Cipher import AES
+from Crypto.Hash import SHA256, HMAC
+from Crypto.Util import Counter
+from Crypto.Util.number import bytes_to_long, long_to_bytes
+from Crypto.Random import get_random_bytes
+
+from .helpers import IntegrityError
+
+
+class Key(object):
+    FILE_ID = 'DARC KEY'
+
+    def __init__(self, store=None):
+        if store:
+            self.open(store)
+
+    def open(self, store):
+        path = os.path.join(os.path.expanduser('~'),
+                            '.darc', 'keys', store.id.encode('hex'))
+        with open(path, 'rb') as fd:
+            lines = fd.readlines()
+            if not lines[0].startswith(self.FILE_ID) != self.FILE_ID:
+                raise ValueError('Not a DARC key file')
+            self.store_id = lines[0][len(self.FILE_ID):].strip().decode('hex')
+            cdata = (''.join(lines[1:])).decode('base64')
+        self.password = ''
+        data = self.decrypt_key_file(cdata, '')
+        while not data:
+            self.password = getpass('Key password: ')
+            if not self.password:
+                raise Exception('Key decryption failed')
+            data = self.decrypt_key_file(cdata, self.password)
+            if not data:
+                print 'Incorrect password'
+        key = msgpack.unpackb(data)
+        assert key['version'] == 1
+        self.store_id = key['store_id']
+        self.enc_key = key['enc_key']
+        self.enc_hmac_key = key['enc_hmac_key']
+        self.id_key = key['id_key']
+        self.archive_key = key['archive_key']
+        self.chunk_seed = key['chunk_seed']
+        self.counter = Counter.new(128, initial_value=bytes_to_long(os.urandom(16)), allow_wraparound=True)
+
+    def encrypt_key_file(self, data, password):
+        salt = get_random_bytes(32)
+        iterations = 2000
+        key = pbkdf2(password, salt, 32, iterations, hashlib.sha256)
+        hash = HMAC.new(key, data, SHA256).digest()
+        cdata = AES.new(key, AES.MODE_CTR, counter=Counter.new(128)).encrypt(data)
+        d = {
+            'version': 1,
+            'salt': salt,
+            'iterations': iterations,
+            'algorithm': 'SHA256',
+            'hash': hash,
+            'data': cdata,
+        }
+        return msgpack.packb(d)
+
+    def decrypt_key_file(self, data, password):
+        d = msgpack.unpackb(data)
+        assert d['version'] == 1
+        assert d['algorithm'] == 'SHA256'
+        key = pbkdf2(password, d['salt'], 32, d['iterations'], hashlib.sha256)
+        data = AES.new(key, AES.MODE_CTR, counter=Counter.new(128)).decrypt(d['data'])
+        if HMAC.new(key, data, SHA256).digest() != d['hash']:
+            return None
+        return data
+
+    def save(self, path, password):
+        key = {
+            'version': 1,
+            'store_id': self.store_id,
+            'enc_key': self.enc_key,
+            'enc_hmac_key': self.enc_hmac_key,
+            'id_key': self.enc_key,
+            'archive_key': self.enc_key,
+            'chunk_seed': self.chunk_seed,
+        }
+        data = self.encrypt_key_file(msgpack.packb(key), password)
+        with open(path, 'wb') as fd:
+            fd.write('%s %s\n' % (self.FILE_ID, self.store_id.encode('hex')))
+            fd.write(data.encode('base64'))
+            print 'Key chain "%s" created' % path
+
+    def chpass(self):
+        password, password2 = 1, 2
+        while password != password2:
+            password = getpass('New password: ')
+            password2 = getpass('New password again: ')
+            if password != password2:
+                print 'Passwords do not match'
+        self.save(self.path, password)
+        return 0
+
+    @staticmethod
+    def create(store):
+        path = os.path.join(os.path.expanduser('~'),
+                            '.darc', 'keys', store.id.encode('hex'))
+        if os.path.exists(path):
+            print '%s already exists' % path
+            return 1
+        password, password2 = 1, 2
+        while password != password2:
+            password = getpass('Keychain password: ')
+            password2 = getpass('Keychain password again: ')
+            if password != password2:
+                print 'Passwords do not match'
+        key = Key()
+        key.store_id = store.id
+        # Chunk AES256 encryption key
+        key.enc_key = get_random_bytes(32)
+        # Chunk encryption HMAC key
+        key.enc_hmac_key = get_random_bytes(32)
+        # Chunk id HMAC key
+        key.id_key = get_random_bytes(32)
+        # Archive name HMAC key
+        key.archive_key = get_random_bytes(32)
+        # Chunkifier seed
+        key.chunk_seed = bytes_to_long(get_random_bytes(4)) & 0x7fffffff
+        key.save(path, password)
+        return 0
+
+    def id_hash(self, data):
+        """Return HMAC hash using the "id" HMAC key
+        """
+        return HMAC.new(self.id_key, data, SHA256).digest()
+
+    def archive_hash(self, data):
+        """Return HMAC hash using the "archive" HMAC key
+        """
+        return HMAC.new(self.archive_key, data, SHA256).digest()
+
+    def encrypt(self, data):
+        data = zlib.compress(data)
+        nonce = long_to_bytes(self.counter.next_value(), 16)
+        data = ''.join((nonce, AES.new(self.enc_key, AES.MODE_CTR, '',
+                                       counter=self.counter).encrypt(data)))
+        hash = HMAC.new(self.enc_hmac_key, data, SHA256).digest()
+        return ''.join(('\0', hash, data)), hash
+
+    def decrypt(self, data):
+        assert data[0] == '\0'
+        hash = data[1:33]
+        if HMAC.new(self.enc_hmac_key, data[33:], SHA256).digest() != hash:
+            raise IntegrityError('Encryption integrity error')
+        nonce = bytes_to_long(data[33:49])
+        counter = Counter.new(128, initial_value=nonce, allow_wraparound=True)
+        data = AES.new(self.enc_key, AES.MODE_CTR, counter=counter).decrypt(data[49:])
+        return zlib.decompress(data), hash
+

+ 0 - 189
darc/keychain.py

@@ -1,189 +0,0 @@
-from __future__ import with_statement
-from getpass import getpass
-import hashlib
-import os
-import msgpack
-import zlib
-
-from pbkdf2 import pbkdf2
-from Crypto.Cipher import AES
-from Crypto.Hash import SHA256, HMAC
-from Crypto.PublicKey import RSA
-from Crypto.Util import Counter
-from Crypto.Util.number import bytes_to_long, long_to_bytes
-
-from . import PACKET_ENCRYPT_READ, PACKET_ENCRYPT_CREATE
-from .helpers import IntegrityError, zero_pad
-from .oaep import OAEP
-
-
-class Keychain(object):
-    FILE_ID = 'DARC KEYCHAIN'
-
-    CREATE = '\1'
-    READ = '\2'
-
-    def __init__(self, path=None):
-        self._key_cache = {}
-        self.read_key = os.urandom(32)
-        self.create_key = os.urandom(32)
-        self.counter = Counter.new(64, prefix='\0' * 8)
-        self.aes_id = self.rsa_read = self.rsa_create = None
-        self.path = path
-        if path:
-            self.open(path)
-
-    def get_chunkify_seed(self):
-        return bytes_to_long(self.aes_id[:4]) & 0x7fffffff
-
-    def open(self, path):
-        print 'Opening keychain "%s"' % path
-        with open(path, 'rb') as fd:
-            if fd.read(len(self.FILE_ID)) != self.FILE_ID:
-                raise ValueError('Not a keychain')
-            cdata = fd.read()
-        self.password = ''
-        data = self.decrypt_keychain(cdata, '')
-        while not data:
-            self.password = getpass('Keychain password: ')
-            if not self.password:
-                raise Exception('Keychain decryption failed')
-            data = self.decrypt_keychain(cdata, self.password)
-            if not data:
-                print 'Incorrect password'
-        chain = msgpack.unpackb(data)
-        assert chain['version'] == 1
-        self.aes_id = chain['aes_id']
-        self.rsa_read = RSA.importKey(chain['rsa_read'])
-        self.rsa_create = RSA.importKey(chain['rsa_create'])
-        self.read_encrypted = OAEP(256, hash=SHA256).encode(self.read_key, os.urandom(32))
-        self.read_encrypted = zero_pad(self.rsa_read.encrypt(self.read_encrypted, '')[0], 256)
-        self.create_encrypted = OAEP(256, hash=SHA256).encode(self.create_key, os.urandom(32))
-        self.create_encrypted = zero_pad(self.rsa_create.encrypt(self.create_encrypted, '')[0], 256)
-
-    def encrypt_keychain(self, data, password):
-        salt = os.urandom(32)
-        iterations = 2000
-        key = pbkdf2(password, salt, 32, iterations, hashlib.sha256)
-        hash = HMAC.new(key, data, SHA256).digest()
-        cdata = AES.new(key, AES.MODE_CTR, counter=Counter.new(128)).encrypt(data)
-        d = {
-            'version': 1,
-            'salt': salt,
-            'iterations': iterations,
-            'algorithm': 'SHA256',
-            'hash': hash,
-            'data': cdata,
-        }
-        return msgpack.packb(d)
-
-    def decrypt_keychain(self, data, password):
-        d = msgpack.unpackb(data)
-        assert d['version'] == 1
-        assert d['algorithm'] == 'SHA256'
-        key = pbkdf2(password, d['salt'], 32, d['iterations'], hashlib.sha256)
-        data = AES.new(key, AES.MODE_CTR, counter=Counter.new(128)).decrypt(d['data'])
-        if HMAC.new(key, data, SHA256).digest() != d['hash']:
-            return None
-        return data
-
-    def save(self, path, password):
-        chain = {
-            'version': 1,
-            'aes_id': self.aes_id,
-            'rsa_read': self.rsa_read.exportKey('PEM'),
-            'rsa_create': self.rsa_create.exportKey('PEM'),
-        }
-        data = self.encrypt_keychain(msgpack.packb(chain), password)
-        with open(path, 'wb') as fd:
-            fd.write(self.FILE_ID)
-            fd.write(data)
-            print 'Key chain "%s" saved' % path
-
-    def restrict(self, path):
-        if os.path.exists(path):
-            print '%s already exists' % path
-            return 1
-        self.rsa_read = self.rsa_read.publickey()
-        self.save(path, self.password)
-        return 0
-
-    def chpass(self):
-        password, password2 = 1, 2
-        while password != password2:
-            password = getpass('New password: ')
-            password2 = getpass('New password again: ')
-            if password != password2:
-                print 'Passwords do not match'
-        self.save(self.path, password)
-        return 0
-
-    @staticmethod
-    def generate(path):
-        if os.path.exists(path):
-            print '%s already exists' % path
-            return 1
-        password, password2 = 1, 2
-        while password != password2:
-            password = getpass('Keychain password: ')
-            password2 = getpass('Keychain password again: ')
-            if password != password2:
-                print 'Passwords do not match'
-        chain = Keychain()
-        print 'Generating keychain'
-        chain.aes_id = os.urandom(32)
-        chain.rsa_read = RSA.generate(2048)
-        chain.rsa_create = RSA.generate(2048)
-        chain.save(path, password)
-        return 0
-
-    def id_hash(self, data):
-        """Return HMAC hash using the "id" AES key
-        """
-        return HMAC.new(self.aes_id, data, SHA256).digest()
-
-    def encrypt(self, magic, data):
-        """Helper function used by `encrypt_read` and `encrypt_create`
-        """
-        data = zlib.compress(data)
-        nonce = long_to_bytes(self.counter.next_value(), 8)
-        if magic & PACKET_ENCRYPT_READ:
-            data = ''.join((nonce, self.read_encrypted,
-                            AES.new(self.read_key, AES.MODE_CTR, '',
-                                    counter=self.counter).encrypt(data)))
-        elif magic & PACKET_ENCRYPT_CREATE:
-            data = ''.join((nonce, self.create_encrypted,
-                            AES.new(self.create_key, AES.MODE_CTR, '',
-                                    counter=self.counter).encrypt(data)))
-        hash = self.id_hash(data)
-        return ''.join((chr(magic), hash, data)), hash
-
-    def _decrypt_key(self, data, rsa_key):
-        """Helper function used by `decrypt`
-        """
-        try:
-            return self._key_cache[data]
-        except KeyError:
-            self._key_cache[data] = OAEP(256, hash=SHA256).decode(rsa_key.decrypt(data))
-            return self._key_cache[data]
-
-    def decrypt(self, data):
-        """Decrypt `data` previously encrypted by `encrypt_create` or `encrypt_read`
-        """
-        magic = ord(data[0])
-        hash = data[1:33]
-        if self.id_hash(data[33:]) != hash:
-            raise IntegrityError('Encryption integrity error')
-        nonce = bytes_to_long(data[33:41])
-        counter = Counter.new(64, prefix='\0' * 8, initial_value=nonce)
-        if magic & PACKET_ENCRYPT_READ:
-            key = self._decrypt_key(data[41:297], self.rsa_read)
-        elif magic & PACKET_ENCRYPT_CREATE:
-            key = self._decrypt_key(data[41:297], self.rsa_create)
-        else:
-            raise Exception('Unknown pack magic %d found' % magic)
-        data = AES.new(key, AES.MODE_CTR, counter=counter).decrypt(data[297:])
-        return magic, zlib.decompress(data), hash
-
-
-

+ 0 - 71
darc/oaep.py

@@ -1,71 +0,0 @@
-from Crypto.Util.number import long_to_bytes
-from Crypto.Hash import SHA
-
-from .helpers import IntegrityError
-
-def _xor_bytes(a, b):
-    return ''.join(chr(ord(x[0]) ^ ord(x[1])) for x in zip(a, b))
-
-
-def MGF1(seed, mask_len, hash=SHA):
-    """MGF1 is a Mask Generation Function based on hash function
-    """
-    T = ''.join(hash.new(seed + long_to_bytes(c, 4)).digest()
-                for c in range(1 + mask_len / hash.digest_size))
-    return T[:mask_len]
-
-
-class OAEP(object):
-    """Optimal Asymmetric Encryption Padding
-    """
-    def __init__(self, k, hash=SHA, MGF=MGF1):
-        self.k = k
-        self.hash = hash
-        self.MGF = MGF
-
-    def encode(self, msg, seed, label=''):
-        # FIXME: length checks
-        if len(msg) > self.k - 2 * self.hash.digest_size - 2:
-            raise ValueError('message too long')
-        label_hash = self.hash.new(label).digest()
-        padding = '\0' * (self.k - len(msg) - 2 * self.hash.digest_size - 2)
-        datablock = '%s%s\1%s' % (label_hash, padding, msg)
-        datablock_mask = self.MGF(seed, self.k - self.hash.digest_size - 1, self.hash)
-        masked_db = _xor_bytes(datablock, datablock_mask)
-        seed_mask = self.MGF(masked_db, self.hash.digest_size, self.hash)
-        masked_seed = _xor_bytes(seed, seed_mask)
-        return '\0%s%s' % (masked_seed, masked_db)
-
-    def decode(self, ciphertext, label=''):
-        if len(ciphertext) < self.k:
-            ciphertext = ('\0' * (self.k - len(ciphertext))) + ciphertext
-        label_hash = self.hash.new(label).digest()
-        masked_seed = ciphertext[1:self.hash.digest_size + 1]
-        masked_db = ciphertext[-(self.k - self.hash.digest_size - 1):]
-        seed_mask = self.MGF(masked_db, self.hash.digest_size, self.hash)
-        seed = _xor_bytes(masked_seed, seed_mask)
-        datablock_mask = self.MGF(seed, self.k - self.hash.digest_size - 1, self.hash)
-        datablock = _xor_bytes(masked_db, datablock_mask)
-        label_hash2 = datablock[:self.hash.digest_size]
-        data = datablock[self.hash.digest_size:].lstrip('\0')
-        if (ciphertext[0] != '\0' or
-            label_hash != label_hash2 or
-            data[0] != '\1'):
-            raise IntegrityError('decryption error')
-        return data[1:]
-
-
-def test():
-    from Crypto.Hash import SHA256
-    import os
-    import random
-    oaep = OAEP(256, SHA256)
-    for x in range(1000):
-        M = os.urandom(random.randint(0, 100))
-        EM = oaep.encode(M, os.urandom(32))
-        assert len(EM) == oaep.k
-        assert oaep.decode(EM) == M
-
-if __name__ == '__main__':
-    test()
-

+ 1 - 1
darc/store.py

@@ -32,7 +32,7 @@ class Store(object):
 
     def __init__(self, path, create=False):
         self.txn_active = False
-        if not os.path.exists(path) and create:
+        if create:
             self.create(path)
         self.open(path)