Browse Source

Switch to pure AES256 encryption and improved metadata storage

Jonas Borgström 14 năm trước cách đây
mục cha
commit
b294ceba67
10 tập tin đã thay đổi với 401 bổ sung449 xóa
  1. 0 10
      darc/__init__.py
  2. 108 93
      darc/archive.py
  3. 34 45
      darc/archiver.py
  4. 42 29
      darc/cache.py
  5. 56 0
      darc/hashindex.pyx
  6. 0 11
      darc/helpers.py
  7. 160 0
      darc/key.py
  8. 0 189
      darc/keychain.py
  9. 0 71
      darc/oaep.py
  10. 1 1
      darc/store.py

+ 0 - 10
darc/__init__.py

@@ -2,13 +2,3 @@
 
 NS_CHUNK = 0
 NS_ARCHIVE_METADATA = 1
-NS_ARCHIVE_CHUNKS = 2
-NS_ARCHIVE_ITEMS = 3
-
-PACKET_ENCRYPT_READ   = 2 ** 7
-PACKET_ENCRYPT_CREATE = 2 ** 6
-PACKET_CHUNK            = 1 | PACKET_ENCRYPT_READ
-PACKET_ARCHIVE_METADATA = 2 | PACKET_ENCRYPT_READ
-PACKET_ARCHIVE_ITEMS    = 3 | PACKET_ENCRYPT_READ
-PACKET_ARCHIVE_CHUNKS   = 1 | PACKET_ENCRYPT_CREATE
-

+ 108 - 93
darc/archive.py

@@ -6,11 +6,10 @@ import os
 import socket
 import stat
 import sys
-from itertools import izip
+from os.path import dirname
 from xattr import xattr, XATTR_NOFOLLOW
 
-from . import NS_ARCHIVE_METADATA, NS_ARCHIVE_ITEMS, NS_ARCHIVE_CHUNKS, NS_CHUNK, \
-    PACKET_ARCHIVE_METADATA, PACKET_ARCHIVE_ITEMS, PACKET_ARCHIVE_CHUNKS, PACKET_CHUNK
+from . import NS_ARCHIVE_METADATA, NS_CHUNK
 from ._speedups import chunkify
 from .helpers import uid2user, user2uid, gid2group, group2gid, IntegrityError
 
@@ -26,22 +25,24 @@ class Archive(object):
     class DoesNotExist(Exception):
         pass
 
-    def __init__(self, store, keychain, name=None):
-        self.keychain = keychain
+    def __init__(self, store, key, name=None, cache=None):
+        self.key = key
         self.store = store
-        self.items = []
+        self.cache = cache
+        self.items = ''
+        self.items_refs = []
+        self.items_prefix = ''
         self.items_ids = []
         self.hard_links = {}
         if name:
-            self.load(self.keychain.id_hash(name))
+            self.load(self.key.archive_hash(name))
 
     def load(self, id):
         self.id = id
         try:
-            kind, data, self.hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_METADATA, self.id))
+            data, self.hash = self.key.decrypt(self.store.get(NS_ARCHIVE_METADATA, self.id))
         except self.store.DoesNotExist:
             raise self.DoesNotExist
-        assert kind == PACKET_ARCHIVE_METADATA
         self.metadata = msgpack.unpackb(data)
         assert self.metadata['version'] == 1
 
@@ -51,80 +52,90 @@ class Archive(object):
         t, f = self.metadata['time'].split('.', 1)
         return datetime.strptime(t, '%Y-%m-%dT%H:%M:%S') + timedelta(seconds=float('.' + f))
 
-    def get_chunks(self):
-        for id in self.metadata['chunks_ids']:
-            magic, data, hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_CHUNKS, id))
-            assert magic == PACKET_ARCHIVE_CHUNKS
-            assert hash == id
-            chunks = msgpack.unpackb(data)
-            for chunk in chunks:
-                yield chunk
-
     def get_items(self):
-        for id in self.metadata['items_ids']:
-            magic, data, items_hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_ITEMS, id))
-            assert magic == PACKET_ARCHIVE_ITEMS
-            assert items_hash == id
-            items = msgpack.unpackb(data)
-            for item in items:
+        unpacker = msgpack.Unpacker()
+        for id, size, csize in self.metadata['items']:
+            data, items_hash = self.key.decrypt(self.store.get(NS_CHUNK, id))
+            assert self.key.id_hash(data) == id
+            unpacker.feed(data)
+            for item in unpacker:
                 yield item
 
-    def add_item(self, item):
-        self.items.append(item)
-        if len(self.items) > 100000:
+    def add_item(self, item, refs=None):
+        data = msgpack.packb(item)
+        prefix = dirname(item['path'])
+        if self.items_prefix and self.items_prefix != prefix:
             self.flush_items()
+        if refs:
+            self.items_refs += refs
+        self.items += data
+        self.items_prefix = prefix
 
     def flush_items(self):
-        data, hash = self.keychain.encrypt(PACKET_ARCHIVE_ITEMS, msgpack.packb(self.items))
-        self.store.put(NS_ARCHIVE_ITEMS, hash, data)
-        self.items_ids.append(hash)
-        self.items = []
-
-    def save_chunks(self, cache):
-        chunks = []
-        ids = []
-        def flush(chunks):
-            data, hash = self.keychain.encrypt(PACKET_ARCHIVE_CHUNKS, msgpack.packb(chunks))
-            self.store.put(NS_ARCHIVE_CHUNKS, hash, data)
-            ids.append(hash)
-        for id, (count, size) in cache.chunks.iteritems():
-            if count > 1000000:
-                chunks.append((id, size))
-            if len(chunks) > 100000:
-                flush(chunks)
-                chunks = []
-        flush(chunks)
-        return ids
+        if not self.items:
+            return
+        id = self.key.id_hash(self.items)
+        if self.cache.seen_chunk(id):
+            self.items_ids.append(self.cache.chunk_incref(id))
+            for id in self.items_refs:
+                self.cache.chunk_decref(id)
+        else:
+            self.items_ids.append(self.cache.add_chunk(id, self.items))
+        self.items = ''
+        self.items_refs = []
+        self.items_prefix = ''
 
     def save(self, name, cache):
-        self.id = self.keychain.id_hash(name)
-        chunks_ids = self.save_chunks(cache)
+        self.id = self.key.archive_hash(name)
         self.flush_items()
         metadata = {
             'version': 1,
             'name': name,
-            'chunks_ids': chunks_ids,
-            'items_ids': self.items_ids,
+            'items': self.items_ids,
             'cmdline': sys.argv,
             'hostname': socket.gethostname(),
             'username': getuser(),
             'time': datetime.utcnow().isoformat(),
         }
-        data, self.hash = self.keychain.encrypt(PACKET_ARCHIVE_METADATA, msgpack.packb(metadata))
+        data, self.hash = self.key.encrypt(msgpack.packb(metadata))
         self.store.put(NS_ARCHIVE_METADATA, self.id, data)
         self.store.commit()
         cache.commit()
 
-    def stats(self, cache):
-        osize = csize = usize = 0
+    def get_chunks(self):
         for item in self.get_items():
-            if stat.S_ISREG(item['mode']) and not 'source' in item:
-                osize += item['size']
-        for id, size in self.get_chunks():
-            csize += size
-            if cache.seen_chunk(id) == 1:
-                usize += size
-        return osize, csize, usize
+            try:
+                for chunk in item['chunks']:
+                    yield chunk
+            except KeyError:
+                pass
+
+    def stats(self, cache):
+        # This function is a bit evil since it abuses the cache to calculate
+        # the stats. The cache transaction must be rolled back afterwards
+        unpacker = msgpack.Unpacker()
+        cache.begin_txn()
+        osize = zsize = usize = 0
+        for id, size, csize in self.metadata['items']:
+            osize += size
+            zsize += csize
+            unique = self.cache.seen_chunk(id) == 1
+            if unique:
+                usize += csize
+            data, items_hash = self.key.decrypt(self.store.get(NS_CHUNK, id))
+            assert self.key.id_hash(data) == id
+            unpacker.feed(data)
+            for item in unpacker:
+                try:
+                    for id, size, csize in item['chunks']:
+                        osize += size
+                        zsize += csize
+                        if unique and self.cache.seen_chunk(id) == 1:
+                            usize += csize
+                except KeyError:
+                    pass
+        cache.rollback()
+        return osize, zsize, usize
 
     def extract_item(self, item, dest=None, start_cb=None):
         dest = dest or os.getcwdu()
@@ -163,14 +174,13 @@ class Archive(object):
                     if i==0:
                         start_cb(item)
                     assert not error
-                    magic, data, hash = self.keychain.decrypt(chunk)
-                    assert magic == PACKET_CHUNK
-                    if self.keychain.id_hash(data) != id:
+                    data, hash = self.key.decrypt(chunk)
+                    if self.key.id_hash(data) != id:
                         raise IntegrityError('chunk hash did not match')
                     fd.write(data)
                     if last:
-                        self.restore_attrs(path, item)
                         fd.close()
+                        self.restore_attrs(path, item)
 
                 fd = open(path, 'wb')
                 n = len(item['chunks'])
@@ -179,7 +189,7 @@ class Archive(object):
                     self.restore_attrs(path, item)
                     fd.close()
                 else:
-                    for i, id in enumerate(item['chunks']):
+                    for i, (id, size, csize) in enumerate(item['chunks']):
                         self.store.get(NS_CHUNK, id, callback=extract_cb, callback_data=(id, i, i==n-1))
 
         else:
@@ -206,16 +216,15 @@ class Archive(object):
             pass
         if not symlink:
             # FIXME: We should really call futimes here (c extension required)
-            os.utime(path, (item['atime'], item['mtime']))
+            os.utime(path, (item['mtime'], item['mtime']))
 
     def verify_file(self, item, start, result):
         def verify_chunk(chunk, error, (id, i, last)):
             if i == 0:
                 start(item)
             assert not error
-            magic, data, hash = self.keychain.decrypt(chunk)
-            assert magic == PACKET_CHUNK
-            if self.keychain.id_hash(data) != id:
+            data, hash = self.key.decrypt(chunk)
+            if self.key.id_hash(data) != id:
                 result(item, False)
             elif last:
                 result(item, True)
@@ -224,17 +233,24 @@ class Archive(object):
             start(item)
             result(item, True)
         else:
-            for i, id in enumerate(item['chunks']):
+            for i, (id, size, csize) in enumerate(item['chunks']):
                 self.store.get(NS_CHUNK, id, callback=verify_chunk, callback_data=(id, i, i==n-1))
 
     def delete(self, cache):
-        for id, size in self.get_chunks():
-            cache.chunk_decref(id)
+        unpacker = msgpack.Unpacker()
+        for id, size, csize in self.metadata['items']:
+            if self.cache.seen_chunk(id) == 1:
+                data, items_hash = self.key.decrypt(self.store.get(NS_CHUNK, id))
+                assert self.key.id_hash(data) == id
+                unpacker.feed(data)
+                for item in unpacker:
+                    try:
+                        for chunk_id, size, csize in item['chunks']:
+                            self.cache.chunk_decref(chunk_id)
+                    except KeyError:
+                        pass
+            self.cache.chunk_decref(id)
         self.store.delete(NS_ARCHIVE_METADATA, self.id)
-        for id in self.metadata['chunks_ids']:
-            self.store.delete(NS_ARCHIVE_CHUNKS, id)
-        for id in self.metadata['items_ids']:
-            self.store.delete(NS_ARCHIVE_ITEMS, id)
         self.store.commit()
         cache.commit()
 
@@ -243,7 +259,7 @@ class Archive(object):
             'mode': st.st_mode,
             'uid': st.st_uid, 'user': uid2user(st.st_uid),
             'gid': st.st_gid, 'group': gid2group(st.st_gid),
-            'atime': st.st_atime, 'mtime': st.st_mtime,
+            'mtime': st.st_mtime,
         }
         try:
             xa = xattr(path, XATTR_NOFOLLOW)
@@ -287,34 +303,33 @@ class Archive(object):
                 return
             else:
                 self.hard_links[st.st_ino, st.st_dev] = safe_path
-        path_hash = self.keychain.id_hash(path.encode('utf-8'))
-        ids, size = cache.file_known_and_unchanged(path_hash, st)
+        path_hash = self.key.id_hash(path.encode('utf-8'))
+        ids = cache.file_known_and_unchanged(path_hash, st)
+        chunks = None
         if ids is not None:
             # Make sure all ids are available
             for id in ids:
                 if not cache.seen_chunk(id):
-                    ids = None
                     break
             else:
-                for id in ids:
-                    cache.chunk_incref(id)
+                chunks = [cache.chunk_incref(id) for id in ids]
         # Only chunkify the file if needed
-        if ids is None:
+        if chunks is None:
             with open(path, 'rb') as fd:
-                size = 0
-                ids = []
+                chunks = []
                 for chunk in chunkify(fd, CHUNK_SIZE, WINDOW_SIZE,
-                                      self.keychain.get_chunkify_seed()):
-                    ids.append(cache.add_chunk(self.keychain.id_hash(chunk), chunk))
-                    size += len(chunk)
+                                      self.key.chunk_seed):
+                    chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk))
+            ids = [id for id, _, _ in chunks]
             cache.memorize_file(path_hash, st, ids)
-        item = {'path': safe_path, 'chunks': ids, 'size': size}
+        item = {'path': safe_path, 'chunks': chunks}
         item.update(self.stat_attrs(st, path))
-        self.add_item(item)
+        self.add_item(item, ids)
 
     @staticmethod
-    def list_archives(store, keychain):
+    def list_archives(store, key):
         for id in list(store.list(NS_ARCHIVE_METADATA)):
-            archive = Archive(store, keychain)
+            archive = Archive(store, key)
             archive.load(id)
             yield archive
+

+ 34 - 45
darc/archiver.py

@@ -8,7 +8,7 @@ import sys
 from .archive import Archive
 from .store import Store
 from .cache import Cache
-from .keychain import Keychain
+from .key import Key
 from .helpers import location_validator, format_file_size, format_time,\
     format_file_mode, IncludePattern, ExcludePattern, exclude_path, to_localtime
 from .remote import StoreServer, RemoteStore
@@ -44,18 +44,22 @@ class Archiver(object):
     def do_serve(self, args):
         return StoreServer().serve()
 
+    def do_init(self, args):
+        store = self.open_store(args.store, create=True)
+        key = Key.create(store)
+
     def do_create(self, args):
-        store = self.open_store(args.archive, create=True)
-        keychain = Keychain(args.keychain)
+        store = self.open_store(args.archive)
+        key = Key(store)
         try:
-            Archive(store, keychain, args.archive.archive)
+            Archive(store, key, args.archive.archive)
         except Archive.DoesNotExist:
             pass
         else:
             self.print_error('Archive already exists')
             return self.exit_code
-        archive = Archive(store, keychain)
-        cache = Cache(store, keychain)
+        cache = Cache(store, key)
+        archive = Archive(store, key, cache=cache)
         # Add darc cache dir to inode_skip list
         skip_inodes = set()
         try:
@@ -112,8 +116,8 @@ class Archiver(object):
         def start_cb(item):
             self.print_verbose(item['path'].decode('utf-8'))
         store = self.open_store(args.archive)
-        keychain = Keychain(args.keychain)
-        archive = Archive(store, keychain, args.archive.archive)
+        key = Key(store)
+        archive = Archive(store, key, args.archive.archive)
         dirs = []
         for item in archive.get_items():
             if exclude_path(item['path'], args.patterns):
@@ -131,22 +135,24 @@ class Archiver(object):
 
     def do_delete(self, args):
         store = self.open_store(args.archive)
-        keychain = Keychain(args.keychain)
-        archive = Archive(store, keychain, args.archive.archive)
-        cache = Cache(store, keychain)
+        key = Key(store)
+        cache = Cache(store, key)
+        archive = Archive(store, key, args.archive.archive, cache=cache)
         archive.delete(cache)
         return self.exit_code
 
     def do_list(self, args):
         store = self.open_store(args.src)
-        keychain = Keychain(args.keychain)
+        key = Key(store)
         if args.src.archive:
             tmap = {1: 'p', 2: 'c', 4: 'd', 6: 'b', 010: '-', 012: 'l', 014: 's'}
-            archive = Archive(store, keychain, args.src.archive)
+            archive = Archive(store, key, args.src.archive)
             for item in archive.get_items():
                 type = tmap.get(item['mode'] / 4096, '?')
                 mode = format_file_mode(item['mode'])
-                size = item.get('size', 0)
+                size = 0
+                if type == '-':
+                    size = sum(size for _, size, _ in item['chunks'])
                 mtime = format_time(datetime.fromtimestamp(item['mtime']))
                 if 'source' in item:
                     if type == 'l':
@@ -160,14 +166,14 @@ class Archiver(object):
                                                   item['group'], size, mtime,
                                                   item['path'], extra)
         else:
-            for archive in sorted(Archive.list_archives(store, keychain), key=attrgetter('ts')):
+            for archive in sorted(Archive.list_archives(store, key), key=attrgetter('ts')):
                 print '%-20s %s' % (archive.metadata['name'], to_localtime(archive.ts).strftime('%c'))
         return self.exit_code
 
     def do_verify(self, args):
         store = self.open_store(args.archive)
-        keychain = Keychain(args.keychain)
-        archive = Archive(store, keychain, args.archive.archive)
+        key = Key(store)
+        archive = Archive(store, key, args.archive.archive)
         def start_cb(item):
             self.print_verbose('%s ...', item['path'].decode('utf-8'), newline=False)
         def result_cb(item, success):
@@ -187,9 +193,9 @@ class Archiver(object):
 
     def do_info(self, args):
         store = self.open_store(args.archive)
-        keychain = Keychain(args.keychain)
-        archive = Archive(store, keychain, args.archive.archive)
-        cache = Cache(store, keychain)
+        key = Key(store)
+        cache = Cache(store, key)
+        archive = Archive(store, key, args.archive.archive, cache=cache)
         osize, csize, usize = archive.stats(cache)
         print 'Name:', archive.metadata['name']
         print 'Hostname:', archive.metadata['hostname']
@@ -201,45 +207,28 @@ class Archiver(object):
         print 'Unique data:', format_file_size(usize)
         return self.exit_code
 
-    def do_init_keychain(self, args):
-        return Keychain.generate(args.keychain)
-
-    def do_export_restricted(self, args):
-        keychain = Keychain(args.keychain)
-        keychain.restrict(args.output)
-        return self.exit_code
-
-    def do_keychain_chpass(self, args):
-        return Keychain(args.keychain).chpass()
-
     def run(self, args=None):
         dot_path = os.path.join(os.path.expanduser('~'), '.darc')
         if not os.path.exists(dot_path):
             os.mkdir(dot_path)
-        default_keychain = os.path.join(os.path.expanduser('~'),
-                                        '.darc', 'keychain')
+            os.mkdir(os.path.join(dot_path, 'keys'))
+            os.mkdir(os.path.join(dot_path, 'cache'))
         parser = argparse.ArgumentParser(description='DARC - Deduplicating Archiver')
-        parser.add_argument('-k', '--keychain', dest='keychain', type=str,
-                            default=default_keychain,
-                            help='Keychain to use')
         parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
                             default=False,
                             help='Verbose output')
 
-
         subparsers = parser.add_subparsers(title='Available subcommands')
-        subparser = subparsers.add_parser('init-keychain')
-        subparser.set_defaults(func=self.do_init_keychain)
-        subparser = subparsers.add_parser('export-restricted')
-        subparser.add_argument('output', metavar='OUTPUT', type=str,
-                               help='Keychain to create')
-        subparser.set_defaults(func=self.do_export_restricted)
-        subparser = subparsers.add_parser('change-password')
-        subparser.set_defaults(func=self.do_keychain_chpass)
 
         subparser = subparsers.add_parser('serve')
         subparser.set_defaults(func=self.do_serve)
 
+        subparser = subparsers.add_parser('init')
+        subparser.set_defaults(func=self.do_init)
+        subparser.add_argument('store', metavar='ARCHIVE',
+                               type=location_validator(archive=False),
+                               help='Store to create')
+
         subparser = subparsers.add_parser('create')
         subparser.set_defaults(func=self.do_create)
         subparser.add_argument('-i', '--include', dest='patterns',

+ 42 - 29
darc/cache.py

@@ -5,19 +5,19 @@ import msgpack
 import os
 import shutil
 
-from . import NS_ARCHIVE_CHUNKS, NS_CHUNK, PACKET_ARCHIVE_CHUNKS, PACKET_CHUNK
+from . import NS_CHUNK, NS_ARCHIVE_METADATA
 from .helpers import error_callback
-from .hashindex import NSIndex
+from .hashindex import ChunkIndex
 
 
 class Cache(object):
     """Client Side cache
     """
 
-    def __init__(self, store, keychain):
+    def __init__(self, store, key):
         self.txn_active = False
         self.store = store
-        self.keychain = keychain
+        self.key = key
         self.path = os.path.join(Cache.cache_dir_path(), self.store.id.encode('hex'))
         if not os.path.exists(self.path):
             self.create()
@@ -25,6 +25,7 @@ class Cache(object):
         assert self.id == store.id
         if self.tid != store.tid:
             self.sync()
+            self.commit()
 
     @staticmethod
     def cache_dir_path():
@@ -44,7 +45,7 @@ class Cache(object):
         config.set('cache', 'tid', '0')
         with open(os.path.join(self.path, 'config'), 'wb') as fd:
             config.write(fd)
-        NSIndex.create(os.path.join(self.path, 'chunks'))
+        ChunkIndex.create(os.path.join(self.path, 'chunks'))
         with open(os.path.join(self.path, 'files'), 'wb') as fd:
             pass # empty file
 
@@ -60,7 +61,7 @@ class Cache(object):
             raise Exception('%s Does not look like a darc cache')
         self.id = self.config.get('cache', 'store_id').decode('hex')
         self.tid = self.config.getint('cache', 'tid')
-        self.chunks = NSIndex(os.path.join(self.path, 'chunks'))
+        self.chunks = ChunkIndex(os.path.join(self.path, 'chunks'))
         self.files = None
 
     def _read_files(self):
@@ -96,9 +97,6 @@ class Cache(object):
             with open(os.path.join(self.path, 'files'), 'wb') as fd:
                 for item in self.files.iteritems():
                     msgpack.pack(item, fd)
-        for id, (count, size) in self.chunks.iteritems():
-            if count > 1000000:
-                self.chunks[id] = count - 1000000, size
         self.config.set('cache', 'tid', self.store.tid)
         with open(os.path.join(self.path, 'config'), 'w') as fd:
             self.config.write(fd)
@@ -129,48 +127,63 @@ class Cache(object):
         self.begin_txn()
         print 'Initializing cache...'
         self.chunks.clear()
-        for id in self.store.list(NS_ARCHIVE_CHUNKS):
-            magic, data, hash = self.keychain.decrypt(self.store.get(NS_ARCHIVE_CHUNKS, id))
-            assert magic == PACKET_ARCHIVE_CHUNKS
-            chunks = msgpack.unpackb(data)
-            for id, size in chunks:
+        unpacker = msgpack.Unpacker()
+        for id in self.store.list(NS_ARCHIVE_METADATA):
+            data, hash = self.key.decrypt(self.store.get(NS_ARCHIVE_METADATA, id))
+            archive = msgpack.unpackb(data)
+            print 'Analyzing archive:', archive['name']
+            for id, size, csize in archive['items']:
+                data, hash = self.key.decrypt(self.store.get(NS_CHUNK, id))
+                assert self.key.id_hash(data) == id
                 try:
-                    count, size = self.chunks[id]
-                    self.chunks[id] = count + 1, size
+                    count, size, csize = self.chunks[id]
+                    self.chunks[id] = count + 1, size, csize
                 except KeyError:
-                    self.chunks[id] = 1, size
+                    self.chunks[id] = 1, size, csize
+                    unpacker.feed(data)
+                    for item in unpacker:
+                        try:
+                            for id, size, csize in item['chunks']:
+                                try:
+                                    count, size, csize = self.chunks[id]
+                                    self.chunks[id] = count + 1, size, csize
+                                except KeyError:
+                                    self.chunks[id] = 1, size, csize
+                                pass
+                        except KeyError:
+                            pass
 
     def add_chunk(self, id, data):
         if not self.txn_active:
             self.begin_txn()
         if self.seen_chunk(id):
             return self.chunk_incref(id)
-        data, hash = self.keychain.encrypt(PACKET_CHUNK, data)
+        size = len(data)
+        data, hash = self.key.encrypt(data)
         csize = len(data)
         self.store.put(NS_CHUNK, id, data, callback=error_callback)
-        self.chunks[id] = (1000001, csize)
-        return id
+        self.chunks[id] = (1, size, csize)
+        return id, size, csize
 
     def seen_chunk(self, id):
-        return self.chunks.get(id, (0, 0))[0]
+        return self.chunks.get(id, (0, 0, 0))[0]
 
     def chunk_incref(self, id):
         if not self.txn_active:
             self.begin_txn()
-        count, size = self.chunks[id]
-        if count < 1000000:
-            self.chunks[id] = (count + 1000001, size)
-        return id
+        count, size, csize = self.chunks[id]
+        self.chunks[id] = (count + 1, size, csize)
+        return id, size, csize
 
     def chunk_decref(self, id):
         if not self.txn_active:
             self.begin_txn()
-        count, size = self.chunks[id]
+        count, size, csize = self.chunks[id]
         if count == 1:
             del self.chunks[id]
             self.store.delete(NS_CHUNK, id, callback=error_callback)
         else:
-            self.chunks[id] = (count - 1, size)
+            self.chunks[id] = (count - 1, size, csize)
 
     def file_known_and_unchanged(self, path_hash, st):
         if self.files is None:
@@ -180,9 +193,9 @@ class Cache(object):
             and entry[2] == st.st_size and entry[1] == st.st_ino):
             # reset entry age
             self.files[path_hash] = (0,) + entry[1:]
-            return entry[4], entry[2]
+            return entry[4]
         else:
-            return None, 0
+            return None
 
     def memorize_file(self, path_hash, st, ids):
         # Entry: Age, inode, size, mtime, chunk ids

+ 56 - 0
darc/hashindex.pyx

@@ -113,6 +113,62 @@ cdef class NSKeyIterator:
         return self.key[:32], (value[0], value[1])
 
 
+cdef class ChunkIndex(IndexBase):
+
+    @classmethod
+    def create(cls, path, capacity=16):
+        index = hashindex_create(path, capacity, 32, 12)
+        hashindex_close(index)
+        return cls(path)
+
+    def __getitem__(self, key):
+        assert len(key) == 32
+        data = <int *>hashindex_get(self.index, <char *>key)
+        if not data:
+            raise KeyError
+        return data[0], data[1], data[2]
+
+    def __delitem__(self, key):
+        assert len(key) == 32
+        hashindex_delete(self.index, <char *>key)
+
+    def __setitem__(self, key, value):
+        assert len(key) == 32
+        cdef int[3] data
+        data[0] = value[0]
+        data[1] = value[1]
+        data[2] = value[2]
+        hashindex_set(self.index, <char *>key, data)
+
+    def __contains__(self, key):
+        assert len(key) == 32
+        data = <int *>hashindex_get(self.index, <char *>key)
+        return data != NULL
+
+    def iteritems(self, marker=None, limit=0):
+        iter = ChunkKeyIterator()
+        iter.index = self.index
+        return iter
+
+
+cdef class ChunkKeyIterator:
+    cdef HashIndex *index
+    cdef char *key
+
+    def __cinit__(self):
+        self.key = NULL
+
+    def __iter__(self):
+        return self
+
+    def __next__(self):
+        self.key = <char *>hashindex_next_key(self.index, <char *>self.key)
+        if not self.key:
+            raise StopIteration
+        cdef int *value = <int *>(self.key + 32)
+        return self.key[:32], (value[0], value[1], value[2])
+
+
 cdef class BandIndex(IndexBase):
 
     @classmethod

+ 0 - 11
darc/helpers.py

@@ -70,17 +70,6 @@ def decode_long(bytes):
             return v + (b << base)
 
 
-def zero_pad(data, length):
-    """Make sure data is `length` bytes long by prepending zero bytes
-
-    >>> zero_pad('foo', 5)
-    '\\x00\\x00foo'
-    >>> zero_pad('foo', 3)
-    'foo'
-    """
-    return '\0' * (length - len(data)) + data
-
-
 def exclude_path(path, patterns):
     """Used by create and extract sub-commands to determine
     if an item should be processed or not

+ 160 - 0
darc/key.py

@@ -0,0 +1,160 @@
+from __future__ import with_statement
+from getpass import getpass
+import hashlib
+import os
+import msgpack
+import zlib
+
+from pbkdf2 import pbkdf2
+from Crypto.Cipher import AES
+from Crypto.Hash import SHA256, HMAC
+from Crypto.Util import Counter
+from Crypto.Util.number import bytes_to_long, long_to_bytes
+from Crypto.Random import get_random_bytes
+
+from .helpers import IntegrityError
+
+
+class Key(object):
+    FILE_ID = 'DARC KEY'
+
+    def __init__(self, store=None):
+        if store:
+            self.open(store)
+
+    def open(self, store):
+        path = os.path.join(os.path.expanduser('~'),
+                            '.darc', 'keys', store.id.encode('hex'))
+        with open(path, 'rb') as fd:
+            lines = fd.readlines()
+            if not lines[0].startswith(self.FILE_ID) != self.FILE_ID:
+                raise ValueError('Not a DARC key file')
+            self.store_id = lines[0][len(self.FILE_ID):].strip().decode('hex')
+            cdata = (''.join(lines[1:])).decode('base64')
+        self.password = ''
+        data = self.decrypt_key_file(cdata, '')
+        while not data:
+            self.password = getpass('Key password: ')
+            if not self.password:
+                raise Exception('Key decryption failed')
+            data = self.decrypt_key_file(cdata, self.password)
+            if not data:
+                print 'Incorrect password'
+        key = msgpack.unpackb(data)
+        assert key['version'] == 1
+        self.store_id = key['store_id']
+        self.enc_key = key['enc_key']
+        self.enc_hmac_key = key['enc_hmac_key']
+        self.id_key = key['id_key']
+        self.archive_key = key['archive_key']
+        self.chunk_seed = key['chunk_seed']
+        self.counter = Counter.new(128, initial_value=bytes_to_long(os.urandom(16)), allow_wraparound=True)
+
+    def encrypt_key_file(self, data, password):
+        salt = get_random_bytes(32)
+        iterations = 2000
+        key = pbkdf2(password, salt, 32, iterations, hashlib.sha256)
+        hash = HMAC.new(key, data, SHA256).digest()
+        cdata = AES.new(key, AES.MODE_CTR, counter=Counter.new(128)).encrypt(data)
+        d = {
+            'version': 1,
+            'salt': salt,
+            'iterations': iterations,
+            'algorithm': 'SHA256',
+            'hash': hash,
+            'data': cdata,
+        }
+        return msgpack.packb(d)
+
+    def decrypt_key_file(self, data, password):
+        d = msgpack.unpackb(data)
+        assert d['version'] == 1
+        assert d['algorithm'] == 'SHA256'
+        key = pbkdf2(password, d['salt'], 32, d['iterations'], hashlib.sha256)
+        data = AES.new(key, AES.MODE_CTR, counter=Counter.new(128)).decrypt(d['data'])
+        if HMAC.new(key, data, SHA256).digest() != d['hash']:
+            return None
+        return data
+
+    def save(self, path, password):
+        key = {
+            'version': 1,
+            'store_id': self.store_id,
+            'enc_key': self.enc_key,
+            'enc_hmac_key': self.enc_hmac_key,
+            'id_key': self.enc_key,
+            'archive_key': self.enc_key,
+            'chunk_seed': self.chunk_seed,
+        }
+        data = self.encrypt_key_file(msgpack.packb(key), password)
+        with open(path, 'wb') as fd:
+            fd.write('%s %s\n' % (self.FILE_ID, self.store_id.encode('hex')))
+            fd.write(data.encode('base64'))
+            print 'Key chain "%s" created' % path
+
+    def chpass(self):
+        password, password2 = 1, 2
+        while password != password2:
+            password = getpass('New password: ')
+            password2 = getpass('New password again: ')
+            if password != password2:
+                print 'Passwords do not match'
+        self.save(self.path, password)
+        return 0
+
+    @staticmethod
+    def create(store):
+        path = os.path.join(os.path.expanduser('~'),
+                            '.darc', 'keys', store.id.encode('hex'))
+        if os.path.exists(path):
+            print '%s already exists' % path
+            return 1
+        password, password2 = 1, 2
+        while password != password2:
+            password = getpass('Keychain password: ')
+            password2 = getpass('Keychain password again: ')
+            if password != password2:
+                print 'Passwords do not match'
+        key = Key()
+        key.store_id = store.id
+        # Chunk AES256 encryption key
+        key.enc_key = get_random_bytes(32)
+        # Chunk encryption HMAC key
+        key.enc_hmac_key = get_random_bytes(32)
+        # Chunk id HMAC key
+        key.id_key = get_random_bytes(32)
+        # Archive name HMAC key
+        key.archive_key = get_random_bytes(32)
+        # Chunkifier seed
+        key.chunk_seed = bytes_to_long(get_random_bytes(4)) & 0x7fffffff
+        key.save(path, password)
+        return 0
+
+    def id_hash(self, data):
+        """Return HMAC hash using the "id" HMAC key
+        """
+        return HMAC.new(self.id_key, data, SHA256).digest()
+
+    def archive_hash(self, data):
+        """Return HMAC hash using the "archive" HMAC key
+        """
+        return HMAC.new(self.archive_key, data, SHA256).digest()
+
+    def encrypt(self, data):
+        data = zlib.compress(data)
+        nonce = long_to_bytes(self.counter.next_value(), 16)
+        data = ''.join((nonce, AES.new(self.enc_key, AES.MODE_CTR, '',
+                                       counter=self.counter).encrypt(data)))
+        hash = HMAC.new(self.enc_hmac_key, data, SHA256).digest()
+        return ''.join(('\0', hash, data)), hash
+
+    def decrypt(self, data):
+        assert data[0] == '\0'
+        hash = data[1:33]
+        if HMAC.new(self.enc_hmac_key, data[33:], SHA256).digest() != hash:
+            raise IntegrityError('Encryption integrity error')
+        nonce = bytes_to_long(data[33:49])
+        counter = Counter.new(128, initial_value=nonce, allow_wraparound=True)
+        data = AES.new(self.enc_key, AES.MODE_CTR, counter=counter).decrypt(data[49:])
+        return zlib.decompress(data), hash
+

+ 0 - 189
darc/keychain.py

@@ -1,189 +0,0 @@
-from __future__ import with_statement
-from getpass import getpass
-import hashlib
-import os
-import msgpack
-import zlib
-
-from pbkdf2 import pbkdf2
-from Crypto.Cipher import AES
-from Crypto.Hash import SHA256, HMAC
-from Crypto.PublicKey import RSA
-from Crypto.Util import Counter
-from Crypto.Util.number import bytes_to_long, long_to_bytes
-
-from . import PACKET_ENCRYPT_READ, PACKET_ENCRYPT_CREATE
-from .helpers import IntegrityError, zero_pad
-from .oaep import OAEP
-
-
-class Keychain(object):
-    FILE_ID = 'DARC KEYCHAIN'
-
-    CREATE = '\1'
-    READ = '\2'
-
-    def __init__(self, path=None):
-        self._key_cache = {}
-        self.read_key = os.urandom(32)
-        self.create_key = os.urandom(32)
-        self.counter = Counter.new(64, prefix='\0' * 8)
-        self.aes_id = self.rsa_read = self.rsa_create = None
-        self.path = path
-        if path:
-            self.open(path)
-
-    def get_chunkify_seed(self):
-        return bytes_to_long(self.aes_id[:4]) & 0x7fffffff
-
-    def open(self, path):
-        print 'Opening keychain "%s"' % path
-        with open(path, 'rb') as fd:
-            if fd.read(len(self.FILE_ID)) != self.FILE_ID:
-                raise ValueError('Not a keychain')
-            cdata = fd.read()
-        self.password = ''
-        data = self.decrypt_keychain(cdata, '')
-        while not data:
-            self.password = getpass('Keychain password: ')
-            if not self.password:
-                raise Exception('Keychain decryption failed')
-            data = self.decrypt_keychain(cdata, self.password)
-            if not data:
-                print 'Incorrect password'
-        chain = msgpack.unpackb(data)
-        assert chain['version'] == 1
-        self.aes_id = chain['aes_id']
-        self.rsa_read = RSA.importKey(chain['rsa_read'])
-        self.rsa_create = RSA.importKey(chain['rsa_create'])
-        self.read_encrypted = OAEP(256, hash=SHA256).encode(self.read_key, os.urandom(32))
-        self.read_encrypted = zero_pad(self.rsa_read.encrypt(self.read_encrypted, '')[0], 256)
-        self.create_encrypted = OAEP(256, hash=SHA256).encode(self.create_key, os.urandom(32))
-        self.create_encrypted = zero_pad(self.rsa_create.encrypt(self.create_encrypted, '')[0], 256)
-
-    def encrypt_keychain(self, data, password):
-        salt = os.urandom(32)
-        iterations = 2000
-        key = pbkdf2(password, salt, 32, iterations, hashlib.sha256)
-        hash = HMAC.new(key, data, SHA256).digest()
-        cdata = AES.new(key, AES.MODE_CTR, counter=Counter.new(128)).encrypt(data)
-        d = {
-            'version': 1,
-            'salt': salt,
-            'iterations': iterations,
-            'algorithm': 'SHA256',
-            'hash': hash,
-            'data': cdata,
-        }
-        return msgpack.packb(d)
-
-    def decrypt_keychain(self, data, password):
-        d = msgpack.unpackb(data)
-        assert d['version'] == 1
-        assert d['algorithm'] == 'SHA256'
-        key = pbkdf2(password, d['salt'], 32, d['iterations'], hashlib.sha256)
-        data = AES.new(key, AES.MODE_CTR, counter=Counter.new(128)).decrypt(d['data'])
-        if HMAC.new(key, data, SHA256).digest() != d['hash']:
-            return None
-        return data
-
-    def save(self, path, password):
-        chain = {
-            'version': 1,
-            'aes_id': self.aes_id,
-            'rsa_read': self.rsa_read.exportKey('PEM'),
-            'rsa_create': self.rsa_create.exportKey('PEM'),
-        }
-        data = self.encrypt_keychain(msgpack.packb(chain), password)
-        with open(path, 'wb') as fd:
-            fd.write(self.FILE_ID)
-            fd.write(data)
-            print 'Key chain "%s" saved' % path
-
-    def restrict(self, path):
-        if os.path.exists(path):
-            print '%s already exists' % path
-            return 1
-        self.rsa_read = self.rsa_read.publickey()
-        self.save(path, self.password)
-        return 0
-
-    def chpass(self):
-        password, password2 = 1, 2
-        while password != password2:
-            password = getpass('New password: ')
-            password2 = getpass('New password again: ')
-            if password != password2:
-                print 'Passwords do not match'
-        self.save(self.path, password)
-        return 0
-
-    @staticmethod
-    def generate(path):
-        if os.path.exists(path):
-            print '%s already exists' % path
-            return 1
-        password, password2 = 1, 2
-        while password != password2:
-            password = getpass('Keychain password: ')
-            password2 = getpass('Keychain password again: ')
-            if password != password2:
-                print 'Passwords do not match'
-        chain = Keychain()
-        print 'Generating keychain'
-        chain.aes_id = os.urandom(32)
-        chain.rsa_read = RSA.generate(2048)
-        chain.rsa_create = RSA.generate(2048)
-        chain.save(path, password)
-        return 0
-
-    def id_hash(self, data):
-        """Return HMAC hash using the "id" AES key
-        """
-        return HMAC.new(self.aes_id, data, SHA256).digest()
-
-    def encrypt(self, magic, data):
-        """Helper function used by `encrypt_read` and `encrypt_create`
-        """
-        data = zlib.compress(data)
-        nonce = long_to_bytes(self.counter.next_value(), 8)
-        if magic & PACKET_ENCRYPT_READ:
-            data = ''.join((nonce, self.read_encrypted,
-                            AES.new(self.read_key, AES.MODE_CTR, '',
-                                    counter=self.counter).encrypt(data)))
-        elif magic & PACKET_ENCRYPT_CREATE:
-            data = ''.join((nonce, self.create_encrypted,
-                            AES.new(self.create_key, AES.MODE_CTR, '',
-                                    counter=self.counter).encrypt(data)))
-        hash = self.id_hash(data)
-        return ''.join((chr(magic), hash, data)), hash
-
-    def _decrypt_key(self, data, rsa_key):
-        """Helper function used by `decrypt`
-        """
-        try:
-            return self._key_cache[data]
-        except KeyError:
-            self._key_cache[data] = OAEP(256, hash=SHA256).decode(rsa_key.decrypt(data))
-            return self._key_cache[data]
-
-    def decrypt(self, data):
-        """Decrypt `data` previously encrypted by `encrypt_create` or `encrypt_read`
-        """
-        magic = ord(data[0])
-        hash = data[1:33]
-        if self.id_hash(data[33:]) != hash:
-            raise IntegrityError('Encryption integrity error')
-        nonce = bytes_to_long(data[33:41])
-        counter = Counter.new(64, prefix='\0' * 8, initial_value=nonce)
-        if magic & PACKET_ENCRYPT_READ:
-            key = self._decrypt_key(data[41:297], self.rsa_read)
-        elif magic & PACKET_ENCRYPT_CREATE:
-            key = self._decrypt_key(data[41:297], self.rsa_create)
-        else:
-            raise Exception('Unknown pack magic %d found' % magic)
-        data = AES.new(key, AES.MODE_CTR, counter=counter).decrypt(data[297:])
-        return magic, zlib.decompress(data), hash
-
-
-

+ 0 - 71
darc/oaep.py

@@ -1,71 +0,0 @@
-from Crypto.Util.number import long_to_bytes
-from Crypto.Hash import SHA
-
-from .helpers import IntegrityError
-
-def _xor_bytes(a, b):
-    return ''.join(chr(ord(x[0]) ^ ord(x[1])) for x in zip(a, b))
-
-
-def MGF1(seed, mask_len, hash=SHA):
-    """MGF1 is a Mask Generation Function based on hash function
-    """
-    T = ''.join(hash.new(seed + long_to_bytes(c, 4)).digest()
-                for c in range(1 + mask_len / hash.digest_size))
-    return T[:mask_len]
-
-
-class OAEP(object):
-    """Optimal Asymmetric Encryption Padding
-    """
-    def __init__(self, k, hash=SHA, MGF=MGF1):
-        self.k = k
-        self.hash = hash
-        self.MGF = MGF
-
-    def encode(self, msg, seed, label=''):
-        # FIXME: length checks
-        if len(msg) > self.k - 2 * self.hash.digest_size - 2:
-            raise ValueError('message too long')
-        label_hash = self.hash.new(label).digest()
-        padding = '\0' * (self.k - len(msg) - 2 * self.hash.digest_size - 2)
-        datablock = '%s%s\1%s' % (label_hash, padding, msg)
-        datablock_mask = self.MGF(seed, self.k - self.hash.digest_size - 1, self.hash)
-        masked_db = _xor_bytes(datablock, datablock_mask)
-        seed_mask = self.MGF(masked_db, self.hash.digest_size, self.hash)
-        masked_seed = _xor_bytes(seed, seed_mask)
-        return '\0%s%s' % (masked_seed, masked_db)
-
-    def decode(self, ciphertext, label=''):
-        if len(ciphertext) < self.k:
-            ciphertext = ('\0' * (self.k - len(ciphertext))) + ciphertext
-        label_hash = self.hash.new(label).digest()
-        masked_seed = ciphertext[1:self.hash.digest_size + 1]
-        masked_db = ciphertext[-(self.k - self.hash.digest_size - 1):]
-        seed_mask = self.MGF(masked_db, self.hash.digest_size, self.hash)
-        seed = _xor_bytes(masked_seed, seed_mask)
-        datablock_mask = self.MGF(seed, self.k - self.hash.digest_size - 1, self.hash)
-        datablock = _xor_bytes(masked_db, datablock_mask)
-        label_hash2 = datablock[:self.hash.digest_size]
-        data = datablock[self.hash.digest_size:].lstrip('\0')
-        if (ciphertext[0] != '\0' or
-            label_hash != label_hash2 or
-            data[0] != '\1'):
-            raise IntegrityError('decryption error')
-        return data[1:]
-
-
-def test():
-    from Crypto.Hash import SHA256
-    import os
-    import random
-    oaep = OAEP(256, SHA256)
-    for x in range(1000):
-        M = os.urandom(random.randint(0, 100))
-        EM = oaep.encode(M, os.urandom(32))
-        assert len(EM) == oaep.k
-        assert oaep.decode(EM) == M
-
-if __name__ == '__main__':
-    test()
-

+ 1 - 1
darc/store.py

@@ -32,7 +32,7 @@ class Store(object):
 
     def __init__(self, path, create=False):
         self.txn_active = False
-        if not os.path.exists(path) and create:
+        if create:
             self.create(path)
         self.open(path)