Procházet zdrojové kódy

Initial AES encryption support

Jonas Borgström před 14 roky
rodič
revize
1ab4ebf2cc
5 změnil soubory, kde provedl 107 přidání a 47 odebrání
  1. 14 13
      dedupestore/archive.py
  2. 3 3
      dedupestore/archiver.py
  3. 8 13
      dedupestore/cache.py
  4. 82 0
      dedupestore/crypt.py
  5. 0 18
      dedupestore/helpers.py

+ 14 - 13
dedupestore/archive.py

@@ -1,14 +1,13 @@
 from datetime import datetime
-import hashlib
 import logging
 import os
 import stat
 import sys
-import zlib
 
 from .cache import NS_ARCHIVES, NS_CHUNKS, NS_CINDEX
 from .chunkifier import chunkify
-from .helpers import uid2user, user2uid, gid2group, group2gid, pack, unpack
+from .crypt import CryptoManager
+from .helpers import uid2user, user2uid, gid2group, group2gid
 
 CHUNK_SIZE = 55001
 
@@ -16,29 +15,30 @@ CHUNK_SIZE = 55001
 class Archive(object):
 
     def __init__(self, store, name=None):
+        self.crypt = CryptoManager(store)
         self.store = store
         self.items = []
         self.chunks = []
         self.chunk_idx = {}
         self.hard_links = {}
         if name:
-            self.load(hashlib.sha256(name).digest())
+            self.load(self.crypt.id_hash(name))
 
     def load(self, id):
         self.id = id
-        archive = unpack(self.store.get(NS_ARCHIVES, self.id))
+        archive = self.crypt.unpack_read(self.store.get(NS_ARCHIVES, self.id))
         if archive['version'] != 1:
             raise Exception('Archive version %r not supported' % archive['version'])
         self.items = archive['items']
         self.name = archive['name']
-        cindex = unpack(self.store.get(NS_CINDEX, self.id))
+        cindex = self.crypt.unpack_create(self.store.get(NS_CINDEX, self.id))
         assert cindex['version'] == 1
         self.chunks = cindex['chunks']
         for i, chunk in enumerate(self.chunks):
             self.chunk_idx[i] = chunk[0]
 
     def save(self, name):
-        self.id = hashlib.sha256(name).digest()
+        self.id = self.crypt.id_hash(name)
         archive = {
             'version': 1,
             'name': name,
@@ -46,14 +46,15 @@ class Archive(object):
             'ts': datetime.utcnow().isoformat(),
             'items': self.items,
         }
-        _, data = pack(archive)
+        data = self.crypt.pack_read(archive)
         self.store.put(NS_ARCHIVES, self.id, data)
         cindex = {
             'version': 1,
             'chunks': self.chunks,
         }
-        _, data = pack(cindex)
+        data = self.crypt.pack_create(cindex)
         self.store.put(NS_CINDEX, self.id, data)
+        self.crypt.store_key()
         self.store.commit()
 
     def add_chunk(self, id, size):
@@ -117,7 +118,7 @@ class Archive(object):
                     for chunk in item['chunks']:
                         id = self.chunk_idx[chunk]
                         try:
-                            fd.write(unpack(self.store.get(NS_CHUNKS, id)))
+                            fd.write(self.crypt.unpack_read(self.store.get(NS_CHUNKS, id)))
                         except ValueError:
                             raise Exception('Invalid chunk checksum')
                 self.restore_stat(path, item)
@@ -145,7 +146,7 @@ class Archive(object):
                 for chunk in item['chunks']:
                     id = self.chunk_idx[chunk]
                     try:
-                        unpack(self.store.get(NS_CHUNKS, id))
+                        self.crypt.unpack_read(self.store.get(NS_CHUNKS, id))
                     except ValueError:
                         logging.error('%s ... ERROR', item['path'])
                         break
@@ -242,12 +243,12 @@ class Archive(object):
         })
 
     def process_chunk(self, data, cache):
-        id = hashlib.sha256(data).digest()
+        id = self.crypt.id_hash(data)
         try:
             return self.chunk_idx[id]
         except KeyError:
             idx = len(self.chunks)
-            size = cache.add_chunk(id, data)
+            size = cache.add_chunk(id, data, self.crypt)
             self.chunks.append((id, size))
             self.chunk_idx[id] = idx
             return idx

+ 3 - 3
dedupestore/archiver.py

@@ -19,8 +19,8 @@ class Archiver(object):
 
     def do_create(self, args):
         store = self.open_store(args.archive)
-        cache = Cache(store)
         archive = Archive(store)
+        cache = Cache(store, archive.crypt)
         archive.create(args.archive.archive, args.paths, cache)
         return self.exit_code_from_logger()
 
@@ -32,8 +32,8 @@ class Archiver(object):
 
     def do_delete(self, args):
         store = self.open_store(args.archive)
-        cache = Cache(store)
         archive = Archive(store, args.archive.archive)
+        cache = Cache(store, archive.crypt)
         archive.delete(cache)
         return self.exit_code_from_logger()
 
@@ -55,8 +55,8 @@ class Archiver(object):
 
     def do_info(self, args):
         store = self.open_store(args.archive)
-        cache = Cache(store)
         archive = Archive(store, args.archive.archive)
+        cache = Cache(store, archive.crypt)
         osize, csize, usize = archive.stats(cache)
         print 'Original size:', pretty_size(osize)
         print 'Compressed size:', pretty_size(csize)

+ 8 - 13
dedupestore/cache.py

@@ -1,10 +1,6 @@
-import hashlib
 import logging
 import msgpack
 import os
-import zlib
-
-from .helpers import pack, unpack
 
 NS_ARCHIVES = 'A'
 NS_CHUNKS = 'C'
@@ -15,20 +11,19 @@ class Cache(object):
     """Client Side cache
     """
 
-    def __init__(self, store):
+    def __init__(self, store, crypt):
         self.store = store
         self.path = os.path.join(os.path.expanduser('~'), '.dedupestore', 'cache',
                                  '%s.cache' % self.store.uuid)
         self.tid = -1
         self.open()
         if self.tid != self.store.tid:
-            self.init()
+            self.init(crypt)
 
     def open(self):
         if not os.path.exists(self.path):
             return
-        data = open(self.path, 'rb').read()
-        cache = unpack(data)
+        cache = msgpack.unpackb(open(self.path, 'rb').read())
         version = cache.get('version')
         if version != 1:
             logging.error('Unsupported cache version %r' % version)
@@ -38,7 +33,7 @@ class Cache(object):
         self.chunkmap = cache['chunkmap']
         self.tid = cache['tid']
 
-    def init(self):
+    def init(self, crypt):
         """Initializes cache by fetching and reading all archive indicies
         """
         logging.info('Initializing cache...')
@@ -47,7 +42,7 @@ class Cache(object):
         if self.store.tid == 0:
             return
         for id in list(self.store.list(NS_CINDEX)):
-            cindex = unpack(self.store.get(NS_CINDEX, id))
+            cindex = crypt.unpack_create(self.store.get(NS_CINDEX, id))
             for id, size in cindex['chunks']:
                 try:
                     count, size = self.chunkmap[id]
@@ -63,17 +58,17 @@ class Cache(object):
                 'chunkmap': self.chunkmap,
                 'tid': self.store.tid,
         }
-        _, data = pack(cache)
+        data = msgpack.packb(cache)
         cachedir = os.path.dirname(self.path)
         if not os.path.exists(cachedir):
             os.makedirs(cachedir)
         with open(self.path, 'wb') as fd:
             fd.write(data)
 
-    def add_chunk(self, id, data):
+    def add_chunk(self, id, data, crypt):
         if self.seen_chunk(id):
             return self.chunk_incref(id)
-        _, data = pack(data)
+        data = crypt.pack_read(data)
         csize = len(data)
         self.store.put(NS_CHUNKS, id, data)
         self.chunkmap[id] = (1, csize)

+ 82 - 0
dedupestore/crypt.py

@@ -0,0 +1,82 @@
+import hashlib
+import hmac
+import msgpack
+import os
+import zlib
+
+from Crypto.Cipher import AES
+
+
+class CryptoManager(object):
+
+    KEY_CREATE = 1
+    KEY_READ = 2
+    KEY_ID = 3
+    KEY_ARCHIVE = 4
+    KEY_CINDEX = 5
+
+    def __init__(self, store):
+        self.key_cache = {}
+        self.store = store
+        self.tid = store.tid
+        self.id_key = '0' * 32
+        self.read_key = os.urandom(32)
+        self.create_key = os.urandom(32)
+
+    def get_key(self, tid):
+        try:
+            return self.key_cache[tid]
+        except KeyError:
+            keys = self.load_key(tid)
+            self.key_cache[tid] = keys
+            return keys
+
+    def load_key(self, tid):
+        data = self.store.get('K', str(tid))
+        id = data[:32]
+        if self.id_hash(data[32:]) != id:
+            raise Exception('Invalid key object found')
+        key = msgpack.unpackb(data[32:])
+        return key['create'], key['read']
+
+    def store_key(self):
+        key = {
+            'version': 1,
+            'read': self.read_key,
+            'create': self.create_key,
+        }
+        data = msgpack.packb(key)
+        id = self.id_hash(data)
+        self.store.put('K', str(self.tid), id + data)
+
+    def id_hash(self, data):
+        return hmac.new(self.id_key, data, hashlib.sha256).digest()
+
+    def pack(self, data, key):
+        data = zlib.compress(msgpack.packb(data))
+        id = hmac.new(key, data, hashlib.sha256).digest()
+        data = AES.new(key, AES.MODE_CFB, id[:16]).encrypt(data)
+        return id + msgpack.packb((1, self.tid, data))
+
+    def pack_read(self, data):
+        return self.pack(data, self.read_key)
+
+    def pack_create(self, data):
+        return self.pack(data, self.create_key)
+
+    def unpack(self, data, key_idx):
+        id = data[:32]
+        version, tid, data = msgpack.unpackb(data[32:])
+        assert version == 1
+        key = self.get_key(tid)[key_idx]
+        data = AES.new(key, AES.MODE_CFB, id[:16]).decrypt(data)
+        if hmac.new(key, data, hashlib.sha256).digest() != id:
+            raise ValueError
+        return msgpack.unpackb(zlib.decompress(data))
+
+    def unpack_read(self, data):
+        return self.unpack(data, 1)
+
+    def unpack_create(self, data):
+        return self.unpack(data, 0)
+

+ 0 - 18
dedupestore/helpers.py

@@ -1,26 +1,8 @@
 import argparse
 import grp
-import hashlib
 import logging
-import msgpack
 import pwd
 import re
-import zlib
-
-
-def pack(data):
-    data = zlib.compress(msgpack.packb(data))
-    id = hashlib.sha256(data).digest()
-    tid = 0
-    return id, msgpack.packb((1, tid, id, data))
-
-
-def unpack(data):
-    version, tid, id, data = msgpack.unpackb(data)
-    assert version == 1
-    if hashlib.sha256(data).digest() != id:
-        raise ValueError
-    return msgpack.unpackb(zlib.decompress(data))
 
 
 def memoize(function):