Kaynağa Gözat

Switch from sha1 to sha256

Jonas Borgström 14 yıl önce
ebeveyn
işleme
98b1b5e0ce
2 değiştirilmiş dosya ile 61 ekleme ve 41 silme
  1. 41 24
      dedupestore/archiver.py
  2. 20 17
      dedupestore/cache.py

+ 41 - 24
dedupestore/archiver.py

@@ -15,25 +15,21 @@ CHUNK_SIZE = 55001
 
 class Archive(object):
 
-    def __init__(self, store, name=None):
+    def __init__(self, store, cache, name=None):
         self.store = store
+        self.cache = cache
         self.items = []
         self.chunks = []
         self.chunk_idx = {}
         if name:
             self.open(name)
 
-    def add_chunk(self, id, csize, osize):
-        try:
-            return self.chunk_idx[id]
-        except KeyError:
-            idx = len(self.chunks)
-            self.chunks.append((id, csize, osize))
-            self.chunk_idx[id] = idx
-            return idx
-
     def open(self, name):
-        archive = cPickle.loads(zlib.decompress(self.store.get(NS_ARCHIVES, name)))
+        id = self.cache.archives[name]
+        data = self.store.get(NS_ARCHIVES, id)
+        if hashlib.sha256(data).digest() != id:
+            raise Exception('Archive hash did not match')
+        archive = cPickle.loads(zlib.decompress(data))
         self.items = archive['items']
         self.name = archive['name']
         self.chunks = archive['chunks']
@@ -42,9 +38,20 @@ class Archive(object):
 
     def save(self, name):
         archive = {'name': name, 'items': self.items, 'chunks': self.chunks}
-        self.store.put(NS_ARCHIVES, name, zlib.compress(cPickle.dumps(archive)))
+        data = zlib.compress(cPickle.dumps(archive))
+        self.id = hashlib.sha256(data).digest()
+        self.store.put(NS_ARCHIVES, self.id, data)
         self.store.commit()
 
+    def add_chunk(self, id, csize, osize):
+        try:
+            return self.chunk_idx[id]
+        except KeyError:
+            idx = len(self.chunks)
+            self.chunks.append((id, csize, osize))
+            self.chunk_idx[id] = idx
+            return idx
+
     def stats(self, cache):
         total_osize = 0
         total_csize = 0
@@ -84,7 +91,11 @@ class Archive(object):
                     for chunk in item['chunks']:
                         id = self.chunk_idx[chunk]
                         data = self.store.get(NS_CHUNKS, id)
-                        if hashlib.sha1(data).digest() != id:
+                        cid = data[:32]
+                        data = data[32:]
+                        if hashlib.sha256(data).digest() != cid:
+                            raise Exception('Invalid chunk checksum')
+                        if hashlib.sha256(zlib.decompress(data)).digest() != id:
                             raise Exception('Invalid chunk checksum')
                         fd.write(zlib.decompress(data))
 
@@ -94,24 +105,30 @@ class Archive(object):
                 for chunk in item['chunks']:
                     id = self.chunk_idx[chunk]
                     data = self.store.get(NS_CHUNKS, id)
-                    if hashlib.sha1(data).digest() != id:
-                        logging.ERROR('%s ... ERROR', item['path'])
+                    data = self.store.get(NS_CHUNKS, id)
+                    cid = data[:32]
+                    data = data[32:]
+                    if (hashlib.sha256(data).digest() != cid or
+                        hashlib.sha256(zlib.decompress(data)).digest() != id):
+                        logging.error('%s ... ERROR', item['path'])
                         break
                 else:
                     logging.info('%s ... OK', item['path'])
 
     def delete(self, cache):
-        self.store.delete(NS_ARCHIVES, self.name)
+        self.store.delete(NS_ARCHIVES, self.cache.archives[self.name])
         for item in self.items:
             if item['type'] == 'FILE':
                 for c in item['chunks']:
                     id = self.chunk_idx[c]
                     cache.chunk_decref(id)
         self.store.commit()
-        cache.archives.remove(self.name)
+        del cache.archives[self.name]
         cache.save()
 
     def create(self, name, paths, cache):
+        if name in cache.archives:
+            raise NameError('Archive already exists')
         for path in paths:
             for root, dirs, files in os.walk(path):
                 for d in dirs:
@@ -123,7 +140,7 @@ class Archive(object):
                     if entry:
                         self.items.append(entry)
         self.save(name)
-        cache.archives.append(name)
+        cache.archives[name] = self.id
         cache.save()
 
     def process_dir(self, path, cache):
@@ -167,23 +184,23 @@ class Archiver(object):
 
     def do_create(self, args):
         store, cache = self.open_store(args.archive)
-        archive = Archive(store)
+        archive = Archive(store, cache)
         archive.create(args.archive.archive, args.paths, cache)
 
     def do_extract(self, args):
         store, cache = self.open_store(args.archive)
-        archive = Archive(store, args.archive.archive)
+        archive = Archive(store, cache, args.archive.archive)
         archive.extract(args.dest)
 
     def do_delete(self, args):
         store, cache = self.open_store(args.archive)
-        archive = Archive(store, args.archive.archive)
+        archive = Archive(store, cache, args.archive.archive)
         archive.delete(cache)
 
     def do_list(self, args):
         store, cache = self.open_store(args.src)
         if args.src.archive:
-            archive = Archive(store, args.src.archive)
+            archive = Archive(store, cache, args.src.archive)
             archive.list()
         else:
             for archive in sorted(cache.archives):
@@ -191,12 +208,12 @@ class Archiver(object):
 
     def do_verify(self, args):
         store, cache = self.open_store(args.archive)
-        archive = Archive(store, args.archive.archive)
+        archive = Archive(store, cache, args.archive.archive)
         archive.verify()
 
     def do_info(self, args):
         store, cache = self.open_store(args.archive)
-        archive = Archive(store, args.archive.archive)
+        archive = Archive(store, cache, args.archive.archive)
         stats = archive.stats(cache)
         print 'Original size:', self.pretty_size(stats['osize'])
         print 'Compressed size:', self.pretty_size(stats['csize'])

+ 20 - 17
dedupestore/cache.py

@@ -24,55 +24,58 @@ class Cache(object):
     def open(self):
         if not os.path.exists(self.path):
             return
-        print 'Loading cache: ', self.path, '...'
-        data = cPickle.loads(zlib.decompress(open(self.path, 'rb').read()))
+        data = open(self.path, 'rb').read()
+        id = data[:32]
+        data = data[32:]
+        if hashlib.sha256(data).digest() != id:
+            raise Exception('Cache hash did not match')
+        data = cPickle.loads(zlib.decompress(data))
         if data['uuid'] != self.store.uuid:
-            print >> sys.stderr, 'Cache UUID mismatch'
-            return
+            raise Exception('Cache UUID mismatch')
         self.chunkmap = data['chunkmap']
         self.archives = data['archives']
         self.tid = data['tid']
-        print 'done'
 
     def init(self):
         """Initializes cache by fetching and reading all archive indicies
         """
         self.chunkmap = {}
-        self.archives = []
+        self.archives = {}
         self.tid = self.store.tid
         if self.store.tid == 0:
             return
-        print 'Recreating cache...'
         for id in list(self.store.list(NS_ARCHIVES)):
-            archive = cPickle.loads(zlib.decompress(self.store.get(NS_ARCHIVES, id)))
-            self.archives.append(archive['name'])
+            data = self.store.get(NS_ARCHIVES, id)
+            if hashlib.sha256(data).digest() != id:
+                raise Exception('Archive hash did not match')
+            archive = cPickle.loads(zlib.decompress(data))
+            self.archives[archive['name']] = id
             for id, csize, osize in archive['chunks']:
                 if self.seen_chunk(id):
                     self.chunk_incref(id)
                 else:
                     self.init_chunk(id, csize, osize)
-        print 'done'
 
     def save(self):
         assert self.store.state == self.store.OPEN
-        print 'saving cache'
         data = {'uuid': self.store.uuid,
                 'chunkmap': self.chunkmap,
                 'tid': self.store.tid, 'archives': self.archives}
-        print 'Saving cache as:', self.path
         cachedir = os.path.dirname(self.path)
         if not os.path.exists(cachedir):
             os.makedirs(cachedir)
         with open(self.path, 'wb') as fd:
-            fd.write(zlib.compress(cPickle.dumps(data)))
-        print 'done'
+            data = zlib.compress(cPickle.dumps(data))
+            id = hashlib.sha256(data).digest()
+            fd.write(id + data)
 
     def add_chunk(self, data):
-        osize = len(data)
-        data = zlib.compress(data)
-        id = hashlib.sha1(data).digest()
+        id = hashlib.sha256(data).digest()
         if self.seen_chunk(id):
             return self.chunk_incref(id)
+        osize = len(data)
+        data = zlib.compress(data)
+        data = hashlib.sha256(data).digest() + data
         csize = len(data)
         self.store.put(NS_CHUNKS, id, data)
         return self.init_chunk(id, csize, osize)