浏览代码

Some code refactoring.

Jonas Borgström 15 年之前
父节点
当前提交
98f738fe7f
共有 3 个文件被更改,包括 78 次插入61 次删除
  1. 3 2
      dedupestore/_speedups.c
  2. 74 58
      dedupestore/archiver.py
  3. 1 1
      dedupestore/chunkifier.py

+ 3 - 2
dedupestore/_speedups.c

@@ -46,7 +46,7 @@ ChunkifyIter_dealloc(PyObject *self)
 {
 {
     ChunkifyIter *c = (ChunkifyIter *)self;
     ChunkifyIter *c = (ChunkifyIter *)self;
     Py_DECREF(c->fd);
     Py_DECREF(c->fd);
-    Py_DECREF(c->chunks);
+    Py_XDECREF(c->chunks);
     free(c->data);
     free(c->data);
     self->ob_type->tp_free(self);
     self->ob_type->tp_free(self);
 }
 }
@@ -65,6 +65,7 @@ ChunkifyIter_iternext(PyObject *self)
     if(c->extra)
     if(c->extra)
     {
     {
         c->done = 1;
         c->done = 1;
+	Py_INCREF(c->extra);
         return c->extra;
         return c->extra;
     }
     }
     for(;;)
     for(;;)
@@ -259,4 +260,4 @@ init_speedups(void)
 
 
   Py_INCREF(&ChunkifyIterType);
   Py_INCREF(&ChunkifyIterType);
   PyModule_AddObject(m, "_ChunkifyIter", (PyObject *)&ChunkifyIterType);
   PyModule_AddObject(m, "_ChunkifyIter", (PyObject *)&ChunkifyIterType);
-}
+}

+ 74 - 58
dedupestore/archiver.py

@@ -10,6 +10,65 @@ from cache import Cache
 from store import Store, NS_ARCHIVES, NS_CHUNKS, CHUNK_SIZE
 from store import Store, NS_ARCHIVES, NS_CHUNKS, CHUNK_SIZE
 
 
 
 
+class Archive(object):
+
+    def __init__(self, store, name=None):
+        self.store = store
+        self.items = []
+        if name:
+            self.open(name)
+
+    def open(self, name):
+        archive = cPickle.loads(zlib.decompress(self.store.get(NS_ARCHIVES, name)))
+        self.items = archive['items']
+
+    def save(self, name):
+        archive = {'name': name, 'items': self.items}
+        self.store.put(NS_ARCHIVES, name, zlib.compress(cPickle.dumps(archive)))
+        self.store.commit()
+
+    def list(self):
+        for item in self.items:
+            print item['path']
+
+    def extract(self):
+        for item in self.items:
+            assert item['path'][0] not in ('/', '\\', ':')
+            print item['path']
+            if item['type'] == 'DIR':
+                if not os.path.exists(item['path']):
+                    os.makedirs(item['path'])
+            if item['type'] == 'FILE':
+                with open(item['path'], 'wb') as fd:
+                    for chunk in item['chunks']:
+                        data = self.store.get(NS_CHUNKS, chunk)
+                        if hashlib.sha1(data).digest() != chunk[4:]:
+                            raise Exception('Invalid chunk checksum')
+                        fd.write(zlib.decompress(data))
+
+    def verify(self):
+        for item in self.items:
+            if item['type'] == 'FILE':
+                print item['path'], '...',
+                for chunk in item['chunks']:
+                    data = self.store.get(NS_CHUNKS, chunk)
+                    if hashlib.sha1(data).digest() != chunk[4:]:
+                        print 'ERROR'
+                        break
+                else:
+                    print 'OK'
+
+    def delete(self, cache):
+        self.store.delete(NS_ARCHIVES, self.name)
+        for item in self.items:
+            if item['type'] == 'FILE':
+                for c in item['chunks']:
+                    cache.chunk_decref(c)
+        self.store.commit()
+        cache.archives.remove(self.name)
+        cache.save()
+
+
 class Archiver(object):
 class Archiver(object):
 
 
     def create_archive(self, archive_name, paths):
     def create_archive(self, archive_name, paths):
@@ -19,34 +78,22 @@ class Archiver(object):
             pass
             pass
         else:
         else:
             raise Exception('Archive "%s" already exists' % archive_name)
             raise Exception('Archive "%s" already exists' % archive_name)
-        items = []
+        archive = Archive(self.store)
         for path in paths:
         for path in paths:
             for root, dirs, files in os.walk(path):
             for root, dirs, files in os.walk(path):
                 for d in dirs:
                 for d in dirs:
                     name = os.path.join(root, d)
                     name = os.path.join(root, d)
-                    items.append(self.process_dir(name, self.cache))
+                    archive.items.append(self.process_dir(name, self.cache))
                 for f in files:
                 for f in files:
                     name = os.path.join(root, f)
                     name = os.path.join(root, f)
-                    items.append(self.process_file(name, self.cache))
-        archive = {'name': archive_name, 'items': items}
-        self.store.put(NS_ARCHIVES, archive_name, zlib.compress(cPickle.dumps(archive)))
-        self.store.commit()
+                    archive.items.append(self.process_file(name, self.cache))
+        archive.save(archive_name)
         self.cache.archives.append(archive_name)
         self.cache.archives.append(archive_name)
         self.cache.save()
         self.cache.save()
 
 
     def delete_archive(self, archive_name):
     def delete_archive(self, archive_name):
-        try:
-            archive = cPickle.loads(zlib.decompress(self.store.get(NS_ARCHIVES, archive_name)))
-        except Store.DoesNotExist:
-            raise Exception('Archive "%s" does not exist' % archive_name)
-        self.store.delete(NS_ARCHIVES, archive_name)
-        for item in archive['items']:
-            if item['type'] == 'FILE':
-                for c in item['chunks']:
-                    self.cache.chunk_decref(c)
-        self.store.commit()
-        self.cache.archives.remove(archive_name)
-        self.cache.save()
+        archive = Archive(self.store, archive_name)
+        archive.delete(self.cache)
 
 
     def list_archives(self):
     def list_archives(self):
         print 'Archives:'
         print 'Archives:'
@@ -54,47 +101,16 @@ class Archiver(object):
             print archive
             print archive
 
 
     def list_archive(self, archive_name):
     def list_archive(self, archive_name):
-        try:
-            archive = cPickle.loads(zlib.decompress(self.store.get(NS_ARCHIVES, archive_name)))
-        except Store.DoesNotExist:
-            raise Exception('Archive "%s" does not exist' % archive_name)
-        for item in archive['items']:
-            print item['path']
+        archive = Archive(self.store, archive_name)
+        archive.list()
 
 
     def verify_archive(self, archive_name):
     def verify_archive(self, archive_name):
-        try:
-            archive = cPickle.loads(zlib.decompress(self.store.get(NS_ARCHIVES, archive_name)))
-        except Store.DoesNotExist:
-            raise Exception('Archive "%s" does not exist' % archive_name)
-        for item in archive['items']:
-            if item['type'] == 'FILE':
-                print item['path'], '...',
-                for chunk in item['chunks']:
-                    data = self.store.get(NS_CHUNKS, chunk)
-                    if hashlib.sha1(data).digest() != chunk[4:]:
-                        print 'ERROR'
-                        break
-                else:
-                    print 'OK'
+        archive = Archive(self.store, archive_name)
+        archive.verify()
 
 
     def extract_archive(self, archive_name):
     def extract_archive(self, archive_name):
-        try:
-            archive = cPickle.loads(zlib.decompress(self.store.get(NS_ARCHIVES, archive_name)))
-        except Store.DoesNotExist:
-            raise Exception('Archive "%s" does not exist' % archive_name)
-        for item in archive['items']:
-            assert item['path'][0] not in ('/', '\\', ':')
-            print item['path']
-            if item['type'] == 'DIR':
-                if not os.path.exists(item['path']):
-                    os.makedirs(item['path'])
-            if item['type'] == 'FILE':
-                with open(item['path'], 'wb') as fd:
-                    for chunk in item['chunks']:
-                        data = self.store.get(NS_CHUNKS, chunk)
-                        if hashlib.sha1(data).digest() != chunk[4:]:
-                            raise Exception('Invalid chunk checksum')
-                        fd.write(zlib.decompress(data))
+        archive = Archive(self.store, archive_name)
+        archive.extract()
 
 
     def process_dir(self, path, cache):
     def process_dir(self, path, cache):
         path = path.lstrip('/\\:')
         path = path.lstrip('/\\:')
@@ -102,9 +118,10 @@ class Archiver(object):
         return {'type': 'DIR', 'path': path}
         return {'type': 'DIR', 'path': path}
 
 
     def process_file(self, path, cache):
     def process_file(self, path, cache):
-        print 'Adding: %s...' % path,
-        sys.stdout.flush()
         with open(path, 'rb') as fd:
         with open(path, 'rb') as fd:
+            path = path.lstrip('/\\:')
+            print 'Adding: %s...' % path,
+            sys.stdout.flush()
             origsize = 0
             origsize = 0
             compsize = 0
             compsize = 0
             chunks = []
             chunks = []
@@ -113,7 +130,6 @@ class Archiver(object):
                 id, size = cache.add_chunk(chunk)
                 id, size = cache.add_chunk(chunk)
                 compsize += size
                 compsize += size
                 chunks.append(id)
                 chunks.append(id)
-        path = path.lstrip('/\\:')
         ratio = origsize and compsize * 100 / origsize or 0
         ratio = origsize and compsize * 100 / origsize or 0
         print '(%d chunks: %d%%)' % (len(chunks), ratio)
         print '(%d chunks: %d%%)' % (len(chunks), ratio)
         return {'type': 'FILE', 'path': path, 'size': origsize, 'chunks': chunks}
         return {'type': 'FILE', 'path': path, 'size': origsize, 'chunks': chunks}

+ 1 - 1
dedupestore/chunkifier.py

@@ -29,6 +29,7 @@ def roll_checksum(sum, remove, add, len):
 
 
 
 
 class ChunkifyIter(object):
 class ChunkifyIter(object):
+
     def __init__(self, fd, chunk_size, chunks):
     def __init__(self, fd, chunk_size, chunks):
         self.fd = fd
         self.fd = fd
         self.chunk_size = chunk_size
         self.chunk_size = chunk_size
@@ -140,6 +141,5 @@ except ImportError:
 
 
 
 
 if __name__ == '__main__':
 if __name__ == '__main__':
-    import StringIO
     import doctest
     import doctest
     doctest.testmod()
     doctest.testmod()