Procházet zdrojové kódy

Improved robustness

Jonas Borgström před 14 roky
rodič
revize
d0d8dc5f1e
3 změnil soubory, kde provedl 41 přidání a 24 odebrání
  1. 32 14
      dedupestore/archiver.py
  2. 0 10
      dedupestore/cache.py
  3. 9 0
      doc/design.txt

+ 32 - 14
dedupestore/archiver.py

@@ -1,5 +1,7 @@
 import os
+import sys
 import hashlib
+import logging
 import zlib
 import cPickle
 from optparse import OptionParser
@@ -70,7 +72,7 @@ class Archive(object):
     def extract(self):
         for item in self.items:
             assert item['path'][0] not in ('/', '\\', ':')
-            print item['path']
+            logging.info(item['path'])
             if item['type'] == 'DIR':
                 if not os.path.exists(item['path']):
                     os.makedirs(item['path'])
@@ -89,15 +91,14 @@ class Archive(object):
     def verify(self):
         for item in self.items:
             if item['type'] == 'FILE':
-                print item['path'], '...',
                 for chunk in item['chunks']:
                     id = self.chunk_idx[chunk]
                     data = self.store.get(NS_CHUNKS, id)
                     if hashlib.sha1(data).digest() != id:
-                        print 'ERROR'
+                        logging.ERROR('%s ... ERROR', item['path'])
                         break
                 else:
-                    print 'OK'
+                    logging.info('%s ... OK', item['path'])
 
     def delete(self, cache):
         self.store.delete(NS_ARCHIVES, self.name)
@@ -115,23 +116,30 @@ class Archive(object):
             for root, dirs, files in os.walk(path):
                 for d in dirs:
                     p = os.path.join(root, d)
-                    print p
                     self.items.append(self.process_dir(p, cache))
                 for f in files:
                     p = os.path.join(root, f)
-                    print p
-                    self.items.append(self.process_file(p, cache))
+                    entry = self.process_file(p, cache)
+                    if entry:
+                        self.items.append(entry)
         self.save(name)
         cache.archives.append(name)
         cache.save()
 
     def process_dir(self, path, cache):
         path = path.lstrip('/\\:')
+        logging.info(path)
         return {'type': 'DIR', 'path': path}
 
     def process_file(self, path, cache):
-        with open(path, 'rb') as fd:
+        try:
+            fd = open(path, 'rb')
+        except IOError, e:
+            logging.error(e)
+            return
+        with fd:
             path = path.lstrip('/\\:')
+            logging.info(path)
             chunks = []
             size = 0
             for chunk in chunkify(fd, CHUNK_SIZE, 30):
@@ -186,6 +194,9 @@ class Archiver(object):
 
     def run(self):
         parser = OptionParser()
+        parser.add_option("-v", "--verbose",
+                          action="store_true", dest="verbose", default=False,
+                          help="Print status messages to stdout")
         parser.add_option("-s", "--store", dest="store",
                           help="path to dedupe store", metavar="STORE")
         parser.add_option("-c", "--create", dest="create_archive",
@@ -204,25 +215,32 @@ class Archiver(object):
         parser.add_option("-S", "--stats", dest="archive_stats",
                         help="Display archive statistics", metavar="ARCHIVE")
         (options, args) = parser.parse_args()
+        if options.verbose:
+            logging.basicConfig(level=logging.INFO, format='%(message)s')
+        else:
+            logging.basicConfig(level=logging.WARNING, format='%(message)s')
         if options.store:
             self.store = BandStore(options.store)
         else:
             parser.error('No store path specified')
         self.cache = Cache(self.store)
-        if options.list_archives:
+        if options.list_archives and not args:
             self.list_archives()
-        elif options.list_archive:
+        elif options.list_archive and not args:
             self.list_archive(options.list_archive)
-        elif options.verify_archive:
+        elif options.verify_archive and not args:
             self.verify_archive(options.verify_archive)
-        elif options.extract_archive:
+        elif options.extract_archive and not args:
             self.extract_archive(options.extract_archive)
-        elif options.delete_archive:
+        elif options.delete_archive and not args:
             self.delete_archive(options.delete_archive)
         elif options.create_archive:
             self.create_archive(options.create_archive, args)
-        elif options.archive_stats:
+        elif options.archive_stats and not args:
             self.archive_stats(options.archive_stats)
+        else:
+            parser.error('Invalid usage')
+            sys.exit(1)
 
 def main():
     archiver = Archiver()

+ 0 - 10
dedupestore/cache.py

@@ -18,8 +18,6 @@ class Cache(object):
                                  '%s.cache' % self.store.uuid)
         self.tid = -1
         self.open()
-        self.total = 0
-        self.max = 0
         if self.tid != self.store.tid:
             self.init()
 
@@ -47,8 +45,6 @@ class Cache(object):
             return
         print 'Recreating cache...'
         for id in self.store.list(NS_ARCHIVES):
-
-
             archive = cPickle.loads(zlib.decompress(self.store.get(NS_ARCHIVES, id)))
             self.archives.append(archive['name'])
             for id, sum, csize, osize in archive['chunks']:
@@ -76,12 +72,7 @@ class Cache(object):
         osize = len(data)
         data = zlib.compress(data)
         id = hashlib.sha1(data).digest()
-        self.total += 1
-        if osize == 55001* 4:
-            self.max += 1
-            print 'rate = %.2f' % (100.*self.max/self.total)
         if self.seen_chunk(id):
-            print 'yay %d bytes' % osize
             return self.chunk_incref(id)
         csize = len(data)
         self.store.put(NS_CHUNKS, id, data)
@@ -104,7 +95,6 @@ class Cache(object):
         count, csize, osize = self.chunkmap[id]
         if count == 1:
             del self.chunkmap[id]
-            print 'deleting chunk: ', id.encode('hex')
             self.store.delete(NS_CHUNKS, id)
         else:
             self.chunkmap[id] = (count - 1, csize, osize)

+ 9 - 0
doc/design.txt

@@ -2,6 +2,15 @@
 Dedupstore
 ==========
 
+TODO
+----
+* Symbolic links
+* Owner, group, mode
+* msgpack
+* Hard links
+
+
+
 cache = Cache(path,)
 
 for file in files: