瀏覽代碼

Added creation time stats output using the --stats flag

Jonas Borgström 14 年之前
父節點
當前提交
8616df7f32
共有 4 個文件被更改,包括 68 次插入24 次删除
  1. 14 16
      darc/archive.py
  2. 17 5
      darc/archiver.py
  3. 5 3
      darc/cache.py
  4. 32 0
      darc/helpers.py

+ 14 - 16
darc/archive.py

@@ -12,8 +12,9 @@ from xattr import xattr, XATTR_NOFOLLOW
 from . import NS_ARCHIVE_METADATA, NS_CHUNK
 from ._speedups import chunkify
 from .helpers import uid2user, user2uid, gid2group, group2gid, IntegrityError, \
-    Counter, encode_filename
+    Counter, encode_filename, Statistics
 
+ITEMS_BUFFER = 1024 * 1024
 CHUNK_SIZE = 64 * 1024
 WINDOW_SIZE = 4096
 
@@ -33,6 +34,7 @@ class Archive(object):
         self.items = StringIO()
         self.items_ids = []
         self.hard_links = {}
+        self.stats = Statistics()
         if name:
             self.load(self.key.archive_hash(name))
 
@@ -74,7 +76,7 @@ class Archive(object):
 
     def add_item(self, item):
         self.items.write(msgpack.packb(item))
-        if self.items.tell() > 1024 * 1024:
+        if self.items.tell() > ITEMS_BUFFER:
             self.flush_items()
 
     def flush_items(self, flush=False):
@@ -85,9 +87,11 @@ class Archive(object):
         self.items.seek(0)
         self.items.truncate()
         for chunk in chunks[:-1]:
-            self.items_ids.append(self.cache.add_chunk(self.key.id_hash(chunk), chunk))
+            self.items_ids.append(self.cache.add_chunk(self.key.id_hash(chunk),
+                                  chunk, self.stats))
         if flush or len(chunks) == 1:
-            self.items_ids.append(self.cache.add_chunk(self.key.id_hash(chunks[-1]), chunks[-1]))
+            self.items_ids.append(self.cache.add_chunk(self.key.id_hash(chunks[-1]),
+                                  chunks[-1], self.stats))
         else:
             self.items.write(chunks[-1])
 
@@ -108,7 +112,7 @@ class Archive(object):
         self.store.commit()
         cache.commit()
 
-    def stats(self, cache):
+    def calc_stats(self, cache):
         # This function is a bit evil since it abuses the cache to calculate
         # the stats. The cache transaction must be rolled back afterwards
         def cb(chunk, error, id):
@@ -120,21 +124,15 @@ class Archive(object):
                 try:
                     for id, size, csize in item['chunks']:
                         count, _, _ = self.cache.chunks[id]
-                        stats['osize'] += size
-                        stats['csize'] += csize
-                        if count == 1:
-                            stats['usize'] += csize
+                        stats.update(size, csize, count==1)
                         self.cache.chunks[id] = count - 1, size, csize
                 except KeyError:
                     pass
         unpacker = msgpack.Unpacker()
         cache.begin_txn()
-        stats = {'osize': 0, 'csize': 0, 'usize': 0}
+        stats = Statistics()
         for id, size, csize in self.metadata['items']:
-            stats['osize'] += size
-            stats['csize'] += csize
-            if self.cache.seen_chunk(id) == 1:
-                stats['usize'] += csize
+            stats.update(size, csize, self.cache.seen_chunk(id) == 1)
             self.store.get(NS_CHUNK, id, callback=cb, callback_data=id)
             self.cache.chunk_decref(id)
         self.store.flush_rpc()
@@ -323,14 +321,14 @@ class Archive(object):
                 if not cache.seen_chunk(id):
                     break
             else:
-                chunks = [cache.chunk_incref(id) for id in ids]
+                chunks = [cache.chunk_incref(id, self.stats) for id in ids]
         # Only chunkify the file if needed
         if chunks is None:
             with open(path, 'rb') as fd:
                 chunks = []
                 for chunk in chunkify(fd, CHUNK_SIZE, WINDOW_SIZE,
                                       self.key.chunk_seed):
-                    chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk))
+                    chunks.append(cache.add_chunk(self.key.id_hash(chunk), chunk, self.stats))
             ids = [id for id, _, _ in chunks]
             cache.memorize_file(path_hash, st, ids)
         item = {'path': safe_path, 'chunks': chunks}

+ 17 - 5
darc/archiver.py

@@ -11,7 +11,7 @@ from .cache import Cache
 from .key import Key
 from .helpers import location_validator, format_file_size, format_time,\
     format_file_mode, IncludePattern, ExcludePattern, exclude_path, to_localtime, \
-    get_cache_dir, day_of_year
+    get_cache_dir, day_of_year, format_timedelta
 from .remote import StoreServer, RemoteStore
 
 class Archiver(object):
@@ -48,6 +48,7 @@ class Archiver(object):
         return self.exit_code
 
     def do_create(self, args):
+        t0 = datetime.now()
         store = self.open_store(args.archive)
         key = Key(store)
         try:
@@ -76,6 +77,16 @@ class Archiver(object):
         for path in args.paths:
             self._process(archive, cache, args.patterns, skip_inodes, path)
         archive.save(args.archive.archive, cache)
+        if args.stats:
+            t = datetime.now()
+            diff = t - t0
+            print '-' * 40
+            print 'Archive name: %s' % args.archive.archive
+            print 'Start time: %s' % t0.strftime('%c')
+            print 'End time: %s' % t.strftime('%c')
+            print 'Duration: %.2f (%s)' % (diff.total_seconds(), format_timedelta(diff))
+            archive.stats.print_()
+            print '-' * 40
         return self.exit_code
 
     def _process(self, archive, cache, patterns, skip_inodes, path):
@@ -204,15 +215,13 @@ class Archiver(object):
         key = Key(store)
         cache = Cache(store, key)
         archive = Archive(store, key, args.archive.archive, cache=cache)
-        stats = archive.stats(cache)
+        stats = archive.calc_stats(cache)
         print 'Name:', archive.metadata['name']
         print 'Hostname:', archive.metadata['hostname']
         print 'Username:', archive.metadata['username']
         print 'Time:', archive.metadata['time']
         print 'Command line:', ' '.join(archive.metadata['cmdline'])
-        print 'Original size:', format_file_size(stats['osize'])
-        print 'Compressed size:', format_file_size(stats['csize'])
-        print 'Unique data:', format_file_size(stats['usize'])
+        stats.print_()
         return self.exit_code
 
     def do_purge(self, args):
@@ -291,6 +300,9 @@ class Archiver(object):
 
         subparser = subparsers.add_parser('create')
         subparser.set_defaults(func=self.do_create)
+        subparser.add_argument('-s', '--stats', dest='stats',
+                               action='store_true', default=False,
+                               help='Print statistics for the created archive')
         subparser.add_argument('-i', '--include', dest='patterns',
                                type=IncludePattern, action='append',
                                help='Include condition')

+ 5 - 3
darc/cache.py

@@ -152,26 +152,28 @@ class Cache(object):
                 self.store.get(NS_CHUNK, id, callback=cb, callback_data=id)
             self.store.flush_rpc()
 
-    def add_chunk(self, id, data):
+    def add_chunk(self, id, data, stats):
         if not self.txn_active:
             self.begin_txn()
         if self.seen_chunk(id):
-            return self.chunk_incref(id)
+            return self.chunk_incref(id, stats)
         size = len(data)
         data, hash = self.key.encrypt(data)
         csize = len(data)
         self.store.put(NS_CHUNK, id, data, callback=error_callback)
         self.chunks[id] = (1, size, csize)
+        stats.update(size, csize, True)
         return id, size, csize
 
     def seen_chunk(self, id):
         return self.chunks.get(id, (0, 0, 0))[0]
 
-    def chunk_incref(self, id):
+    def chunk_incref(self, id, stats):
         if not self.txn_active:
             self.begin_txn()
         count, size, csize = self.chunks[id]
         self.chunks[id] = (count + 1, size, csize)
+        stats.update(size, csize, False)
         return id, size, csize
 
     def chunk_decref(self, id):

+ 32 - 0
darc/helpers.py

@@ -13,6 +13,22 @@ import time
 import urllib
 
 
+class Statistics(object):
+
+    def __init__(self):
+        self.osize = self.csize = self.usize = 0
+
+    def update(self, size, csize, unique):
+        self.osize += size
+        self.csize += csize
+        if unique:
+            self.usize += csize
+
+    def print_(self):
+        print 'Original size: %d (%s)' % (self.osize, format_file_size(self.osize))
+        print 'Compressed size: %s (%s)'% (self.csize, format_file_size(self.csize))
+        print 'Unique data: %d (%s)' % (self.usize, format_file_size(self.usize))
+
 def day_of_year(d):
     """Calculate the "day of year" from a date object"""
     return int(d.strftime('%j'))
@@ -194,6 +210,22 @@ def format_time(t):
         return t.strftime('%b %d  %Y')
 
 
+def format_timedelta(td):
+    """Format timedelta in a human friendly format"""
+    ts = td.total_seconds()
+    s = ts % 60
+    m = int(ts / 60) % 60
+    h = int(ts / 3600) % 24
+    txt = '%.2f seconds' % s
+    if m:
+        txt = '%d minutes %s' % (m, txt)
+    if h:
+        txt = '%d hours %s' % (h, txt)
+    if td.days:
+        txt = '%d days %s' % (td.days, txt)
+    return txt
+
+
 def format_file_mode(mod):
     """Format file mode bits for list output
     """