Pārlūkot izejas kodu

Include "all archives" size information in "--stats" output.

Closes #54
Jonas Borgström 11 gadi atpakaļ
vecāks
revīzija
e398d5f632
5 mainītis faili ar 54 papildinājumiem un 15 dzēšanām
  1. 8 0
      CHANGES
  2. 20 0
      attic/_hashindex.c
  3. 4 4
      attic/archiver.py
  4. 6 0
      attic/hashindex.pyx
  5. 16 11
      attic/helpers.py

+ 8 - 0
CHANGES

@@ -3,6 +3,14 @@ Attic Changelog
 
 
 Here you can see the full list of changes between each Attic release.
 Here you can see the full list of changes between each Attic release.
 
 
+Version 0.12
+------------
+
+(feature release, released on X)
+
+- Include "all archives" size information in "--stats" output. (#54)
+- Switch to SI units (Power of 1000 instead 1024) when printing file sizes
+
 Version 0.11
 Version 0.11
 ------------
 ------------
 
 

+ 20 - 0
attic/_hashindex.c

@@ -389,3 +389,23 @@ hashindex_get_size(HashIndex *index)
     return index->num_entries;
     return index->num_entries;
 }
 }
 
 
+static void
+hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, long long *total_unique_size, long long *total_unique_csize)
+{
+    int64_t size = 0, csize = 0, unique_size = 0, unique_csize = 0;
+    const int32_t *values;
+    void *key = NULL;
+
+    while((key = hashindex_next_key(index, key))) {
+        values = key + 32;
+        unique_size += values[1];
+        unique_csize += values[2];
+        size += values[0] * values[1];
+        csize += values[0] * values[2];
+    }
+    *total_size = size;
+    *total_csize = csize;
+    *total_unique_size = unique_size;
+    *total_unique_csize = unique_csize;
+}
+

+ 4 - 4
attic/archiver.py

@@ -130,14 +130,14 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         if args.stats:
         if args.stats:
             t = datetime.now()
             t = datetime.now()
             diff = t - t0
             diff = t - t0
-            print('-' * 40)
+            print('-' * 70)
             print('Archive name: %s' % args.archive.archive)
             print('Archive name: %s' % args.archive.archive)
             print('Archive fingerprint: %s' % hexlify(archive.id).decode('ascii'))
             print('Archive fingerprint: %s' % hexlify(archive.id).decode('ascii'))
             print('Start time: %s' % t0.strftime('%c'))
             print('Start time: %s' % t0.strftime('%c'))
             print('End time: %s' % t.strftime('%c'))
             print('End time: %s' % t.strftime('%c'))
             print('Duration: %s' % format_timedelta(diff))
             print('Duration: %s' % format_timedelta(diff))
-            archive.stats.print_()
-            print('-' * 40)
+            archive.stats.print_(cache)
+            print('-' * 70)
         return self.exit_code
         return self.exit_code
 
 
     def _process(self, archive, cache, excludes, skip_inodes, path, restrict_dev):
     def _process(self, archive, cache, excludes, skip_inodes, path, restrict_dev):
@@ -297,7 +297,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         print('Username:', archive.metadata[b'username'])
         print('Username:', archive.metadata[b'username'])
         print('Time: %s' % to_localtime(archive.ts).strftime('%c'))
         print('Time: %s' % to_localtime(archive.ts).strftime('%c'))
         print('Command line:', remove_surrogates(' '.join(archive.metadata[b'cmdline'])))
         print('Command line:', remove_surrogates(' '.join(archive.metadata[b'cmdline'])))
-        stats.print_()
+        stats.print_(cache)
         return self.exit_code
         return self.exit_code
 
 
     def do_prune(self, args):
     def do_prune(self, args):

+ 6 - 0
attic/hashindex.pyx

@@ -8,6 +8,7 @@ cdef extern from "_hashindex.c":
 
 
     HashIndex *hashindex_open(char *path, int readonly)
     HashIndex *hashindex_open(char *path, int readonly)
     HashIndex *hashindex_create(char *path, int capacity, int key_size, int value_size)
     HashIndex *hashindex_create(char *path, int capacity, int key_size, int value_size)
+    void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, long long *unique_size, long long *unique_csize)
     int hashindex_get_size(HashIndex *index)
     int hashindex_get_size(HashIndex *index)
     int hashindex_clear(HashIndex *index)
     int hashindex_clear(HashIndex *index)
     int hashindex_close(HashIndex *index)
     int hashindex_close(HashIndex *index)
@@ -174,6 +175,11 @@ cdef class ChunkIndex(IndexBase):
             iter.key = key - 32
             iter.key = key - 32
         return iter
         return iter
 
 
+    def summarize(self):
+        cdef long long total_size, total_csize, unique_size, unique_csize
+        hashindex_summarize(self.index, &total_size, &total_csize, &unique_size, &unique_csize)
+        return total_size, total_csize, unique_size, unique_csize
+
 
 
 cdef class ChunkKeyIterator:
 cdef class ChunkKeyIterator:
     cdef ChunkIndex idx
     cdef ChunkIndex idx

+ 16 - 11
attic/helpers.py

@@ -132,11 +132,14 @@ class Statistics:
         if unique:
         if unique:
             self.usize += csize
             self.usize += csize
 
 
-    def print_(self):
+    def print_(self, cache):
+        total_size, total_csize, unique_size, unique_csize = cache.chunks.summarize()
         print('Number of files: %d' % self.nfiles)
         print('Number of files: %d' % self.nfiles)
-        print('Original size: %d (%s)' % (self.osize, format_file_size(self.osize)))
-        print('Compressed size: %s (%s)' % (self.csize, format_file_size(self.csize)))
-        print('Unique data: %d (%s)' % (self.usize, format_file_size(self.usize)))
+        print()
+        print('                   Original size    Compressed size  Deduplicated size')
+        print('This archive: %18s %18s %18s' % (format_file_size(self.osize), format_file_size(self.csize), format_file_size(self.usize)))
+        print('All archives: %18s %18s %18s' % (format_file_size(total_size), format_file_size(total_csize), format_file_size(unique_csize)))
+
 
 
 
 
 def get_keys_dir():
 def get_keys_dir():
@@ -278,14 +281,16 @@ def format_file_mode(mod):
 def format_file_size(v):
 def format_file_size(v):
     """Format file size into a human friendly format
     """Format file size into a human friendly format
     """
     """
-    if v > 1024 * 1024 * 1024:
-        return '%.2f GB' % (v / 1024. / 1024. / 1024.)
-    elif v > 1024 * 1024:
-        return '%.2f MB' % (v / 1024. / 1024.)
-    elif v > 1024:
-        return '%.2f kB' % (v / 1024.)
+    if v > 10**12:
+        return '%.2f TB' % (v / 10**12)
+    elif v > 10**9:
+        return '%.2f GB' % (v / 10**9)
+    elif v > 10**6:
+        return '%.2f MB' % (v / 10**6)
+    elif v > 10**3:
+        return '%.2f kB' % (v / 10**3)
     else:
     else:
-        return '%d B' % v
+        return '%d B ' % v
 
 
 
 
 def format_archive(archive):
 def format_archive(archive):