Browse Source

determine and report chunk counts in chunks index

borg info repo::archive now reports unique chunks count, total chunks count

also: use index->key_size instead of hardcoded value
Thomas Waldmann 10 years ago
parent
commit
6d0a00496a
3 changed files with 21 additions and 8 deletions
  1. 9 3
      borg/_hashindex.c
  2. 8 4
      borg/hashindex.pyx
  3. 4 1
      borg/helpers.py

+ 9 - 3
borg/_hashindex.c

@@ -361,14 +361,18 @@ hashindex_get_size(HashIndex *index)
 }
 
 static void
-hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, long long *total_unique_size, long long *total_unique_csize)
+hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize,
+                    long long *total_unique_size, long long *total_unique_csize,
+                    long long *total_unique_chunks, long long *total_chunks)
 {
-    int64_t size = 0, csize = 0, unique_size = 0, unique_csize = 0;
+    int64_t size = 0, csize = 0, unique_size = 0, unique_csize = 0, chunks = 0, unique_chunks = 0;
     const int32_t *values;
     void *key = NULL;
 
     while((key = hashindex_next_key(index, key))) {
-        values = key + 32;
+        values = key + index->key_size;
+        unique_chunks++;
+        chunks += values[0];
         unique_size += values[1];
         unique_csize += values[2];
         size += values[0] * values[1];
@@ -378,4 +382,6 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs
     *total_csize = csize;
     *total_unique_size = unique_size;
     *total_unique_csize = unique_csize;
+    *total_unique_chunks = unique_chunks;
+    *total_chunks = chunks;
 }

+ 8 - 4
borg/hashindex.pyx

@@ -11,7 +11,9 @@ cdef extern from "_hashindex.c":
     HashIndex *hashindex_read(char *path)
     HashIndex *hashindex_init(int capacity, int key_size, int value_size)
     void hashindex_free(HashIndex *index)
-    void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, long long *unique_size, long long *unique_csize)
+    void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize,
+                             long long *unique_size, long long *unique_csize,
+                             long long *total_unique_chunks, long long *total_chunks)
     int hashindex_get_size(HashIndex *index)
     int hashindex_write(HashIndex *index, char *path)
     void *hashindex_get(HashIndex *index, void *key)
@@ -179,9 +181,11 @@ cdef class ChunkIndex(IndexBase):
         return iter
 
     def summarize(self):
-        cdef long long total_size, total_csize, unique_size, unique_csize
-        hashindex_summarize(self.index, &total_size, &total_csize, &unique_size, &unique_csize)
-        return total_size, total_csize, unique_size, unique_csize
+        cdef long long total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks
+        hashindex_summarize(self.index, &total_size, &total_csize,
+                            &unique_size, &unique_csize,
+                            &total_unique_chunks, &total_chunks)
+        return total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks
 
 
 cdef class ChunkKeyIterator:

+ 4 - 1
borg/helpers.py

@@ -174,11 +174,14 @@ class Statistics:
             self.usize += csize
 
     def print_(self, label, cache):
-        total_size, total_csize, unique_size, unique_csize = cache.chunks.summarize()
+        total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks = cache.chunks.summarize()
         print()
         print('                       Original size      Compressed size    Deduplicated size')
         print('%-15s %20s %20s %20s' % (label, format_file_size(self.osize), format_file_size(self.csize), format_file_size(self.usize)))
         print('All archives:   %20s %20s %20s' % (format_file_size(total_size), format_file_size(total_csize), format_file_size(unique_csize)))
+        print()
+        print('                       Unique chunks         Total chunks')
+        print('Chunk index:    %20d %20d' % (total_unique_chunks, total_chunks))
 
     def show_progress(self, item=None, final=False):
         if not final: