浏览代码

cache_sync: compute size/count stats, borg info: consider part files

fixes #3522
Thomas Waldmann 6 年之前
父节点
当前提交
e0480800d5
共有 4 个文件被更改,包括 121 次插入13 次删除
  1. 7 3
      src/borg/archive.py
  2. 38 3
      src/borg/cache_sync/cache_sync.c
  3. 48 4
      src/borg/cache_sync/unpack.h
  4. 28 3
      src/borg/hashindex.pyx

+ 7 - 3
src/borg/archive.py

@@ -500,10 +500,14 @@ Utilization of max. archive size: {csize_max:.0%}
             add(id)
             data = self.key.decrypt(id, chunk)
             sync.feed(data)
-        stats = Statistics()
-        stats.osize, stats.csize, unique_size, stats.usize, unique_chunks, chunks = archive_index.stats_against(cache.chunks)
-        stats.nfiles = sync.num_files
+        size, csize, unique_size, unique_csize, unique_chunks, chunks = archive_index.stats_against(cache.chunks)
         pi.finish()
+        stats = Statistics()
+        stats.nfiles = sync.num_files_totals if self.consider_part_files \
+                       else sync.num_files_totals - sync.num_files_parts
+        stats.osize = size if self.consider_part_files else size - sync.size_parts
+        stats.csize = csize if self.consider_part_files else csize - sync.csize_parts
+        stats.usize = unique_csize  # the part files use same chunks as the full file
         return stats
 
     @contextmanager

+ 38 - 3
src/borg/cache_sync/cache_sync.c

@@ -38,7 +38,12 @@ cache_sync_init(HashIndex *chunks)
     unpack_init(&ctx->ctx);
     /* needs to be set only once */
     ctx->ctx.user.chunks = chunks;
-    ctx->ctx.user.num_files = 0;
+    ctx->ctx.user.parts.size = 0;
+    ctx->ctx.user.parts.csize = 0;
+    ctx->ctx.user.parts.num_files = 0;
+    ctx->ctx.user.totals.size = 0;
+    ctx->ctx.user.totals.csize = 0;
+    ctx->ctx.user.totals.num_files = 0;
     ctx->buf = NULL;
     ctx->head = 0;
     ctx->tail = 0;
@@ -63,9 +68,39 @@ cache_sync_error(const CacheSyncCtx *ctx)
 }
 
 static uint64_t
-cache_sync_num_files(const CacheSyncCtx *ctx)
+cache_sync_num_files_totals(const CacheSyncCtx *ctx)
 {
-    return ctx->ctx.user.num_files;
+    return ctx->ctx.user.totals.num_files;
+}
+
+static uint64_t
+cache_sync_num_files_parts(const CacheSyncCtx *ctx)
+{
+    return ctx->ctx.user.parts.num_files;
+}
+
+static uint64_t
+cache_sync_size_totals(const CacheSyncCtx *ctx)
+{
+    return ctx->ctx.user.totals.size;
+}
+
+static uint64_t
+cache_sync_size_parts(const CacheSyncCtx *ctx)
+{
+    return ctx->ctx.user.parts.size;
+}
+
+static uint64_t
+cache_sync_csize_totals(const CacheSyncCtx *ctx)
+{
+    return ctx->ctx.user.totals.csize;
+}
+
+static uint64_t
+cache_sync_csize_parts(const CacheSyncCtx *ctx)
+{
+    return ctx->ctx.user.parts.csize;
 }
 
 /**

+ 48 - 4
src/borg/cache_sync/unpack.h

@@ -40,7 +40,7 @@
 #endif
 
 typedef struct unpack_user {
-    /* Item.chunks is at the top level; we don't care about anything else,
+    /* Item.chunks and Item.part are at the top level; we don't care about anything else,
      * only need to track the current level to navigate arbitrary and unknown structure.
      * To discern keys from everything else on the top level we use expect_map_item_end.
      */
@@ -50,8 +50,6 @@ typedef struct unpack_user {
 
     HashIndex *chunks;
 
-    uint64_t num_files;
-
     /*
      * We don't care about most stuff. This flag tells us whether we're at the chunks structure,
      * meaning:
@@ -59,6 +57,13 @@ typedef struct unpack_user {
      *                        ^-HERE-^
      */
     int inside_chunks;
+
+    /* is this item a .part file (created for checkpointing inside files)? */
+    int part;
+
+    /* does this item have a chunks list in it? */
+    int has_chunks;
+
     enum {
         /* the next thing is a map key at the Item root level,
          * and it might be the "chunks" key we're looking for */
@@ -95,11 +100,28 @@ typedef struct unpack_user {
         expect_item_begin
     } expect;
 
+    /* collect values here for current chunklist entry */
     struct {
         char key[32];
         uint32_t csize;
         uint32_t size;
     } current;
+
+    /* summing up chunks sizes here within a single item */
+    struct {
+        uint64_t size, csize;
+    } item;
+
+    /* total sizes and files count coming from all files */
+    struct {
+        uint64_t size, csize, num_files;
+    } totals;
+
+    /* total sizes and files count coming from part files */
+    struct {
+        uint64_t size, csize, num_files;
+    } parts;
+
 } unpack_user;
 
 struct unpack_context;
@@ -270,6 +292,8 @@ static inline int unpack_callback_array_end(unpack_user* u)
                 return -1;
             }
         }
+        u->item.size += u->current.size;
+        u->item.csize += u->current.csize;
 
         u->expect = expect_entry_begin_or_chunks_end;
         break;
@@ -303,6 +327,10 @@ static inline int unpack_callback_map(unpack_user* u, unsigned int n)
         }
         /* This begins a new Item */
         u->expect = expect_chunks_map_key;
+        u->part = 0;
+        u->has_chunks = 0;
+        u->item.size = 0;
+        u->item.csize = 0;
     }
 
     if(u->inside_chunks) {
@@ -338,6 +366,19 @@ static inline int unpack_callback_map_end(unpack_user* u)
         SET_LAST_ERROR("Unexpected map end");
         return -1;
     }
+    if(u->level == 0) {
+        /* This ends processing of an Item */
+        if(u->has_chunks) {
+            if(u->part) {
+                u->parts.num_files += 1;
+                u->parts.size += u->item.size;
+                u->parts.csize += u->item.csize;
+            }
+            u->totals.num_files += 1;
+            u->totals.size += u->item.size;
+            u->totals.csize += u->item.csize;
+        }
+    }
     return 0;
 }
 
@@ -360,7 +401,10 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char*
         if(length == 6 && !memcmp("chunks", p, 6)) {
             u->expect = expect_chunks_begin;
             u->inside_chunks = 1;
-            u->num_files++;
+            u->has_chunks = 1;
+        } else if(length == 4 && !memcmp("part", p, 4)) {
+            u->expect = expect_map_item_end;
+            u->part = 1;
         } else {
             u->expect = expect_map_item_end;
         }

+ 28 - 3
src/borg/hashindex.pyx

@@ -46,7 +46,12 @@ cdef extern from "cache_sync/cache_sync.c":
 
     CacheSyncCtx *cache_sync_init(HashIndex *chunks)
     const char *cache_sync_error(const CacheSyncCtx *ctx)
-    uint64_t cache_sync_num_files(const CacheSyncCtx *ctx)
+    uint64_t cache_sync_num_files_totals(const CacheSyncCtx *ctx)
+    uint64_t cache_sync_num_files_parts(const CacheSyncCtx *ctx)
+    uint64_t cache_sync_size_totals(const CacheSyncCtx *ctx)
+    uint64_t cache_sync_size_parts(const CacheSyncCtx *ctx)
+    uint64_t cache_sync_csize_totals(const CacheSyncCtx *ctx)
+    uint64_t cache_sync_csize_parts(const CacheSyncCtx *ctx)
     int cache_sync_feed(CacheSyncCtx *ctx, void *data, uint32_t length)
     void cache_sync_free(CacheSyncCtx *ctx)
 
@@ -525,5 +530,25 @@ cdef class CacheSynchronizer:
                 raise ValueError('cache_sync_feed failed: ' + error.decode('ascii'))
 
     @property
-    def num_files(self):
-        return cache_sync_num_files(self.sync)
+    def num_files_totals(self):
+        return cache_sync_num_files_totals(self.sync)
+
+    @property
+    def num_files_parts(self):
+        return cache_sync_num_files_parts(self.sync)
+
+    @property
+    def size_totals(self):
+        return cache_sync_size_totals(self.sync)
+
+    @property
+    def size_parts(self):
+        return cache_sync_size_parts(self.sync)
+
+    @property
+    def csize_totals(self):
+        return cache_sync_csize_totals(self.sync)
+
+    @property
+    def csize_parts(self):
+        return cache_sync_csize_parts(self.sync)