瀏覽代碼

cache_sync: compute size/count stats, borg info: consider part files (#4286)

cache_sync: compute size/count stats, borg info: consider part files

fixes #3522
TW 6 年之前
父節點
當前提交
c3f40de606
共有 4 個文件被更改,包括 132 次插入22 次删除
  1. 9 3
      src/borg/archive.py
  2. 38 3
      src/borg/cache_sync/cache_sync.c
  3. 57 13
      src/borg/cache_sync/unpack.h
  4. 28 3
      src/borg/hashindex.pyx

+ 9 - 3
src/borg/archive.py

@@ -510,10 +510,16 @@ Utilization of max. archive size: {csize_max:.0%}
             add(id)
             data = self.key.decrypt(id, chunk)
             sync.feed(data)
-        stats = Statistics()
-        stats.osize, stats.csize, unique_size, stats.usize, unique_chunks, chunks = archive_index.stats_against(cache.chunks)
-        stats.nfiles = sync.num_files
+        unique_csize = archive_index.stats_against(cache.chunks)[3]
         pi.finish()
+        stats = Statistics()
+        stats.nfiles = sync.num_files_totals if self.consider_part_files \
+                       else sync.num_files_totals - sync.num_files_parts
+        stats.osize = sync.size_totals if self.consider_part_files \
+                      else sync.size_totals - sync.size_parts
+        stats.csize = sync.csize_totals if self.consider_part_files \
+                      else sync.csize_totals - sync.csize_parts
+        stats.usize = unique_csize  # the part files use same chunks as the full file
         return stats
 
     @contextmanager

+ 38 - 3
src/borg/cache_sync/cache_sync.c

@@ -38,7 +38,12 @@ cache_sync_init(HashIndex *chunks)
     unpack_init(&ctx->ctx);
     /* needs to be set only once */
     ctx->ctx.user.chunks = chunks;
-    ctx->ctx.user.num_files = 0;
+    ctx->ctx.user.parts.size = 0;
+    ctx->ctx.user.parts.csize = 0;
+    ctx->ctx.user.parts.num_files = 0;
+    ctx->ctx.user.totals.size = 0;
+    ctx->ctx.user.totals.csize = 0;
+    ctx->ctx.user.totals.num_files = 0;
     ctx->buf = NULL;
     ctx->head = 0;
     ctx->tail = 0;
@@ -63,9 +68,39 @@ cache_sync_error(const CacheSyncCtx *ctx)
 }
 
 static uint64_t
-cache_sync_num_files(const CacheSyncCtx *ctx)
+cache_sync_num_files_totals(const CacheSyncCtx *ctx)
 {
-    return ctx->ctx.user.num_files;
+    return ctx->ctx.user.totals.num_files;
+}
+
+static uint64_t
+cache_sync_num_files_parts(const CacheSyncCtx *ctx)
+{
+    return ctx->ctx.user.parts.num_files;
+}
+
+static uint64_t
+cache_sync_size_totals(const CacheSyncCtx *ctx)
+{
+    return ctx->ctx.user.totals.size;
+}
+
+static uint64_t
+cache_sync_size_parts(const CacheSyncCtx *ctx)
+{
+    return ctx->ctx.user.parts.size;
+}
+
+static uint64_t
+cache_sync_csize_totals(const CacheSyncCtx *ctx)
+{
+    return ctx->ctx.user.totals.csize;
+}
+
+static uint64_t
+cache_sync_csize_parts(const CacheSyncCtx *ctx)
+{
+    return ctx->ctx.user.parts.csize;
 }
 
 /**

+ 57 - 13
src/borg/cache_sync/unpack.h

@@ -40,7 +40,7 @@
 #endif
 
 typedef struct unpack_user {
-    /* Item.chunks is at the top level; we don't care about anything else,
+    /* Item.chunks and Item.part are at the top level; we don't care about anything else,
      * only need to track the current level to navigate arbitrary and unknown structure.
      * To discern keys from everything else on the top level we use expect_map_item_end.
      */
@@ -50,8 +50,6 @@ typedef struct unpack_user {
 
     HashIndex *chunks;
 
-    uint64_t num_files;
-
     /*
      * We don't care about most stuff. This flag tells us whether we're at the chunks structure,
      * meaning:
@@ -59,16 +57,23 @@ typedef struct unpack_user {
      *                        ^-HERE-^
      */
     int inside_chunks;
+
+    /* is this item a .part file (created for checkpointing inside files)? */
+    int part;
+
+    /* does this item have a chunks list in it? */
+    int has_chunks;
+
     enum {
         /* the next thing is a map key at the Item root level,
-         * and it might be the "chunks" key we're looking for */
-        expect_chunks_map_key,
+         * and it might be the "chunks" or "part" key we're looking for */
+        expect_map_key,
 
-        /* blocking state to expect_chunks_map_key
+        /* blocking state to expect_map_key
          * {     'stuff': <complex and arbitrary structure>,     'chunks': [
-         * ecmk    ->   emie    ->   ->       ->      ->   ecmk  ecb       eeboce
+         * emk     ->   emie    ->   ->       ->      ->   emk   ecb       eeboce
          *                (nested containers are tracked via level)
-         * ecmk=expect_chunks_map_key, emie=expect_map_item_end, ecb=expect_chunks_begin,
+         * emk=expect_map_key, emie=expect_map_item_end, ecb=expect_chunks_begin,
          * eeboce=expect_entry_begin_or_chunks_end
          */
         expect_map_item_end,
@@ -95,11 +100,28 @@ typedef struct unpack_user {
         expect_item_begin
     } expect;
 
+    /* collect values here for current chunklist entry */
     struct {
         char key[32];
         uint32_t csize;
         uint32_t size;
     } current;
+
+    /* summing up chunks sizes here within a single item */
+    struct {
+        uint64_t size, csize;
+    } item;
+
+    /* total sizes and files count coming from all files */
+    struct {
+        uint64_t size, csize, num_files;
+    } totals;
+
+    /* total sizes and files count coming from part files */
+    struct {
+        uint64_t size, csize, num_files;
+    } parts;
+
 } unpack_user;
 
 struct unpack_context;
@@ -107,7 +129,7 @@ typedef struct unpack_context unpack_context;
 typedef int (*execute_fn)(unpack_context *ctx, const char* data, size_t len, size_t* off);
 
 #define UNEXPECTED(what)                                            \
-    if(u->inside_chunks || u->expect == expect_chunks_map_key) { \
+    if(u->inside_chunks || u->expect == expect_map_key) { \
         SET_LAST_ERROR("Unexpected object: " what);                 \
         return -1;                                                  \
     }
@@ -270,6 +292,8 @@ static inline int unpack_callback_array_end(unpack_user* u)
                 return -1;
             }
         }
+        u->item.size += u->current.size;
+        u->item.csize += u->current.csize;
 
         u->expect = expect_entry_begin_or_chunks_end;
         break;
@@ -302,7 +326,11 @@ static inline int unpack_callback_map(unpack_user* u, unsigned int n)
             return -1;
         }
         /* This begins a new Item */
-        u->expect = expect_chunks_map_key;
+        u->expect = expect_map_key;
+        u->part = 0;
+        u->has_chunks = 0;
+        u->item.size = 0;
+        u->item.csize = 0;
     }
 
     if(u->inside_chunks) {
@@ -321,7 +349,7 @@ static inline int unpack_callback_map_item(unpack_user* u, unsigned int current)
     if(u->level == 1) {
         switch(u->expect) {
         case expect_map_item_end:
-            u->expect = expect_chunks_map_key;
+            u->expect = expect_map_key;
             break;
         default:
             SET_LAST_ERROR("Unexpected map item");
@@ -338,6 +366,19 @@ static inline int unpack_callback_map_end(unpack_user* u)
         SET_LAST_ERROR("Unexpected map end");
         return -1;
     }
+    if(u->level == 0) {
+        /* This ends processing of an Item */
+        if(u->has_chunks) {
+            if(u->part) {
+                u->parts.num_files += 1;
+                u->parts.size += u->item.size;
+                u->parts.csize += u->item.csize;
+            }
+            u->totals.num_files += 1;
+            u->totals.size += u->item.size;
+            u->totals.csize += u->item.csize;
+        }
+    }
     return 0;
 }
 
@@ -356,11 +397,14 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char*
         memcpy(u->current.key, p, 32);
         u->expect = expect_size;
         break;
-    case expect_chunks_map_key:
+    case expect_map_key:
         if(length == 6 && !memcmp("chunks", p, 6)) {
             u->expect = expect_chunks_begin;
             u->inside_chunks = 1;
-            u->num_files++;
+            u->has_chunks = 1;
+        } else if(length == 4 && !memcmp("part", p, 4)) {
+            u->expect = expect_map_item_end;
+            u->part = 1;
         } else {
             u->expect = expect_map_item_end;
         }

+ 28 - 3
src/borg/hashindex.pyx

@@ -45,7 +45,12 @@ cdef extern from "cache_sync/cache_sync.c":
 
     CacheSyncCtx *cache_sync_init(HashIndex *chunks)
     const char *cache_sync_error(const CacheSyncCtx *ctx)
-    uint64_t cache_sync_num_files(const CacheSyncCtx *ctx)
+    uint64_t cache_sync_num_files_totals(const CacheSyncCtx *ctx)
+    uint64_t cache_sync_num_files_parts(const CacheSyncCtx *ctx)
+    uint64_t cache_sync_size_totals(const CacheSyncCtx *ctx)
+    uint64_t cache_sync_size_parts(const CacheSyncCtx *ctx)
+    uint64_t cache_sync_csize_totals(const CacheSyncCtx *ctx)
+    uint64_t cache_sync_csize_parts(const CacheSyncCtx *ctx)
     int cache_sync_feed(CacheSyncCtx *ctx, void *data, uint32_t length)
     void cache_sync_free(CacheSyncCtx *ctx)
 
@@ -526,5 +531,25 @@ cdef class CacheSynchronizer:
                 raise ValueError('cache_sync_feed failed: ' + error.decode('ascii'))
 
     @property
-    def num_files(self):
-        return cache_sync_num_files(self.sync)
+    def num_files_totals(self):
+        return cache_sync_num_files_totals(self.sync)
+
+    @property
+    def num_files_parts(self):
+        return cache_sync_num_files_parts(self.sync)
+
+    @property
+    def size_totals(self):
+        return cache_sync_size_totals(self.sync)
+
+    @property
+    def size_parts(self):
+        return cache_sync_size_parts(self.sync)
+
+    @property
+    def csize_totals(self):
+        return cache_sync_csize_totals(self.sync)
+
+    @property
+    def csize_parts(self):
+        return cache_sync_csize_parts(self.sync)