Ver Fonte

Merge pull request #4391 from ThomasWaldmann/archive-stat-meta

include size/csize/nfiles[_parts] stats into archive, fixes #3241
TW há 6 anos atrás
pai
commit
dd2a3d42fb
6 ficheiros alterados com 83 adições e 40 exclusões
  1. 68 31
      src/borg/archive.py
  2. 2 2
      src/borg/archiver.py
  3. 4 4
      src/borg/cache.py
  4. 1 1
      src/borg/constants.py
  5. 1 1
      src/borg/helpers/checks.py
  6. 7 1
      src/borg/item.pyx

+ 68 - 31
src/borg/archive.py

@@ -54,13 +54,20 @@ class Statistics:
     def __init__(self, output_json=False):
         self.output_json = output_json
         self.osize = self.csize = self.usize = self.nfiles = 0
+        self.osize_parts = self.csize_parts = self.usize_parts = self.nfiles_parts = 0
         self.last_progress = 0  # timestamp when last progress was shown
 
-    def update(self, size, csize, unique):
-        self.osize += size
-        self.csize += csize
-        if unique:
-            self.usize += csize
+    def update(self, size, csize, unique, part=False):
+        if not part:
+            self.osize += size
+            self.csize += csize
+            if unique:
+                self.usize += csize
+        else:
+            self.osize_parts += size
+            self.csize_parts += csize
+            if unique:
+                self.usize_parts += csize
 
     def __add__(self, other):
         if not isinstance(other, Statistics):
@@ -70,6 +77,10 @@ class Statistics:
         stats.csize = self.csize + other.csize
         stats.usize = self.usize + other.usize
         stats.nfiles = self.nfiles + other.nfiles
+        stats.osize_parts = self.osize_parts + other.osize_parts
+        stats.csize_parts = self.csize_parts + other.csize_parts
+        stats.usize_parts = self.usize_parts + other.usize_parts
+        stats.nfiles_parts = self.nfiles_parts + other.nfiles_parts
         return stats
 
     summary = "{label:15} {stats.osize_fmt:>20s} {stats.csize_fmt:>20s} {stats.usize_fmt:>20s}"
@@ -492,7 +503,7 @@ Utilization of max. archive size: {csize_max:.0%}
         del self.manifest.archives[self.checkpoint_name]
         self.cache.chunk_decref(self.id, self.stats)
 
-    def save(self, name=None, comment=None, timestamp=None, additional_metadata=None):
+    def save(self, name=None, comment=None, timestamp=None, stats=None, additional_metadata=None):
         name = name or self.name
         if name in self.manifest.archives:
             raise self.AlreadyExists(name)
@@ -518,6 +529,14 @@ Utilization of max. archive size: {csize_max:.0%}
             'time_end': end.strftime(ISO_FORMAT),
             'chunker_params': self.chunker_params,
         }
+        if stats is not None:
+            metadata.update({
+                'size': stats.osize,
+                'csize': stats.csize,
+                'nfiles': stats.nfiles,
+                'size_parts': stats.osize_parts,
+                'csize_parts': stats.csize_parts,
+                'nfiles_parts': stats.nfiles_parts})
         metadata.update(additional_metadata or {})
         metadata = ArchiveItem(metadata)
         data = self.key.pack_and_authenticate_metadata(metadata.as_dict(), context=b'archive')
@@ -530,30 +549,48 @@ Utilization of max. archive size: {csize_max:.0%}
         self.repository.commit(compact=False)
         self.cache.commit()
 
-    def calc_stats(self, cache):
-        def add(id):
-            entry = cache.chunks[id]
-            archive_index.add(id, 1, entry.size, entry.csize)
-
-        archive_index = ChunkIndex()
-        sync = CacheSynchronizer(archive_index)
-        add(self.id)
-        pi = ProgressIndicatorPercent(total=len(self.metadata.items), msg='Calculating statistics... %3d%%')
-        for id, chunk in zip(self.metadata.items, self.repository.get_many(self.metadata.items)):
-            pi.show(increase=1)
-            add(id)
-            data = self.key.decrypt(id, chunk)
-            sync.feed(data)
-        unique_csize = archive_index.stats_against(cache.chunks)[3]
-        pi.finish()
+    def calc_stats(self, cache, want_unique=True):
+        have_borg12_meta = self.metadata.get('nfiles') is not None
+
+        if have_borg12_meta and not want_unique:
+            unique_csize = 0
+        else:
+            def add(id):
+                entry = cache.chunks[id]
+                archive_index.add(id, 1, entry.size, entry.csize)
+
+            archive_index = ChunkIndex()
+            sync = CacheSynchronizer(archive_index)
+            add(self.id)
+            pi = ProgressIndicatorPercent(total=len(self.metadata.items), msg='Calculating statistics... %3d%%')
+            for id, chunk in zip(self.metadata.items, self.repository.get_many(self.metadata.items)):
+                pi.show(increase=1)
+                add(id)
+                data = self.key.decrypt(id, chunk)
+                sync.feed(data)
+            unique_csize = archive_index.stats_against(cache.chunks)[3]
+            pi.finish()
+
         stats = Statistics()
-        stats.nfiles = sync.num_files_totals if self.consider_part_files \
-                       else sync.num_files_totals - sync.num_files_parts
-        stats.osize = sync.size_totals if self.consider_part_files \
-                      else sync.size_totals - sync.size_parts
-        stats.csize = sync.csize_totals if self.consider_part_files \
-                      else sync.csize_totals - sync.csize_parts
         stats.usize = unique_csize  # the part files use same chunks as the full file
+        if not have_borg12_meta:
+            if self.consider_part_files:
+                stats.nfiles = sync.num_files_totals
+                stats.osize = sync.size_totals
+                stats.csize = sync.csize_totals
+            else:
+                stats.nfiles = sync.num_files_totals - sync.num_files_parts
+                stats.osize = sync.size_totals - sync.size_parts
+                stats.csize = sync.csize_totals - sync.csize_parts
+        else:
+            if self.consider_part_files:
+                stats.nfiles = self.metadata.nfiles_parts + self.metadata.nfiles
+                stats.osize = self.metadata.size_parts + self.metadata.size
+                stats.csize = self.metadata.csize_parts + self.metadata.csize
+            else:
+                stats.nfiles = self.metadata.nfiles
+                stats.osize = self.metadata.size
+                stats.csize = self.metadata.csize
         return stats
 
     @contextmanager
@@ -1057,9 +1094,9 @@ class ChunksProcessor:
 
                 # if we created part files, we have referenced all chunks from the part files,
                 # but we also will reference the same chunks also from the final, complete file:
-                dummy_stats = Statistics()  # do not count this data volume twice
                 for chunk in item.chunks:
-                    cache.chunk_incref(chunk.id, dummy_stats, size=chunk.size)
+                    cache.chunk_incref(chunk.id, stats, size=chunk.size, part=True)
+                stats.nfiles_parts += part_number - 1
 
 
 class FilesystemObjectProcessors:
@@ -1882,7 +1919,7 @@ class ArchiveRecreater:
             return
         if comment is None:
             comment = archive.metadata.get('comment', '')
-        target.save(comment=comment, additional_metadata={
+        target.save(comment=comment, stats=target.stats, additional_metadata={
             # keep some metadata as in original archive:
             'time': archive.metadata.time,
             'time_end': archive.metadata.get('time_end') or archive.metadata.time,

+ 2 - 2
src/borg/archiver.py

@@ -502,11 +502,11 @@ class Archiver:
                                   keep_exclude_tags=args.keep_exclude_tags, skip_inodes=skip_inodes,
                                   restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run)
             if not dry_run:
-                archive.save(comment=args.comment, timestamp=args.timestamp)
                 if args.progress:
                     archive.stats.show_progress(final=True)
-                args.stats |= args.json
                 archive.stats += fso.stats
+                archive.save(comment=args.comment, timestamp=args.timestamp, stats=archive.stats)
+                args.stats |= args.json
                 if args.stats:
                     if args.json:
                         json_print(basic_json_data(manifest, cache=cache, extra={

+ 4 - 4
src/borg/cache.py

@@ -903,11 +903,11 @@ class LocalCache(CacheStatsMixin):
                             id, stored_size, size))
         return refcount
 
-    def chunk_incref(self, id, stats, size=None):
+    def chunk_incref(self, id, stats, size=None, part=False):
         if not self.txn_active:
             self.begin_txn()
         count, _size, csize = self.chunks.incref(id)
-        stats.update(_size, csize, False)
+        stats.update(_size, csize, False, part=part)
         return ChunkListEntry(id, _size, csize)
 
     def chunk_decref(self, id, stats, wait=True):
@@ -1047,7 +1047,7 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
             self.chunks[id] = entry._replace(size=size)
         return entry.refcount
 
-    def chunk_incref(self, id, stats, size=None):
+    def chunk_incref(self, id, stats, size=None, part=False):
         if not self._txn_active:
             self.begin_txn()
         count, _size, csize = self.chunks.incref(id)
@@ -1055,7 +1055,7 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
         # size or add_chunk); we can't add references to those (size=0 is invalid) and generally don't try to.
         size = _size or size
         assert size
-        stats.update(size, csize, False)
+        stats.update(size, csize, False, part=part)
         return ChunkListEntry(id, size, csize)
 
     def chunk_decref(self, id, stats, wait=True):

+ 1 - 1
src/borg/constants.py

@@ -12,7 +12,7 @@ ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'hostname', 'us
                           'comment', 'chunker_params',
                           'recreate_cmdline',
                           'recreate_source_id', 'recreate_args', 'recreate_partial_chunks',  # used in 1.1.0b1 .. b2
-                          ])
+                          'size', 'csize', 'nfiles', 'size_parts', 'csize_parts', 'nfiles_parts', ])
 
 # this is the set of keys that are always present in archives:
 REQUIRED_ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'time', ])

+ 1 - 1
src/borg/helpers/checks.py

@@ -35,5 +35,5 @@ def check_extension_modules():
         raise ExtensionModuleError
     if platform.API_VERSION != platform.OS_API_VERSION or platform.API_VERSION != '1.2_03':
         raise ExtensionModuleError
-    if item.API_VERSION != '1.1_05':
+    if item.API_VERSION != '1.1_06':
         raise ExtensionModuleError

+ 7 - 1
src/borg/item.pyx

@@ -12,7 +12,7 @@ cdef extern from "_item.c":
     object _optr_to_object(object bytes)
 
 
-API_VERSION = '1.1_05'
+API_VERSION = '1.1_06'
 
 
 class PropDict:
@@ -368,6 +368,12 @@ class ArchiveItem(PropDict):
     recreate_source_id = PropDict._make_property('recreate_source_id', bytes)
     recreate_args = PropDict._make_property('recreate_args', list)  # list of s-e-str
     recreate_partial_chunks = PropDict._make_property('recreate_partial_chunks', list)  # list of tuples
+    size = PropDict._make_property('size', int)
+    csize = PropDict._make_property('csize', int)
+    nfiles = PropDict._make_property('nfiles', int)
+    size_parts = PropDict._make_property('size_parts', int)
+    csize_parts = PropDict._make_property('csize_parts', int)
+    nfiles_parts = PropDict._make_property('nfiles_parts', int)
 
 
 class ManifestItem(PropDict):