Browse Source

Merge pull request #6296 from ThomasWaldmann/cache-pre12-archive-meta

info: use a pre12-meta cache to accelerate stats for borg < 1.2 archives
TW 3 years ago
parent
commit
4896fe1560
2 changed files with 47 additions and 1 deletions
  1. 34 1
      src/borg/archive.py
  2. 13 0
      src/borg/cache.py

+ 34 - 1
src/borg/archive.py

@@ -105,6 +105,27 @@ class Statistics:
             'nfiles': self.nfiles,
         }
 
+    def as_raw_dict(self):
+        return {
+            'size': self.osize,
+            'csize': self.csize,
+            'nfiles': self.nfiles,
+            'size_parts': self.osize_parts,
+            'csize_parts': self.csize_parts,
+            'nfiles_parts': self.nfiles_parts,
+        }
+
+    @classmethod
+    def from_raw_dict(cls, **kw):
+        self = cls()
+        self.osize = kw['size']
+        self.csize = kw['csize']
+        self.nfiles = kw['nfiles']
+        self.osize_parts = kw['size_parts']
+        self.csize_parts = kw['csize_parts']
+        self.nfiles_parts = kw['nfiles_parts']
+        return self
+
     @property
     def osize_fmt(self):
         return format_file_size(self.osize, iec=self.iec)
@@ -627,6 +648,17 @@ Utilization of max. archive size: {csize_max:.0%}
         self.cache.commit()
 
     def calc_stats(self, cache, want_unique=True):
+        # caching wrapper around _calc_stats which is rather slow for archives made with borg < 1.2
+        have_borg12_meta = self.metadata.get('nfiles') is not None
+        try:
+            stats = Statistics.from_raw_dict(**cache.pre12_meta[self.fpr])
+        except KeyError:  # not in pre12_meta cache
+            stats = self._calc_stats(cache, want_unique=want_unique)
+            if not have_borg12_meta:
+                cache.pre12_meta[self.fpr] = stats.as_raw_dict()
+        return stats
+
+    def _calc_stats(self, cache, want_unique=True):
         have_borg12_meta = self.metadata.get('nfiles') is not None
 
         if have_borg12_meta and not want_unique:
@@ -639,7 +671,8 @@ Utilization of max. archive size: {csize_max:.0%}
             archive_index = ChunkIndex()
             sync = CacheSynchronizer(archive_index)
             add(self.id)
-            pi = ProgressIndicatorPercent(total=len(self.metadata.items), msg='Calculating statistics... %3d%%',
+            pi = ProgressIndicatorPercent(total=len(self.metadata.items),
+                                          msg='Calculating statistics for archive %s ... %%3d%%%%' % self.name,
                                           msgid='archive.calc_stats')
             for id, chunk in zip(self.metadata.items, self.repository.get_many(self.metadata.items)):
                 pi.show(increase=1)

+ 13 - 0
src/borg/cache.py

@@ -1,4 +1,5 @@
 import configparser
+import json
 import os
 import shutil
 import stat
@@ -407,6 +408,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
 
     def __init__(self, iec=False):
         self.iec = iec
+        self.pre12_meta = {}  # here we cache archive metadata for borg < 1.2
 
     def __str__(self):
         return self.str_format.format(self.format_tuple())
@@ -511,6 +513,8 @@ class LocalCache(CacheStatsMixin):
         os.makedirs(os.path.join(self.path, 'chunks.archive.d'))
         with SaveFile(os.path.join(self.path, files_cache_name()), binary=True):
             pass  # empty file
+        with SaveFile(os.path.join(self.path, 'pre12-meta'), binary=False) as fd:
+            json.dump(self.pre12_meta, fd, indent=4)
 
     def _do_open(self):
         self.cache_config.load()
@@ -521,6 +525,11 @@ class LocalCache(CacheStatsMixin):
             self.files = None
         else:
             self._read_files()
+        try:
+            with open(os.path.join(self.path, 'pre12-meta')) as fd:
+                self.pre12_meta = json.load(fd)
+        except (FileNotFoundError, json.JSONDecodeError):
+            pass
 
     def open(self):
         if not os.path.isdir(self.path):
@@ -529,6 +538,9 @@ class LocalCache(CacheStatsMixin):
         self.rollback()
 
     def close(self):
+        # save the pre12_meta cache in any case
+        with open(os.path.join(self.path, 'pre12-meta'), 'w') as fd:
+            json.dump(self.pre12_meta, fd, indent=4)
         if self.cache_config is not None:
             self.cache_config.close()
             self.cache_config = None
@@ -1066,6 +1078,7 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
         self.security_manager = SecurityManager(repository)
         self.security_manager.assert_secure(manifest, key, lock_wait=lock_wait)
 
+        self.pre12_meta = {}
         logger.warning('Note: --no-cache-sync is an experimental feature.')
 
     # Public API