Переглянути джерело

calc_stats: deduplicated size now, was deduplicated csize

also: remove pre12_meta cache
Thomas Waldmann 3 роки тому
батько
коміт
49adb77157
2 змінених файлів з 23 додано та 22 видалено
  1. 23 10
      src/borg/archive.py
  2. 0 12
      src/borg/cache.py

+ 23 - 10
src/borg/archive.py

@@ -629,18 +629,31 @@ Utilization of max. archive size: {csize_max:.0%}
         self.cache.commit()
 
     def calc_stats(self, cache, want_unique=True):
-        # caching wrapper around _calc_stats which is rather slow for archives made with borg < 1.2
-        have_borg12_meta = self.metadata.get('nfiles') is not None
-        try:
-            stats = Statistics.from_raw_dict(**cache.pre12_meta[self.fpr])
-        except KeyError:  # not in pre12_meta cache
-            stats = self._calc_stats(cache, want_unique=want_unique)
-            if not have_borg12_meta:
-                cache.pre12_meta[self.fpr] = stats.as_raw_dict()
-        return stats
+        if not want_unique:
+            unique_size = 0
+        else:
+            def add(id):
+                entry = cache.chunks[id]
+                archive_index.add(id, 1, entry.size)
+
+            archive_index = ChunkIndex()
+            sync = CacheSynchronizer(archive_index)
+            add(self.id)
+            # we must escape any % char in the archive name, because we use it in a format string, see #6500
+            arch_name_escd = self.name.replace('%', '%%')
+            pi = ProgressIndicatorPercent(total=len(self.metadata.items),
+                                          msg='Calculating statistics for archive %s ... %%3.0f%%%%' % arch_name_escd,
+                                          msgid='archive.calc_stats')
+            for id, chunk in zip(self.metadata.items, self.repository.get_many(self.metadata.items)):
+                pi.show(increase=1)
+                add(id)
+                data = self.key.decrypt(id, chunk)
+                sync.feed(data)
+            unique_size = archive_index.stats_against(cache.chunks)[1]
+            pi.finish()
 
-    def _calc_stats(self, cache, want_unique=True):
         stats = Statistics(iec=self.iec)
+        stats.usize = unique_size  # the part files use same chunks as the full file
         stats.nfiles = self.metadata.nfiles
         stats.osize = self.metadata.size
         if self.consider_part_files:

+ 0 - 12
src/borg/cache.py

@@ -413,7 +413,6 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
 
     def __init__(self, iec=False):
         self.iec = iec
-        self.pre12_meta = {}  # here we cache archive metadata for borg < 1.2
 
     def __str__(self):
         return self.str_format.format(self.format_tuple())
@@ -511,8 +510,6 @@ class LocalCache(CacheStatsMixin):
         os.makedirs(os.path.join(self.path, 'chunks.archive.d'))
         with SaveFile(os.path.join(self.path, files_cache_name()), binary=True):
             pass  # empty file
-        with SaveFile(os.path.join(self.path, 'pre12-meta'), binary=False) as fd:
-            json.dump(self.pre12_meta, fd, indent=4)
 
     def _do_open(self):
         self.cache_config.load()
@@ -523,11 +520,6 @@ class LocalCache(CacheStatsMixin):
             self.files = None
         else:
             self._read_files()
-        try:
-            with open(os.path.join(self.path, 'pre12-meta')) as fd:
-                self.pre12_meta = json.load(fd)
-        except (FileNotFoundError, json.JSONDecodeError):
-            pass
 
     def open(self):
         if not os.path.isdir(self.path):
@@ -536,9 +528,6 @@ class LocalCache(CacheStatsMixin):
         self.rollback()
 
     def close(self):
-        # save the pre12_meta cache in any case
-        with open(os.path.join(self.path, 'pre12-meta'), 'w') as fd:
-            json.dump(self.pre12_meta, fd, indent=4)
         if self.cache_config is not None:
             self.cache_config.close()
             self.cache_config = None
@@ -1037,7 +1026,6 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
         self.security_manager = SecurityManager(repository)
         self.security_manager.assert_secure(manifest, key, lock_wait=lock_wait)
 
-        self.pre12_meta = {}
         logger.warning('Note: --no-cache-sync is an experimental feature.')
 
     # Public API