3 years ago · b9f9623a6d
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -58,60 +58,46 @@ class Statistics:
 
															     def __init__(self, output_json=False, iec=False):
														
 
															         self.output_json = output_json
														
 
															         self.iec = iec
														
 
															-        self.osize = self.csize = self.usize = self.nfiles = 0
														
 
															-        self.osize_parts = self.csize_parts = self.usize_parts = self.nfiles_parts = 0
														
 
															+        self.osize = self.nfiles = 0
														
 
															+        self.osize_parts = self.nfiles_parts = 0
														
 
															         self.last_progress = 0  # timestamp when last progress was shown
														
 
															-    def update(self, size, csize, unique, part=False):
														
 
															+    def update(self, size, part=False):
														
 
															         if not part:
														
 
															             self.osize += size
														
 
															-            self.csize += csize
														
 
															-            if unique:
														
 
															-                self.usize += csize
														
 
															         else:
														
 
															             self.osize_parts += size
														
 
															-            self.csize_parts += csize
														
 
															-            if unique:
														
 
															-                self.usize_parts += csize
														
 
															     def __add__(self, other):
														
 
															         if not isinstance(other, Statistics):
														
 
															             raise TypeError('can only add Statistics objects')
														
 
															         stats = Statistics(self.output_json, self.iec)
														
 
															         stats.osize = self.osize + other.osize
														
 
															-        stats.csize = self.csize + other.csize
														
 
															-        stats.usize = self.usize + other.usize
														
 
															         stats.nfiles = self.nfiles + other.nfiles
														
 
															         stats.osize_parts = self.osize_parts + other.osize_parts
														
 
															-        stats.csize_parts = self.csize_parts + other.csize_parts
														
 
															-        stats.usize_parts = self.usize_parts + other.usize_parts
														
 
															         stats.nfiles_parts = self.nfiles_parts + other.nfiles_parts
														
 
															         return stats
														
 
															-    summary = "{label:15} {stats.osize_fmt:>20s} {stats.csize_fmt:>20s} {stats.usize_fmt:>20s}"
														
 
															+    summary = "{label:15} {stats.osize_fmt:>20s}"
														
 
															     def __str__(self):
														
 
															         return self.summary.format(stats=self, label='This archive:')
														
 
															     def __repr__(self):
														
 
															-        return "<{cls} object at {hash:#x} ({self.osize}, {self.csize}, {self.usize})>".format(
														
 
															+        return "<{cls} object at {hash:#x} ({self.osize})>".format(
														
 
															             cls=type(self).__name__, hash=id(self), self=self)
														
 
															     def as_dict(self):
														
 
															         return {
														
 
															             'original_size': FileSize(self.osize, iec=self.iec),
														
 
															-            'compressed_size': FileSize(self.csize, iec=self.iec),
														
 
															-            'deduplicated_size': FileSize(self.usize, iec=self.iec),
														
 
															             'nfiles': self.nfiles,
														
 
															         }
														
 
															     def as_raw_dict(self):
														
 
															         return {
														
 
															             'size': self.osize,
														
 
															-            'csize': self.csize,
														
 
															             'nfiles': self.nfiles,
														
 
															             'size_parts': self.osize_parts,
														
 
															-            'csize_parts': self.csize_parts,
														
 
															             'nfiles_parts': self.nfiles_parts,
														
 
															         }
														
@@ -119,10 +105,8 @@ class Statistics:
 
															     def from_raw_dict(cls, **kw):
														
 
															         self = cls()
														
 
															         self.osize = kw['size']
														
 
															-        self.csize = kw['csize']
														
 
															         self.nfiles = kw['nfiles']
														
 
															         self.osize_parts = kw['size_parts']
														
 
															-        self.csize_parts = kw['csize_parts']
														
 
															         self.nfiles_parts = kw['nfiles_parts']
														
 
															         return self
														
@@ -130,14 +114,6 @@ class Statistics:
 
															     def osize_fmt(self):
														
 
															         return format_file_size(self.osize, iec=self.iec)
														
 
															-    @property
														
 
															-    def usize_fmt(self):
														
 
															-        return format_file_size(self.usize, iec=self.iec)
														
 
															-
														
 
															-    @property
														
 
															-    def csize_fmt(self):
														
 
															-        return format_file_size(self.csize, iec=self.iec)
														
 
															-
														
 
															     def show_progress(self, item=None, final=False, stream=None, dt=None):
														
 
															         now = time.monotonic()
														
 
															         if dt is None or now - self.last_progress > dt:
														
@@ -158,7 +134,7 @@ class Statistics:
 
															             else:
														
 
															                 columns, lines = get_terminal_size()
														
 
															                 if not final:
														
 
															-                    msg = '{0.osize_fmt} O {0.csize_fmt} C {0.usize_fmt} D {0.nfiles} N '.format(self)
														
 
															+                    msg = '{0.osize_fmt} O {0.nfiles} N '.format(self)
														
 
															                     path = remove_surrogates(item.path) if item else ''
														
 
															                     space = columns - swidth(msg)
														
 
															                     if space < 12:
														
@@ -614,10 +590,8 @@ Utilization of max. archive size: {csize_max:.0%}
 
															         if stats is not None:
														
 
															             metadata.update({
														
 
															                 'size': stats.osize,
														
 
															-                'csize': stats.csize,
														
 
															                 'nfiles': stats.nfiles,
														
 
															                 'size_parts': stats.osize_parts,
														
 
															-                'csize_parts': stats.csize_parts,
														
 
															                 'nfiles_parts': stats.nfiles_parts})
														
 
															         metadata.update(additional_metadata or {})
														
 
															         metadata = ArchiveItem(metadata)
														
@@ -651,51 +625,12 @@ Utilization of max. archive size: {csize_max:.0%}
 
															         return stats
														
 
															     def _calc_stats(self, cache, want_unique=True):
														
 
															-        have_borg12_meta = self.metadata.get('nfiles') is not None
														
 
															-
														
 
															-        if have_borg12_meta and not want_unique:
														
 
															-            unique_csize = 0
														
 
															-        else:
														
 
															-            def add(id):
														
 
															-                entry = cache.chunks[id]
														
 
															-                archive_index.add(id, 1, entry.size, entry.csize)
														
 
															-
														
 
															-            archive_index = ChunkIndex()
														
 
															-            sync = CacheSynchronizer(archive_index)
														
 
															-            add(self.id)
														
 
															-            # we must escape any % char in the archive name, because we use it in a format string, see #6500
														
 
															-            arch_name_escd = self.name.replace('%', '%%')
														
 
															-            pi = ProgressIndicatorPercent(total=len(self.metadata.items),
														
 
															-                                          msg='Calculating statistics for archive %s ... %%3.0f%%%%' % arch_name_escd,
														
 
															-                                          msgid='archive.calc_stats')
														
 
															-            for id, chunk in zip(self.metadata.items, self.repository.get_many(self.metadata.items)):
														
 
															-                pi.show(increase=1)
														
 
															-                add(id)
														
 
															-                data = self.key.decrypt(id, chunk)
														
 
															-                sync.feed(data)
														
 
															-            unique_csize = archive_index.stats_against(cache.chunks)[3]
														
 
															-            pi.finish()
														
 
															-
														
 
															         stats = Statistics(iec=self.iec)
														
 
															-        stats.usize = unique_csize  # the part files use same chunks as the full file
														
 
															-        if not have_borg12_meta:
														
 
															-            if self.consider_part_files:
														
 
															-                stats.nfiles = sync.num_files_totals
														
 
															-                stats.osize = sync.size_totals
														
 
															-                stats.csize = sync.csize_totals
														
 
															-            else:
														
 
															-                stats.nfiles = sync.num_files_totals - sync.num_files_parts
														
 
															-                stats.osize = sync.size_totals - sync.size_parts
														
 
															-                stats.csize = sync.csize_totals - sync.csize_parts
														
 
															-        else:
														
 
															-            if self.consider_part_files:
														
 
															-                stats.nfiles = self.metadata.nfiles_parts + self.metadata.nfiles
														
 
															-                stats.osize = self.metadata.size_parts + self.metadata.size
														
 
															-                stats.csize = self.metadata.csize_parts + self.metadata.csize
														
 
															-            else:
														
 
															-                stats.nfiles = self.metadata.nfiles
														
 
															-                stats.osize = self.metadata.size
														
 
															-                stats.csize = self.metadata.csize
														
 
															+        stats.nfiles = self.metadata.nfiles
														
 
															+        stats.osize = self.metadata.size
														
 
															+        if self.consider_part_files:
														
 
															+            stats.nfiles += self.metadata.nfiles_parts
														
 
															+            stats.osize += self.metadata.size_parts
														
 
															         return stats
														
 
															     @contextmanager
														
@@ -986,7 +921,7 @@ Utilization of max. archive size: {csize_max:.0%}
 
															                         item = Item(internal_dict=item)
														
 
															                         if 'chunks' in item:
														
 
															                             part = not self.consider_part_files and 'part' in item
														
 
															-                            for chunk_id, size, csize in item.chunks:
														
 
															+                            for chunk_id, size, _ in item.chunks:
														
 
															                                 chunk_decref(chunk_id, stats, part=part)
														
 
															                 except (TypeError, ValueError):
														
 
															                     # if items metadata spans multiple chunks and one chunk got dropped somehow,
														
@@ -1789,15 +1724,15 @@ class ArchiveChecker:
 
															         def add_callback(chunk):
														
 
															             id_ = self.key.id_hash(chunk)
														
 
															             cdata = self.key.encrypt(id_, chunk)
														
 
															-            add_reference(id_, len(chunk), len(cdata), cdata)
														
 
															+            add_reference(id_, len(chunk), cdata)
														
 
															             return id_
														
 
															-        def add_reference(id_, size, csize, cdata=None):
														
 
															+        def add_reference(id_, size, cdata=None):
														
 
															             try:
														
 
															                 self.chunks.incref(id_)
														
 
															             except KeyError:
														
 
															                 assert cdata is not None
														
 
															-                self.chunks[id_] = ChunkIndexEntry(refcount=1, size=size, csize=csize)
														
 
															+                self.chunks[id_] = ChunkIndexEntry(refcount=1, size=size, csize=0)  # was: csize=csize
														
 
															                 if self.repair:
														
 
															                     self.repository.put(id_, cdata)
														
@@ -1811,8 +1746,7 @@ class ArchiveChecker:
 
															                 chunk = Chunk(None, allocation=CH_ALLOC, size=size)
														
 
															                 chunk_id, data = cached_hash(chunk, self.key.id_hash)
														
 
															                 cdata = self.key.encrypt(chunk_id, data)
														
 
															-                csize = len(cdata)
														
 
															-                return chunk_id, size, csize, cdata
														
 
															+                return chunk_id, size, cdata
														
 
															             offset = 0
														
 
															             chunk_list = []
														
@@ -1835,30 +1769,30 @@ class ArchiveChecker:
 
															                                      'Replacing with all-zero chunk.'.format(
														
 
															                                      archive_name, item.path, offset, offset + size, bin_to_hex(chunk_id)))
														
 
															                         self.error_found = chunks_replaced = True
														
 
															-                        chunk_id, size, csize, cdata = replacement_chunk(size)
														
 
															-                        add_reference(chunk_id, size, csize, cdata)
														
 
															+                        chunk_id, size, cdata = replacement_chunk(size)
														
 
															+                        add_reference(chunk_id, size, cdata)
														
 
															                     else:
														
 
															                         logger.info('{}: {}: Previously missing file chunk is still missing (Byte {}-{}, Chunk {}). '
														
 
															                                     'It has an all-zero replacement chunk already.'.format(
														
 
															                                     archive_name, item.path, offset, offset + size, bin_to_hex(chunk_id)))
														
 
															                         chunk_id, size, csize = chunk_current
														
 
															                         if chunk_id in self.chunks:
														
 
															-                            add_reference(chunk_id, size, csize)
														
 
															+                            add_reference(chunk_id, size)
														
 
															                         else:
														
 
															                             logger.warning('{}: {}: Missing all-zero replacement chunk detected (Byte {}-{}, Chunk {}). '
														
 
															                                            'Generating new replacement chunk.'.format(
														
 
															                                            archive_name, item.path, offset, offset + size, bin_to_hex(chunk_id)))
														
 
															                             self.error_found = chunks_replaced = True
														
 
															-                            chunk_id, size, csize, cdata = replacement_chunk(size)
														
 
															-                            add_reference(chunk_id, size, csize, cdata)
														
 
															+                            chunk_id, size, cdata = replacement_chunk(size)
														
 
															+                            add_reference(chunk_id, size, cdata)
														
 
															                 else:
														
 
															                     if chunk_current == chunk_healthy:
														
 
															                         # normal case, all fine.
														
 
															-                        add_reference(chunk_id, size, csize)
														
 
															+                        add_reference(chunk_id, size)
														
 
															                     else:
														
 
															                         logger.info('{}: {}: Healed previously missing file chunk! (Byte {}-{}, Chunk {}).'.format(
														
 
															                             archive_name, item.path, offset, offset + size, bin_to_hex(chunk_id)))
														
 
															-                        add_reference(chunk_id, size, csize)
														
 
															+                        add_reference(chunk_id, size)
														
 
															                         mark_as_possibly_superseded(chunk_current[0])  # maybe orphaned the all-zero replacement chunk
														
 
															                 chunk_list.append([chunk_id, size, csize])  # list-typed element as chunks_healthy is list-of-lists
														
 
															                 offset += size
														
@@ -2005,7 +1939,7 @@ class ArchiveChecker:
 
															                 data = msgpack.packb(archive.as_dict())
														
 
															                 new_archive_id = self.key.id_hash(data)
														
 
															                 cdata = self.key.encrypt(new_archive_id, data)
														
 
															-                add_reference(new_archive_id, len(data), len(cdata), cdata)
														
 
															+                add_reference(new_archive_id, len(data), cdata)
														
 
															                 self.manifest.archives[info.name] = (new_archive_id, info.ts)
														
 
															             pi.finish()
														
--- a/src/borg/archiver.py
+++ b/src/borg/archiver.py
@@ -99,7 +99,7 @@ except BaseException:
 
															 assert EXIT_ERROR == 2, "EXIT_ERROR is not 2, as expected - fix assert AND exception handler right above this line."
														
 
															-STATS_HEADER = "                       Original size      Compressed size    Deduplicated size"
														
 
															+STATS_HEADER = "                       Original size"
														
 
															 PURE_PYTHON_MSGPACK_WARNING = "Using a pure-python msgpack! This will result in lower performance."
														
@@ -1797,8 +1797,8 @@ class Archiver:
 
															                 Command line: {command_line}
														
 
															                 Utilization of maximum supported archive size: {limits[max_archive_size]:.0%}
														
 
															                 ------------------------------------------------------------------------------
														
 
															-                                       Original size      Compressed size    Deduplicated size
														
 
															-                This archive:   {stats[original_size]:>20s} {stats[compressed_size]:>20s} {stats[deduplicated_size]:>20s}
														
 
															+                                       Original size
														
 
															+                This archive:   {stats[original_size]:>20s}
														
 
															                 {cache}
														
 
															                 """).strip().format(cache=cache, **info))
														
 
															             if self.exit_code:
														
--- a/src/borg/cache.py
+++ b/src/borg/cache.py
@@ -406,7 +406,7 @@ class Cache:
 
															 class CacheStatsMixin:
														
 
															     str_format = """\
														
 
															-All archives:   {0.total_size:>20s} {0.total_csize:>20s} {0.unique_csize:>20s}
														
 
															+All archives:   {0.total_size:>20s}
														
 
															                        Unique chunks         Total chunks
														
 
															 Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
														
@@ -418,39 +418,32 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
 
															     def __str__(self):
														
 
															         return self.str_format.format(self.format_tuple())
														
 
															-    Summary = namedtuple('Summary', ['total_size', 'total_csize', 'unique_size', 'unique_csize', 'total_unique_chunks',
														
 
															-                                     'total_chunks'])
														
 
															+    Summary = namedtuple('Summary', ['total_size', 'unique_size', 'total_unique_chunks', 'total_chunks'])
														
 
															     def stats(self):
														
 
															         from .archive import Archive
														
 
															         # XXX: this should really be moved down to `hashindex.pyx`
														
 
															-        total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks = self.chunks.summarize()
														
 
															+        total_size, _, unique_size, _, total_unique_chunks, total_chunks = self.chunks.summarize()
														
 
															         # the above values have the problem that they do not consider part files,
														
 
															-        # thus the total_size and total_csize might be too high (chunks referenced
														
 
															+        # thus the total_size might be too high (chunks referenced
														
 
															         # by the part files AND by the complete file).
														
 
															-        # since borg 1.2 we have new archive metadata telling the total size and
														
 
															-        # csize per archive, so we can just sum up all archives to get the "all
														
 
															-        # archives" stats:
														
 
															-        total_size, total_csize = 0, 0
														
 
															+        # since borg 1.2 we have new archive metadata telling the total size per archive,
														
 
															+        # so we can just sum up all archives to get the "all archives" stats:
														
 
															+        total_size = 0
														
 
															         for archive_name in self.manifest.archives:
														
 
															             archive = Archive(self.repository, self.key, self.manifest, archive_name,
														
 
															                               consider_part_files=self.consider_part_files)
														
 
															             stats = archive.calc_stats(self, want_unique=False)
														
 
															             total_size += stats.osize
														
 
															-            total_csize += stats.csize
														
 
															-        stats = self.Summary(total_size, total_csize, unique_size, unique_csize,
														
 
															-                             total_unique_chunks, total_chunks)._asdict()
														
 
															+        stats = self.Summary(total_size, unique_size, total_unique_chunks, total_chunks)._asdict()
														
 
															         return stats
														
 
															     def format_tuple(self):
														
 
															         stats = self.stats()
														
 
															-        for field in ['total_size', 'total_csize', 'unique_csize']:
														
 
															+        for field in ['total_size', ]:
														
 
															             stats[field] = format_file_size(stats[field], iec=self.iec)
														
 
															         return self.Summary(**stats)
														
 
															-    def chunks_stored_size(self):
														
 
															-        return self.stats()['unique_csize']
														
 
															-
														
 
															 class LocalCache(CacheStatsMixin):
														
 
															     """
														
@@ -679,8 +672,6 @@ class LocalCache(CacheStatsMixin):
 
															         processed_item_metadata_bytes = 0
														
 
															         processed_item_metadata_chunks = 0
														
 
															         compact_chunks_archive_saved_space = 0
														
 
															-        fetched_chunks_for_csize = 0
														
 
															-        fetched_bytes_for_csize = 0
														
 
															         def mkpath(id, suffix=''):
														
 
															             id_hex = bin_to_hex(id)
														
@@ -718,39 +709,6 @@ class LocalCache(CacheStatsMixin):
 
															             except FileNotFoundError:
														
 
															                 pass
														
 
															-        def fetch_missing_csize(chunk_idx):
														
 
															-            """
														
 
															-            Archives created with AdHocCache will have csize=0 in all chunk list entries whose
														
 
															-            chunks were already in the repository.
														
 
															-
														
 
															-            Scan *chunk_idx* for entries where csize=0 and fill in the correct information.
														
 
															-            """
														
 
															-            nonlocal fetched_chunks_for_csize
														
 
															-            nonlocal fetched_bytes_for_csize
														
 
															-
														
 
															-            all_missing_ids = chunk_idx.zero_csize_ids()
														
 
															-            fetch_ids = []
														
 
															-            if len(chunks_fetched_size_index):
														
 
															-                for id_ in all_missing_ids:
														
 
															-                    already_fetched_entry = chunks_fetched_size_index.get(id_)
														
 
															-                    if already_fetched_entry:
														
 
															-                        entry = chunk_idx[id_]._replace(csize=already_fetched_entry.csize)
														
 
															-                        assert entry.size == already_fetched_entry.size, 'Chunk size mismatch'
														
 
															-                        chunk_idx[id_] = entry
														
 
															-                    else:
														
 
															-                        fetch_ids.append(id_)
														
 
															-            else:
														
 
															-                fetch_ids = all_missing_ids
														
 
															-
														
 
															-            # This is potentially a rather expensive operation, but it's hard to tell at this point
														
 
															-            # if it's a problem in practice (hence the experimental status of --no-cache-sync).
														
 
															-            for id_, data in zip(fetch_ids, decrypted_repository.repository.get_many(fetch_ids)):
														
 
															-                entry = chunk_idx[id_]._replace(csize=len(data))
														
 
															-                chunk_idx[id_] = entry
														
 
															-                chunks_fetched_size_index[id_] = entry
														
 
															-                fetched_chunks_for_csize += 1
														
 
															-                fetched_bytes_for_csize += len(data)
														
 
															-
														
 
															         def fetch_and_build_idx(archive_id, decrypted_repository, chunk_idx):
														
 
															             nonlocal processed_item_metadata_bytes
														
 
															             nonlocal processed_item_metadata_chunks
														
@@ -766,7 +724,6 @@ class LocalCache(CacheStatsMixin):
 
															                 processed_item_metadata_chunks += 1
														
 
															                 sync.feed(data)
														
 
															             if self.do_cache:
														
 
															-                fetch_missing_csize(chunk_idx)
														
 
															                 write_archive_index(archive_id, chunk_idx)
														
 
															         def write_archive_index(archive_id, chunk_idx):
														
@@ -862,12 +819,7 @@ class LocalCache(CacheStatsMixin):
 
															                         chunk_idx = chunk_idx or ChunkIndex(usable=master_index_capacity)
														
 
															                         logger.info('Fetching archive index for %s ...', archive_name)
														
 
															                         fetch_and_build_idx(archive_id, decrypted_repository, chunk_idx)
														
 
															-                if not self.do_cache:
														
 
															-                    fetch_missing_csize(chunk_idx)
														
 
															                 pi.finish()
														
 
															-                logger.debug('Cache sync: had to fetch %s (%d chunks) because no archive had a csize set for them '
														
 
															-                             '(due to --no-cache-sync)',
														
 
															-                             format_file_size(fetched_bytes_for_csize), fetched_chunks_for_csize)
														
 
															                 logger.debug('Cache sync: processed %s (%d chunks) of metadata',
														
 
															                              format_file_size(processed_item_metadata_bytes), processed_item_metadata_chunks)
														
 
															                 logger.debug('Cache sync: compact chunks.archive.d storage saved %s bytes',
														
@@ -951,10 +903,10 @@ class LocalCache(CacheStatsMixin):
 
															         if size is None:
														
 
															             raise ValueError("when giving compressed data for a new chunk, the uncompressed size must be given also")
														
 
															         data = self.key.encrypt(id, chunk, compress=compress)
														
 
															-        csize = len(data)
														
 
															+        csize = 0  # len(data)
														
 
															         self.repository.put(id, data, wait=wait)
														
 
															         self.chunks.add(id, 1, size, csize)
														
 
															-        stats.update(size, csize, not refcount)
														
 
															+        stats.update(size)
														
 
															         return ChunkListEntry(id, size, csize)
														
 
															     def seen_chunk(self, id, size=None):
														
@@ -970,7 +922,7 @@ class LocalCache(CacheStatsMixin):
 
															         if not self.txn_active:
														
 
															             self.begin_txn()
														
 
															         count, _size, csize = self.chunks.incref(id)
														
 
															-        stats.update(_size, csize, False, part=part)
														
 
															+        stats.update(_size, part=part)
														
 
															         return ChunkListEntry(id, _size, csize)
														
 
															     def chunk_decref(self, id, stats, wait=True, part=False):
														
@@ -980,9 +932,9 @@ class LocalCache(CacheStatsMixin):
 
															         if count == 0:
														
 
															             del self.chunks[id]
														
 
															             self.repository.delete(id, wait=wait)
														
 
															-            stats.update(-size, -csize, True, part=part)
														
 
															+            stats.update(-size, part=part)
														
 
															         else:
														
 
															-            stats.update(-size, -csize, False, part=part)
														
 
															+            stats.update(-size, part=part)
														
 
															     def file_known_and_unchanged(self, hashed_path, path_hash, st):
														
 
															         """
														
@@ -1122,7 +1074,7 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
 
															         csize = len(data)
														
 
															         self.repository.put(id, data, wait=wait)
														
 
															         self.chunks.add(id, 1, size, csize)
														
 
															-        stats.update(size, csize, not refcount)
														
 
															+        stats.update(size)
														
 
															         return ChunkListEntry(id, size, csize)
														
 
															     def seen_chunk(self, id, size=None):
														
@@ -1144,7 +1096,7 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
 
															         # size or add_chunk); we can't add references to those (size=0 is invalid) and generally don't try to.
														
 
															         size = _size or size
														
 
															         assert size
														
 
															-        stats.update(size, csize, False, part=part)
														
 
															+        stats.update(size, part=part)
														
 
															         return ChunkListEntry(id, size, csize)
														
 
															     def chunk_decref(self, id, stats, wait=True, part=False):
														
@@ -1154,9 +1106,9 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
 
															         if count == 0:
														
 
															             del self.chunks[id]
														
 
															             self.repository.delete(id, wait=wait)
														
 
															-            stats.update(-size, -csize, True, part=part)
														
 
															+            stats.update(-size, part=part)
														
 
															         else:
														
 
															-            stats.update(-size, -csize, False, part=part)
														
 
															+            stats.update(-size, part=part)
														
 
															     def commit(self):
														
 
															         if not self._txn_active:
														
--- a/src/borg/constants.py
+++ b/src/borg/constants.py
@@ -12,7 +12,7 @@ ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'hostname', 'us
 
															                           'comment', 'chunker_params',
														
 
															                           'recreate_cmdline',
														
 
															                           'recreate_source_id', 'recreate_args', 'recreate_partial_chunks',  # used in 1.1.0b1 .. b2
														
 
															-                          'size', 'csize', 'nfiles', 'size_parts', 'csize_parts', 'nfiles_parts', ])
														
 
															+                          'size', 'nfiles', 'size_parts', 'nfiles_parts', ])
														
 
															 # this is the set of keys that are always present in archives:
														
 
															 REQUIRED_ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'time', ])
														
--- a/src/borg/hashindex.pyx
+++ b/src/borg/hashindex.pyx
@@ -375,9 +375,7 @@ cdef class ChunkIndex(IndexBase):
 
															             assert refcount <= _MAX_VALUE, "invalid reference count"
														
 
															             chunks += refcount
														
 
															             unique_size += _le32toh(values[1])
														
 
															-            unique_csize += _le32toh(values[2])
														
 
															             size += <uint64_t> _le32toh(values[1]) * _le32toh(values[0])
														
 
															-            csize += <uint64_t> _le32toh(values[2]) * _le32toh(values[0])
														
 
															         return size, csize, unique_size, unique_csize, unique_chunks, chunks
														
--- a/src/borg/helpers/parseformat.py
+++ b/src/borg/helpers/parseformat.py
@@ -698,9 +698,6 @@ class ItemFormatter(BaseFormatter):
 
															         'source': 'link target for symlinks (identical to linktarget)',
														
 
															         'hlid': 'hard link identity (same if hardlinking same fs object)',
														
 
															         'extra': 'prepends {source} with " -> " for soft links and " link to " for hard links',
														
 
															-        'csize': 'compressed size',
														
 
															-        'dsize': 'deduplicated size',
														
 
															-        'dcsize': 'deduplicated compressed size',
														
 
															         'num_chunks': 'number of chunks in this file',
														
 
															         'unique_chunks': 'number of unique chunks in this file',
														
 
															         'xxh64': 'XXH64 checksum of this file (note: this is NOT a cryptographic hash!)',
														
@@ -708,7 +705,7 @@ class ItemFormatter(BaseFormatter):
 
															     }
														
 
															     KEY_GROUPS = (
														
 
															         ('type', 'mode', 'uid', 'gid', 'user', 'group', 'path', 'bpath', 'source', 'linktarget', 'hlid', 'flags'),
														
 
															-        ('size', 'csize', 'dsize', 'dcsize', 'num_chunks', 'unique_chunks'),
														
 
															+        ('size', 'num_chunks', 'unique_chunks'),
														
 
															         ('mtime', 'ctime', 'atime', 'isomtime', 'isoctime', 'isoatime'),
														
 
															         tuple(sorted(hash_algorithms)),
														
 
															         ('archiveid', 'archivename', 'extra'),
														
@@ -716,7 +713,7 @@ class ItemFormatter(BaseFormatter):
 
															     )
														
 
															     KEYS_REQUIRING_CACHE = (
														
 
															-        'dsize', 'dcsize', 'unique_chunks',
														
 
															+        'unique_chunks',
														
 
															     )
														
 
															     @classmethod
														
@@ -774,9 +771,6 @@ class ItemFormatter(BaseFormatter):
 
															         self.format_keys = {f[1] for f in Formatter().parse(format)}
														
 
															         self.call_keys = {
														
 
															             'size': self.calculate_size,
														
 
															-            'csize': self.calculate_csize,
														
 
															-            'dsize': partial(self.sum_unique_chunks_metadata, lambda chunk: chunk.size),
														
 
															-            'dcsize': partial(self.sum_unique_chunks_metadata, lambda chunk: chunk.csize),
														
 
															             'num_chunks': self.calculate_num_chunks,
														
 
															             'unique_chunks': partial(self.sum_unique_chunks_metadata, lambda chunk: 1),
														
 
															             'isomtime': partial(self.format_iso_time, 'mtime'),
														
@@ -848,10 +842,6 @@ class ItemFormatter(BaseFormatter):
 
															         # note: does not support hardlink slaves, they will be size 0
														
 
															         return item.get_size(compressed=False)
														
 
															-    def calculate_csize(self, item):
														
 
															-        # note: does not support hardlink slaves, they will be csize 0
														
 
															-        return item.get_size(compressed=True)
														
 
															-
														
 
															     def hash_item(self, hash_function, item):
														
 
															         if 'chunks' not in item:
														
 
															             return ""
														
--- a/src/borg/item.pyx
+++ b/src/borg/item.pyx
@@ -293,8 +293,9 @@ class Item(PropDict):
 
															         :param from_chunks: If true, size is computed from chunks even if a precomputed value is available.
														
 
															         :param consider_ids: Returns the size of the given ids only.
														
 
															         """
														
 
															-        attr = 'csize' if compressed else 'size'
														
 
															-        assert not (compressed and memorize), 'Item does not have a csize field.'
														
 
															+        if compressed:
														
 
															+            return 0  # try to live without csize
														
 
															+        attr = 'size'
														
 
															         assert not (consider_ids is not None and memorize), "Can't store size when considering only certain ids"
														
 
															         try:
														
 
															             if from_chunks or consider_ids is not None:
														
--- a/src/borg/testsuite/archive.py
+++ b/src/borg/testsuite/archive.py
@@ -19,47 +19,44 @@ from ..platform import uid2user, gid2group
 
															 @pytest.fixture()
														
 
															 def stats():
														
 
															     stats = Statistics()
														
 
															-    stats.update(20, 10, unique=True)
														
 
															+    stats.update(20)
														
 
															     return stats
														
 
															 def test_stats_basic(stats):
														
 
															     assert stats.osize == 20
														
 
															-    assert stats.csize == stats.usize == 10
														
 
															-    stats.update(20, 10, unique=False)
														
 
															+    stats.update(20)
														
 
															     assert stats.osize == 40
														
 
															-    assert stats.csize == 20
														
 
															-    assert stats.usize == 10
														
 
															 def tests_stats_progress(stats, monkeypatch, columns=80):
														
 
															     monkeypatch.setenv('COLUMNS', str(columns))
														
 
															     out = StringIO()
														
 
															     stats.show_progress(stream=out)
														
 
															-    s = '20 B O 10 B C 10 B D 0 N '
														
 
															+    s = '20 B O 0 N '
														
 
															     buf = ' ' * (columns - len(s))
														
 
															     assert out.getvalue() == s + buf + "\r"
														
 
															     out = StringIO()
														
 
															-    stats.update(10**3, 0, unique=False)
														
 
															+    stats.update(10 ** 3)
														
 
															     stats.show_progress(item=Item(path='foo'), final=False, stream=out)
														
 
															-    s = '1.02 kB O 10 B C 10 B D 0 N foo'
														
 
															+    s = '1.02 kB O 0 N foo'
														
 
															     buf = ' ' * (columns - len(s))
														
 
															     assert out.getvalue() == s + buf + "\r"
														
 
															     out = StringIO()
														
 
															     stats.show_progress(item=Item(path='foo'*40), final=False, stream=out)
														
 
															-    s = '1.02 kB O 10 B C 10 B D 0 N foofoofoofoofoofoofoofo...oofoofoofoofoofoofoofoofoo'
														
 
															+    s = '1.02 kB O 0 N foofoofoofoofoofoofoofoofoofoo...foofoofoofoofoofoofoofoofoofoofoo'
														
 
															     buf = ' ' * (columns - len(s))
														
 
															     assert out.getvalue() == s + buf + "\r"
														
 
															 def test_stats_format(stats):
														
 
															     assert str(stats) == """\
														
 
															-This archive:                   20 B                 10 B                 10 B"""
														
 
															+This archive:                   20 B"""
														
 
															     s = f"{stats.osize_fmt}"
														
 
															     assert s == "20 B"
														
 
															     # kind of redundant, but id is variable so we can't match reliably
														
 
															-    assert repr(stats) == f'<Statistics object at {id(stats):#x} (20, 10, 10)>'
														
 
															+    assert repr(stats) == f'<Statistics object at {id(stats):#x} (20)>'
														
 
															 def test_stats_progress_json(stats):
														
@@ -73,8 +70,6 @@ def test_stats_progress_json(stats):
 
															     assert result['finished'] is False
														
 
															     assert result['path'] == 'foo'
														
 
															     assert result['original_size'] == 20
														
 
															-    assert result['compressed_size'] == 10
														
 
															-    assert result['deduplicated_size'] == 10
														
 
															     assert result['nfiles'] == 0  # this counter gets updated elsewhere
														
 
															     out = StringIO()
														
@@ -85,8 +80,6 @@ def test_stats_progress_json(stats):
 
															     assert result['finished'] is True  # see #6570
														
 
															     assert 'path' not in result
														
 
															     assert 'original_size' not in result
														
 
															-    assert 'compressed_size' not in result
														
 
															-    assert 'deduplicated_size' not in result
														
 
															     assert 'nfiles' not in result
														
--- a/src/borg/testsuite/archiver.py
+++ b/src/borg/testsuite/archiver.py
@@ -1543,7 +1543,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
															         cache = info_repo['cache']
														
 
															         stats = cache['stats']
														
 
															         assert all(isinstance(o, int) for o in stats.values())
														
 
															-        assert all(key in stats for key in ('total_chunks', 'total_csize', 'total_size', 'total_unique_chunks', 'unique_csize', 'unique_size'))
														
 
															+        assert all(key in stats for key in ('total_chunks', 'total_size', 'total_unique_chunks', 'unique_size'))
														
 
															         info_archive = json.loads(self.cmd('info', '--json', self.repository_location + '::test'))
														
 
															         assert info_repo['repository'] == info_archive['repository']
														
@@ -2363,12 +2363,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
															         self.cmd('init', '--encryption=repokey', self.repository_location)
														
 
															         test_archive = self.repository_location + '::test'
														
 
															         self.cmd('create', '-C', 'lz4', test_archive, 'input')
														
 
															-        output = self.cmd('list', '--format', '{size} {csize} {dsize} {dcsize} {path}{NL}', test_archive)
														
 
															-        size, csize, dsize, dcsize, path = output.split("\n")[1].split(" ")
														
 
															-        assert int(csize) < int(size)
														
 
															-        assert int(dcsize) < int(dsize)
														
 
															-        assert int(dsize) <= int(size)
														
 
															-        assert int(dcsize) <= int(csize)
														
 
															+        output = self.cmd('list', '--format', '{size} {path}{NL}', test_archive)
														
 
															+        size, path = output.split("\n")[1].split(" ")
														
 
															+        assert int(size) == 10000
														
 
															     def test_list_json(self):
														
 
															         self.create_regular_file('file1', size=1024 * 80)
														
@@ -2441,69 +2438,6 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
															         log = self.cmd('--debug', 'create', self.repository_location + '::test', 'input')
														
 
															         assert 'security: read previous location' in log
														
 
															-    def _get_sizes(self, compression, compressible, size=10000):
														
 
															-        if compressible:
														
 
															-            contents = b'X' * size
														
 
															-        else:
														
 
															-            contents = os.urandom(size)
														
 
															-        self.create_regular_file('file', contents=contents)
														
 
															-        self.cmd('init', '--encryption=none', self.repository_location)
														
 
															-        archive = self.repository_location + '::test'
														
 
															-        self.cmd('create', '-C', compression, archive, 'input')
														
 
															-        output = self.cmd('list', '--format', '{size} {csize} {path}{NL}', archive)
														
 
															-        size, csize, path = output.split("\n")[1].split(" ")
														
 
															-        return int(size), int(csize)
														
 
															-
														
 
															-    def test_compression_none_compressible(self):
														
 
															-        size, csize = self._get_sizes('none', compressible=True)
														
 
															-        assert csize == size + 3
														
 
															-
														
 
															-    def test_compression_none_uncompressible(self):
														
 
															-        size, csize = self._get_sizes('none', compressible=False)
														
 
															-        assert csize == size + 3
														
 
															-
														
 
															-    def test_compression_zlib_compressible(self):
														
 
															-        size, csize = self._get_sizes('zlib', compressible=True)
														
 
															-        assert csize < size * 0.1
														
 
															-        assert csize == 37
														
 
															-
														
 
															-    def test_compression_zlib_uncompressible(self):
														
 
															-        size, csize = self._get_sizes('zlib', compressible=False)
														
 
															-        assert csize >= size
														
 
															-
														
 
															-    def test_compression_auto_compressible(self):
														
 
															-        size, csize = self._get_sizes('auto,zlib', compressible=True)
														
 
															-        assert csize < size * 0.1
														
 
															-        assert csize == 37  # same as compression 'zlib'
														
 
															-
														
 
															-    def test_compression_auto_uncompressible(self):
														
 
															-        size, csize = self._get_sizes('auto,zlib', compressible=False)
														
 
															-        assert csize == size + 3  # same as compression 'none'
														
 
															-
														
 
															-    def test_compression_lz4_compressible(self):
														
 
															-        size, csize = self._get_sizes('lz4', compressible=True)
														
 
															-        assert csize < size * 0.1
														
 
															-
														
 
															-    def test_compression_lz4_uncompressible(self):
														
 
															-        size, csize = self._get_sizes('lz4', compressible=False)
														
 
															-        assert csize == size + 3  # same as compression 'none'
														
 
															-
														
 
															-    def test_compression_lzma_compressible(self):
														
 
															-        size, csize = self._get_sizes('lzma', compressible=True)
														
 
															-        assert csize < size * 0.1
														
 
															-
														
 
															-    def test_compression_lzma_uncompressible(self):
														
 
															-        size, csize = self._get_sizes('lzma', compressible=False)
														
 
															-        assert csize == size + 3  # same as compression 'none'
														
 
															-
														
 
															-    def test_compression_zstd_compressible(self):
														
 
															-        size, csize = self._get_sizes('zstd', compressible=True)
														
 
															-        assert csize < size * 0.1
														
 
															-
														
 
															-    def test_compression_zstd_uncompressible(self):
														
 
															-        size, csize = self._get_sizes('zstd', compressible=False)
														
 
															-        assert csize == size + 3  # same as compression 'none'
														
 
															-
														
 
															     def test_change_passphrase(self):
														
 
															         self.cmd('init', '--encryption=repokey', self.repository_location)
														
 
															         os.environ['BORG_NEW_PASSPHRASE'] = 'newpassphrase'
														
@@ -2951,13 +2885,12 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
															                 correct_chunks = cache.chunks
														
 
															         assert original_chunks is not correct_chunks
														
 
															         seen = set()
														
 
															-        for id, (refcount, size, csize) in correct_chunks.iteritems():
														
 
															-            o_refcount, o_size, o_csize = original_chunks[id]
														
 
															+        for id, (refcount, size, _) in correct_chunks.iteritems():
														
 
															+            o_refcount, o_size, _ = original_chunks[id]
														
 
															             assert refcount == o_refcount
														
 
															             assert size == o_size
														
 
															-            assert csize == o_csize
														
 
															             seen.add(id)
														
 
															-        for id, (refcount, size, csize) in original_chunks.iteritems():
														
 
															+        for id, (refcount, size, _) in original_chunks.iteritems():
														
 
															             assert id in seen
														
 
															     def test_check_cache(self):
														
@@ -3051,15 +2984,13 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
															         self.cmd('init', '--encryption=repokey', self.repository_location)
														
 
															         self.cmd('create', self.repository_location + '::test', 'input', '-C', 'none')
														
 
															         file_list = self.cmd('list', self.repository_location + '::test', 'input/compressible',
														
 
															-                             '--format', '{size} {csize} {sha256}')
														
 
															-        size, csize, sha256_before = file_list.split(' ')
														
 
															-        assert int(csize) >= int(size)  # >= due to metadata overhead
														
 
															+                             '--format', '{size} {sha256}')
														
 
															+        size, sha256_before = file_list.split(' ')
														
 
															         self.cmd('recreate', self.repository_location, '-C', 'lz4', '--recompress')
														
 
															         self.check_cache()
														
 
															         file_list = self.cmd('list', self.repository_location + '::test', 'input/compressible',
														
 
															-                             '--format', '{size} {csize} {sha256}')
														
 
															-        size, csize, sha256_after = file_list.split(' ')
														
 
															-        assert int(csize) < int(size)
														
 
															+                             '--format', '{size} {sha256}')
														
 
															+        size, sha256_after = file_list.split(' ')
														
 
															         assert sha256_before == sha256_after
														
 
															     def test_recreate_timestamp(self):
														
--- a/src/borg/testsuite/hashindex.py
+++ b/src/borg/testsuite/hashindex.py
@@ -147,11 +147,9 @@ class HashIndexTestCase(BaseTestCase):
 
															         idx[H(2)] = 2, 2000, 200
														
 
															         idx[H(3)] = 3, 3000, 300
														
 
															-        size, csize, unique_size, unique_csize, unique_chunks, chunks = idx.summarize()
														
 
															+        size, _, unique_size, _, unique_chunks, chunks = idx.summarize()
														
 
															         assert size == 1000 + 2 * 2000 + 3 * 3000
														
 
															-        assert csize == 100 + 2 * 200 + 3 * 300
														
 
															         assert unique_size == 1000 + 2000 + 3000
														
 
															-        assert unique_csize == 100 + 200 + 300
														
 
															         assert chunks == 1 + 2 + 3
														
 
															         assert unique_chunks == 3
														
--- a/src/borg/testsuite/item.py
+++ b/src/borg/testsuite/item.py
@@ -143,13 +143,11 @@ def test_unknown_property():
 
															 def test_item_file_size():
														
 
															     item = Item(mode=0o100666, chunks=[
														
 
															-        ChunkListEntry(csize=1, size=1000, id=None),
														
 
															-        ChunkListEntry(csize=1, size=2000, id=None),
														
 
															+        ChunkListEntry(csize=0, size=1000, id=None),
														
 
															+        ChunkListEntry(csize=0, size=2000, id=None),
														
 
															     ])
														
 
															     assert item.get_size() == 3000
														
 
															-    with pytest.raises(AssertionError):
														
 
															-        item.get_size(compressed=True, memorize=True)
														
 
															-    assert item.get_size(compressed=True) == 2
														
 
															+    assert item.get_size(compressed=True) == 0  # no csize any more
														
 
															     item.get_size(memorize=True)
														
 
															     assert item.size == 3000