|
@@ -58,60 +58,46 @@ class Statistics:
|
|
def __init__(self, output_json=False, iec=False):
|
|
def __init__(self, output_json=False, iec=False):
|
|
self.output_json = output_json
|
|
self.output_json = output_json
|
|
self.iec = iec
|
|
self.iec = iec
|
|
- self.osize = self.csize = self.usize = self.nfiles = 0
|
|
|
|
- self.osize_parts = self.csize_parts = self.usize_parts = self.nfiles_parts = 0
|
|
|
|
|
|
+ self.osize = self.nfiles = 0
|
|
|
|
+ self.osize_parts = self.nfiles_parts = 0
|
|
self.last_progress = 0 # timestamp when last progress was shown
|
|
self.last_progress = 0 # timestamp when last progress was shown
|
|
|
|
|
|
- def update(self, size, csize, unique, part=False):
|
|
|
|
|
|
+ def update(self, size, part=False):
|
|
if not part:
|
|
if not part:
|
|
self.osize += size
|
|
self.osize += size
|
|
- self.csize += csize
|
|
|
|
- if unique:
|
|
|
|
- self.usize += csize
|
|
|
|
else:
|
|
else:
|
|
self.osize_parts += size
|
|
self.osize_parts += size
|
|
- self.csize_parts += csize
|
|
|
|
- if unique:
|
|
|
|
- self.usize_parts += csize
|
|
|
|
|
|
|
|
def __add__(self, other):
|
|
def __add__(self, other):
|
|
if not isinstance(other, Statistics):
|
|
if not isinstance(other, Statistics):
|
|
raise TypeError('can only add Statistics objects')
|
|
raise TypeError('can only add Statistics objects')
|
|
stats = Statistics(self.output_json, self.iec)
|
|
stats = Statistics(self.output_json, self.iec)
|
|
stats.osize = self.osize + other.osize
|
|
stats.osize = self.osize + other.osize
|
|
- stats.csize = self.csize + other.csize
|
|
|
|
- stats.usize = self.usize + other.usize
|
|
|
|
stats.nfiles = self.nfiles + other.nfiles
|
|
stats.nfiles = self.nfiles + other.nfiles
|
|
stats.osize_parts = self.osize_parts + other.osize_parts
|
|
stats.osize_parts = self.osize_parts + other.osize_parts
|
|
- stats.csize_parts = self.csize_parts + other.csize_parts
|
|
|
|
- stats.usize_parts = self.usize_parts + other.usize_parts
|
|
|
|
stats.nfiles_parts = self.nfiles_parts + other.nfiles_parts
|
|
stats.nfiles_parts = self.nfiles_parts + other.nfiles_parts
|
|
return stats
|
|
return stats
|
|
|
|
|
|
- summary = "{label:15} {stats.osize_fmt:>20s} {stats.csize_fmt:>20s} {stats.usize_fmt:>20s}"
|
|
|
|
|
|
+ summary = "{label:15} {stats.osize_fmt:>20s}"
|
|
|
|
|
|
def __str__(self):
|
|
def __str__(self):
|
|
return self.summary.format(stats=self, label='This archive:')
|
|
return self.summary.format(stats=self, label='This archive:')
|
|
|
|
|
|
def __repr__(self):
|
|
def __repr__(self):
|
|
- return "<{cls} object at {hash:#x} ({self.osize}, {self.csize}, {self.usize})>".format(
|
|
|
|
|
|
+ return "<{cls} object at {hash:#x} ({self.osize})>".format(
|
|
cls=type(self).__name__, hash=id(self), self=self)
|
|
cls=type(self).__name__, hash=id(self), self=self)
|
|
|
|
|
|
def as_dict(self):
|
|
def as_dict(self):
|
|
return {
|
|
return {
|
|
'original_size': FileSize(self.osize, iec=self.iec),
|
|
'original_size': FileSize(self.osize, iec=self.iec),
|
|
- 'compressed_size': FileSize(self.csize, iec=self.iec),
|
|
|
|
- 'deduplicated_size': FileSize(self.usize, iec=self.iec),
|
|
|
|
'nfiles': self.nfiles,
|
|
'nfiles': self.nfiles,
|
|
}
|
|
}
|
|
|
|
|
|
def as_raw_dict(self):
|
|
def as_raw_dict(self):
|
|
return {
|
|
return {
|
|
'size': self.osize,
|
|
'size': self.osize,
|
|
- 'csize': self.csize,
|
|
|
|
'nfiles': self.nfiles,
|
|
'nfiles': self.nfiles,
|
|
'size_parts': self.osize_parts,
|
|
'size_parts': self.osize_parts,
|
|
- 'csize_parts': self.csize_parts,
|
|
|
|
'nfiles_parts': self.nfiles_parts,
|
|
'nfiles_parts': self.nfiles_parts,
|
|
}
|
|
}
|
|
|
|
|
|
@@ -119,10 +105,8 @@ class Statistics:
|
|
def from_raw_dict(cls, **kw):
|
|
def from_raw_dict(cls, **kw):
|
|
self = cls()
|
|
self = cls()
|
|
self.osize = kw['size']
|
|
self.osize = kw['size']
|
|
- self.csize = kw['csize']
|
|
|
|
self.nfiles = kw['nfiles']
|
|
self.nfiles = kw['nfiles']
|
|
self.osize_parts = kw['size_parts']
|
|
self.osize_parts = kw['size_parts']
|
|
- self.csize_parts = kw['csize_parts']
|
|
|
|
self.nfiles_parts = kw['nfiles_parts']
|
|
self.nfiles_parts = kw['nfiles_parts']
|
|
return self
|
|
return self
|
|
|
|
|
|
@@ -130,14 +114,6 @@ class Statistics:
|
|
def osize_fmt(self):
|
|
def osize_fmt(self):
|
|
return format_file_size(self.osize, iec=self.iec)
|
|
return format_file_size(self.osize, iec=self.iec)
|
|
|
|
|
|
- @property
|
|
|
|
- def usize_fmt(self):
|
|
|
|
- return format_file_size(self.usize, iec=self.iec)
|
|
|
|
-
|
|
|
|
- @property
|
|
|
|
- def csize_fmt(self):
|
|
|
|
- return format_file_size(self.csize, iec=self.iec)
|
|
|
|
-
|
|
|
|
def show_progress(self, item=None, final=False, stream=None, dt=None):
|
|
def show_progress(self, item=None, final=False, stream=None, dt=None):
|
|
now = time.monotonic()
|
|
now = time.monotonic()
|
|
if dt is None or now - self.last_progress > dt:
|
|
if dt is None or now - self.last_progress > dt:
|
|
@@ -158,7 +134,7 @@ class Statistics:
|
|
else:
|
|
else:
|
|
columns, lines = get_terminal_size()
|
|
columns, lines = get_terminal_size()
|
|
if not final:
|
|
if not final:
|
|
- msg = '{0.osize_fmt} O {0.csize_fmt} C {0.usize_fmt} D {0.nfiles} N '.format(self)
|
|
|
|
|
|
+ msg = '{0.osize_fmt} O {0.nfiles} N '.format(self)
|
|
path = remove_surrogates(item.path) if item else ''
|
|
path = remove_surrogates(item.path) if item else ''
|
|
space = columns - swidth(msg)
|
|
space = columns - swidth(msg)
|
|
if space < 12:
|
|
if space < 12:
|
|
@@ -614,10 +590,8 @@ Utilization of max. archive size: {csize_max:.0%}
|
|
if stats is not None:
|
|
if stats is not None:
|
|
metadata.update({
|
|
metadata.update({
|
|
'size': stats.osize,
|
|
'size': stats.osize,
|
|
- 'csize': stats.csize,
|
|
|
|
'nfiles': stats.nfiles,
|
|
'nfiles': stats.nfiles,
|
|
'size_parts': stats.osize_parts,
|
|
'size_parts': stats.osize_parts,
|
|
- 'csize_parts': stats.csize_parts,
|
|
|
|
'nfiles_parts': stats.nfiles_parts})
|
|
'nfiles_parts': stats.nfiles_parts})
|
|
metadata.update(additional_metadata or {})
|
|
metadata.update(additional_metadata or {})
|
|
metadata = ArchiveItem(metadata)
|
|
metadata = ArchiveItem(metadata)
|
|
@@ -651,51 +625,12 @@ Utilization of max. archive size: {csize_max:.0%}
|
|
return stats
|
|
return stats
|
|
|
|
|
|
def _calc_stats(self, cache, want_unique=True):
|
|
def _calc_stats(self, cache, want_unique=True):
|
|
- have_borg12_meta = self.metadata.get('nfiles') is not None
|
|
|
|
-
|
|
|
|
- if have_borg12_meta and not want_unique:
|
|
|
|
- unique_csize = 0
|
|
|
|
- else:
|
|
|
|
- def add(id):
|
|
|
|
- entry = cache.chunks[id]
|
|
|
|
- archive_index.add(id, 1, entry.size, entry.csize)
|
|
|
|
-
|
|
|
|
- archive_index = ChunkIndex()
|
|
|
|
- sync = CacheSynchronizer(archive_index)
|
|
|
|
- add(self.id)
|
|
|
|
- # we must escape any % char in the archive name, because we use it in a format string, see #6500
|
|
|
|
- arch_name_escd = self.name.replace('%', '%%')
|
|
|
|
- pi = ProgressIndicatorPercent(total=len(self.metadata.items),
|
|
|
|
- msg='Calculating statistics for archive %s ... %%3.0f%%%%' % arch_name_escd,
|
|
|
|
- msgid='archive.calc_stats')
|
|
|
|
- for id, chunk in zip(self.metadata.items, self.repository.get_many(self.metadata.items)):
|
|
|
|
- pi.show(increase=1)
|
|
|
|
- add(id)
|
|
|
|
- data = self.key.decrypt(id, chunk)
|
|
|
|
- sync.feed(data)
|
|
|
|
- unique_csize = archive_index.stats_against(cache.chunks)[3]
|
|
|
|
- pi.finish()
|
|
|
|
-
|
|
|
|
stats = Statistics(iec=self.iec)
|
|
stats = Statistics(iec=self.iec)
|
|
- stats.usize = unique_csize # the part files use same chunks as the full file
|
|
|
|
- if not have_borg12_meta:
|
|
|
|
- if self.consider_part_files:
|
|
|
|
- stats.nfiles = sync.num_files_totals
|
|
|
|
- stats.osize = sync.size_totals
|
|
|
|
- stats.csize = sync.csize_totals
|
|
|
|
- else:
|
|
|
|
- stats.nfiles = sync.num_files_totals - sync.num_files_parts
|
|
|
|
- stats.osize = sync.size_totals - sync.size_parts
|
|
|
|
- stats.csize = sync.csize_totals - sync.csize_parts
|
|
|
|
- else:
|
|
|
|
- if self.consider_part_files:
|
|
|
|
- stats.nfiles = self.metadata.nfiles_parts + self.metadata.nfiles
|
|
|
|
- stats.osize = self.metadata.size_parts + self.metadata.size
|
|
|
|
- stats.csize = self.metadata.csize_parts + self.metadata.csize
|
|
|
|
- else:
|
|
|
|
- stats.nfiles = self.metadata.nfiles
|
|
|
|
- stats.osize = self.metadata.size
|
|
|
|
- stats.csize = self.metadata.csize
|
|
|
|
|
|
+ stats.nfiles = self.metadata.nfiles
|
|
|
|
+ stats.osize = self.metadata.size
|
|
|
|
+ if self.consider_part_files:
|
|
|
|
+ stats.nfiles += self.metadata.nfiles_parts
|
|
|
|
+ stats.osize += self.metadata.size_parts
|
|
return stats
|
|
return stats
|
|
|
|
|
|
@contextmanager
|
|
@contextmanager
|
|
@@ -986,7 +921,7 @@ Utilization of max. archive size: {csize_max:.0%}
|
|
item = Item(internal_dict=item)
|
|
item = Item(internal_dict=item)
|
|
if 'chunks' in item:
|
|
if 'chunks' in item:
|
|
part = not self.consider_part_files and 'part' in item
|
|
part = not self.consider_part_files and 'part' in item
|
|
- for chunk_id, size, csize in item.chunks:
|
|
|
|
|
|
+ for chunk_id, size, _ in item.chunks:
|
|
chunk_decref(chunk_id, stats, part=part)
|
|
chunk_decref(chunk_id, stats, part=part)
|
|
except (TypeError, ValueError):
|
|
except (TypeError, ValueError):
|
|
# if items metadata spans multiple chunks and one chunk got dropped somehow,
|
|
# if items metadata spans multiple chunks and one chunk got dropped somehow,
|
|
@@ -1789,15 +1724,15 @@ class ArchiveChecker:
|
|
def add_callback(chunk):
|
|
def add_callback(chunk):
|
|
id_ = self.key.id_hash(chunk)
|
|
id_ = self.key.id_hash(chunk)
|
|
cdata = self.key.encrypt(id_, chunk)
|
|
cdata = self.key.encrypt(id_, chunk)
|
|
- add_reference(id_, len(chunk), len(cdata), cdata)
|
|
|
|
|
|
+ add_reference(id_, len(chunk), cdata)
|
|
return id_
|
|
return id_
|
|
|
|
|
|
- def add_reference(id_, size, csize, cdata=None):
|
|
|
|
|
|
+ def add_reference(id_, size, cdata=None):
|
|
try:
|
|
try:
|
|
self.chunks.incref(id_)
|
|
self.chunks.incref(id_)
|
|
except KeyError:
|
|
except KeyError:
|
|
assert cdata is not None
|
|
assert cdata is not None
|
|
- self.chunks[id_] = ChunkIndexEntry(refcount=1, size=size, csize=csize)
|
|
|
|
|
|
+ self.chunks[id_] = ChunkIndexEntry(refcount=1, size=size, csize=0) # was: csize=csize
|
|
if self.repair:
|
|
if self.repair:
|
|
self.repository.put(id_, cdata)
|
|
self.repository.put(id_, cdata)
|
|
|
|
|
|
@@ -1811,8 +1746,7 @@ class ArchiveChecker:
|
|
chunk = Chunk(None, allocation=CH_ALLOC, size=size)
|
|
chunk = Chunk(None, allocation=CH_ALLOC, size=size)
|
|
chunk_id, data = cached_hash(chunk, self.key.id_hash)
|
|
chunk_id, data = cached_hash(chunk, self.key.id_hash)
|
|
cdata = self.key.encrypt(chunk_id, data)
|
|
cdata = self.key.encrypt(chunk_id, data)
|
|
- csize = len(cdata)
|
|
|
|
- return chunk_id, size, csize, cdata
|
|
|
|
|
|
+ return chunk_id, size, cdata
|
|
|
|
|
|
offset = 0
|
|
offset = 0
|
|
chunk_list = []
|
|
chunk_list = []
|
|
@@ -1835,30 +1769,30 @@ class ArchiveChecker:
|
|
'Replacing with all-zero chunk.'.format(
|
|
'Replacing with all-zero chunk.'.format(
|
|
archive_name, item.path, offset, offset + size, bin_to_hex(chunk_id)))
|
|
archive_name, item.path, offset, offset + size, bin_to_hex(chunk_id)))
|
|
self.error_found = chunks_replaced = True
|
|
self.error_found = chunks_replaced = True
|
|
- chunk_id, size, csize, cdata = replacement_chunk(size)
|
|
|
|
- add_reference(chunk_id, size, csize, cdata)
|
|
|
|
|
|
+ chunk_id, size, cdata = replacement_chunk(size)
|
|
|
|
+ add_reference(chunk_id, size, cdata)
|
|
else:
|
|
else:
|
|
logger.info('{}: {}: Previously missing file chunk is still missing (Byte {}-{}, Chunk {}). '
|
|
logger.info('{}: {}: Previously missing file chunk is still missing (Byte {}-{}, Chunk {}). '
|
|
'It has an all-zero replacement chunk already.'.format(
|
|
'It has an all-zero replacement chunk already.'.format(
|
|
archive_name, item.path, offset, offset + size, bin_to_hex(chunk_id)))
|
|
archive_name, item.path, offset, offset + size, bin_to_hex(chunk_id)))
|
|
chunk_id, size, csize = chunk_current
|
|
chunk_id, size, csize = chunk_current
|
|
if chunk_id in self.chunks:
|
|
if chunk_id in self.chunks:
|
|
- add_reference(chunk_id, size, csize)
|
|
|
|
|
|
+ add_reference(chunk_id, size)
|
|
else:
|
|
else:
|
|
logger.warning('{}: {}: Missing all-zero replacement chunk detected (Byte {}-{}, Chunk {}). '
|
|
logger.warning('{}: {}: Missing all-zero replacement chunk detected (Byte {}-{}, Chunk {}). '
|
|
'Generating new replacement chunk.'.format(
|
|
'Generating new replacement chunk.'.format(
|
|
archive_name, item.path, offset, offset + size, bin_to_hex(chunk_id)))
|
|
archive_name, item.path, offset, offset + size, bin_to_hex(chunk_id)))
|
|
self.error_found = chunks_replaced = True
|
|
self.error_found = chunks_replaced = True
|
|
- chunk_id, size, csize, cdata = replacement_chunk(size)
|
|
|
|
- add_reference(chunk_id, size, csize, cdata)
|
|
|
|
|
|
+ chunk_id, size, cdata = replacement_chunk(size)
|
|
|
|
+ add_reference(chunk_id, size, cdata)
|
|
else:
|
|
else:
|
|
if chunk_current == chunk_healthy:
|
|
if chunk_current == chunk_healthy:
|
|
# normal case, all fine.
|
|
# normal case, all fine.
|
|
- add_reference(chunk_id, size, csize)
|
|
|
|
|
|
+ add_reference(chunk_id, size)
|
|
else:
|
|
else:
|
|
logger.info('{}: {}: Healed previously missing file chunk! (Byte {}-{}, Chunk {}).'.format(
|
|
logger.info('{}: {}: Healed previously missing file chunk! (Byte {}-{}, Chunk {}).'.format(
|
|
archive_name, item.path, offset, offset + size, bin_to_hex(chunk_id)))
|
|
archive_name, item.path, offset, offset + size, bin_to_hex(chunk_id)))
|
|
- add_reference(chunk_id, size, csize)
|
|
|
|
|
|
+ add_reference(chunk_id, size)
|
|
mark_as_possibly_superseded(chunk_current[0]) # maybe orphaned the all-zero replacement chunk
|
|
mark_as_possibly_superseded(chunk_current[0]) # maybe orphaned the all-zero replacement chunk
|
|
chunk_list.append([chunk_id, size, csize]) # list-typed element as chunks_healthy is list-of-lists
|
|
chunk_list.append([chunk_id, size, csize]) # list-typed element as chunks_healthy is list-of-lists
|
|
offset += size
|
|
offset += size
|
|
@@ -2005,7 +1939,7 @@ class ArchiveChecker:
|
|
data = msgpack.packb(archive.as_dict())
|
|
data = msgpack.packb(archive.as_dict())
|
|
new_archive_id = self.key.id_hash(data)
|
|
new_archive_id = self.key.id_hash(data)
|
|
cdata = self.key.encrypt(new_archive_id, data)
|
|
cdata = self.key.encrypt(new_archive_id, data)
|
|
- add_reference(new_archive_id, len(data), len(cdata), cdata)
|
|
|
|
|
|
+ add_reference(new_archive_id, len(data), cdata)
|
|
self.manifest.archives[info.name] = (new_archive_id, info.ts)
|
|
self.manifest.archives[info.name] = (new_archive_id, info.ts)
|
|
pi.finish()
|
|
pi.finish()
|
|
|
|
|