8 years ago · 7c9c4b61d7
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -519,13 +519,20 @@ Utilization of max. archive size: {csize_max:.0%}
 
															         has_damaged_chunks = 'chunks_healthy' in item
														
 
															         if dry_run or stdout:
														
 
															             if 'chunks' in item:
														
 
															+                item_chunks_size = 0
														
 
															                 for _, data in self.pipeline.fetch_many([c.id for c in item.chunks], is_preloaded=True):
														
 
															                     if pi:
														
 
															                         pi.show(increase=len(data), info=[remove_surrogates(item.path)])
														
 
															                     if stdout:
														
 
															                         sys.stdout.buffer.write(data)
														
 
															+                    item_chunks_size += len(data)
														
 
															                 if stdout:
														
 
															                     sys.stdout.buffer.flush()
														
 
															+                if 'size' in item:
														
 
															+                    item_size = item.size
														
 
															+                    if item_size != item_chunks_size:
														
 
															+                        logger.warning('{}: size inconsistency detected: size {}, chunks size {}'.format(
														
 
															+                            item.path, item_size, item_chunks_size))
														
 
															             if has_damaged_chunks:
														
 
															                 logger.warning('File %s has damaged (all-zero) chunks. Try running borg check --repair.' %
														
 
															                                remove_surrogates(item.path))
														
@@ -582,10 +589,15 @@ Utilization of max. archive size: {csize_max:.0%}
 
															                         else:
														
 
															                             fd.write(data)
														
 
															                 with backup_io('truncate'):
														
 
															-                    pos = fd.tell()
														
 
															+                    pos = item_chunks_size = fd.tell()
														
 
															                     fd.truncate(pos)
														
 
															                     fd.flush()
														
 
															                     self.restore_attrs(path, item, fd=fd.fileno())
														
 
															+            if 'size' in item:
														
 
															+                item_size = item.size
														
 
															+                if item_size != item_chunks_size:
														
 
															+                    logger.warning('{}: size inconsistency detected: size {}, chunks size {}'.format(
														
 
															+                        item.path, item_size, item_chunks_size))
														
 
															             if has_damaged_chunks:
														
 
															                 logger.warning('File %s has damaged (all-zero) chunks. Try running borg check --repair.' %
														
 
															                                remove_surrogates(item.path))
														
@@ -829,6 +841,7 @@ Utilization of max. archive size: {csize_max:.0%}
 
															         length = len(item.chunks)
														
 
															         # the item should only have the *additional* chunks we processed after the last partial item:
														
 
															         item.chunks = item.chunks[from_chunk:]
														
 
															+        item.get_size(memorize=True)
														
 
															         item.path += '.borg_part_%d' % number
														
 
															         item.part = number
														
 
															         number += 1
														
@@ -877,6 +890,7 @@ Utilization of max. archive size: {csize_max:.0%}
 
															         )
														
 
															         fd = sys.stdin.buffer  # binary
														
 
															         self.chunk_file(item, cache, self.stats, backup_io_iter(self.chunker.chunkify(fd)))
														
 
															+        item.get_size(memorize=True)
														
 
															         self.stats.nfiles += 1
														
 
															         self.add_item(item)
														
 
															         return 'i'  # stdin
														
@@ -937,6 +951,7 @@ Utilization of max. archive size: {csize_max:.0%}
 
															                 cache.memorize_file(path_hash, st, [c.id for c in item.chunks])
														
 
															             status = status or 'M'  # regular file, modified (if not 'A' already)
														
 
															         item.update(self.stat_attrs(st, path))
														
 
															+        item.get_size(memorize=True)
														
 
															         if is_special_file:
														
 
															             # we processed a special file like a regular file. reflect that in mode,
														
 
															             # so it can be extracted / accessed in FUSE mount like a regular file:
														
@@ -1355,6 +1370,13 @@ class ArchiveChecker:
 
															                 logger.info('{}: Completely healed previously damaged file!'.format(item.path))
														
 
															                 del item.chunks_healthy
														
 
															             item.chunks = chunk_list
														
 
															+            if 'size' in item:
														
 
															+                item_size = item.size
														
 
															+                item_chunks_size = item.get_size(compressed=False, from_chunks=True)
														
 
															+                if item_size != item_chunks_size:
														
 
															+                    # just warn, but keep the inconsistency, so that borg extract can warn about it.
														
 
															+                    logger.warning('{}: size inconsistency detected: size {}, chunks size {}'.format(
														
 
															+                                   item.path, item_size, item_chunks_size))
														
 
															         def robust_iterator(archive):
														
 
															             """Iterates through all archive items
														
--- a/src/borg/archiver.py
+++ b/src/borg/archiver.py
@@ -557,7 +557,7 @@ class Archiver:
 
															         if progress:
														
 
															             pi = ProgressIndicatorPercent(msg='%5.1f%% Extracting: %s', step=0.1)
														
 
															             pi.output('Calculating size')
														
 
															-            extracted_size = sum(item.file_size(hardlink_masters) for item in archive.iter_items(filter))
														
 
															+            extracted_size = sum(item.get_size(hardlink_masters) for item in archive.iter_items(filter))
														
 
															             pi.total = extracted_size
														
 
															         else:
														
 
															             pi = None
														
@@ -616,10 +616,13 @@ class Archiver:
 
															         def sum_chunk_size(item, consider_ids=None):
														
 
															             if item.get('deleted'):
														
 
															-                return None
														
 
															+                size = None
														
 
															             else:
														
 
															-                return sum(c.size for c in item.chunks
														
 
															-                           if consider_ids is None or c.id in consider_ids)
														
 
															+                if consider_ids is not None:  # consider only specific chunks
														
 
															+                    size = sum(chunk.size for chunk in item.chunks if chunk.id in consider_ids)
														
 
															+                else:  # consider all chunks
														
 
															+                    size = item.get_size()
														
 
															+            return size
														
 
															         def get_owner(item):
														
 
															             if args.numeric_owner:
														
--- a/src/borg/cache.py
+++ b/src/borg/cache.py
@@ -20,13 +20,12 @@ from .helpers import format_file_size
 
															 from .helpers import yes
														
 
															 from .helpers import remove_surrogates
														
 
															 from .helpers import ProgressIndicatorPercent, ProgressIndicatorMessage
														
 
															-from .item import Item, ArchiveItem
														
 
															+from .item import Item, ArchiveItem, ChunkListEntry
														
 
															 from .key import PlaintextKey
														
 
															 from .locking import Lock
														
 
															 from .platform import SaveFile
														
 
															 from .remote import cache_if_remote
														
 
															-ChunkListEntry = namedtuple('ChunkListEntry', 'id size csize')
														
 
															 FileCacheEntry = namedtuple('FileCacheEntry', 'age inode size mtime chunk_ids')
														
--- a/src/borg/constants.py
+++ b/src/borg/constants.py
@@ -1,6 +1,6 @@
 
															 # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
														
 
															 ITEM_KEYS = frozenset(['path', 'source', 'rdev', 'chunks', 'chunks_healthy', 'hardlink_master',
														
 
															-                       'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime',
														
 
															+                       'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime', 'size',
														
 
															                        'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended',
														
 
															                        'part'])
														
--- a/src/borg/fuse.py
+++ b/src/borg/fuse.py
@@ -72,7 +72,6 @@ class FuseOperations(llfuse.Operations):
 
															         self.contents = defaultdict(dict)
														
 
															         self.default_dir = Item(mode=0o40755, mtime=int(time.time() * 1e9), uid=os.getuid(), gid=os.getgid())
														
 
															         self.pending_archives = {}
														
 
															-        self.accounted_chunks = {}
														
 
															         self.cache = ItemCache()
														
 
															         data_cache_capacity = int(os.environ.get('BORG_MOUNT_DATA_CACHE_ENTRIES', os.cpu_count() or 1))
														
 
															         logger.debug('mount data cache capacity: %d chunks', data_cache_capacity)
														
@@ -257,14 +256,6 @@ class FuseOperations(llfuse.Operations):
 
															     def getattr(self, inode, ctx=None):
														
 
															         item = self.get_item(inode)
														
 
															-        size = 0
														
 
															-        dsize = 0
														
 
															-        if 'chunks' in item:
														
 
															-            for key, chunksize, _ in item.chunks:
														
 
															-                size += chunksize
														
 
															-                if self.accounted_chunks.get(key, inode) == inode:
														
 
															-                    self.accounted_chunks[key] = inode
														
 
															-                    dsize += chunksize
														
 
															         entry = llfuse.EntryAttributes()
														
 
															         entry.st_ino = inode
														
 
															         entry.generation = 0
														
@@ -275,9 +266,9 @@ class FuseOperations(llfuse.Operations):
 
															         entry.st_uid = item.uid
														
 
															         entry.st_gid = item.gid
														
 
															         entry.st_rdev = item.get('rdev', 0)
														
 
															-        entry.st_size = size
														
 
															+        entry.st_size = item.get_size()
														
 
															         entry.st_blksize = 512
														
 
															-        entry.st_blocks = dsize / 512
														
 
															+        entry.st_blocks = (entry.st_size + entry.st_blksize - 1) // entry.st_blksize
														
 
															         # note: older archives only have mtime (not atime nor ctime)
														
 
															         mtime_ns = item.mtime
														
 
															         if have_fuse_xtime_ns:
														
--- a/src/borg/helpers.py
+++ b/src/borg/helpers.py
@@ -105,7 +105,7 @@ def check_extension_modules():
 
															         raise ExtensionModuleError
														
 
															     if platform.API_VERSION != platform.OS_API_VERSION != '1.1_01':
														
 
															         raise ExtensionModuleError
														
 
															-    if item.API_VERSION != '1.1_01':
														
 
															+    if item.API_VERSION != '1.1_02':
														
 
															         raise ExtensionModuleError
														
@@ -1759,10 +1759,12 @@ class ItemFormatter(BaseFormatter):
 
															         return len(item.get('chunks', []))
														
 
															     def calculate_size(self, item):
														
 
															-        return sum(c.size for c in item.get('chunks', []))
														
 
															+        # note: does not support hardlink slaves, they will be size 0
														
 
															+        return item.get_size(compressed=False)
														
 
															     def calculate_csize(self, item):
														
 
															-        return sum(c.csize for c in item.get('chunks', []))
														
 
															+        # note: does not support hardlink slaves, they will be csize 0
														
 
															+        return item.get_size(compressed=True)
														
 
															     def hash_item(self, hash_function, item):
														
 
															         if 'chunks' not in item:
														
--- a/src/borg/item.pyx
+++ b/src/borg/item.pyx
@@ -1,8 +1,10 @@
 
															+from collections import namedtuple
														
 
															+
														
 
															 from .constants import ITEM_KEYS
														
 
															 from .helpers import safe_encode, safe_decode
														
 
															 from .helpers import StableDict
														
 
															-API_VERSION = '1.1_01'
														
 
															+API_VERSION = '1.1_02'
														
 
															 class PropDict:
														
@@ -113,6 +115,8 @@ class PropDict:
 
															         return property(_get, _set, _del, doc=doc)
														
 
															+ChunkListEntry = namedtuple('ChunkListEntry', 'id size csize')
														
 
															+
														
 
															 class Item(PropDict):
														
 
															     """
														
 
															     Item abstraction that deals with validation and the low-level details internally:
														
@@ -156,6 +160,10 @@ class Item(PropDict):
 
															     ctime = PropDict._make_property('ctime', int)
														
 
															     mtime = PropDict._make_property('mtime', int)
														
 
															+    # size is only present for items with a chunk list and then it is sum(chunk_sizes)
														
 
															+    # compatibility note: this is a new feature, in old archives size will be missing.
														
 
															+    size = PropDict._make_property('size', int)
														
 
															+
														
 
															     hardlink_master = PropDict._make_property('hardlink_master', bool)
														
 
															     chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None')
														
@@ -168,13 +176,48 @@ class Item(PropDict):
 
															     part = PropDict._make_property('part', int)
														
 
															-    def file_size(self, hardlink_masters=None):
														
 
															-        hardlink_masters = hardlink_masters or {}
														
 
															-        chunks, _ = hardlink_masters.get(self.get('source'), (None, None))
														
 
															-        chunks = self.get('chunks', chunks)
														
 
															-        if chunks is None:
														
 
															-            return 0
														
 
															-        return sum(chunk.size for chunk in chunks)
														
 
															+    def get_size(self, hardlink_masters=None, memorize=False, compressed=False, from_chunks=False):
														
 
															+        """
														
 
															+        Determine the (uncompressed or compressed) size of this item.
														
 
															+
														
 
															+        For hardlink slaves, the size is computed via the hardlink master's
														
 
															+        chunk list, if available (otherwise size will be returned as 0).
														
 
															+
														
 
															+        If memorize is True, the computed size value will be stored into the item.
														
 
															+        """
														
 
															+        attr = 'csize' if compressed else 'size'
														
 
															+        try:
														
 
															+            if from_chunks:
														
 
															+                raise AttributeError
														
 
															+            size = getattr(self, attr)
														
 
															+        except AttributeError:
														
 
															+            # no precomputed (c)size value available, compute it:
														
 
															+            try:
														
 
															+                chunks = getattr(self, 'chunks')
														
 
															+                having_chunks = True
														
 
															+            except AttributeError:
														
 
															+                having_chunks = False
														
 
															+                # this item has no (own) chunks list, but if this is a hardlink slave
														
 
															+                # and we know the master, we can still compute the size.
														
 
															+                if hardlink_masters is None:
														
 
															+                    chunks = None
														
 
															+                else:
														
 
															+                    try:
														
 
															+                        master = getattr(self, 'source')
														
 
															+                    except AttributeError:
														
 
															+                        # not a hardlink slave, likely a directory or special file w/o chunks
														
 
															+                        chunks = None
														
 
															+                    else:
														
 
															+                        # hardlink slave, try to fetch hardlink master's chunks list
														
 
															+                        # todo: put precomputed size into hardlink_masters' values and use it, if present
														
 
															+                        chunks, _ = hardlink_masters.get(master, (None, None))
														
 
															+                if chunks is None:
														
 
															+                    return 0
														
 
															+            size = sum(getattr(ChunkListEntry(*chunk), attr) for chunk in chunks)
														
 
															+            # if requested, memorize the precomputed (c)size for items that have an own chunks list:
														
 
															+            if memorize and having_chunks:
														
 
															+                setattr(self, attr, size)
														
 
															+        return size
														
 
															 class EncryptedKey(PropDict):
														
--- a/src/borg/testsuite/item.py
+++ b/src/borg/testsuite/item.py
@@ -142,9 +142,9 @@ def test_item_file_size():
 
															         ChunkListEntry(csize=1, size=1000, id=None),
														
 
															         ChunkListEntry(csize=1, size=2000, id=None),
														
 
															     ])
														
 
															-    assert item.file_size() == 3000
														
 
															+    assert item.get_size() == 3000
														
 
															 def test_item_file_size_no_chunks():
														
 
															     item = Item()
														
 
															-    assert item.file_size() == 0
														
 
															+    assert item.get_size() == 0