Browse Source

archived file items: add size metadata

if an item has a chunk list, pre-compute the total size and store it into "size" metadata entry.

this speeds up access to item size (e.g. for regular files) and could also be used to verify the validity of the chunks list.

note about hardlinks: size is only stored for hardlink masters (only they have an own chunk list)
Thomas Waldmann 8 years ago
parent
commit
a52b54dc3c
6 changed files with 25 additions and 6 deletions
  1. 3 0
      src/borg/archive.py
  2. 8 3
      src/borg/archiver.py
  3. 1 1
      src/borg/constants.py
  4. 1 0
      src/borg/fuse.py
  5. 4 1
      src/borg/helpers.py
  6. 8 1
      src/borg/item.pyx

+ 3 - 0
src/borg/archive.py

@@ -777,6 +777,7 @@ Utilization of max. archive size: {csize_max:.0%}
         length = len(item.chunks)
         # the item should only have the *additional* chunks we processed after the last partial item:
         item.chunks = item.chunks[from_chunk:]
+        item.size = sum(chunk.size for chunk in item.chunks)
         item.path += '.borg_part_%d' % number
         item.part = number
         number += 1
@@ -825,6 +826,7 @@ Utilization of max. archive size: {csize_max:.0%}
         )
         fd = sys.stdin.buffer  # binary
         self.chunk_file(item, cache, self.stats, backup_io_iter(self.chunker.chunkify(fd)))
+        item.size = sum(chunk.size for chunk in item.chunks)
         self.stats.nfiles += 1
         self.add_item(item)
         return 'i'  # stdin
@@ -885,6 +887,7 @@ Utilization of max. archive size: {csize_max:.0%}
                 cache.memorize_file(path_hash, st, [c.id for c in item.chunks])
             status = status or 'M'  # regular file, modified (if not 'A' already)
         item.update(self.stat_attrs(st, path))
+        item.size = sum(chunk.size for chunk in item.chunks)
         if is_special_file:
             # we processed a special file like a regular file. reflect that in mode,
             # so it can be extracted / accessed in FUSE mount like a regular file:

+ 8 - 3
src/borg/archiver.py

@@ -600,10 +600,15 @@ class Archiver:
 
         def sum_chunk_size(item, consider_ids=None):
             if item.get('deleted'):
-                return None
+                size = None
             else:
-                return sum(c.size for c in item.chunks
-                           if consider_ids is None or c.id in consider_ids)
+                if consider_ids is not None:  # consider only specific chunks
+                    size = sum(chunk.size for chunk in item.chunks if chunk.id in consider_ids)
+                else:  # consider all chunks
+                    size = item.get('size')
+                    if size is None:
+                        size = sum(chunk.size for chunk in item.chunks)
+            return size
 
         def get_owner(item):
             if args.numeric_owner:

+ 1 - 1
src/borg/constants.py

@@ -1,6 +1,6 @@
 # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
 ITEM_KEYS = frozenset(['path', 'source', 'rdev', 'chunks', 'chunks_healthy', 'hardlink_master',
-                       'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime',
+                       'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime', 'size',
                        'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended',
                        'part'])
 

+ 1 - 0
src/borg/fuse.py

@@ -260,6 +260,7 @@ class FuseOperations(llfuse.Operations):
         size = 0
         dsize = 0
         if 'chunks' in item:
+            # if we would not need to compute dsize, we could get size quickly from item.size, if present.
             for key, chunksize, _ in item.chunks:
                 size += chunksize
                 if self.accounted_chunks.get(key, inode) == inode:

+ 4 - 1
src/borg/helpers.py

@@ -104,7 +104,7 @@ def check_extension_modules():
         raise ExtensionModuleError
     if platform.API_VERSION != platform.OS_API_VERSION != '1.1_01':
         raise ExtensionModuleError
-    if item.API_VERSION != '1.1_01':
+    if item.API_VERSION != '1.1_02':
         raise ExtensionModuleError
 
 
@@ -1701,6 +1701,9 @@ class ItemFormatter(BaseFormatter):
         return len(item.get('chunks', []))
 
     def calculate_size(self, item):
+        size = item.get('size')
+        if size is not None:
+            return size
         return sum(c.size for c in item.get('chunks', []))
 
     def calculate_csize(self, item):

+ 8 - 1
src/borg/item.pyx

@@ -2,7 +2,7 @@ from .constants import ITEM_KEYS
 from .helpers import safe_encode, safe_decode
 from .helpers import StableDict
 
-API_VERSION = '1.1_01'
+API_VERSION = '1.1_02'
 
 
 class PropDict:
@@ -156,6 +156,10 @@ class Item(PropDict):
     ctime = PropDict._make_property('ctime', int)
     mtime = PropDict._make_property('mtime', int)
 
+    # size is only present for items with a chunk list and then it is sum(chunk_sizes)
+    # compatibility note: this is a new feature, in old archives size will be missing.
+    size = PropDict._make_property('size', int)
+
     hardlink_master = PropDict._make_property('hardlink_master', bool)
 
     chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None')
@@ -169,6 +173,9 @@ class Item(PropDict):
     part = PropDict._make_property('part', int)
 
     def file_size(self, hardlink_masters=None):
+        size = self.get('size')
+        if size is not None:
+            return size
         hardlink_masters = hardlink_masters or {}
         chunks, _ = hardlink_masters.get(self.get('source'), (None, None))
         chunks = self.get('chunks', chunks)