Browse Source

Merge pull request #2172 from Abogical/master

Add dsize and dcsize keys, fixes #2164
enkore 8 years ago
parent
commit
63d4cf4c82
3 changed files with 27 additions and 9 deletions
  1. 1 0
      AUTHORS
  2. 21 7
      src/borg/helpers.py
  3. 5 2
      src/borg/testsuite/archiver.py

+ 1 - 0
AUTHORS

@@ -11,6 +11,7 @@ Borg authors ("The Borg Collective")
 - Martin Hostettler <textshell@uchuujin.de>
 - Martin Hostettler <textshell@uchuujin.de>
 - Daniel Reichelt <hacking@nachtgeist.net>
 - Daniel Reichelt <hacking@nachtgeist.net>
 - Lauri Niskanen <ape@ape3000.com>
 - Lauri Niskanen <ape@ape3000.com>
+- Abdel-Rahman A. (Abogical)
 
 
 Borg is a fork of Attic.
 Borg is a fork of Attic.
 
 

+ 21 - 7
src/borg/helpers.py

@@ -20,7 +20,7 @@ import time
 import unicodedata
 import unicodedata
 import uuid
 import uuid
 from binascii import hexlify
 from binascii import hexlify
-from collections import namedtuple, deque, abc
+from collections import namedtuple, deque, abc, Counter
 from datetime import datetime, timezone, timedelta
 from datetime import datetime, timezone, timedelta
 from fnmatch import translate
 from fnmatch import translate
 from functools import wraps, partial, lru_cache
 from functools import wraps, partial, lru_cache
@@ -1574,13 +1574,15 @@ class ItemFormatter(BaseFormatter):
         'source': 'link target for links (identical to linktarget)',
         'source': 'link target for links (identical to linktarget)',
         'extra': 'prepends {source} with " -> " for soft links and " link to " for hard links',
         'extra': 'prepends {source} with " -> " for soft links and " link to " for hard links',
         'csize': 'compressed size',
         'csize': 'compressed size',
+        'dsize': 'deduplicated size',
+        'dcsize': 'deduplicated compressed size',
         'num_chunks': 'number of chunks in this file',
         'num_chunks': 'number of chunks in this file',
         'unique_chunks': 'number of unique chunks in this file',
         'unique_chunks': 'number of unique chunks in this file',
         'health': 'either "healthy" (file ok) or "broken" (if file has all-zero replacement chunks)',
         'health': 'either "healthy" (file ok) or "broken" (if file has all-zero replacement chunks)',
     }
     }
     KEY_GROUPS = (
     KEY_GROUPS = (
         ('type', 'mode', 'uid', 'gid', 'user', 'group', 'path', 'bpath', 'source', 'linktarget', 'flags'),
         ('type', 'mode', 'uid', 'gid', 'user', 'group', 'path', 'bpath', 'source', 'linktarget', 'flags'),
-        ('size', 'csize', 'num_chunks', 'unique_chunks'),
+        ('size', 'csize', 'dsize', 'dcsize', 'num_chunks', 'unique_chunks'),
         ('mtime', 'ctime', 'atime', 'isomtime', 'isoctime', 'isoatime'),
         ('mtime', 'ctime', 'atime', 'isomtime', 'isoctime', 'isoatime'),
         tuple(sorted(hashlib.algorithms_guaranteed)),
         tuple(sorted(hashlib.algorithms_guaranteed)),
         ('archiveid', 'archivename', 'extra'),
         ('archiveid', 'archivename', 'extra'),
@@ -1630,8 +1632,10 @@ class ItemFormatter(BaseFormatter):
         self.call_keys = {
         self.call_keys = {
             'size': self.calculate_size,
             'size': self.calculate_size,
             'csize': self.calculate_csize,
             'csize': self.calculate_csize,
+            'dsize': partial(self.sum_unique_chunks_metadata, lambda chunk: chunk.size),
+            'dcsize': partial(self.sum_unique_chunks_metadata, lambda chunk: chunk.csize),
             'num_chunks': self.calculate_num_chunks,
             'num_chunks': self.calculate_num_chunks,
-            'unique_chunks': self.calculate_unique_chunks,
+            'unique_chunks': partial(self.sum_unique_chunks_metadata, lambda chunk: 1),
             'isomtime': partial(self.format_time, 'mtime'),
             'isomtime': partial(self.format_time, 'mtime'),
             'isoctime': partial(self.format_time, 'ctime'),
             'isoctime': partial(self.format_time, 'ctime'),
             'isoatime': partial(self.format_time, 'atime'),
             'isoatime': partial(self.format_time, 'atime'),
@@ -1679,12 +1683,22 @@ class ItemFormatter(BaseFormatter):
             item_data[key] = self.call_keys[key](item)
             item_data[key] = self.call_keys[key](item)
         return item_data
         return item_data
 
 
-    def calculate_num_chunks(self, item):
-        return len(item.get('chunks', []))
+    def sum_unique_chunks_metadata(self, metadata_func, item):
+        """
+        sum unique chunks metadata, a unique chunk is a chunk which is referenced globally as often as it is in the
+        item
 
 
-    def calculate_unique_chunks(self, item):
+        item: The item to sum its unique chunks' metadata
+        metadata_func: A function that takes a parameter of type ChunkIndexEntry and returns a number, used to return
+                       the metadata needed from the chunk
+        """
         chunk_index = self.archive.cache.chunks
         chunk_index = self.archive.cache.chunks
-        return sum(1 for c in item.get('chunks', []) if chunk_index[c.id].refcount == 1)
+        chunks = item.get('chunks', [])
+        chunks_counter = Counter(c.id for c in chunks)
+        return sum(metadata_func(c) for c in chunks if chunk_index[c.id].refcount == chunks_counter[c.id])
+
+    def calculate_num_chunks(self, item):
+        return len(item.get('chunks', []))
 
 
     def calculate_size(self, item):
     def calculate_size(self, item):
         return sum(c.size for c in item.get('chunks', []))
         return sum(c.size for c in item.get('chunks', []))

+ 5 - 2
src/borg/testsuite/archiver.py

@@ -1440,9 +1440,12 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('init', '--encryption=repokey', self.repository_location)
         test_archive = self.repository_location + '::test'
         test_archive = self.repository_location + '::test'
         self.cmd('create', '-C', 'lz4', test_archive, 'input')
         self.cmd('create', '-C', 'lz4', test_archive, 'input')
-        output = self.cmd('list', '--format', '{size} {csize} {path}{NL}', test_archive)
-        size, csize, path = output.split("\n")[1].split(" ")
+        output = self.cmd('list', '--format', '{size} {csize} {dsize} {dcsize} {path}{NL}', test_archive)
+        size, csize, dsize, dcsize, path = output.split("\n")[1].split(" ")
         assert int(csize) < int(size)
         assert int(csize) < int(size)
+        assert int(dcsize) < int(dsize)
+        assert int(dsize) <= int(size)
+        assert int(dcsize) <= int(csize)
 
 
     def _get_sizes(self, compression, compressible, size=10000):
     def _get_sizes(self, compression, compressible, size=10000):
         if compressible:
         if compressible: