浏览代码

Merge pull request #2172 from Abogical/master

Add dsize and dcsize keys, fixes #2164
enkore 8 年之前
父节点
当前提交
63d4cf4c82
共有 3 个文件被更改,包括 27 次插入9 次删除
  1. 1 0
      AUTHORS
  2. 21 7
      src/borg/helpers.py
  3. 5 2
      src/borg/testsuite/archiver.py

+ 1 - 0
AUTHORS

@@ -11,6 +11,7 @@ Borg authors ("The Borg Collective")
 - Martin Hostettler <textshell@uchuujin.de>
 - Martin Hostettler <textshell@uchuujin.de>
 - Daniel Reichelt <hacking@nachtgeist.net>
 - Daniel Reichelt <hacking@nachtgeist.net>
 - Lauri Niskanen <ape@ape3000.com>
 - Lauri Niskanen <ape@ape3000.com>
+- Abdel-Rahman A. (Abogical)
 
 
 Borg is a fork of Attic.
 Borg is a fork of Attic.
 
 

+ 21 - 7
src/borg/helpers.py

@@ -20,7 +20,7 @@ import time
 import unicodedata
 import unicodedata
 import uuid
 import uuid
 from binascii import hexlify
 from binascii import hexlify
-from collections import namedtuple, deque, abc
+from collections import namedtuple, deque, abc, Counter
 from datetime import datetime, timezone, timedelta
 from datetime import datetime, timezone, timedelta
 from fnmatch import translate
 from fnmatch import translate
 from functools import wraps, partial, lru_cache
 from functools import wraps, partial, lru_cache
@@ -1574,13 +1574,15 @@ class ItemFormatter(BaseFormatter):
         'source': 'link target for links (identical to linktarget)',
         'source': 'link target for links (identical to linktarget)',
         'extra': 'prepends {source} with " -> " for soft links and " link to " for hard links',
         'extra': 'prepends {source} with " -> " for soft links and " link to " for hard links',
         'csize': 'compressed size',
         'csize': 'compressed size',
+        'dsize': 'deduplicated size',
+        'dcsize': 'deduplicated compressed size',
         'num_chunks': 'number of chunks in this file',
         'num_chunks': 'number of chunks in this file',
         'unique_chunks': 'number of unique chunks in this file',
         'unique_chunks': 'number of unique chunks in this file',
         'health': 'either "healthy" (file ok) or "broken" (if file has all-zero replacement chunks)',
         'health': 'either "healthy" (file ok) or "broken" (if file has all-zero replacement chunks)',
     }
     }
     KEY_GROUPS = (
     KEY_GROUPS = (
         ('type', 'mode', 'uid', 'gid', 'user', 'group', 'path', 'bpath', 'source', 'linktarget', 'flags'),
         ('type', 'mode', 'uid', 'gid', 'user', 'group', 'path', 'bpath', 'source', 'linktarget', 'flags'),
-        ('size', 'csize', 'num_chunks', 'unique_chunks'),
+        ('size', 'csize', 'dsize', 'dcsize', 'num_chunks', 'unique_chunks'),
         ('mtime', 'ctime', 'atime', 'isomtime', 'isoctime', 'isoatime'),
         ('mtime', 'ctime', 'atime', 'isomtime', 'isoctime', 'isoatime'),
         tuple(sorted(hashlib.algorithms_guaranteed)),
         tuple(sorted(hashlib.algorithms_guaranteed)),
         ('archiveid', 'archivename', 'extra'),
         ('archiveid', 'archivename', 'extra'),
@@ -1630,8 +1632,10 @@ class ItemFormatter(BaseFormatter):
         self.call_keys = {
         self.call_keys = {
             'size': self.calculate_size,
             'size': self.calculate_size,
             'csize': self.calculate_csize,
             'csize': self.calculate_csize,
+            'dsize': partial(self.sum_unique_chunks_metadata, lambda chunk: chunk.size),
+            'dcsize': partial(self.sum_unique_chunks_metadata, lambda chunk: chunk.csize),
             'num_chunks': self.calculate_num_chunks,
             'num_chunks': self.calculate_num_chunks,
-            'unique_chunks': self.calculate_unique_chunks,
+            'unique_chunks': partial(self.sum_unique_chunks_metadata, lambda chunk: 1),
             'isomtime': partial(self.format_time, 'mtime'),
             'isomtime': partial(self.format_time, 'mtime'),
             'isoctime': partial(self.format_time, 'ctime'),
             'isoctime': partial(self.format_time, 'ctime'),
             'isoatime': partial(self.format_time, 'atime'),
             'isoatime': partial(self.format_time, 'atime'),
@@ -1679,12 +1683,22 @@ class ItemFormatter(BaseFormatter):
             item_data[key] = self.call_keys[key](item)
             item_data[key] = self.call_keys[key](item)
         return item_data
         return item_data
 
 
-    def calculate_num_chunks(self, item):
-        return len(item.get('chunks', []))
+    def sum_unique_chunks_metadata(self, metadata_func, item):
+        """
+        sum unique chunks metadata, a unique chunk is a chunk which is referenced globally as often as it is in the
+        item
 
 
-    def calculate_unique_chunks(self, item):
+        item: The item to sum its unique chunks' metadata
+        metadata_func: A function that takes a parameter of type ChunkIndexEntry and returns a number, used to return
+                       the metadata needed from the chunk
+        """
         chunk_index = self.archive.cache.chunks
         chunk_index = self.archive.cache.chunks
-        return sum(1 for c in item.get('chunks', []) if chunk_index[c.id].refcount == 1)
+        chunks = item.get('chunks', [])
+        chunks_counter = Counter(c.id for c in chunks)
+        return sum(metadata_func(c) for c in chunks if chunk_index[c.id].refcount == chunks_counter[c.id])
+
+    def calculate_num_chunks(self, item):
+        return len(item.get('chunks', []))
 
 
     def calculate_size(self, item):
     def calculate_size(self, item):
         return sum(c.size for c in item.get('chunks', []))
         return sum(c.size for c in item.get('chunks', []))

+ 5 - 2
src/borg/testsuite/archiver.py

@@ -1440,9 +1440,12 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('init', '--encryption=repokey', self.repository_location)
         test_archive = self.repository_location + '::test'
         test_archive = self.repository_location + '::test'
         self.cmd('create', '-C', 'lz4', test_archive, 'input')
         self.cmd('create', '-C', 'lz4', test_archive, 'input')
-        output = self.cmd('list', '--format', '{size} {csize} {path}{NL}', test_archive)
-        size, csize, path = output.split("\n")[1].split(" ")
+        output = self.cmd('list', '--format', '{size} {csize} {dsize} {dcsize} {path}{NL}', test_archive)
+        size, csize, dsize, dcsize, path = output.split("\n")[1].split(" ")
         assert int(csize) < int(size)
         assert int(csize) < int(size)
+        assert int(dcsize) < int(dsize)
+        assert int(dsize) <= int(size)
+        assert int(dcsize) <= int(csize)
 
 
     def _get_sizes(self, compression, compressible, size=10000):
     def _get_sizes(self, compression, compressible, size=10000):
         if compressible:
         if compressible: