浏览代码

Merge pull request #1607 from ThomasWaldmann/tune-check-verify-data

borg check --verify-data tuning
TW 8 年之前
父节点
当前提交
23ac8af7fa
共有 3 个文件被更改,包括 58 次插入18 次删除
  1. 29 18
      src/borg/archive.py
  2. 11 0
      src/borg/helpers.py
  3. 18 0
      src/borg/testsuite/helpers.py

+ 29 - 18
src/borg/archive.py

@@ -33,7 +33,7 @@ from .helpers import decode_dict, StableDict
 from .helpers import int_to_bigint, bigint_to_int, bin_to_hex
 from .helpers import int_to_bigint, bigint_to_int, bin_to_hex
 from .helpers import ProgressIndicatorPercent, log_multi
 from .helpers import ProgressIndicatorPercent, log_multi
 from .helpers import PathPrefixPattern, FnmatchPattern
 from .helpers import PathPrefixPattern, FnmatchPattern
-from .helpers import consume
+from .helpers import consume, chunkit
 from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec
 from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec
 from .item import Item, ArchiveItem
 from .item import Item, ArchiveItem
 from .key import key_factory
 from .key import key_factory
@@ -1045,23 +1045,34 @@ class ArchiveChecker:
         errors = 0
         errors = 0
         defect_chunks = []
         defect_chunks = []
         pi = ProgressIndicatorPercent(total=count, msg="Verifying data %6.2f%%", step=0.01)
         pi = ProgressIndicatorPercent(total=count, msg="Verifying data %6.2f%%", step=0.01)
-        for chunk_id, (refcount, *_) in self.chunks.iteritems():
-            pi.show()
-            try:
-                encrypted_data = self.repository.get(chunk_id)
-            except Repository.ObjectNotFound:
-                self.error_found = True
-                errors += 1
-                logger.error('chunk %s not found', bin_to_hex(chunk_id))
-                continue
-            try:
-                _chunk_id = None if chunk_id == Manifest.MANIFEST_ID else chunk_id
-                _, data = self.key.decrypt(_chunk_id, encrypted_data)
-            except IntegrityError as integrity_error:
-                self.error_found = True
-                errors += 1
-                logger.error('chunk %s, integrity error: %s', bin_to_hex(chunk_id), integrity_error)
-                defect_chunks.append(chunk_id)
+        for chunk_infos in chunkit(self.chunks.iteritems(), 100):
+            chunk_ids = [chunk_id for chunk_id, _ in chunk_infos]
+            chunk_data_iter = self.repository.get_many(chunk_ids)
+            chunk_ids_revd = list(reversed(chunk_ids))
+            while chunk_ids_revd:
+                pi.show()
+                chunk_id = chunk_ids_revd.pop(-1)  # better efficiency
+                try:
+                    encrypted_data = next(chunk_data_iter)
+                except (Repository.ObjectNotFound, IntegrityError) as err:
+                    self.error_found = True
+                    errors += 1
+                    logger.error('chunk %s: %s', bin_to_hex(chunk_id), err)
+                    if isinstance(err, IntegrityError):
+                        defect_chunks.append(chunk_id)
+                    # as the exception killed our generator, make a new one for remaining chunks:
+                    if chunk_ids_revd:
+                        chunk_ids = list(reversed(chunk_ids_revd))
+                        chunk_data_iter = self.repository.get_many(chunk_ids)
+                else:
+                    try:
+                        _chunk_id = None if chunk_id == Manifest.MANIFEST_ID else chunk_id
+                        _, data = self.key.decrypt(_chunk_id, encrypted_data)
+                    except IntegrityError as integrity_error:
+                        self.error_found = True
+                        errors += 1
+                        logger.error('chunk %s, integrity error: %s', bin_to_hex(chunk_id), integrity_error)
+                        defect_chunks.append(chunk_id)
         pi.finish()
         pi.finish()
         if defect_chunks:
         if defect_chunks:
             if self.repair:
             if self.repair:

+ 11 - 0
src/borg/helpers.py

@@ -1494,6 +1494,17 @@ def file_status(mode):
     return '?'
     return '?'
 
 
 
 
+def chunkit(it, size):
+    """
+    Chunk an iterator <it> into pieces of <size>.
+
+    >>> list(chunker('ABCDEFG', 3))
+    [['A', 'B', 'C'], ['D', 'E', 'F'], ['G']]
+    """
+    iterable = iter(it)
+    return iter(lambda: list(islice(iterable, size)), [])
+
+
 def consume(iterator, n=None):
 def consume(iterator, n=None):
     """Advance the iterator n-steps ahead. If n is none, consume entirely."""
     """Advance the iterator n-steps ahead. If n is none, consume entirely."""
     # Use functions that consume iterators at C speed.
     # Use functions that consume iterators at C speed.

+ 18 - 0
src/borg/testsuite/helpers.py

@@ -25,6 +25,7 @@ from ..helpers import load_excludes
 from ..helpers import CompressionSpec, CompressionDecider1, CompressionDecider2
 from ..helpers import CompressionSpec, CompressionDecider1, CompressionDecider2
 from ..helpers import parse_pattern, PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern
 from ..helpers import parse_pattern, PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern
 from ..helpers import swidth_slice
 from ..helpers import swidth_slice
+from ..helpers import chunkit
 
 
 from . import BaseTestCase, environment_variable, FakeInputs
 from . import BaseTestCase, environment_variable, FakeInputs
 
 
@@ -977,6 +978,23 @@ def test_chunk_file_wrapper():
     assert cfw.exhausted
     assert cfw.exhausted
 
 
 
 
+def test_chunkit():
+    it = chunkit('abcdefg', 3)
+    assert next(it) == ['a', 'b', 'c']
+    assert next(it) == ['d', 'e', 'f']
+    assert next(it) == ['g']
+    with pytest.raises(StopIteration):
+        next(it)
+    with pytest.raises(StopIteration):
+        next(it)
+
+    it = chunkit('ab', 3)
+    assert list(it) == [['a', 'b']]
+
+    it = chunkit('', 3)
+    assert list(it) == []
+
+
 def test_clean_lines():
 def test_clean_lines():
     conf = """\
     conf = """\
 #comment
 #comment