Forráskód Böngészése

Merge pull request #1607 from ThomasWaldmann/tune-check-verify-data

borg check --verify-data tuning
TW 8 éve
szülő
commit
23ac8af7fa
3 módosított fájl, 58 hozzáadás és 18 törlés
  1. 29 18
      src/borg/archive.py
  2. 11 0
      src/borg/helpers.py
  3. 18 0
      src/borg/testsuite/helpers.py

+ 29 - 18
src/borg/archive.py

@@ -33,7 +33,7 @@ from .helpers import decode_dict, StableDict
 from .helpers import int_to_bigint, bigint_to_int, bin_to_hex
 from .helpers import ProgressIndicatorPercent, log_multi
 from .helpers import PathPrefixPattern, FnmatchPattern
-from .helpers import consume
+from .helpers import consume, chunkit
 from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec
 from .item import Item, ArchiveItem
 from .key import key_factory
@@ -1045,23 +1045,34 @@ class ArchiveChecker:
         errors = 0
         defect_chunks = []
         pi = ProgressIndicatorPercent(total=count, msg="Verifying data %6.2f%%", step=0.01)
-        for chunk_id, (refcount, *_) in self.chunks.iteritems():
-            pi.show()
-            try:
-                encrypted_data = self.repository.get(chunk_id)
-            except Repository.ObjectNotFound:
-                self.error_found = True
-                errors += 1
-                logger.error('chunk %s not found', bin_to_hex(chunk_id))
-                continue
-            try:
-                _chunk_id = None if chunk_id == Manifest.MANIFEST_ID else chunk_id
-                _, data = self.key.decrypt(_chunk_id, encrypted_data)
-            except IntegrityError as integrity_error:
-                self.error_found = True
-                errors += 1
-                logger.error('chunk %s, integrity error: %s', bin_to_hex(chunk_id), integrity_error)
-                defect_chunks.append(chunk_id)
+        for chunk_infos in chunkit(self.chunks.iteritems(), 100):
+            chunk_ids = [chunk_id for chunk_id, _ in chunk_infos]
+            chunk_data_iter = self.repository.get_many(chunk_ids)
+            chunk_ids_revd = list(reversed(chunk_ids))
+            while chunk_ids_revd:
+                pi.show()
+                chunk_id = chunk_ids_revd.pop(-1)  # better efficiency
+                try:
+                    encrypted_data = next(chunk_data_iter)
+                except (Repository.ObjectNotFound, IntegrityError) as err:
+                    self.error_found = True
+                    errors += 1
+                    logger.error('chunk %s: %s', bin_to_hex(chunk_id), err)
+                    if isinstance(err, IntegrityError):
+                        defect_chunks.append(chunk_id)
+                    # as the exception killed our generator, make a new one for remaining chunks:
+                    if chunk_ids_revd:
+                        chunk_ids = list(reversed(chunk_ids_revd))
+                        chunk_data_iter = self.repository.get_many(chunk_ids)
+                else:
+                    try:
+                        _chunk_id = None if chunk_id == Manifest.MANIFEST_ID else chunk_id
+                        _, data = self.key.decrypt(_chunk_id, encrypted_data)
+                    except IntegrityError as integrity_error:
+                        self.error_found = True
+                        errors += 1
+                        logger.error('chunk %s, integrity error: %s', bin_to_hex(chunk_id), integrity_error)
+                        defect_chunks.append(chunk_id)
         pi.finish()
         if defect_chunks:
             if self.repair:

+ 11 - 0
src/borg/helpers.py

@@ -1494,6 +1494,17 @@ def file_status(mode):
     return '?'
 
 
+def chunkit(it, size):
+    """
+    Chunk an iterator <it> into pieces of <size>.
+
+    >>> list(chunker('ABCDEFG', 3))
+    [['A', 'B', 'C'], ['D', 'E', 'F'], ['G']]
+    """
+    iterable = iter(it)
+    return iter(lambda: list(islice(iterable, size)), [])
+
+
 def consume(iterator, n=None):
     """Advance the iterator n-steps ahead. If n is none, consume entirely."""
     # Use functions that consume iterators at C speed.

+ 18 - 0
src/borg/testsuite/helpers.py

@@ -25,6 +25,7 @@ from ..helpers import load_excludes
 from ..helpers import CompressionSpec, CompressionDecider1, CompressionDecider2
 from ..helpers import parse_pattern, PatternMatcher, RegexPattern, PathPrefixPattern, FnmatchPattern, ShellPattern
 from ..helpers import swidth_slice
+from ..helpers import chunkit
 
 from . import BaseTestCase, environment_variable, FakeInputs
 
@@ -977,6 +978,23 @@ def test_chunk_file_wrapper():
     assert cfw.exhausted
 
 
+def test_chunkit():
+    it = chunkit('abcdefg', 3)
+    assert next(it) == ['a', 'b', 'c']
+    assert next(it) == ['d', 'e', 'f']
+    assert next(it) == ['g']
+    with pytest.raises(StopIteration):
+        next(it)
+    with pytest.raises(StopIteration):
+        next(it)
+
+    it = chunkit('ab', 3)
+    assert list(it) == [['a', 'b']]
+
+    it = chunkit('', 3)
+    assert list(it) == []
+
+
 def test_clean_lines():
     conf = """\
 #comment