9 年之前 · 73e46358c3
--- a/borg/archive.py
+++ b/borg/archive.py
@@ -920,31 +920,56 @@ class ArchiveChecker:
 
				                     self.repository.put(id_, cdata)
			
 
				 
			
 
				         def verify_file_chunks(item):
			
 
				-            """Verifies that all file chunks are present
			
 
				+            """Verifies that all file chunks are present.
			
 
				 
			
 
				-            Missing file chunks will be replaced with new chunks of the same
			
 
				-            length containing all zeros.
			
 
				+            Missing file chunks will be replaced with new chunks of the same length containing all zeros.
			
 
				+            If a previously missing file chunk re-appears, the replacement chunk is replaced by the correct one.
			
 
				             """
			
 
				             offset = 0
			
 
				             chunk_list = []
			
 
				             chunks_replaced = False
			
 
				-            for chunk_id, size, csize in item[b'chunks']:
			
 
				+            has_chunks_healthy = b'chunks_healthy' in item
			
 
				+            chunks_current = item[b'chunks']
			
 
				+            chunks_healthy = item[b'chunks_healthy'] if has_chunks_healthy else chunks_current
			
 
				+            assert len(chunks_current) == len(chunks_healthy)
			
 
				+            for chunk_current, chunk_healthy in zip(chunks_current, chunks_healthy):
			
 
				+                chunk_id, size, csize = chunk_healthy
			
 
				                 if chunk_id not in self.chunks:
			
 
				-                    # If a file chunk is missing, create an all empty replacement chunk
			
 
				-                    logger.error('{}: Missing file chunk detected (Byte {}-{})'.format(item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size))
			
 
				-                    self.error_found = chunks_replaced = True
			
 
				-                    data = bytes(size)
			
 
				-                    chunk_id = self.key.id_hash(data)
			
 
				-                    cdata = self.key.encrypt(data)
			
 
				-                    csize = len(cdata)
			
 
				-                    add_reference(chunk_id, size, csize, cdata)
			
 
				+                    # a chunk of the healthy list is missing
			
 
				+                    if chunk_current == chunk_healthy:
			
 
				+                        logger.error('{}: New missing file chunk detected (Byte {}-{}). '
			
 
				+                                     'Replacing with all-zero chunk.'.format(
			
 
				+                                     item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size))
			
 
				+                        self.error_found = chunks_replaced = True
			
 
				+                        data = bytes(size)
			
 
				+                        chunk_id = self.key.id_hash(data)
			
 
				+                        cdata = self.key.encrypt(data)
			
 
				+                        csize = len(cdata)
			
 
				+                        add_reference(chunk_id, size, csize, cdata)
			
 
				+                    else:
			
 
				+                        logger.info('{}: Previously missing file chunk is still missing (Byte {}-{}). '
			
 
				+                                    'It has a all-zero replacement chunk already.'.format(
			
 
				+                                    item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size))
			
 
				+                        chunk_id, size, csize = chunk_current
			
 
				+                        add_reference(chunk_id, size, csize)
			
 
				                 else:
			
 
				-                    add_reference(chunk_id, size, csize)
			
 
				-                chunk_list.append((chunk_id, size, csize))
			
 
				+                    if chunk_current == chunk_healthy:
			
 
				+                        # normal case, all fine.
			
 
				+                        add_reference(chunk_id, size, csize)
			
 
				+                    else:
			
 
				+                        logger.info('{}: Healed previously missing file chunk! (Byte {}-{}).'.format(
			
 
				+                            item[b'path'].decode('utf-8', 'surrogateescape'), offset, offset + size))
			
 
				+                        add_reference(chunk_id, size, csize)
			
 
				+                        mark_as_possibly_superseded(chunk_current[0])  # maybe orphaned the all-zero replacement chunk
			
 
				+                chunk_list.append([chunk_id, size, csize])  # list-typed element as chunks_healthy is list-of-lists
			
 
				                 offset += size
			
 
				-            if chunks_replaced and b'chunks_healthy' not in item:
			
 
				+            if chunks_replaced and not has_chunks_healthy:
			
 
				                 # if this is first repair, remember the correct chunk IDs, so we can maybe heal the file later
			
 
				                 item[b'chunks_healthy'] = item[b'chunks']
			
 
				+            if has_chunks_healthy and chunk_list == chunks_healthy:
			
 
				+                logger.info('{}: Completely healed previously damaged file!'.format(
			
 
				+                            item[b'path'].decode('utf-8', 'surrogateescape')))
			
 
				+                del item[b'chunks_healthy']
			
 
				             item[b'chunks'] = chunk_list
			
 
				 
			
 
				         def robust_iterator(archive):
			
--- a/borg/archiver.py
+++ b/borg/archiver.py
@@ -973,9 +973,12 @@ class Archiver:
 
				         - Check if archive metadata chunk is present. if not, remove archive from
			
 
				           manifest.
			
 
				         - For all files (items) in the archive, for all chunks referenced by these
			
 
				-          files, check if chunk is present (if not and we are in repair mode, replace
			
 
				-          it with a same-size chunk of zeros). This requires reading of archive and
			
 
				-          file metadata, but not data.
			
 
				+          files, check if chunk is present.
			
 
				+          If a chunk is not present and we are in repair mode, replace it with a same-size
			
 
				+          replacement chunk of zeros.
			
 
				+          If a previously lost chunk reappears (e.g. via a later backup) and we are in
			
 
				+          repair mode, the all-zero replacement chunk will be replaced by the correct chunk.
			
 
				+          This requires reading of archive and file metadata, but not data.
			
 
				         - If we are in repair mode and we checked all the archives: delete orphaned
			
 
				           chunks from the repo.
			
 
				         - if you use a remote repo server via ssh:, the archive check is executed on
			
--- a/borg/testsuite/archiver.py
+++ b/borg/testsuite/archiver.py
@@ -1142,12 +1142,45 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
 
				         with repository:
			
 
				             for item in archive.iter_items():
			
 
				                 if item[b'path'].endswith('testsuite/archiver.py'):
			
 
				-                    repository.delete(item[b'chunks'][-1][0])
			
 
				+                    valid_chunks = item[b'chunks']
			
 
				+                    killed_chunk = valid_chunks[-1]
			
 
				+                    repository.delete(killed_chunk[0])
			
 
				                     break
			
 
				+            else:
			
 
				+                self.assert_true(False)  # should not happen
			
 
				             repository.commit()
			
 
				         self.cmd('check', self.repository_location, exit_code=1)
			
 
				-        self.cmd('check', '--repair', self.repository_location, exit_code=0)
			
 
				+        output = self.cmd('check', '--repair', self.repository_location, exit_code=0)
			
 
				+        self.assert_in('New missing file chunk detected', output)
			
 
				         self.cmd('check', self.repository_location, exit_code=0)
			
 
				+        # check that the file in the old archives has now a different chunk list without the killed chunk
			
 
				+        for archive_name in ('archive1', 'archive2'):
			
 
				+            archive, repository = self.open_archive(archive_name)
			
 
				+            with repository:
			
 
				+                for item in archive.iter_items():
			
 
				+                    if item[b'path'].endswith('testsuite/archiver.py'):
			
 
				+                        self.assert_not_equal(valid_chunks, item[b'chunks'])
			
 
				+                        self.assert_not_in(killed_chunk, item[b'chunks'])
			
 
				+                        break
			
 
				+                else:
			
 
				+                    self.assert_true(False)  # should not happen
			
 
				+        # do a fresh backup (that will include the killed chunk)
			
 
				+        with patch.object(ChunkBuffer, 'BUFFER_SIZE', 10):
			
 
				+            self.create_src_archive('archive3')
			
 
				+        # check should be able to heal the file now:
			
 
				+        output = self.cmd('check', '-v', '--repair', self.repository_location, exit_code=0)
			
 
				+        self.assert_in('Healed previously missing file chunk', output)
			
 
				+        self.assert_in('testsuite/archiver.py: Completely healed previously damaged file!', output)
			
 
				+        # check that the file in the old archives has the correct chunks again
			
 
				+        for archive_name in ('archive1', 'archive2'):
			
 
				+            archive, repository = self.open_archive(archive_name)
			
 
				+            with repository:
			
 
				+                for item in archive.iter_items():
			
 
				+                    if item[b'path'].endswith('testsuite/archiver.py'):
			
 
				+                        self.assert_equal(valid_chunks, item[b'chunks'])
			
 
				+                        break
			
 
				+                else:
			
 
				+                    self.assert_true(False)  # should not happen
			
 
				 
			
 
				     def test_missing_archive_item_chunk(self):
			
 
				         archive, repository = self.open_archive('archive1')