Browse Source

Merge pull request #975 from enkore/feature/paranoid-check

check: support integrity verification
enkore 9 năm trước cách đây
mục cha
commit
3bc22061f7
3 tập tin đã thay đổi với 81 bổ sung3 xóa
  1. 37 2
      borg/archive.py
  2. 21 1
      borg/archiver.py
  3. 23 0
      borg/testsuite/archiver.py

+ 37 - 2
borg/archive.py

@@ -22,7 +22,8 @@ from .helpers import Chunk, Error, uid2user, user2uid, gid2group, group2gid, \
     Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, bin_to_hex, \
     ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, \
     PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume, \
-    CompressionDecider1, CompressionDecider2, CompressionSpec
+    CompressionDecider1, CompressionDecider2, CompressionSpec, \
+    IntegrityError
 from .repository import Repository
 from .platform import acl_get, acl_set
 from .chunker import Chunker
@@ -698,7 +699,17 @@ class ArchiveChecker:
         self.error_found = False
         self.possibly_superseded = set()
 
-    def check(self, repository, repair=False, archive=None, last=None, prefix=None, save_space=False):
+    def check(self, repository, repair=False, archive=None, last=None, prefix=None, verify_data=False,
+              save_space=False):
+        """Perform a set of checks on 'repository'
+
+        :param repair: enable repair mode, write updated or corrected data into repository
+        :param archive: only check this archive
+        :param last: only check this number of recent archives
+        :param prefix: only check archives with this prefix
+        :param verify_data: integrity verification of data referenced by archives
+        :param save_space: Repository.commit(save_space)
+        """
         logger.info('Starting archive consistency check...')
         self.check_all = archive is None and last is None and prefix is None
         self.repair = repair
@@ -712,6 +723,8 @@ class ArchiveChecker:
         else:
             self.manifest, _ = Manifest.load(repository, key=self.key)
         self.rebuild_refcounts(archive=archive, last=last, prefix=prefix)
+        if verify_data:
+            self.verify_data()
         self.orphan_chunks_check()
         self.finish(save_space=save_space)
         if self.error_found:
@@ -741,6 +754,26 @@ class ArchiveChecker:
         cdata = repository.get(next(self.chunks.iteritems())[0])
         return key_factory(repository, cdata)
 
+    def verify_data(self):
+        logger.info('Starting cryptographic data integrity verification...')
+        pi = ProgressIndicatorPercent(total=len(self.chunks), msg="Verifying data %6.2f%%", step=0.01, same_line=True)
+        count = errors = 0
+        for chunk_id, (refcount, *_) in self.chunks.iteritems():
+            pi.show()
+            if not refcount:
+                continue
+            encrypted_data = self.repository.get(chunk_id)
+            try:
+                _, data = self.key.decrypt(chunk_id, encrypted_data)
+            except IntegrityError as integrity_error:
+                self.error_found = True
+                errors += 1
+                logger.error('chunk %s, integrity error: %s', bin_to_hex(chunk_id), integrity_error)
+            count += 1
+        pi.finish()
+        log = logger.error if errors else logger.info
+        log('Finished cryptographic data integrity verification, verified %d chunks with %d integrity errors.', count, errors)
+
     def rebuild_manifest(self):
         """Rebuild the manifest object if it is missing
 
@@ -874,6 +907,8 @@ class ArchiveChecker:
         else:
             # we only want one specific archive
             archive_items = [item for item in self.manifest.archives.items() if item[0] == archive]
+            if not archive_items:
+                logger.error("Archive '%s' not found.", archive)
             num_archives = 1
             end = 1
 

+ 21 - 1
borg/archiver.py

@@ -185,12 +185,16 @@ class Archiver:
             if not yes(msg, false_msg="Aborting.", truish=('YES', ),
                        env_var_override='BORG_CHECK_I_KNOW_WHAT_I_AM_DOING'):
                 return EXIT_ERROR
+        if args.repo_only and args.verify_data:
+            self.print_error("--repository-only and --verify-data contradict each other. Please select one.")
+            return EXIT_ERROR
         if not args.archives_only:
             if not repository.check(repair=args.repair, save_space=args.save_space):
                 return EXIT_WARNING
         if not args.repo_only and not ArchiveChecker().check(
                 repository, repair=args.repair, archive=args.location.archive,
-                last=args.last, prefix=args.prefix, save_space=args.save_space):
+                last=args.last, prefix=args.prefix, verify_data=args.verify_data,
+                save_space=args.save_space):
             return EXIT_WARNING
         return EXIT_SUCCESS
 
@@ -1212,6 +1216,18 @@ class Archiver:
           required).
         - The archive checks can be time consuming, they can be skipped using the
           --repository-only option.
+
+        The --verify-data option will perform a full integrity verification (as opposed to
+        checking the CRC32 of the segment) of data, which means reading the data from the
+        repository, decrypting and decompressing it. This is a cryptographic verification,
+        which will detect (accidental) corruption. For encrypted repositories it is
+        tamper-resistant as well, unless the attacker has access to the keys.
+
+        It is also very slow.
+
+        --verify-data only verifies data used by the archives specified with --last,
+        --prefix or an explicitly named archive. If none of these are passed,
+        all data in the repository is verified.
         """)
         subparser = subparsers.add_parser('check', parents=[common_parser], add_help=False,
                                           description=self.do_check.__doc__,
@@ -1228,6 +1244,10 @@ class Archiver:
         subparser.add_argument('--archives-only', dest='archives_only', action='store_true',
                                default=False,
                                help='only perform archives checks')
+        subparser.add_argument('--verify-data', dest='verify_data', action='store_true',
+                               default=False,
+                               help='perform cryptographic archive data integrity verification '
+                                    '(conflicts with --repository-only)')
         subparser.add_argument('--repair', dest='repair', action='store_true',
                                default=False,
                                help='attempt to repair any inconsistencies found')

+ 23 - 0
borg/testsuite/archiver.py

@@ -1598,6 +1598,29 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
         self.cmd('check', self.repository_location, exit_code=0)
         self.cmd('extract', '--dry-run', self.repository_location + '::archive1', exit_code=0)
 
+    def _test_verify_data(self, *init_args):
+        shutil.rmtree(self.repository_path)
+        self.cmd('init', self.repository_location, *init_args)
+        self.create_src_archive('archive1')
+        archive, repository = self.open_archive('archive1')
+        with repository:
+            for item in archive.iter_items():
+                if item[b'path'].endswith('testsuite/archiver.py'):
+                    chunk = item[b'chunks'][-1]
+                    data = repository.get(chunk.id) + b'1234'
+                    repository.put(chunk.id, data)
+                    break
+            repository.commit()
+        self.cmd('check', self.repository_location, exit_code=0)
+        output = self.cmd('check', '--verify-data', self.repository_location, exit_code=1)
+        assert bin_to_hex(chunk.id) + ', integrity error' in output
+
+    def test_verify_data(self):
+        self._test_verify_data('--encryption', 'repokey')
+
+    def test_verify_data_unencrypted(self):
+        self._test_verify_data('--encryption', 'none')
+
 
 class RemoteArchiverTestCase(ArchiverTestCase):
     prefix = '__testsuite__:'