瀏覽代碼

Merge pull request #2568 from enkore/issue/1101.integration.cache

1101.integration.cache
enkore 8 年之前
父節點
當前提交
349a4ade7c

+ 27 - 0
src/borg/_hashindex.c

@@ -291,6 +291,20 @@ hashindex_read(PyObject *file_py)
         goto fail_decref_header;
     }
 
+    /*
+     * Hash the header
+     * If the header is corrupted this bails before doing something stupid (like allocating 3.8 TB of memory)
+     */
+    Py_XDECREF(PyObject_CallMethod(file_py, "hash_part", "s", "HashHeader"));
+    if(PyErr_Occurred()) {
+        if(PyErr_ExceptionMatches(PyExc_AttributeError)) {
+            /* Be able to work with regular file objects which do not have a hash_part method. */
+            PyErr_Clear();
+        } else {
+            goto fail_decref_header;
+        }
+    }
+
     /* Find length of file */
     length_object = PyObject_CallMethod(file_py, "seek", "ni", (Py_ssize_t)0, SEEK_END);
     if(PyErr_Occurred()) {
@@ -473,6 +487,19 @@ hashindex_write(HashIndex *index, PyObject *file_py)
         return;
     }
 
+    /*
+     * Hash the header
+     */
+    Py_XDECREF(PyObject_CallMethod(file_py, "hash_part", "s", "HashHeader"));
+    if(PyErr_Occurred()) {
+        if(PyErr_ExceptionMatches(PyExc_AttributeError)) {
+            /* Be able to work with regular file objects which do not have a hash_part method. */
+            PyErr_Clear();
+        } else {
+            return;
+        }
+    }
+
     /* Note: explicitly construct view; BuildValue can convert (pointer, length) to Python objects, but copies them for doing so */
     buckets_view = PyMemoryView_FromMemory((char*)index->buckets, buckets_length, PyBUF_READ);
     if(!buckets_view) {

+ 66 - 17
src/borg/cache.py

@@ -22,8 +22,10 @@ from .helpers import safe_ns
 from .helpers import yes, hostname_is_unique
 from .helpers import remove_surrogates
 from .helpers import ProgressIndicatorPercent, ProgressIndicatorMessage
+from .helpers import set_ec, EXIT_WARNING
 from .item import ArchiveItem, ChunkListEntry
 from .crypto.key import PlaintextKey
+from .crypto.file_integrity import IntegrityCheckedFile, DetachedIntegrityCheckedFile, FileIntegrityError
 from .locking import Lock
 from .platform import SaveFile
 from .remote import cache_if_remote
@@ -237,6 +239,8 @@ class CacheConfig:
         config.set('cache', 'version', '1')
         config.set('cache', 'repository', self.repository.id_str)
         config.set('cache', 'manifest', '')
+        config.add_section('integrity')
+        config.set('integrity', 'manifest', '')
         with SaveFile(self.config_path) as fd:
             config.write(fd)
 
@@ -253,6 +257,20 @@ class CacheConfig:
         self.manifest_id = unhexlify(self._config.get('cache', 'manifest'))
         self.timestamp = self._config.get('cache', 'timestamp', fallback=None)
         self.key_type = self._config.get('cache', 'key_type', fallback=None)
+        try:
+            self.integrity = dict(self._config.items('integrity'))
+            if self._config.get('cache', 'manifest') != self.integrity.pop('manifest'):
+                # The cache config file is updated (parsed with ConfigParser, the state of the ConfigParser
+                # is modified and then written out.), not re-created.
+                # Thus, older versions will leave our [integrity] section alone, making the section's data invalid.
+                # Therefore, we also add the manifest ID to this section and
+                # can discern whether an older version interfered by comparing the manifest IDs of this section
+                # and the main [cache] section.
+                self.integrity = {}
+                logger.warning('Cache integrity data not available: old Borg version modified the cache.')
+        except configparser.NoSectionError:
+            logger.debug('Cache integrity: No integrity data found (files, chunks). Cache is from old version.')
+            self.integrity = {}
         previous_location = self._config.get('cache', 'previous_location', fallback=None)
         if previous_location:
             self.previous_location = recanonicalize_relative_location(previous_location, self.repository)
@@ -263,6 +281,11 @@ class CacheConfig:
         if manifest:
             self._config.set('cache', 'manifest', manifest.id_str)
             self._config.set('cache', 'timestamp', manifest.timestamp)
+            if not self._config.has_section('integrity'):
+                self._config.add_section('integrity')
+            for file, integrity_data in self.integrity.items():
+                self._config.set('integrity', file, integrity_data)
+            self._config.set('integrity', 'manifest', manifest.id_str)
         if key:
             self._config.set('cache', 'key_type', str(key.TYPE))
         self._config.set('cache', 'previous_location', self.repository._location.canonical_path())
@@ -392,14 +415,16 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
         with open(os.path.join(self.path, 'README'), 'w') as fd:
             fd.write(CACHE_README)
         self.cache_config.create()
-        ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8'))
+        ChunkIndex().write(os.path.join(self.path, 'chunks'))
         os.makedirs(os.path.join(self.path, 'chunks.archive.d'))
         with SaveFile(os.path.join(self.path, 'files'), binary=True) as fd:
             pass  # empty file
 
     def _do_open(self):
         self.cache_config.load()
-        self.chunks = ChunkIndex.read(os.path.join(self.path, 'chunks').encode('utf-8'))
+        with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=False,
+                                  integrity_data=self.cache_config.integrity.get('chunks')) as fd:
+            self.chunks = ChunkIndex.read(fd)
         self.files = None
 
     def open(self):
@@ -417,7 +442,9 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
         self.files = {}
         self._newest_mtime = None
         logger.debug('Reading files cache ...')
-        with open(os.path.join(self.path, 'files'), 'rb') as fd:
+
+        with IntegrityCheckedFile(path=os.path.join(self.path, 'files'), write=False,
+                                  integrity_data=self.cache_config.integrity.get('files')) as fd:
             u = msgpack.Unpacker(use_list=True)
             while True:
                 data = fd.read(64 * 1024)
@@ -458,7 +485,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                 self._newest_mtime = 2 ** 63 - 1  # nanoseconds, good until y2262
             ttl = int(os.environ.get('BORG_FILES_CACHE_TTL', 20))
             pi.output('Saving files cache')
-            with SaveFile(os.path.join(self.path, 'files'), binary=True) as fd:
+            with IntegrityCheckedFile(path=os.path.join(self.path, 'files'), write=True) as fd:
                 for path_hash, item in self.files.items():
                     # Only keep files seen in this backup that are older than newest mtime seen in this backup -
                     # this is to avoid issues with filesystem snapshots and mtime granularity.
@@ -467,10 +494,13 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                     if entry.age == 0 and bigint_to_int(entry.mtime) < self._newest_mtime or \
                        entry.age > 0 and entry.age < ttl:
                         msgpack.pack((path_hash, entry), fd)
+            self.cache_config.integrity['files'] = fd.integrity_data
+        pi.output('Saving chunks cache')
+        with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=True) as fd:
+            self.chunks.write(fd)
+        self.cache_config.integrity['chunks'] = fd.integrity_data
         pi.output('Saving cache config')
         self.cache_config.save(self.manifest, self.key)
-        pi.output('Saving chunks cache')
-        self.chunks.write(os.path.join(self.path, 'chunks').encode('utf-8'))
         os.rename(os.path.join(self.path, 'txn.active'),
                   os.path.join(self.path, 'txn.tmp'))
         shutil.rmtree(os.path.join(self.path, 'txn.tmp'))
@@ -510,7 +540,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
         def mkpath(id, suffix=''):
             id_hex = bin_to_hex(id)
             path = os.path.join(archive_path, id_hex + suffix)
-            return path.encode('utf-8')
+            return path
 
         def cached_archives():
             if self.do_cache:
@@ -525,7 +555,14 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
 
         def cleanup_outdated(ids):
             for id in ids:
-                os.unlink(mkpath(id))
+                cleanup_cached_archive(id)
+
+        def cleanup_cached_archive(id):
+            os.unlink(mkpath(id))
+            try:
+                os.unlink(mkpath(id) + '.integrity')
+            except FileNotFoundError:
+                pass
 
         def fetch_and_build_idx(archive_id, repository, key, chunk_idx):
             cdata = repository.get(archive_id)
@@ -542,14 +579,16 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                 for item in unpacker:
                     if not isinstance(item, dict):
                         logger.error('Error: Did not get expected metadata dict - archive corrupted!')
-                        continue
+                        continue   # XXX: continue?!
                     for chunk_id, size, csize in item.get(b'chunks', []):
                         chunk_idx.add(chunk_id, 1, size, csize)
             if self.do_cache:
                 fn = mkpath(archive_id)
                 fn_tmp = mkpath(archive_id, suffix='.tmp')
                 try:
-                    chunk_idx.write(fn_tmp)
+                    with DetachedIntegrityCheckedFile(path=fn_tmp, write=True,
+                                                      filename=bin_to_hex(archive_id)) as fd:
+                        chunk_idx.write(fd)
                 except Exception:
                     os.unlink(fn_tmp)
                 else:
@@ -564,9 +603,9 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
             logger.info('Synchronizing chunks cache...')
             cached_ids = cached_archives()
             archive_ids = repo_archives()
-            logger.info('Archives: %d, w/ cached Idx: %d, w/ outdated Idx: %d, w/o cached Idx: %d.' % (
+            logger.info('Archives: %d, w/ cached Idx: %d, w/ outdated Idx: %d, w/o cached Idx: %d.',
                 len(archive_ids), len(cached_ids),
-                len(cached_ids - archive_ids), len(archive_ids - cached_ids), ))
+                len(cached_ids - archive_ids), len(archive_ids - cached_ids))
             # deallocates old hashindex, creates empty hashindex:
             chunk_idx.clear()
             cleanup_outdated(cached_ids - archive_ids)
@@ -583,10 +622,20 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                     if self.do_cache:
                         if archive_id in cached_ids:
                             archive_chunk_idx_path = mkpath(archive_id)
-                            logger.info("Reading cached archive chunk index for %s ..." % archive_name)
-                            archive_chunk_idx = ChunkIndex.read(archive_chunk_idx_path)
-                        else:
-                            logger.info('Fetching and building archive index for %s ...' % archive_name)
+                            logger.info("Reading cached archive chunk index for %s ...", archive_name)
+                            try:
+                                with DetachedIntegrityCheckedFile(path=archive_chunk_idx_path, write=False) as fd:
+                                    archive_chunk_idx = ChunkIndex.read(fd)
+                            except FileIntegrityError as fie:
+                                logger.error('Cached archive chunk index of %s is corrupted: %s', archive_name, fie)
+                                # Delete it and fetch a new index
+                                cleanup_cached_archive(archive_id)
+                                cached_ids.remove(archive_id)
+                                set_ec(EXIT_WARNING)
+                        if archive_id not in cached_ids:
+                            # Do not make this an else branch; the FileIntegrityError exception handler
+                            # above can remove *archive_id* from *cached_ids*.
+                            logger.info('Fetching and building archive index for %s ...', archive_name)
                             archive_chunk_idx = ChunkIndex()
                             fetch_and_build_idx(archive_id, repository, self.key, archive_chunk_idx)
                         logger.info("Merging into master chunks index ...")
@@ -599,7 +648,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                             chunk_idx.merge(archive_chunk_idx)
                     else:
                         chunk_idx = chunk_idx or ChunkIndex()
-                        logger.info('Fetching archive index for %s ...' % archive_name)
+                        logger.info('Fetching archive index for %s ...', archive_name)
                         fetch_and_build_idx(archive_id, repository, self.key, chunk_idx)
                 if self.progress:
                     pi.finish()

+ 55 - 28
src/borg/crypto/file_integrity.py

@@ -104,7 +104,7 @@ class FileIntegrityError(IntegrityError):
 
 
 class IntegrityCheckedFile(FileLikeWrapper):
-    def __init__(self, path, write, filename=None, override_fd=None):
+    def __init__(self, path, write, filename=None, override_fd=None, integrity_data=None):
         self.path = path
         self.writing = write
         mode = 'wb' if write else 'rb'
@@ -114,10 +114,10 @@ class IntegrityCheckedFile(FileLikeWrapper):
 
         self.hash_filename(filename)
 
-        if write:
+        if write or not integrity_data:
             self.digests = {}
         else:
-            self.digests = self.read_integrity_file(path, self.hasher)
+            self.digests = self.parse_integrity_data(path, integrity_data, self.hasher)
             # TODO: When we're reading but don't have any digests, i.e. no integrity file existed,
             # TODO: then we could just short-circuit.
 
@@ -126,37 +126,33 @@ class IntegrityCheckedFile(FileLikeWrapper):
         # In Borg the name itself encodes the context (eg. index.N, cache, files),
         # while the path doesn't matter, and moving e.g. a repository or cache directory is supported.
         # Changing the name however imbues a change of context that is not permissible.
+        # While Borg does not use anything except ASCII in these file names, it's important to use
+        # the same encoding everywhere for portability. Using os.fsencode() would be wrong.
         filename = os.path.basename(filename or self.path)
         self.hasher.update(('%10d' % len(filename)).encode())
         self.hasher.update(filename.encode())
 
-    @staticmethod
-    def integrity_file_path(path):
-        return path + '.integrity'
-
     @classmethod
-    def read_integrity_file(cls, path, hasher):
+    def parse_integrity_data(cls, path: str, data: str, hasher: SHA512FileHashingWrapper):
         try:
-            with open(cls.integrity_file_path(path), 'r') as fd:
-                integrity_file = json.load(fd)
-                # Provisions for agility now, implementation later, but make sure the on-disk joint is oiled.
-                algorithm = integrity_file['algorithm']
-                if algorithm != hasher.ALGORITHM:
-                    logger.warning('Cannot verify integrity of %s: Unknown algorithm %r', path, algorithm)
-                    return
-                digests = integrity_file['digests']
-                # Require at least presence of the final digest
-                digests['final']
-                return digests
-        except FileNotFoundError:
-            logger.info('No integrity file found for %s', path)
-        except (OSError, ValueError, TypeError, KeyError) as e:
-            logger.warning('Could not read integrity file for %s: %s', path, e)
+            integrity_data = json.loads(data)
+            # Provisions for agility now, implementation later, but make sure the on-disk joint is oiled.
+            algorithm = integrity_data['algorithm']
+            if algorithm != hasher.ALGORITHM:
+                logger.warning('Cannot verify integrity of %s: Unknown algorithm %r', path, algorithm)
+                return
+            digests = integrity_data['digests']
+            # Require at least presence of the final digest
+            digests['final']
+            return digests
+        except (ValueError, TypeError, KeyError) as e:
+            logger.warning('Could not parse integrity data for %s: %s', path, e)
             raise FileIntegrityError(path)
 
     def hash_part(self, partname, is_final=False):
         if not self.writing and not self.digests:
             return
+        self.hasher.update(('%10d' % len(partname)).encode())
         self.hasher.update(partname.encode())
         self.hasher.hash_length(seek_to_end=is_final)
         digest = self.hasher.hexdigest()
@@ -173,10 +169,41 @@ class IntegrityCheckedFile(FileLikeWrapper):
         if exception:
             return
         if self.writing:
-            with open(self.integrity_file_path(self.path), 'w') as fd:
-                json.dump({
-                    'algorithm': self.hasher.ALGORITHM,
-                    'digests': self.digests,
-                }, fd)
+            self.store_integrity_data(json.dumps({
+                'algorithm': self.hasher.ALGORITHM,
+                'digests': self.digests,
+            }))
         elif self.digests:
             logger.debug('Verified integrity of %s', self.path)
+
+    def store_integrity_data(self, data: str):
+        self.integrity_data = data
+
+
+class DetachedIntegrityCheckedFile(IntegrityCheckedFile):
+    def __init__(self, path, write, filename=None, override_fd=None):
+        super().__init__(path, write, filename, override_fd)
+        filename = filename or os.path.basename(path)
+        output_dir = os.path.dirname(path)
+        self.output_integrity_file = self.integrity_file_path(os.path.join(output_dir, filename))
+        if not write:
+            self.digests = self.read_integrity_file(self.path, self.hasher)
+
+    @staticmethod
+    def integrity_file_path(path):
+        return path + '.integrity'
+
+    @classmethod
+    def read_integrity_file(cls, path, hasher):
+        try:
+            with open(cls.integrity_file_path(path), 'r') as fd:
+                return cls.parse_integrity_data(path, fd.read(), hasher)
+        except FileNotFoundError:
+            logger.info('No integrity file found for %s', path)
+        except OSError as e:
+            logger.warning('Could not read integrity file for %s: %s', path, e)
+            raise FileIntegrityError(path)
+
+    def store_integrity_data(self, data: str):
+        with open(self.output_integrity_file, 'w') as fd:
+            fd.write(data)

+ 10 - 4
src/borg/hashindex.pyx

@@ -67,8 +67,11 @@ cdef class IndexBase:
     def __cinit__(self, capacity=0, path=None, key_size=32):
         self.key_size = key_size
         if path:
-            with open(path, 'rb') as fd:
-                self.index = hashindex_read(fd)
+            if isinstance(path, (str, bytes)):
+                with open(path, 'rb') as fd:
+                    self.index = hashindex_read(fd)
+            else:
+                self.index = hashindex_read(path)
             assert self.index, 'hashindex_read() returned NULL with no exception set'
         else:
             self.index = hashindex_init(capacity, self.key_size, self.value_size)
@@ -84,8 +87,11 @@ cdef class IndexBase:
         return cls(path=path)
 
     def write(self, path):
-        with open(path, 'wb') as fd:
-            hashindex_write(self.index, fd)
+        if isinstance(path, (str, bytes)):
+            with open(path, 'wb') as fd:
+                hashindex_write(self.index, fd)
+        else:
+            hashindex_write(self.index, path)
 
     def clear(self):
         hashindex_free(self.index)

+ 78 - 0
src/borg/testsuite/archiver.py

@@ -1,5 +1,6 @@
 import argparse
 import errno
+import io
 import json
 import logging
 import os
@@ -37,6 +38,7 @@ from ..constants import *  # NOQA
 from ..crypto.low_level import bytes_to_long, num_aes_blocks
 from ..crypto.key import KeyfileKeyBase, RepoKey, KeyfileKey, Passphrase, TAMRequiredError
 from ..crypto.keymanager import RepoIdMismatch, NotABorgKeyFile
+from ..crypto.file_integrity import FileIntegrityError
 from ..helpers import Location, get_security_dir
 from ..helpers import Manifest
 from ..helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
@@ -2886,6 +2888,82 @@ class RemoteArchiverTestCase(ArchiverTestCase):
                 self.assert_true(marker not in res)
 
 
+class ArchiverCorruptionTestCase(ArchiverTestCaseBase):
+    def setUp(self):
+        super().setUp()
+        self.create_test_files()
+        self.cmd('init', '--encryption=repokey', self.repository_location)
+        self.cache_path = json.loads(self.cmd('info', self.repository_location, '--json'))['cache']['path']
+
+    def corrupt(self, file):
+        with open(file, 'r+b') as fd:
+            fd.seek(-1, io.SEEK_END)
+            fd.write(b'1')
+
+    def test_cache_chunks(self):
+        self.corrupt(os.path.join(self.cache_path, 'chunks'))
+
+        if self.FORK_DEFAULT:
+            out = self.cmd('info', self.repository_location, exit_code=2)
+            assert 'failed integrity check' in out
+        else:
+            with pytest.raises(FileIntegrityError):
+                self.cmd('info', self.repository_location)
+
+    def test_cache_files(self):
+        self.cmd('create', self.repository_location + '::test', 'input')
+        self.corrupt(os.path.join(self.cache_path, 'files'))
+
+        if self.FORK_DEFAULT:
+            out = self.cmd('create', self.repository_location + '::test1', 'input', exit_code=2)
+            assert 'failed integrity check' in out
+        else:
+            with pytest.raises(FileIntegrityError):
+                self.cmd('create', self.repository_location + '::test1', 'input')
+
+    def test_chunks_archive(self):
+        self.cmd('create', self.repository_location + '::test1', 'input')
+        # Find ID of test1 so we can corrupt it later :)
+        target_id = self.cmd('list', self.repository_location, '--format={id}{LF}').strip()
+        self.cmd('create', self.repository_location + '::test2', 'input')
+
+        # Force cache sync, creating archive chunks of test1 and test2 in chunks.archive.d
+        self.cmd('delete', '--cache-only', self.repository_location)
+        self.cmd('info', self.repository_location, '--json')
+
+        chunks_archive = os.path.join(self.cache_path, 'chunks.archive.d')
+        assert len(os.listdir(chunks_archive)) == 4  # two archives, one chunks cache and one .integrity file each
+
+        self.corrupt(os.path.join(chunks_archive, target_id))
+
+        # Trigger cache sync by changing the manifest ID in the cache config
+        config_path = os.path.join(self.cache_path, 'config')
+        config = ConfigParser(interpolation=None)
+        config.read(config_path)
+        config.set('cache', 'manifest', bin_to_hex(bytes(32)))
+        with open(config_path, 'w') as fd:
+            config.write(fd)
+
+        # Cache sync notices corrupted archive chunks, but automatically recovers.
+        out = self.cmd('create', '-v', self.repository_location + '::test3', 'input', exit_code=1)
+        assert 'Reading cached archive chunk index for test1' in out
+        assert 'Cached archive chunk index of test1 is corrupted' in out
+        assert 'Fetching and building archive index for test1' in out
+
+    def test_old_version_interfered(self):
+        # Modify the main manifest ID without touching the manifest ID in the integrity section.
+        # This happens if a version without integrity checking modifies the cache.
+        config_path = os.path.join(self.cache_path, 'config')
+        config = ConfigParser(interpolation=None)
+        config.read(config_path)
+        config.set('cache', 'manifest', bin_to_hex(bytes(32)))
+        with open(config_path, 'w') as fd:
+            config.write(fd)
+
+        out = self.cmd('info', self.repository_location)
+        assert 'Cache integrity data not available: old Borg version modified the cache.' in out
+
+
 class DiffArchiverTestCase(ArchiverTestCaseBase):
     def test_basic_functionality(self):
         # Initialize test folder

+ 19 - 19
src/borg/testsuite/file_integrity.py

@@ -1,21 +1,21 @@
 
 import pytest
 
-from ..crypto.file_integrity import IntegrityCheckedFile, FileIntegrityError
+from ..crypto.file_integrity import IntegrityCheckedFile, DetachedIntegrityCheckedFile, FileIntegrityError
 
 
 class TestReadIntegrityFile:
     def test_no_integrity(self, tmpdir):
         protected_file = tmpdir.join('file')
         protected_file.write('1234')
-        assert IntegrityCheckedFile.read_integrity_file(str(protected_file), None) is None
+        assert DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), None) is None
 
     def test_truncated_integrity(self, tmpdir):
         protected_file = tmpdir.join('file')
         protected_file.write('1234')
         tmpdir.join('file.integrity').write('')
         with pytest.raises(FileIntegrityError):
-            IntegrityCheckedFile.read_integrity_file(str(protected_file), None)
+            DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), None)
 
     def test_unknown_algorithm(self, tmpdir):
         class SomeHasher:
@@ -24,7 +24,7 @@ class TestReadIntegrityFile:
         protected_file = tmpdir.join('file')
         protected_file.write('1234')
         tmpdir.join('file.integrity').write('{"algorithm": "HMAC_SERIOUSHASH", "digests": "1234"}')
-        assert IntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) is None
+        assert DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) is None
 
     @pytest.mark.parametrize('json', (
         '{"ALGORITHM": "HMAC_SERIOUSHASH", "digests": "1234"}',
@@ -38,7 +38,7 @@ class TestReadIntegrityFile:
         protected_file.write('1234')
         tmpdir.join('file.integrity').write(json)
         with pytest.raises(FileIntegrityError):
-            IntegrityCheckedFile.read_integrity_file(str(protected_file), None)
+            DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), None)
 
     def test_valid(self, tmpdir):
         class SomeHasher:
@@ -47,35 +47,35 @@ class TestReadIntegrityFile:
         protected_file = tmpdir.join('file')
         protected_file.write('1234')
         tmpdir.join('file.integrity').write('{"algorithm": "HMAC_FOO1", "digests": {"final": "1234"}}')
-        assert IntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) == {'final': '1234'}
+        assert DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) == {'final': '1234'}
 
 
-class TestIntegrityCheckedFile:
+class TestDetachedIntegrityCheckedFile:
     @pytest.fixture
     def integrity_protected_file(self, tmpdir):
         path = str(tmpdir.join('file'))
-        with IntegrityCheckedFile(path, write=True) as fd:
+        with DetachedIntegrityCheckedFile(path, write=True) as fd:
             fd.write(b'foo and bar')
         return path
 
     def test_simple(self, tmpdir, integrity_protected_file):
         assert tmpdir.join('file').check(file=True)
         assert tmpdir.join('file.integrity').check(file=True)
-        with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
+        with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
             assert fd.read() == b'foo and bar'
 
     def test_corrupted_file(self, integrity_protected_file):
         with open(integrity_protected_file, 'ab') as fd:
             fd.write(b' extra data')
         with pytest.raises(FileIntegrityError):
-            with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
+            with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
                 assert fd.read() == b'foo and bar extra data'
 
     def test_corrupted_file_partial_read(self, integrity_protected_file):
         with open(integrity_protected_file, 'ab') as fd:
             fd.write(b' extra data')
         with pytest.raises(FileIntegrityError):
-            with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
+            with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
                 data = b'foo and bar'
                 assert fd.read(len(data)) == data
 
@@ -88,7 +88,7 @@ class TestIntegrityCheckedFile:
         tmpdir.join('file').move(new_path)
         tmpdir.join('file.integrity').move(new_path + '.integrity')
         with pytest.raises(FileIntegrityError):
-            with IntegrityCheckedFile(str(new_path), write=False) as fd:
+            with DetachedIntegrityCheckedFile(str(new_path), write=False) as fd:
                 assert fd.read() == b'foo and bar'
 
     def test_moved_file(self, tmpdir, integrity_protected_file):
@@ -96,27 +96,27 @@ class TestIntegrityCheckedFile:
         tmpdir.join('file').move(new_dir.join('file'))
         tmpdir.join('file.integrity').move(new_dir.join('file.integrity'))
         new_path = str(new_dir.join('file'))
-        with IntegrityCheckedFile(new_path, write=False) as fd:
+        with DetachedIntegrityCheckedFile(new_path, write=False) as fd:
             assert fd.read() == b'foo and bar'
 
     def test_no_integrity(self, tmpdir, integrity_protected_file):
         tmpdir.join('file.integrity').remove()
-        with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
+        with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
             assert fd.read() == b'foo and bar'
 
 
-class TestIntegrityCheckedFileParts:
+class TestDetachedIntegrityCheckedFileParts:
     @pytest.fixture
     def integrity_protected_file(self, tmpdir):
         path = str(tmpdir.join('file'))
-        with IntegrityCheckedFile(path, write=True) as fd:
+        with DetachedIntegrityCheckedFile(path, write=True) as fd:
             fd.write(b'foo and bar')
             fd.hash_part('foopart')
             fd.write(b' other data')
         return path
 
     def test_simple(self, integrity_protected_file):
-        with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
+        with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
             data1 = b'foo and bar'
             assert fd.read(len(data1)) == data1
             fd.hash_part('foopart')
@@ -127,7 +127,7 @@ class TestIntegrityCheckedFileParts:
             # Because some hash_part failed, the final digest will fail as well - again - even if we catch
             # the failing hash_part. This is intentional: (1) it makes the code simpler (2) it's a good fail-safe
             # against overly broad exception handling.
-            with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
+            with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
                 data1 = b'foo and bar'
                 assert fd.read(len(data1)) == data1
                 with pytest.raises(FileIntegrityError):
@@ -140,7 +140,7 @@ class TestIntegrityCheckedFileParts:
         with open(integrity_protected_file, 'ab') as fd:
             fd.write(b'some extra stuff that does not belong')
         with pytest.raises(FileIntegrityError):
-            with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
+            with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
                 data1 = b'foo and bar'
                 try:
                     assert fd.read(len(data1)) == data1

+ 22 - 0
src/borg/testsuite/hashindex.py

@@ -6,6 +6,7 @@ import zlib
 
 from ..hashindex import NSIndex, ChunkIndex
 from .. import hashindex
+from ..crypto.file_integrity import IntegrityCheckedFile, FileIntegrityError
 from . import BaseTestCase
 
 # Note: these tests are part of the self test, do not use or import py.test functionality here.
@@ -319,6 +320,27 @@ class HashIndexDataTestCase(BaseTestCase):
         assert idx1[H(3)] == (ChunkIndex.MAX_VALUE, 6, 7)
 
 
+class HashIndexIntegrityTestCase(HashIndexDataTestCase):
+    def write_integrity_checked_index(self, tempdir):
+        idx = self._deserialize_hashindex(self.HASHINDEX)
+        file = os.path.join(tempdir, 'idx')
+        with IntegrityCheckedFile(path=file, write=True) as fd:
+            idx.write(fd)
+        integrity_data = fd.integrity_data
+        assert 'final' in integrity_data
+        assert 'HashHeader' in integrity_data
+        return file, integrity_data
+
+    def test_integrity_checked_file(self):
+        with tempfile.TemporaryDirectory() as tempdir:
+            file, integrity_data = self.write_integrity_checked_index(tempdir)
+            with open(file, 'r+b') as fd:
+                fd.write(b'Foo')
+            with self.assert_raises(FileIntegrityError):
+                with IntegrityCheckedFile(path=file, write=False, integrity_data=integrity_data) as fd:
+                    ChunkIndex.read(fd)
+
+
 class NSIndexTestCase(BaseTestCase):
     def test_nsindex_segment_limit(self):
         idx = NSIndex()