8 年之前 · 349a4ade7c
--- a/src/borg/_hashindex.c
+++ b/src/borg/_hashindex.c
@@ -291,6 +291,20 @@ hashindex_read(PyObject *file_py)
 
				         goto fail_decref_header;
			
 
				     }
			
 
				 
			
 
				+    /*
			
 
				+     * Hash the header
			
 
				+     * If the header is corrupted this bails before doing something stupid (like allocating 3.8 TB of memory)
			
 
				+     */
			
 
				+    Py_XDECREF(PyObject_CallMethod(file_py, "hash_part", "s", "HashHeader"));
			
 
				+    if(PyErr_Occurred()) {
			
 
				+        if(PyErr_ExceptionMatches(PyExc_AttributeError)) {
			
 
				+            /* Be able to work with regular file objects which do not have a hash_part method. */
			
 
				+            PyErr_Clear();
			
 
				+        } else {
			
 
				+            goto fail_decref_header;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				     /* Find length of file */
			
 
				     length_object = PyObject_CallMethod(file_py, "seek", "ni", (Py_ssize_t)0, SEEK_END);
			
 
				     if(PyErr_Occurred()) {
			
@@ -473,6 +487,19 @@ hashindex_write(HashIndex *index, PyObject *file_py)
 
				         return;
			
 
				     }
			
 
				 
			
 
				+    /*
			
 
				+     * Hash the header
			
 
				+     */
			
 
				+    Py_XDECREF(PyObject_CallMethod(file_py, "hash_part", "s", "HashHeader"));
			
 
				+    if(PyErr_Occurred()) {
			
 
				+        if(PyErr_ExceptionMatches(PyExc_AttributeError)) {
			
 
				+            /* Be able to work with regular file objects which do not have a hash_part method. */
			
 
				+            PyErr_Clear();
			
 
				+        } else {
			
 
				+            return;
			
 
				+        }
			
 
				+    }
			
 
				+
			
 
				     /* Note: explicitly construct view; BuildValue can convert (pointer, length) to Python objects, but copies them for doing so */
			
 
				     buckets_view = PyMemoryView_FromMemory((char*)index->buckets, buckets_length, PyBUF_READ);
			
 
				     if(!buckets_view) {
			
--- a/src/borg/cache.py
+++ b/src/borg/cache.py
@@ -22,8 +22,10 @@ from .helpers import safe_ns
 
				 from .helpers import yes, hostname_is_unique
			
 
				 from .helpers import remove_surrogates
			
 
				 from .helpers import ProgressIndicatorPercent, ProgressIndicatorMessage
			
 
				+from .helpers import set_ec, EXIT_WARNING
			
 
				 from .item import ArchiveItem, ChunkListEntry
			
 
				 from .crypto.key import PlaintextKey
			
 
				+from .crypto.file_integrity import IntegrityCheckedFile, DetachedIntegrityCheckedFile, FileIntegrityError
			
 
				 from .locking import Lock
			
 
				 from .platform import SaveFile
			
 
				 from .remote import cache_if_remote
			
@@ -237,6 +239,8 @@ class CacheConfig:
 
				         config.set('cache', 'version', '1')
			
 
				         config.set('cache', 'repository', self.repository.id_str)
			
 
				         config.set('cache', 'manifest', '')
			
 
				+        config.add_section('integrity')
			
 
				+        config.set('integrity', 'manifest', '')
			
 
				         with SaveFile(self.config_path) as fd:
			
 
				             config.write(fd)
			
 
				 
			
@@ -253,6 +257,20 @@ class CacheConfig:
 
				         self.manifest_id = unhexlify(self._config.get('cache', 'manifest'))
			
 
				         self.timestamp = self._config.get('cache', 'timestamp', fallback=None)
			
 
				         self.key_type = self._config.get('cache', 'key_type', fallback=None)
			
 
				+        try:
			
 
				+            self.integrity = dict(self._config.items('integrity'))
			
 
				+            if self._config.get('cache', 'manifest') != self.integrity.pop('manifest'):
			
 
				+                # The cache config file is updated (parsed with ConfigParser, the state of the ConfigParser
			
 
				+                # is modified and then written out.), not re-created.
			
 
				+                # Thus, older versions will leave our [integrity] section alone, making the section's data invalid.
			
 
				+                # Therefore, we also add the manifest ID to this section and
			
 
				+                # can discern whether an older version interfered by comparing the manifest IDs of this section
			
 
				+                # and the main [cache] section.
			
 
				+                self.integrity = {}
			
 
				+                logger.warning('Cache integrity data not available: old Borg version modified the cache.')
			
 
				+        except configparser.NoSectionError:
			
 
				+            logger.debug('Cache integrity: No integrity data found (files, chunks). Cache is from old version.')
			
 
				+            self.integrity = {}
			
 
				         previous_location = self._config.get('cache', 'previous_location', fallback=None)
			
 
				         if previous_location:
			
 
				             self.previous_location = recanonicalize_relative_location(previous_location, self.repository)
			
@@ -263,6 +281,11 @@ class CacheConfig:
 
				         if manifest:
			
 
				             self._config.set('cache', 'manifest', manifest.id_str)
			
 
				             self._config.set('cache', 'timestamp', manifest.timestamp)
			
 
				+            if not self._config.has_section('integrity'):
			
 
				+                self._config.add_section('integrity')
			
 
				+            for file, integrity_data in self.integrity.items():
			
 
				+                self._config.set('integrity', file, integrity_data)
			
 
				+            self._config.set('integrity', 'manifest', manifest.id_str)
			
 
				         if key:
			
 
				             self._config.set('cache', 'key_type', str(key.TYPE))
			
 
				         self._config.set('cache', 'previous_location', self.repository._location.canonical_path())
			
@@ -392,14 +415,16 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
 
				         with open(os.path.join(self.path, 'README'), 'w') as fd:
			
 
				             fd.write(CACHE_README)
			
 
				         self.cache_config.create()
			
 
				-        ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8'))
			
 
				+        ChunkIndex().write(os.path.join(self.path, 'chunks'))
			
 
				         os.makedirs(os.path.join(self.path, 'chunks.archive.d'))
			
 
				         with SaveFile(os.path.join(self.path, 'files'), binary=True) as fd:
			
 
				             pass  # empty file
			
 
				 
			
 
				     def _do_open(self):
			
 
				         self.cache_config.load()
			
 
				-        self.chunks = ChunkIndex.read(os.path.join(self.path, 'chunks').encode('utf-8'))
			
 
				+        with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=False,
			
 
				+                                  integrity_data=self.cache_config.integrity.get('chunks')) as fd:
			
 
				+            self.chunks = ChunkIndex.read(fd)
			
 
				         self.files = None
			
 
				 
			
 
				     def open(self):
			
@@ -417,7 +442,9 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
 
				         self.files = {}
			
 
				         self._newest_mtime = None
			
 
				         logger.debug('Reading files cache ...')
			
 
				-        with open(os.path.join(self.path, 'files'), 'rb') as fd:
			
 
				+
			
 
				+        with IntegrityCheckedFile(path=os.path.join(self.path, 'files'), write=False,
			
 
				+                                  integrity_data=self.cache_config.integrity.get('files')) as fd:
			
 
				             u = msgpack.Unpacker(use_list=True)
			
 
				             while True:
			
 
				                 data = fd.read(64 * 1024)
			
@@ -458,7 +485,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
 
				                 self._newest_mtime = 2 ** 63 - 1  # nanoseconds, good until y2262
			
 
				             ttl = int(os.environ.get('BORG_FILES_CACHE_TTL', 20))
			
 
				             pi.output('Saving files cache')
			
 
				-            with SaveFile(os.path.join(self.path, 'files'), binary=True) as fd:
			
 
				+            with IntegrityCheckedFile(path=os.path.join(self.path, 'files'), write=True) as fd:
			
 
				                 for path_hash, item in self.files.items():
			
 
				                     # Only keep files seen in this backup that are older than newest mtime seen in this backup -
			
 
				                     # this is to avoid issues with filesystem snapshots and mtime granularity.
			
@@ -467,10 +494,13 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
 
				                     if entry.age == 0 and bigint_to_int(entry.mtime) < self._newest_mtime or \
			
 
				                        entry.age > 0 and entry.age < ttl:
			
 
				                         msgpack.pack((path_hash, entry), fd)
			
 
				+            self.cache_config.integrity['files'] = fd.integrity_data
			
 
				+        pi.output('Saving chunks cache')
			
 
				+        with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=True) as fd:
			
 
				+            self.chunks.write(fd)
			
 
				+        self.cache_config.integrity['chunks'] = fd.integrity_data
			
 
				         pi.output('Saving cache config')
			
 
				         self.cache_config.save(self.manifest, self.key)
			
 
				-        pi.output('Saving chunks cache')
			
 
				-        self.chunks.write(os.path.join(self.path, 'chunks').encode('utf-8'))
			
 
				         os.rename(os.path.join(self.path, 'txn.active'),
			
 
				                   os.path.join(self.path, 'txn.tmp'))
			
 
				         shutil.rmtree(os.path.join(self.path, 'txn.tmp'))
			
@@ -510,7 +540,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
 
				         def mkpath(id, suffix=''):
			
 
				             id_hex = bin_to_hex(id)
			
 
				             path = os.path.join(archive_path, id_hex + suffix)
			
 
				-            return path.encode('utf-8')
			
 
				+            return path
			
 
				 
			
 
				         def cached_archives():
			
 
				             if self.do_cache:
			
@@ -525,7 +555,14 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
 
				 
			
 
				         def cleanup_outdated(ids):
			
 
				             for id in ids:
			
 
				-                os.unlink(mkpath(id))
			
 
				+                cleanup_cached_archive(id)
			
 
				+
			
 
				+        def cleanup_cached_archive(id):
			
 
				+            os.unlink(mkpath(id))
			
 
				+            try:
			
 
				+                os.unlink(mkpath(id) + '.integrity')
			
 
				+            except FileNotFoundError:
			
 
				+                pass
			
 
				 
			
 
				         def fetch_and_build_idx(archive_id, repository, key, chunk_idx):
			
 
				             cdata = repository.get(archive_id)
			
@@ -542,14 +579,16 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
 
				                 for item in unpacker:
			
 
				                     if not isinstance(item, dict):
			
 
				                         logger.error('Error: Did not get expected metadata dict - archive corrupted!')
			
 
				-                        continue
			
 
				+                        continue   # XXX: continue?!
			
 
				                     for chunk_id, size, csize in item.get(b'chunks', []):
			
 
				                         chunk_idx.add(chunk_id, 1, size, csize)
			
 
				             if self.do_cache:
			
 
				                 fn = mkpath(archive_id)
			
 
				                 fn_tmp = mkpath(archive_id, suffix='.tmp')
			
 
				                 try:
			
 
				-                    chunk_idx.write(fn_tmp)
			
 
				+                    with DetachedIntegrityCheckedFile(path=fn_tmp, write=True,
			
 
				+                                                      filename=bin_to_hex(archive_id)) as fd:
			
 
				+                        chunk_idx.write(fd)
			
 
				                 except Exception:
			
 
				                     os.unlink(fn_tmp)
			
 
				                 else:
			
@@ -564,9 +603,9 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
 
				             logger.info('Synchronizing chunks cache...')
			
 
				             cached_ids = cached_archives()
			
 
				             archive_ids = repo_archives()
			
 
				-            logger.info('Archives: %d, w/ cached Idx: %d, w/ outdated Idx: %d, w/o cached Idx: %d.' % (
			
 
				+            logger.info('Archives: %d, w/ cached Idx: %d, w/ outdated Idx: %d, w/o cached Idx: %d.',
			
 
				                 len(archive_ids), len(cached_ids),
			
 
				-                len(cached_ids - archive_ids), len(archive_ids - cached_ids), ))
			
 
				+                len(cached_ids - archive_ids), len(archive_ids - cached_ids))
			
 
				             # deallocates old hashindex, creates empty hashindex:
			
 
				             chunk_idx.clear()
			
 
				             cleanup_outdated(cached_ids - archive_ids)
			
@@ -583,10 +622,20 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
 
				                     if self.do_cache:
			
 
				                         if archive_id in cached_ids:
			
 
				                             archive_chunk_idx_path = mkpath(archive_id)
			
 
				-                            logger.info("Reading cached archive chunk index for %s ..." % archive_name)
			
 
				-                            archive_chunk_idx = ChunkIndex.read(archive_chunk_idx_path)
			
 
				-                        else:
			
 
				-                            logger.info('Fetching and building archive index for %s ...' % archive_name)
			
 
				+                            logger.info("Reading cached archive chunk index for %s ...", archive_name)
			
 
				+                            try:
			
 
				+                                with DetachedIntegrityCheckedFile(path=archive_chunk_idx_path, write=False) as fd:
			
 
				+                                    archive_chunk_idx = ChunkIndex.read(fd)
			
 
				+                            except FileIntegrityError as fie:
			
 
				+                                logger.error('Cached archive chunk index of %s is corrupted: %s', archive_name, fie)
			
 
				+                                # Delete it and fetch a new index
			
 
				+                                cleanup_cached_archive(archive_id)
			
 
				+                                cached_ids.remove(archive_id)
			
 
				+                                set_ec(EXIT_WARNING)
			
 
				+                        if archive_id not in cached_ids:
			
 
				+                            # Do not make this an else branch; the FileIntegrityError exception handler
			
 
				+                            # above can remove *archive_id* from *cached_ids*.
			
 
				+                            logger.info('Fetching and building archive index for %s ...', archive_name)
			
 
				                             archive_chunk_idx = ChunkIndex()
			
 
				                             fetch_and_build_idx(archive_id, repository, self.key, archive_chunk_idx)
			
 
				                         logger.info("Merging into master chunks index ...")
			
@@ -599,7 +648,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
 
				                             chunk_idx.merge(archive_chunk_idx)
			
 
				                     else:
			
 
				                         chunk_idx = chunk_idx or ChunkIndex()
			
 
				-                        logger.info('Fetching archive index for %s ...' % archive_name)
			
 
				+                        logger.info('Fetching archive index for %s ...', archive_name)
			
 
				                         fetch_and_build_idx(archive_id, repository, self.key, chunk_idx)
			
 
				                 if self.progress:
			
 
				                     pi.finish()
			
--- a/src/borg/crypto/file_integrity.py
+++ b/src/borg/crypto/file_integrity.py
@@ -104,7 +104,7 @@ class FileIntegrityError(IntegrityError):
 
				 
			
 
				 
			
 
				 class IntegrityCheckedFile(FileLikeWrapper):
			
 
				-    def __init__(self, path, write, filename=None, override_fd=None):
			
 
				+    def __init__(self, path, write, filename=None, override_fd=None, integrity_data=None):
			
 
				         self.path = path
			
 
				         self.writing = write
			
 
				         mode = 'wb' if write else 'rb'
			
@@ -114,10 +114,10 @@ class IntegrityCheckedFile(FileLikeWrapper):
 
				 
			
 
				         self.hash_filename(filename)
			
 
				 
			
 
				-        if write:
			
 
				+        if write or not integrity_data:
			
 
				             self.digests = {}
			
 
				         else:
			
 
				-            self.digests = self.read_integrity_file(path, self.hasher)
			
 
				+            self.digests = self.parse_integrity_data(path, integrity_data, self.hasher)
			
 
				             # TODO: When we're reading but don't have any digests, i.e. no integrity file existed,
			
 
				             # TODO: then we could just short-circuit.
			
 
				 
			
@@ -126,37 +126,33 @@ class IntegrityCheckedFile(FileLikeWrapper):
 
				         # In Borg the name itself encodes the context (eg. index.N, cache, files),
			
 
				         # while the path doesn't matter, and moving e.g. a repository or cache directory is supported.
			
 
				         # Changing the name however imbues a change of context that is not permissible.
			
 
				+        # While Borg does not use anything except ASCII in these file names, it's important to use
			
 
				+        # the same encoding everywhere for portability. Using os.fsencode() would be wrong.
			
 
				         filename = os.path.basename(filename or self.path)
			
 
				         self.hasher.update(('%10d' % len(filename)).encode())
			
 
				         self.hasher.update(filename.encode())
			
 
				 
			
 
				-    @staticmethod
			
 
				-    def integrity_file_path(path):
			
 
				-        return path + '.integrity'
			
 
				-
			
 
				     @classmethod
			
 
				-    def read_integrity_file(cls, path, hasher):
			
 
				+    def parse_integrity_data(cls, path: str, data: str, hasher: SHA512FileHashingWrapper):
			
 
				         try:
			
 
				-            with open(cls.integrity_file_path(path), 'r') as fd:
			
 
				-                integrity_file = json.load(fd)
			
 
				-                # Provisions for agility now, implementation later, but make sure the on-disk joint is oiled.
			
 
				-                algorithm = integrity_file['algorithm']
			
 
				-                if algorithm != hasher.ALGORITHM:
			
 
				-                    logger.warning('Cannot verify integrity of %s: Unknown algorithm %r', path, algorithm)
			
 
				-                    return
			
 
				-                digests = integrity_file['digests']
			
 
				-                # Require at least presence of the final digest
			
 
				-                digests['final']
			
 
				-                return digests
			
 
				-        except FileNotFoundError:
			
 
				-            logger.info('No integrity file found for %s', path)
			
 
				-        except (OSError, ValueError, TypeError, KeyError) as e:
			
 
				-            logger.warning('Could not read integrity file for %s: %s', path, e)
			
 
				+            integrity_data = json.loads(data)
			
 
				+            # Provisions for agility now, implementation later, but make sure the on-disk joint is oiled.
			
 
				+            algorithm = integrity_data['algorithm']
			
 
				+            if algorithm != hasher.ALGORITHM:
			
 
				+                logger.warning('Cannot verify integrity of %s: Unknown algorithm %r', path, algorithm)
			
 
				+                return
			
 
				+            digests = integrity_data['digests']
			
 
				+            # Require at least presence of the final digest
			
 
				+            digests['final']
			
 
				+            return digests
			
 
				+        except (ValueError, TypeError, KeyError) as e:
			
 
				+            logger.warning('Could not parse integrity data for %s: %s', path, e)
			
 
				             raise FileIntegrityError(path)
			
 
				 
			
 
				     def hash_part(self, partname, is_final=False):
			
 
				         if not self.writing and not self.digests:
			
 
				             return
			
 
				+        self.hasher.update(('%10d' % len(partname)).encode())
			
 
				         self.hasher.update(partname.encode())
			
 
				         self.hasher.hash_length(seek_to_end=is_final)
			
 
				         digest = self.hasher.hexdigest()
			
@@ -173,10 +169,41 @@ class IntegrityCheckedFile(FileLikeWrapper):
 
				         if exception:
			
 
				             return
			
 
				         if self.writing:
			
 
				-            with open(self.integrity_file_path(self.path), 'w') as fd:
			
 
				-                json.dump({
			
 
				-                    'algorithm': self.hasher.ALGORITHM,
			
 
				-                    'digests': self.digests,
			
 
				-                }, fd)
			
 
				+            self.store_integrity_data(json.dumps({
			
 
				+                'algorithm': self.hasher.ALGORITHM,
			
 
				+                'digests': self.digests,
			
 
				+            }))
			
 
				         elif self.digests:
			
 
				             logger.debug('Verified integrity of %s', self.path)
			
 
				+
			
 
				+    def store_integrity_data(self, data: str):
			
 
				+        self.integrity_data = data
			
 
				+
			
 
				+
			
 
				+class DetachedIntegrityCheckedFile(IntegrityCheckedFile):
			
 
				+    def __init__(self, path, write, filename=None, override_fd=None):
			
 
				+        super().__init__(path, write, filename, override_fd)
			
 
				+        filename = filename or os.path.basename(path)
			
 
				+        output_dir = os.path.dirname(path)
			
 
				+        self.output_integrity_file = self.integrity_file_path(os.path.join(output_dir, filename))
			
 
				+        if not write:
			
 
				+            self.digests = self.read_integrity_file(self.path, self.hasher)
			
 
				+
			
 
				+    @staticmethod
			
 
				+    def integrity_file_path(path):
			
 
				+        return path + '.integrity'
			
 
				+
			
 
				+    @classmethod
			
 
				+    def read_integrity_file(cls, path, hasher):
			
 
				+        try:
			
 
				+            with open(cls.integrity_file_path(path), 'r') as fd:
			
 
				+                return cls.parse_integrity_data(path, fd.read(), hasher)
			
 
				+        except FileNotFoundError:
			
 
				+            logger.info('No integrity file found for %s', path)
			
 
				+        except OSError as e:
			
 
				+            logger.warning('Could not read integrity file for %s: %s', path, e)
			
 
				+            raise FileIntegrityError(path)
			
 
				+
			
 
				+    def store_integrity_data(self, data: str):
			
 
				+        with open(self.output_integrity_file, 'w') as fd:
			
 
				+            fd.write(data)
			
--- a/src/borg/hashindex.pyx
+++ b/src/borg/hashindex.pyx
@@ -67,8 +67,11 @@ cdef class IndexBase:
 
				     def __cinit__(self, capacity=0, path=None, key_size=32):
			
 
				         self.key_size = key_size
			
 
				         if path:
			
 
				-            with open(path, 'rb') as fd:
			
 
				-                self.index = hashindex_read(fd)
			
 
				+            if isinstance(path, (str, bytes)):
			
 
				+                with open(path, 'rb') as fd:
			
 
				+                    self.index = hashindex_read(fd)
			
 
				+            else:
			
 
				+                self.index = hashindex_read(path)
			
 
				             assert self.index, 'hashindex_read() returned NULL with no exception set'
			
 
				         else:
			
 
				             self.index = hashindex_init(capacity, self.key_size, self.value_size)
			
@@ -84,8 +87,11 @@ cdef class IndexBase:
 
				         return cls(path=path)
			
 
				 
			
 
				     def write(self, path):
			
 
				-        with open(path, 'wb') as fd:
			
 
				-            hashindex_write(self.index, fd)
			
 
				+        if isinstance(path, (str, bytes)):
			
 
				+            with open(path, 'wb') as fd:
			
 
				+                hashindex_write(self.index, fd)
			
 
				+        else:
			
 
				+            hashindex_write(self.index, path)
			
 
				 
			
 
				     def clear(self):
			
 
				         hashindex_free(self.index)
			
--- a/src/borg/testsuite/archiver.py
+++ b/src/borg/testsuite/archiver.py
@@ -1,5 +1,6 @@
 
				 import argparse
			
 
				 import errno
			
 
				+import io
			
 
				 import json
			
 
				 import logging
			
 
				 import os
			
@@ -37,6 +38,7 @@ from ..constants import *  # NOQA
 
				 from ..crypto.low_level import bytes_to_long, num_aes_blocks
			
 
				 from ..crypto.key import KeyfileKeyBase, RepoKey, KeyfileKey, Passphrase, TAMRequiredError
			
 
				 from ..crypto.keymanager import RepoIdMismatch, NotABorgKeyFile
			
 
				+from ..crypto.file_integrity import FileIntegrityError
			
 
				 from ..helpers import Location, get_security_dir
			
 
				 from ..helpers import Manifest
			
 
				 from ..helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
			
@@ -2886,6 +2888,82 @@ class RemoteArchiverTestCase(ArchiverTestCase):
 
				                 self.assert_true(marker not in res)
			
 
				 
			
 
				 
			
 
				+class ArchiverCorruptionTestCase(ArchiverTestCaseBase):
			
 
				+    def setUp(self):
			
 
				+        super().setUp()
			
 
				+        self.create_test_files()
			
 
				+        self.cmd('init', '--encryption=repokey', self.repository_location)
			
 
				+        self.cache_path = json.loads(self.cmd('info', self.repository_location, '--json'))['cache']['path']
			
 
				+
			
 
				+    def corrupt(self, file):
			
 
				+        with open(file, 'r+b') as fd:
			
 
				+            fd.seek(-1, io.SEEK_END)
			
 
				+            fd.write(b'1')
			
 
				+
			
 
				+    def test_cache_chunks(self):
			
 
				+        self.corrupt(os.path.join(self.cache_path, 'chunks'))
			
 
				+
			
 
				+        if self.FORK_DEFAULT:
			
 
				+            out = self.cmd('info', self.repository_location, exit_code=2)
			
 
				+            assert 'failed integrity check' in out
			
 
				+        else:
			
 
				+            with pytest.raises(FileIntegrityError):
			
 
				+                self.cmd('info', self.repository_location)
			
 
				+
			
 
				+    def test_cache_files(self):
			
 
				+        self.cmd('create', self.repository_location + '::test', 'input')
			
 
				+        self.corrupt(os.path.join(self.cache_path, 'files'))
			
 
				+
			
 
				+        if self.FORK_DEFAULT:
			
 
				+            out = self.cmd('create', self.repository_location + '::test1', 'input', exit_code=2)
			
 
				+            assert 'failed integrity check' in out
			
 
				+        else:
			
 
				+            with pytest.raises(FileIntegrityError):
			
 
				+                self.cmd('create', self.repository_location + '::test1', 'input')
			
 
				+
			
 
				+    def test_chunks_archive(self):
			
 
				+        self.cmd('create', self.repository_location + '::test1', 'input')
			
 
				+        # Find ID of test1 so we can corrupt it later :)
			
 
				+        target_id = self.cmd('list', self.repository_location, '--format={id}{LF}').strip()
			
 
				+        self.cmd('create', self.repository_location + '::test2', 'input')
			
 
				+
			
 
				+        # Force cache sync, creating archive chunks of test1 and test2 in chunks.archive.d
			
 
				+        self.cmd('delete', '--cache-only', self.repository_location)
			
 
				+        self.cmd('info', self.repository_location, '--json')
			
 
				+
			
 
				+        chunks_archive = os.path.join(self.cache_path, 'chunks.archive.d')
			
 
				+        assert len(os.listdir(chunks_archive)) == 4  # two archives, one chunks cache and one .integrity file each
			
 
				+
			
 
				+        self.corrupt(os.path.join(chunks_archive, target_id))
			
 
				+
			
 
				+        # Trigger cache sync by changing the manifest ID in the cache config
			
 
				+        config_path = os.path.join(self.cache_path, 'config')
			
 
				+        config = ConfigParser(interpolation=None)
			
 
				+        config.read(config_path)
			
 
				+        config.set('cache', 'manifest', bin_to_hex(bytes(32)))
			
 
				+        with open(config_path, 'w') as fd:
			
 
				+            config.write(fd)
			
 
				+
			
 
				+        # Cache sync notices corrupted archive chunks, but automatically recovers.
			
 
				+        out = self.cmd('create', '-v', self.repository_location + '::test3', 'input', exit_code=1)
			
 
				+        assert 'Reading cached archive chunk index for test1' in out
			
 
				+        assert 'Cached archive chunk index of test1 is corrupted' in out
			
 
				+        assert 'Fetching and building archive index for test1' in out
			
 
				+
			
 
				+    def test_old_version_interfered(self):
			
 
				+        # Modify the main manifest ID without touching the manifest ID in the integrity section.
			
 
				+        # This happens if a version without integrity checking modifies the cache.
			
 
				+        config_path = os.path.join(self.cache_path, 'config')
			
 
				+        config = ConfigParser(interpolation=None)
			
 
				+        config.read(config_path)
			
 
				+        config.set('cache', 'manifest', bin_to_hex(bytes(32)))
			
 
				+        with open(config_path, 'w') as fd:
			
 
				+            config.write(fd)
			
 
				+
			
 
				+        out = self.cmd('info', self.repository_location)
			
 
				+        assert 'Cache integrity data not available: old Borg version modified the cache.' in out
			
 
				+
			
 
				+
			
 
				 class DiffArchiverTestCase(ArchiverTestCaseBase):
			
 
				     def test_basic_functionality(self):
			
 
				         # Initialize test folder
			
--- a/src/borg/testsuite/file_integrity.py
+++ b/src/borg/testsuite/file_integrity.py
@@ -1,21 +1,21 @@
 
				 
			
 
				 import pytest
			
 
				 
			
 
				-from ..crypto.file_integrity import IntegrityCheckedFile, FileIntegrityError
			
 
				+from ..crypto.file_integrity import IntegrityCheckedFile, DetachedIntegrityCheckedFile, FileIntegrityError
			
 
				 
			
 
				 
			
 
				 class TestReadIntegrityFile:
			
 
				     def test_no_integrity(self, tmpdir):
			
 
				         protected_file = tmpdir.join('file')
			
 
				         protected_file.write('1234')
			
 
				-        assert IntegrityCheckedFile.read_integrity_file(str(protected_file), None) is None
			
 
				+        assert DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), None) is None
			
 
				 
			
 
				     def test_truncated_integrity(self, tmpdir):
			
 
				         protected_file = tmpdir.join('file')
			
 
				         protected_file.write('1234')
			
 
				         tmpdir.join('file.integrity').write('')
			
 
				         with pytest.raises(FileIntegrityError):
			
 
				-            IntegrityCheckedFile.read_integrity_file(str(protected_file), None)
			
 
				+            DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), None)
			
 
				 
			
 
				     def test_unknown_algorithm(self, tmpdir):
			
 
				         class SomeHasher:
			
@@ -24,7 +24,7 @@ class TestReadIntegrityFile:
 
				         protected_file = tmpdir.join('file')
			
 
				         protected_file.write('1234')
			
 
				         tmpdir.join('file.integrity').write('{"algorithm": "HMAC_SERIOUSHASH", "digests": "1234"}')
			
 
				-        assert IntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) is None
			
 
				+        assert DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) is None
			
 
				 
			
 
				     @pytest.mark.parametrize('json', (
			
 
				         '{"ALGORITHM": "HMAC_SERIOUSHASH", "digests": "1234"}',
			
@@ -38,7 +38,7 @@ class TestReadIntegrityFile:
 
				         protected_file.write('1234')
			
 
				         tmpdir.join('file.integrity').write(json)
			
 
				         with pytest.raises(FileIntegrityError):
			
 
				-            IntegrityCheckedFile.read_integrity_file(str(protected_file), None)
			
 
				+            DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), None)
			
 
				 
			
 
				     def test_valid(self, tmpdir):
			
 
				         class SomeHasher:
			
@@ -47,35 +47,35 @@ class TestReadIntegrityFile:
 
				         protected_file = tmpdir.join('file')
			
 
				         protected_file.write('1234')
			
 
				         tmpdir.join('file.integrity').write('{"algorithm": "HMAC_FOO1", "digests": {"final": "1234"}}')
			
 
				-        assert IntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) == {'final': '1234'}
			
 
				+        assert DetachedIntegrityCheckedFile.read_integrity_file(str(protected_file), SomeHasher()) == {'final': '1234'}
			
 
				 
			
 
				 
			
 
				-class TestIntegrityCheckedFile:
			
 
				+class TestDetachedIntegrityCheckedFile:
			
 
				     @pytest.fixture
			
 
				     def integrity_protected_file(self, tmpdir):
			
 
				         path = str(tmpdir.join('file'))
			
 
				-        with IntegrityCheckedFile(path, write=True) as fd:
			
 
				+        with DetachedIntegrityCheckedFile(path, write=True) as fd:
			
 
				             fd.write(b'foo and bar')
			
 
				         return path
			
 
				 
			
 
				     def test_simple(self, tmpdir, integrity_protected_file):
			
 
				         assert tmpdir.join('file').check(file=True)
			
 
				         assert tmpdir.join('file.integrity').check(file=True)
			
 
				-        with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
			
 
				+        with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
			
 
				             assert fd.read() == b'foo and bar'
			
 
				 
			
 
				     def test_corrupted_file(self, integrity_protected_file):
			
 
				         with open(integrity_protected_file, 'ab') as fd:
			
 
				             fd.write(b' extra data')
			
 
				         with pytest.raises(FileIntegrityError):
			
 
				-            with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
			
 
				+            with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
			
 
				                 assert fd.read() == b'foo and bar extra data'
			
 
				 
			
 
				     def test_corrupted_file_partial_read(self, integrity_protected_file):
			
 
				         with open(integrity_protected_file, 'ab') as fd:
			
 
				             fd.write(b' extra data')
			
 
				         with pytest.raises(FileIntegrityError):
			
 
				-            with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
			
 
				+            with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
			
 
				                 data = b'foo and bar'
			
 
				                 assert fd.read(len(data)) == data
			
 
				 
			
@@ -88,7 +88,7 @@ class TestIntegrityCheckedFile:
 
				         tmpdir.join('file').move(new_path)
			
 
				         tmpdir.join('file.integrity').move(new_path + '.integrity')
			
 
				         with pytest.raises(FileIntegrityError):
			
 
				-            with IntegrityCheckedFile(str(new_path), write=False) as fd:
			
 
				+            with DetachedIntegrityCheckedFile(str(new_path), write=False) as fd:
			
 
				                 assert fd.read() == b'foo and bar'
			
 
				 
			
 
				     def test_moved_file(self, tmpdir, integrity_protected_file):
			
@@ -96,27 +96,27 @@ class TestIntegrityCheckedFile:
 
				         tmpdir.join('file').move(new_dir.join('file'))
			
 
				         tmpdir.join('file.integrity').move(new_dir.join('file.integrity'))
			
 
				         new_path = str(new_dir.join('file'))
			
 
				-        with IntegrityCheckedFile(new_path, write=False) as fd:
			
 
				+        with DetachedIntegrityCheckedFile(new_path, write=False) as fd:
			
 
				             assert fd.read() == b'foo and bar'
			
 
				 
			
 
				     def test_no_integrity(self, tmpdir, integrity_protected_file):
			
 
				         tmpdir.join('file.integrity').remove()
			
 
				-        with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
			
 
				+        with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
			
 
				             assert fd.read() == b'foo and bar'
			
 
				 
			
 
				 
			
 
				-class TestIntegrityCheckedFileParts:
			
 
				+class TestDetachedIntegrityCheckedFileParts:
			
 
				     @pytest.fixture
			
 
				     def integrity_protected_file(self, tmpdir):
			
 
				         path = str(tmpdir.join('file'))
			
 
				-        with IntegrityCheckedFile(path, write=True) as fd:
			
 
				+        with DetachedIntegrityCheckedFile(path, write=True) as fd:
			
 
				             fd.write(b'foo and bar')
			
 
				             fd.hash_part('foopart')
			
 
				             fd.write(b' other data')
			
 
				         return path
			
 
				 
			
 
				     def test_simple(self, integrity_protected_file):
			
 
				-        with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
			
 
				+        with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
			
 
				             data1 = b'foo and bar'
			
 
				             assert fd.read(len(data1)) == data1
			
 
				             fd.hash_part('foopart')
			
@@ -127,7 +127,7 @@ class TestIntegrityCheckedFileParts:
 
				             # Because some hash_part failed, the final digest will fail as well - again - even if we catch
			
 
				             # the failing hash_part. This is intentional: (1) it makes the code simpler (2) it's a good fail-safe
			
 
				             # against overly broad exception handling.
			
 
				-            with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
			
 
				+            with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
			
 
				                 data1 = b'foo and bar'
			
 
				                 assert fd.read(len(data1)) == data1
			
 
				                 with pytest.raises(FileIntegrityError):
			
@@ -140,7 +140,7 @@ class TestIntegrityCheckedFileParts:
 
				         with open(integrity_protected_file, 'ab') as fd:
			
 
				             fd.write(b'some extra stuff that does not belong')
			
 
				         with pytest.raises(FileIntegrityError):
			
 
				-            with IntegrityCheckedFile(integrity_protected_file, write=False) as fd:
			
 
				+            with DetachedIntegrityCheckedFile(integrity_protected_file, write=False) as fd:
			
 
				                 data1 = b'foo and bar'
			
 
				                 try:
			
 
				                     assert fd.read(len(data1)) == data1
			
--- a/src/borg/testsuite/hashindex.py
+++ b/src/borg/testsuite/hashindex.py
@@ -6,6 +6,7 @@ import zlib
 
				 
			
 
				 from ..hashindex import NSIndex, ChunkIndex
			
 
				 from .. import hashindex
			
 
				+from ..crypto.file_integrity import IntegrityCheckedFile, FileIntegrityError
			
 
				 from . import BaseTestCase
			
 
				 
			
 
				 # Note: these tests are part of the self test, do not use or import py.test functionality here.
			
@@ -319,6 +320,27 @@ class HashIndexDataTestCase(BaseTestCase):
 
				         assert idx1[H(3)] == (ChunkIndex.MAX_VALUE, 6, 7)
			
 
				 
			
 
				 
			
 
				+class HashIndexIntegrityTestCase(HashIndexDataTestCase):
			
 
				+    def write_integrity_checked_index(self, tempdir):
			
 
				+        idx = self._deserialize_hashindex(self.HASHINDEX)
			
 
				+        file = os.path.join(tempdir, 'idx')
			
 
				+        with IntegrityCheckedFile(path=file, write=True) as fd:
			
 
				+            idx.write(fd)
			
 
				+        integrity_data = fd.integrity_data
			
 
				+        assert 'final' in integrity_data
			
 
				+        assert 'HashHeader' in integrity_data
			
 
				+        return file, integrity_data
			
 
				+
			
 
				+    def test_integrity_checked_file(self):
			
 
				+        with tempfile.TemporaryDirectory() as tempdir:
			
 
				+            file, integrity_data = self.write_integrity_checked_index(tempdir)
			
 
				+            with open(file, 'r+b') as fd:
			
 
				+                fd.write(b'Foo')
			
 
				+            with self.assert_raises(FileIntegrityError):
			
 
				+                with IntegrityCheckedFile(path=file, write=False, integrity_data=integrity_data) as fd:
			
 
				+                    ChunkIndex.read(fd)
			
 
				+
			
 
				+
			
 
				 class NSIndexTestCase(BaseTestCase):
			
 
				     def test_nsindex_segment_limit(self):
			
 
				         idx = NSIndex()