Browse Source

fix race condition with data loss potential, fixes #3536

we discard all files cache entries referring to files
with timestamps AFTER we started the backup.

so, even in case we would back up an inconsistent file
that has been changed while we backed it up, we would
not have a files cache entry for it and would fully
read/chunk/hash it again in next backup.
Thomas Waldmann 8 months ago
parent
commit
ec9d412756

+ 1 - 0
src/borg/archiver/__init__.py

@@ -125,6 +125,7 @@ class Archiver(
     def __init__(self, lock_wait=None, prog=None):
     def __init__(self, lock_wait=None, prog=None):
         self.lock_wait = lock_wait
         self.lock_wait = lock_wait
         self.prog = prog
         self.prog = prog
+        self.start_backup = None
 
 
     def print_warning(self, msg, *args, **kw):
     def print_warning(self, msg, *args, **kw):
         warning_code = kw.get("wc", EXIT_WARNING)  # note: wc=None can be used to not influence exit code
         warning_code = kw.get("wc", EXIT_WARNING)  # note: wc=None can be used to not influence exit code

+ 1 - 0
src/borg/archiver/_common.py

@@ -168,6 +168,7 @@ def with_repository(
                         manifest_,
                         manifest_,
                         progress=getattr(args, "progress", False),
                         progress=getattr(args, "progress", False),
                         cache_mode=getattr(args, "files_cache_mode", FILES_CACHE_MODE_DISABLED),
                         cache_mode=getattr(args, "files_cache_mode", FILES_CACHE_MODE_DISABLED),
+                        start_backup=getattr(self, "start_backup", None),
                         iec=getattr(args, "iec", False),
                         iec=getattr(args, "iec", False),
                     ) as cache_:
                     ) as cache_:
                         return method(self, args, repository=repository, cache=cache_, **kwargs)
                         return method(self, args, repository=repository, cache=cache_, **kwargs)

+ 1 - 0
src/borg/archiver/create_cmd.py

@@ -214,6 +214,7 @@ class CreateMixIn:
         self.noxattrs = args.noxattrs
         self.noxattrs = args.noxattrs
         self.exclude_nodump = args.exclude_nodump
         self.exclude_nodump = args.exclude_nodump
         dry_run = args.dry_run
         dry_run = args.dry_run
+        self.start_backup = time.time_ns()
         t0 = archive_ts_now()
         t0 = archive_ts_now()
         t0_monotonic = time.monotonic()
         t0_monotonic = time.monotonic()
         logger.info('Creating archive at "%s"' % args.location.processed)
         logger.info('Creating archive at "%s"' % args.location.processed)

+ 38 - 22
src/borg/cache.py

@@ -3,6 +3,7 @@ import os
 import shutil
 import shutil
 import stat
 import stat
 from collections import namedtuple
 from collections import namedtuple
+from datetime import datetime, timezone
 from time import perf_counter
 from time import perf_counter
 
 
 from .logger import create_logger
 from .logger import create_logger
@@ -11,7 +12,7 @@ logger = create_logger()
 
 
 files_cache_logger = create_logger("borg.debug.files_cache")
 files_cache_logger = create_logger("borg.debug.files_cache")
 
 
-from .constants import CACHE_README, FILES_CACHE_MODE_DISABLED, ROBJ_FILE_STREAM
+from .constants import CACHE_README, FILES_CACHE_MODE_DISABLED, ROBJ_FILE_STREAM, TIME_DIFFERS2_NS
 from .checksums import xxh64
 from .checksums import xxh64
 from .hashindex import ChunkIndex, ChunkIndexEntry
 from .hashindex import ChunkIndex, ChunkIndexEntry
 from .helpers import Error
 from .helpers import Error
@@ -343,6 +344,7 @@ class Cache:
         cache_mode=FILES_CACHE_MODE_DISABLED,
         cache_mode=FILES_CACHE_MODE_DISABLED,
         iec=False,
         iec=False,
         archive_name=None,
         archive_name=None,
+        start_backup=None,
     ):
     ):
         return AdHocWithFilesCache(
         return AdHocWithFilesCache(
             manifest=manifest,
             manifest=manifest,
@@ -352,6 +354,7 @@ class Cache:
             iec=iec,
             iec=iec,
             cache_mode=cache_mode,
             cache_mode=cache_mode,
             archive_name=archive_name,
             archive_name=archive_name,
+            start_backup=start_backup,
         )
         )
 
 
 
 
@@ -367,7 +370,7 @@ class FilesCacheMixin:
 
 
     FILES_CACHE_NAME = "files"
     FILES_CACHE_NAME = "files"
 
 
-    def __init__(self, cache_mode, archive_name=None):
+    def __init__(self, cache_mode, archive_name=None, start_backup=None):
         self.archive_name = archive_name  # ideally a SERIES name
         self.archive_name = archive_name  # ideally a SERIES name
         assert not ("c" in cache_mode and "m" in cache_mode)
         assert not ("c" in cache_mode and "m" in cache_mode)
         assert "d" in cache_mode or "c" in cache_mode or "m" in cache_mode
         assert "d" in cache_mode or "c" in cache_mode or "m" in cache_mode
@@ -375,6 +378,7 @@ class FilesCacheMixin:
         self._files = None
         self._files = None
         self._newest_cmtime = 0
         self._newest_cmtime = 0
         self._newest_path_hashes = set()
         self._newest_path_hashes = set()
+        self.start_backup = start_backup
 
 
     @property
     @property
     def files(self):
     def files(self):
@@ -490,32 +494,43 @@ class FilesCacheMixin:
 
 
     def _write_files_cache(self, files):
     def _write_files_cache(self, files):
         """write files cache to cache directory"""
         """write files cache to cache directory"""
-        if self._newest_cmtime is None:
-            # was never set because no files were modified/added
-            self._newest_cmtime = 2**63 - 1  # nanoseconds, good until y2262
+        max_time_ns = 2**63 - 1  # nanoseconds, good until y2262
+        # _self._newest_cmtime might be None if it was never set because no files were modified/added.
+        newest_cmtime = self._newest_cmtime if self._newest_cmtime is not None else max_time_ns
+        start_backup_time = self.start_backup - TIME_DIFFERS2_NS if self.start_backup is not None else max_time_ns
+        # we don't want to persist files cache entries of potentially problematic files:
+        discard_after = min(newest_cmtime, start_backup_time)
         ttl = int(os.environ.get("BORG_FILES_CACHE_TTL", 2))
         ttl = int(os.environ.get("BORG_FILES_CACHE_TTL", 2))
         files_cache_logger.debug("FILES-CACHE-SAVE: starting...")
         files_cache_logger.debug("FILES-CACHE-SAVE: starting...")
         # TODO: use something like SaveFile here, but that didn't work due to SyncFile missing .seek().
         # TODO: use something like SaveFile here, but that didn't work due to SyncFile missing .seek().
         with IntegrityCheckedFile(path=os.path.join(self.path, self.files_cache_name()), write=True) as fd:
         with IntegrityCheckedFile(path=os.path.join(self.path, self.files_cache_name()), write=True) as fd:
-            entry_count = 0
+            entries = 0
+            age_discarded = 0
+            race_discarded = 0
             for path_hash, item in files.items():
             for path_hash, item in files.items():
-                # Only keep files seen in this backup that are older than newest cmtime seen in this backup -
-                # this is to avoid issues with filesystem snapshots and cmtime granularity.
-                # Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet.
                 entry = FileCacheEntry(*msgpack.unpackb(item))
                 entry = FileCacheEntry(*msgpack.unpackb(item))
-                if (
-                    entry.age == 0
-                    and max(timestamp_to_int(entry.ctime), timestamp_to_int(entry.mtime)) < self._newest_cmtime
-                    or entry.age > 0
-                    and entry.age < ttl
-                ):
+                if entry.age == 0:  # current entries
+                    if max(timestamp_to_int(entry.ctime), timestamp_to_int(entry.mtime)) < discard_after:
+                        # Only keep files seen in this backup that old enough not to suffer race conditions relating
+                        # to filesystem snapshots and ctime/mtime granularity or being modified while we read them.
+                        keep = True
+                    else:
+                        keep = False
+                        race_discarded += 1
+                else:  # old entries
+                    if entry.age < ttl:
+                        # Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet.
+                        keep = True
+                    else:
+                        keep = False
+                        age_discarded += 1
+                if keep:
                     msgpack.pack((path_hash, entry), fd)
                     msgpack.pack((path_hash, entry), fd)
-                    entry_count += 1
-        files_cache_logger.debug("FILES-CACHE-KILL: removed all old entries with age >= TTL [%d]", ttl)
-        files_cache_logger.debug(
-            "FILES-CACHE-KILL: removed all current entries with newest cmtime %d", self._newest_cmtime
-        )
-        files_cache_logger.debug("FILES-CACHE-SAVE: finished, %d remaining entries saved.", entry_count)
+                    entries += 1
+        files_cache_logger.debug(f"FILES-CACHE-KILL: removed {age_discarded} entries with age >= TTL [{ttl}]")
+        t_str = datetime.fromtimestamp(discard_after / 1e9, timezone.utc).isoformat()
+        files_cache_logger.debug(f"FILES-CACHE-KILL: removed {race_discarded} entries with ctime/mtime >= {t_str}")
+        files_cache_logger.debug(f"FILES-CACHE-SAVE: finished, {entries} remaining entries saved.")
         return fd.integrity_data
         return fd.integrity_data
 
 
     def file_known_and_unchanged(self, hashed_path, path_hash, st):
     def file_known_and_unchanged(self, hashed_path, path_hash, st):
@@ -710,12 +725,13 @@ class AdHocWithFilesCache(FilesCacheMixin, ChunksMixin):
         cache_mode=FILES_CACHE_MODE_DISABLED,
         cache_mode=FILES_CACHE_MODE_DISABLED,
         iec=False,
         iec=False,
         archive_name=None,
         archive_name=None,
+        start_backup=None,
     ):
     ):
         """
         """
         :param warn_if_unencrypted: print warning if accessing unknown unencrypted repository
         :param warn_if_unencrypted: print warning if accessing unknown unencrypted repository
         :param cache_mode: what shall be compared in the file stat infos vs. cached stat infos comparison
         :param cache_mode: what shall be compared in the file stat infos vs. cached stat infos comparison
         """
         """
-        FilesCacheMixin.__init__(self, cache_mode, archive_name)
+        FilesCacheMixin.__init__(self, cache_mode, archive_name, start_backup)
         ChunksMixin.__init__(self)
         ChunksMixin.__init__(self)
         assert isinstance(manifest, Manifest)
         assert isinstance(manifest, Manifest)
         self.manifest = manifest
         self.manifest = manifest

+ 3 - 0
src/borg/constants.py

@@ -117,6 +117,9 @@ FILES_CACHE_MODE_DISABLED = "d"  # most borg commands do not use the files cache
 # we can't go much higher here (like e.g. to 2s) without causing issues.
 # we can't go much higher here (like e.g. to 2s) without causing issues.
 TIME_DIFFERS1_NS = 20000000
 TIME_DIFFERS1_NS = 20000000
 
 
+# similar to above, but for bigger granularity / clock differences
+TIME_DIFFERS2_NS = 3000000000
+
 # return codes returned by borg command
 # return codes returned by borg command
 EXIT_SUCCESS = 0  # everything done, no problems
 EXIT_SUCCESS = 0  # everything done, no problems
 EXIT_WARNING = 1  # reached normal end of operation, but there were issues (generic warning)
 EXIT_WARNING = 1  # reached normal end of operation, but there were issues (generic warning)