Răsfoiți Sursa

fix race condition with data loss potential, fixes #3536

we discard all files cache entries referring to files
with timestamps AFTER we started the backup.

so, even in case we would back up an inconsistent file
that has been changed while we backed it up, we would
not have a files cache entry for it and would fully
read/chunk/hash it again in next backup.
Thomas Waldmann 8 luni în urmă
părinte
comite
ec9d412756

+ 1 - 0
src/borg/archiver/__init__.py

@@ -125,6 +125,7 @@ class Archiver(
     def __init__(self, lock_wait=None, prog=None):
         self.lock_wait = lock_wait
         self.prog = prog
+        self.start_backup = None
 
     def print_warning(self, msg, *args, **kw):
         warning_code = kw.get("wc", EXIT_WARNING)  # note: wc=None can be used to not influence exit code

+ 1 - 0
src/borg/archiver/_common.py

@@ -168,6 +168,7 @@ def with_repository(
                         manifest_,
                         progress=getattr(args, "progress", False),
                         cache_mode=getattr(args, "files_cache_mode", FILES_CACHE_MODE_DISABLED),
+                        start_backup=getattr(self, "start_backup", None),
                         iec=getattr(args, "iec", False),
                     ) as cache_:
                         return method(self, args, repository=repository, cache=cache_, **kwargs)

+ 1 - 0
src/borg/archiver/create_cmd.py

@@ -214,6 +214,7 @@ class CreateMixIn:
         self.noxattrs = args.noxattrs
         self.exclude_nodump = args.exclude_nodump
         dry_run = args.dry_run
+        self.start_backup = time.time_ns()
         t0 = archive_ts_now()
         t0_monotonic = time.monotonic()
         logger.info('Creating archive at "%s"' % args.location.processed)

+ 38 - 22
src/borg/cache.py

@@ -3,6 +3,7 @@ import os
 import shutil
 import stat
 from collections import namedtuple
+from datetime import datetime, timezone
 from time import perf_counter
 
 from .logger import create_logger
@@ -11,7 +12,7 @@ logger = create_logger()
 
 files_cache_logger = create_logger("borg.debug.files_cache")
 
-from .constants import CACHE_README, FILES_CACHE_MODE_DISABLED, ROBJ_FILE_STREAM
+from .constants import CACHE_README, FILES_CACHE_MODE_DISABLED, ROBJ_FILE_STREAM, TIME_DIFFERS2_NS
 from .checksums import xxh64
 from .hashindex import ChunkIndex, ChunkIndexEntry
 from .helpers import Error
@@ -343,6 +344,7 @@ class Cache:
         cache_mode=FILES_CACHE_MODE_DISABLED,
         iec=False,
         archive_name=None,
+        start_backup=None,
     ):
         return AdHocWithFilesCache(
             manifest=manifest,
@@ -352,6 +354,7 @@ class Cache:
             iec=iec,
             cache_mode=cache_mode,
             archive_name=archive_name,
+            start_backup=start_backup,
         )
 
 
@@ -367,7 +370,7 @@ class FilesCacheMixin:
 
     FILES_CACHE_NAME = "files"
 
-    def __init__(self, cache_mode, archive_name=None):
+    def __init__(self, cache_mode, archive_name=None, start_backup=None):
         self.archive_name = archive_name  # ideally a SERIES name
         assert not ("c" in cache_mode and "m" in cache_mode)
         assert "d" in cache_mode or "c" in cache_mode or "m" in cache_mode
@@ -375,6 +378,7 @@ class FilesCacheMixin:
         self._files = None
         self._newest_cmtime = 0
         self._newest_path_hashes = set()
+        self.start_backup = start_backup
 
     @property
     def files(self):
@@ -490,32 +494,43 @@ class FilesCacheMixin:
 
     def _write_files_cache(self, files):
         """write files cache to cache directory"""
-        if self._newest_cmtime is None:
-            # was never set because no files were modified/added
-            self._newest_cmtime = 2**63 - 1  # nanoseconds, good until y2262
+        max_time_ns = 2**63 - 1  # nanoseconds, good until y2262
+        # _self._newest_cmtime might be None if it was never set because no files were modified/added.
+        newest_cmtime = self._newest_cmtime if self._newest_cmtime is not None else max_time_ns
+        start_backup_time = self.start_backup - TIME_DIFFERS2_NS if self.start_backup is not None else max_time_ns
+        # we don't want to persist files cache entries of potentially problematic files:
+        discard_after = min(newest_cmtime, start_backup_time)
         ttl = int(os.environ.get("BORG_FILES_CACHE_TTL", 2))
         files_cache_logger.debug("FILES-CACHE-SAVE: starting...")
         # TODO: use something like SaveFile here, but that didn't work due to SyncFile missing .seek().
         with IntegrityCheckedFile(path=os.path.join(self.path, self.files_cache_name()), write=True) as fd:
-            entry_count = 0
+            entries = 0
+            age_discarded = 0
+            race_discarded = 0
             for path_hash, item in files.items():
-                # Only keep files seen in this backup that are older than newest cmtime seen in this backup -
-                # this is to avoid issues with filesystem snapshots and cmtime granularity.
-                # Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet.
                 entry = FileCacheEntry(*msgpack.unpackb(item))
-                if (
-                    entry.age == 0
-                    and max(timestamp_to_int(entry.ctime), timestamp_to_int(entry.mtime)) < self._newest_cmtime
-                    or entry.age > 0
-                    and entry.age < ttl
-                ):
+                if entry.age == 0:  # current entries
+                    if max(timestamp_to_int(entry.ctime), timestamp_to_int(entry.mtime)) < discard_after:
+                        # Only keep files seen in this backup that old enough not to suffer race conditions relating
+                        # to filesystem snapshots and ctime/mtime granularity or being modified while we read them.
+                        keep = True
+                    else:
+                        keep = False
+                        race_discarded += 1
+                else:  # old entries
+                    if entry.age < ttl:
+                        # Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet.
+                        keep = True
+                    else:
+                        keep = False
+                        age_discarded += 1
+                if keep:
                     msgpack.pack((path_hash, entry), fd)
-                    entry_count += 1
-        files_cache_logger.debug("FILES-CACHE-KILL: removed all old entries with age >= TTL [%d]", ttl)
-        files_cache_logger.debug(
-            "FILES-CACHE-KILL: removed all current entries with newest cmtime %d", self._newest_cmtime
-        )
-        files_cache_logger.debug("FILES-CACHE-SAVE: finished, %d remaining entries saved.", entry_count)
+                    entries += 1
+        files_cache_logger.debug(f"FILES-CACHE-KILL: removed {age_discarded} entries with age >= TTL [{ttl}]")
+        t_str = datetime.fromtimestamp(discard_after / 1e9, timezone.utc).isoformat()
+        files_cache_logger.debug(f"FILES-CACHE-KILL: removed {race_discarded} entries with ctime/mtime >= {t_str}")
+        files_cache_logger.debug(f"FILES-CACHE-SAVE: finished, {entries} remaining entries saved.")
         return fd.integrity_data
 
     def file_known_and_unchanged(self, hashed_path, path_hash, st):
@@ -710,12 +725,13 @@ class AdHocWithFilesCache(FilesCacheMixin, ChunksMixin):
         cache_mode=FILES_CACHE_MODE_DISABLED,
         iec=False,
         archive_name=None,
+        start_backup=None,
     ):
         """
         :param warn_if_unencrypted: print warning if accessing unknown unencrypted repository
         :param cache_mode: what shall be compared in the file stat infos vs. cached stat infos comparison
         """
-        FilesCacheMixin.__init__(self, cache_mode, archive_name)
+        FilesCacheMixin.__init__(self, cache_mode, archive_name, start_backup)
         ChunksMixin.__init__(self)
         assert isinstance(manifest, Manifest)
         self.manifest = manifest

+ 3 - 0
src/borg/constants.py

@@ -117,6 +117,9 @@ FILES_CACHE_MODE_DISABLED = "d"  # most borg commands do not use the files cache
 # we can't go much higher here (like e.g. to 2s) without causing issues.
 TIME_DIFFERS1_NS = 20000000
 
+# similar to above, but for bigger granularity / clock differences
+TIME_DIFFERS2_NS = 3000000000
+
 # return codes returned by borg command
 EXIT_SUCCESS = 0  # everything done, no problems
 EXIT_WARNING = 1  # reached normal end of operation, but there were issues (generic warning)