Ver Fonte

implement files cache mode control, fixes #911

You can now control the files cache mode using this option:

--files-cache={ctime,mtime,size,inode,rechunk,disabled}*

(only some combinations are supported)

Previously, only these modes were supported:
- mtime,size,inode (default of borg < 1.1.0rc4)
- mtime,size (by using --ignore-inode)
- disabled (by using --no-files-cache)

Now, you additionally get:
- ctime alternatively to mtime (more safe), e.g.:
  ctime,size,inode (this is the new default of borg >= 1.1.0rc4)
- rechunk (consider all files as changed, rechunk them)

Deprecated:
- --ignore-inodes (use modes without "inode")
- --no-files-cache (use "disabled" mode)

The tests needed some changes:
- previously, we use os.utime() to set a files mtime (atime) to specific
  values, but that does not work for ctime.
- now use time.sleep() to create the "latest file" that usually does
  not end up in the files cache (see FAQ)
Thomas Waldmann há 7 anos atrás
pai
commit
5e2de8ba67

+ 1 - 1
docs/internals/data-structures.rst

@@ -738,7 +738,7 @@ b) with ``create --chunker-params 19,23,21,4095`` (default):
 
 
   mem_usage  =  0.31GiB
   mem_usage  =  0.31GiB
 
 
-.. note:: There is also the ``--no-files-cache`` option to switch off the files cache.
+.. note:: There is also the ``--files-cache=disabled`` option to disable the files cache.
    You'll save some memory, but it will need to read / chunk all the files as
    You'll save some memory, but it will need to read / chunk all the files as
    it can not skip unmodified files then.
    it can not skip unmodified files then.
 
 

+ 3 - 3
src/borg/archive.py

@@ -1087,13 +1087,13 @@ class FilesystemObjectProcessors:
         self.add_item(item)
         self.add_item(item)
         return 'i'  # stdin
         return 'i'  # stdin
 
 
-    def process_file(self, path, st, cache, ignore_inode=False):
+    def process_file(self, path, st, cache, ignore_inode=False, files_cache_mode=DEFAULT_FILES_CACHE_MODE):
         with self.create_helper(path, st, None) as (item, status, hardlinked, hardlink_master):  # no status yet
         with self.create_helper(path, st, None) as (item, status, hardlinked, hardlink_master):  # no status yet
             is_special_file = is_special(st.st_mode)
             is_special_file = is_special(st.st_mode)
             if not hardlinked or hardlink_master:
             if not hardlinked or hardlink_master:
                 if not is_special_file:
                 if not is_special_file:
                     path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path)))
                     path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path)))
-                    ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode)
+                    ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode, files_cache_mode)
                 else:
                 else:
                     # in --read-special mode, we may be called for special files.
                     # in --read-special mode, we may be called for special files.
                     # there should be no information in the cache about special files processed in
                     # there should be no information in the cache about special files processed in
@@ -1126,7 +1126,7 @@ class FilesystemObjectProcessors:
                     if not is_special_file:
                     if not is_special_file:
                         # we must not memorize special files, because the contents of e.g. a
                         # we must not memorize special files, because the contents of e.g. a
                         # block or char device will change without its mtime/size/inode changing.
                         # block or char device will change without its mtime/size/inode changing.
-                        cache.memorize_file(path_hash, st, [c.id for c in item.chunks])
+                        cache.memorize_file(path_hash, st, [c.id for c in item.chunks], files_cache_mode)
                     status = status or 'M'  # regular file, modified (if not 'A' already)
                     status = status or 'M'  # regular file, modified (if not 'A' already)
                 self.stats.nfiles += 1
                 self.stats.nfiles += 1
             item.update(self.metadata_collector.stat_attrs(st, path))
             item.update(self.metadata_collector.stat_attrs(st, path))

+ 46 - 13
src/borg/archiver.py

@@ -45,7 +45,7 @@ from .crypto.keymanager import KeyManager
 from .helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
 from .helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
 from .helpers import Error, NoManifestError, set_ec
 from .helpers import Error, NoManifestError, set_ec
 from .helpers import positive_int_validator, location_validator, archivename_validator, ChunkerParams
 from .helpers import positive_int_validator, location_validator, archivename_validator, ChunkerParams
-from .helpers import PrefixSpec, SortBySpec
+from .helpers import PrefixSpec, SortBySpec, FilesCacheMode
 from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter
 from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter
 from .helpers import format_timedelta, format_file_size, parse_file_size, format_archive
 from .helpers import format_timedelta, format_file_size, parse_file_size, format_archive
 from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict
 from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict
@@ -346,7 +346,7 @@ class Archiver:
             compression = '--compression=none'
             compression = '--compression=none'
             # measure create perf (without files cache to always have it chunking)
             # measure create perf (without files cache to always have it chunking)
             t_start = time.monotonic()
             t_start = time.monotonic()
-            rc = self.do_create(self.parse_args(['create', compression, '--no-files-cache', archive + '1', path]))
+            rc = self.do_create(self.parse_args(['create', compression, '--files-cache=disabled', archive + '1', path]))
             t_end = time.monotonic()
             t_end = time.monotonic()
             dt_create = t_end - t_start
             dt_create = t_end - t_start
             assert rc == 0
             assert rc == 0
@@ -485,6 +485,7 @@ class Archiver:
         self.output_filter = args.output_filter
         self.output_filter = args.output_filter
         self.output_list = args.output_list
         self.output_list = args.output_list
         self.ignore_inode = args.ignore_inode
         self.ignore_inode = args.ignore_inode
+        self.files_cache_mode = args.files_cache_mode
         dry_run = args.dry_run
         dry_run = args.dry_run
         t0 = datetime.utcnow()
         t0 = datetime.utcnow()
         t0_monotonic = time.monotonic()
         t0_monotonic = time.monotonic()
@@ -548,7 +549,7 @@ class Archiver:
                     return
                     return
             if stat.S_ISREG(st.st_mode):
             if stat.S_ISREG(st.st_mode):
                 if not dry_run:
                 if not dry_run:
-                    status = fso.process_file(path, st, cache, self.ignore_inode)
+                    status = fso.process_file(path, st, cache, self.ignore_inode, self.files_cache_mode)
             elif stat.S_ISDIR(st.st_mode):
             elif stat.S_ISDIR(st.st_mode):
                 if recurse:
                 if recurse:
                     tag_paths = dir_is_tagged(path, exclude_caches, exclude_if_present)
                     tag_paths = dir_is_tagged(path, exclude_caches, exclude_if_present)
@@ -1960,14 +1961,17 @@ class Archiver:
 
 
     def preprocess_args(self, args):
     def preprocess_args(self, args):
         deprecations = [
         deprecations = [
-            # ('--old', '--new', 'Warning: "--old" has been deprecated. Use "--new" instead.'),
+            # ('--old', '--new' or None, 'Warning: "--old" has been deprecated. Use "--new" instead.'),
             ('--list-format', '--format', 'Warning: "--list-format" has been deprecated. Use "--format" instead.'),
             ('--list-format', '--format', 'Warning: "--list-format" has been deprecated. Use "--format" instead.'),
             ('--keep-tag-files', '--keep-exclude-tags', 'Warning: "--keep-tag-files" has been deprecated. Use "--keep-exclude-tags" instead.'),
             ('--keep-tag-files', '--keep-exclude-tags', 'Warning: "--keep-tag-files" has been deprecated. Use "--keep-exclude-tags" instead.'),
+            ('--ignore-inode', None, 'Warning: "--ignore-inode" has been deprecated. Use "--files-cache=ctime,size" or "...=mtime,size" instead.'),
+            ('--no-files-cache', None, 'Warning: "--no-files-cache" has been deprecated. Use "--files-cache=disabled" instead.'),
         ]
         ]
         for i, arg in enumerate(args[:]):
         for i, arg in enumerate(args[:]):
             for old_name, new_name, warning in deprecations:
             for old_name, new_name, warning in deprecations:
                 if arg.startswith(old_name):
                 if arg.startswith(old_name):
-                    args[i] = arg.replace(old_name, new_name)
+                    if new_name is not None:
+                        args[i] = arg.replace(old_name, new_name)
                     print(warning, file=sys.stderr)
                     print(warning, file=sys.stderr)
         return args
         return args
 
 
@@ -2595,13 +2599,39 @@ class Archiver:
         {now}, {utcnow}, {fqdn}, {hostname}, {user} and some others.
         {now}, {utcnow}, {fqdn}, {hostname}, {user} and some others.
 
 
         Backup speed is increased by not reprocessing files that are already part of
         Backup speed is increased by not reprocessing files that are already part of
-        existing archives and weren't modified. Normally, detecting file modifications
-        will take inode information into consideration. This is problematic for files
-        located on sshfs and similar network file systems which do not provide stable
-        inode numbers, such files will always be considered modified. The
-        ``--ignore-inode`` flag can be used to prevent this and improve performance.
-        This flag will reduce reliability of change detection however, with files
-        considered unmodified as long as their size and modification time are unchanged.
+        existing archives and weren't modified. The detection of unmodified files is
+        done by comparing multiple file metadata values with previous values kept in
+        the files cache.
+
+        This comparison can operate in different modes as given by ``--files-cache``:
+
+        - ctime,size,inode (default)
+        - mtime,size,inode (default behaviour of borg versions older than 1.1.0rc4)
+        - ctime,size (ignore the inode number)
+        - mtime,size (ignore the inode number)
+        - rechunk,ctime (all files are considered modified - rechunk, cache ctime)
+        - rechunk,mtime (all files are considered modified - rechunk, cache mtime)
+        - disabled (disable the files cache, all files considered modified - rechunk)
+
+        inode number: better safety, but often unstable on network filesystems
+
+        Normally, detecting file modifications will take inode information into
+        consideration to improve the reliability of file change detection.
+        This is problematic for files located on sshfs and similar network file
+        systems which do not provide stable inode numbers, such files will always
+        be considered modified. You can use modes without `inode` in this case to
+        improve performance, but reliability of change detection might be reduced.
+
+        ctime vs. mtime: safety vs. speed
+
+        - ctime is a rather safe way to detect changes to a file (metadata and contents)
+          as it can not be set from userspace. But, a metadata-only change will already
+          update the ctime, so there might be some unnecessary chunking/hashing even
+          without content changes. Some filesystems do not support ctime (change time).
+        - mtime usually works and only updates if file contents were changed. But mtime
+          can be arbitrarily set from userspace, e.g. to set mtime back to the same value
+          it had before a content change happened. This can be used maliciously as well as
+          well-meant, but in both cases mtime based cache modes can be problematic.
 
 
         The mount points of filesystems or filesystem snapshots should be the same for every
         The mount points of filesystems or filesystem snapshots should be the same for every
         creation of a new archive to ensure fast operation. This is because the file cache that
         creation of a new archive to ensure fast operation. This is because the file cache that
@@ -2692,7 +2722,7 @@ class Archiver:
         subparser.add_argument('--json', action='store_true',
         subparser.add_argument('--json', action='store_true',
                                help='output stats as JSON. Implies ``--stats``.')
                                help='output stats as JSON. Implies ``--stats``.')
         subparser.add_argument('--no-cache-sync', dest='no_cache_sync', action='store_true',
         subparser.add_argument('--no-cache-sync', dest='no_cache_sync', action='store_true',
-                               help='experimental: do not synchronize the cache. Implies ``--no-files-cache``.')
+                               help='experimental: do not synchronize the cache. Implies not using the files cache.')
 
 
         define_exclusion_group(subparser, tag_files=True)
         define_exclusion_group(subparser, tag_files=True)
 
 
@@ -2707,6 +2737,9 @@ class Archiver:
                               help='do not store ctime into archive')
                               help='do not store ctime into archive')
         fs_group.add_argument('--ignore-inode', dest='ignore_inode', action='store_true',
         fs_group.add_argument('--ignore-inode', dest='ignore_inode', action='store_true',
                               help='ignore inode data in the file metadata cache used to detect unchanged files.')
                               help='ignore inode data in the file metadata cache used to detect unchanged files.')
+        fs_group.add_argument('--files-cache', metavar='MODE', dest='files_cache_mode',
+                              type=FilesCacheMode, default=DEFAULT_FILES_CACHE_MODE_UI,
+                              help='operate files cache in MODE. default: %s' % DEFAULT_FILES_CACHE_MODE_UI)
         fs_group.add_argument('--read-special', dest='read_special', action='store_true',
         fs_group.add_argument('--read-special', dest='read_special', action='store_true',
                               help='open and read block and char device files as well as FIFOs as if they were '
                               help='open and read block and char device files as well as FIFOs as if they were '
                                    'regular files. Also follows symlinks pointing to these kinds of files.')
                                    'regular files. Also follows symlinks pointing to these kinds of files.')

+ 42 - 31
src/borg/cache.py

@@ -12,7 +12,7 @@ from .logger import create_logger
 
 
 logger = create_logger()
 logger = create_logger()
 
 
-from .constants import CACHE_README
+from .constants import CACHE_README, DEFAULT_FILES_CACHE_MODE
 from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer
 from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer
 from .helpers import Location
 from .helpers import Location
 from .helpers import Error
 from .helpers import Error
@@ -34,7 +34,8 @@ from .platform import SaveFile
 from .remote import cache_if_remote
 from .remote import cache_if_remote
 from .repository import LIST_SCAN_LIMIT
 from .repository import LIST_SCAN_LIMIT
 
 
-FileCacheEntry = namedtuple('FileCacheEntry', 'age inode size mtime chunk_ids')
+# note: cmtime might me either a ctime or a mtime timestamp
+FileCacheEntry = namedtuple('FileCacheEntry', 'age inode size cmtime chunk_ids')
 
 
 
 
 class SecurityManager:
 class SecurityManager:
@@ -492,7 +493,7 @@ class LocalCache(CacheStatsMixin):
 
 
     def _read_files(self):
     def _read_files(self):
         self.files = {}
         self.files = {}
-        self._newest_mtime = None
+        self._newest_cmtime = None
         logger.debug('Reading files cache ...')
         logger.debug('Reading files cache ...')
 
 
         with IntegrityCheckedFile(path=os.path.join(self.path, 'files'), write=False,
         with IntegrityCheckedFile(path=os.path.join(self.path, 'files'), write=False,
@@ -538,18 +539,18 @@ class LocalCache(CacheStatsMixin):
         self.security_manager.save(self.manifest, self.key)
         self.security_manager.save(self.manifest, self.key)
         pi = ProgressIndicatorMessage(msgid='cache.commit')
         pi = ProgressIndicatorMessage(msgid='cache.commit')
         if self.files is not None:
         if self.files is not None:
-            if self._newest_mtime is None:
+            if self._newest_cmtime is None:
                 # was never set because no files were modified/added
                 # was never set because no files were modified/added
-                self._newest_mtime = 2 ** 63 - 1  # nanoseconds, good until y2262
+                self._newest_cmtime = 2 ** 63 - 1  # nanoseconds, good until y2262
             ttl = int(os.environ.get('BORG_FILES_CACHE_TTL', 20))
             ttl = int(os.environ.get('BORG_FILES_CACHE_TTL', 20))
             pi.output('Saving files cache')
             pi.output('Saving files cache')
             with IntegrityCheckedFile(path=os.path.join(self.path, 'files'), write=True) as fd:
             with IntegrityCheckedFile(path=os.path.join(self.path, 'files'), write=True) as fd:
                 for path_hash, item in self.files.items():
                 for path_hash, item in self.files.items():
-                    # Only keep files seen in this backup that are older than newest mtime seen in this backup -
-                    # this is to avoid issues with filesystem snapshots and mtime granularity.
+                    # Only keep files seen in this backup that are older than newest cmtime seen in this backup -
+                    # this is to avoid issues with filesystem snapshots and cmtime granularity.
                     # Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet.
                     # Also keep files from older backups that have not reached BORG_FILES_CACHE_TTL yet.
                     entry = FileCacheEntry(*msgpack.unpackb(item))
                     entry = FileCacheEntry(*msgpack.unpackb(item))
-                    if entry.age == 0 and bigint_to_int(entry.mtime) < self._newest_mtime or \
+                    if entry.age == 0 and bigint_to_int(entry.cmtime) < self._newest_cmtime or \
                        entry.age > 0 and entry.age < ttl:
                        entry.age > 0 and entry.age < ttl:
                         msgpack.pack((path_hash, entry), fd)
                         msgpack.pack((path_hash, entry), fd)
             self.cache_config.integrity['files'] = fd.integrity_data
             self.cache_config.integrity['files'] = fd.integrity_data
@@ -902,37 +903,47 @@ class LocalCache(CacheStatsMixin):
         else:
         else:
             stats.update(-size, -csize, False)
             stats.update(-size, -csize, False)
 
 
-    def file_known_and_unchanged(self, path_hash, st, ignore_inode=False):
-        if not (self.do_files and stat.S_ISREG(st.st_mode)):
+    def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE):
+        if 'd' in cache_mode or not self.do_files or not stat.S_ISREG(st.st_mode):  # d(isabled)
             return None
             return None
         if self.files is None:
         if self.files is None:
             self._read_files()
             self._read_files()
+        if 'r' in cache_mode:  # r(echunk)
+            return None
         entry = self.files.get(path_hash)
         entry = self.files.get(path_hash)
         if not entry:
         if not entry:
             return None
             return None
         entry = FileCacheEntry(*msgpack.unpackb(entry))
         entry = FileCacheEntry(*msgpack.unpackb(entry))
-        if (entry.size == st.st_size and bigint_to_int(entry.mtime) == st.st_mtime_ns and
-                (ignore_inode or entry.inode == st.st_ino)):
-            # we ignored the inode number in the comparison above or it is still same.
-            # if it is still the same, replacing it in the tuple doesn't change it.
-            # if we ignored it, a reason for doing that is that files were moved to a new
-            # disk / new fs (so a one-time change of inode number is expected) and we wanted
-            # to avoid everything getting chunked again. to be able to re-enable the inode
-            # number comparison in a future backup run (and avoid chunking everything
-            # again at that time), we need to update the inode number in the cache with what
-            # we see in the filesystem.
-            self.files[path_hash] = msgpack.packb(entry._replace(inode=st.st_ino, age=0))
-            return entry.chunk_ids
-        else:
+        if 's' in cache_mode and entry.size != st.st_size:
             return None
             return None
-
-    def memorize_file(self, path_hash, st, ids):
-        if not (self.do_files and stat.S_ISREG(st.st_mode)):
+        if 'i' in cache_mode and not ignore_inode and entry.inode != st.st_ino:
+            return None
+        if 'c' in cache_mode and bigint_to_int(entry.cmtime) != st.st_ctime_ns:
+            return None
+        elif 'm' in cache_mode and bigint_to_int(entry.cmtime) != st.st_mtime_ns:
+            return None
+        # we ignored the inode number in the comparison above or it is still same.
+        # if it is still the same, replacing it in the tuple doesn't change it.
+        # if we ignored it, a reason for doing that is that files were moved to a new
+        # disk / new fs (so a one-time change of inode number is expected) and we wanted
+        # to avoid everything getting chunked again. to be able to re-enable the inode
+        # number comparison in a future backup run (and avoid chunking everything
+        # again at that time), we need to update the inode number in the cache with what
+        # we see in the filesystem.
+        self.files[path_hash] = msgpack.packb(entry._replace(inode=st.st_ino, age=0))
+        return entry.chunk_ids
+
+    def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE):
+        # note: r(echunk) modes will update the files cache, d(isabled) mode won't
+        if 'd' in cache_mode or not self.do_files or not stat.S_ISREG(st.st_mode):
             return
             return
-        mtime_ns = safe_ns(st.st_mtime_ns)
-        entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, mtime=int_to_bigint(mtime_ns), chunk_ids=ids)
+        if 'c' in cache_mode:
+            cmtime_ns = safe_ns(st.st_ctime_ns)
+        elif 'm' in cache_mode:
+            cmtime_ns = safe_ns(st.st_mtime_ns)
+        entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, cmtime=int_to_bigint(cmtime_ns), chunk_ids=ids)
         self.files[path_hash] = msgpack.packb(entry)
         self.files[path_hash] = msgpack.packb(entry)
-        self._newest_mtime = max(self._newest_mtime or 0, mtime_ns)
+        self._newest_cmtime = max(self._newest_cmtime or 0, cmtime_ns)
 
 
 
 
 class AdHocCache(CacheStatsMixin):
 class AdHocCache(CacheStatsMixin):
@@ -973,10 +984,10 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
     files = None
     files = None
     do_files = False
     do_files = False
 
 
-    def file_known_and_unchanged(self, path_hash, st, ignore_inode=False):
+    def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE):
         return None
         return None
 
 
-    def memorize_file(self, path_hash, st, ids):
+    def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE):
         pass
         pass
 
 
     def add_chunk(self, id, chunk, stats, overwrite=False, wait=True):
     def add_chunk(self, id, chunk, stats, overwrite=False, wait=True):

+ 4 - 0
src/borg/constants.py

@@ -60,6 +60,10 @@ CHUNKER_PARAMS = (CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH_WINDOW_SIZE
 # chunker params for the items metadata stream, finer granularity
 # chunker params for the items metadata stream, finer granularity
 ITEMS_CHUNKER_PARAMS = (15, 19, 17, HASH_WINDOW_SIZE)
 ITEMS_CHUNKER_PARAMS = (15, 19, 17, HASH_WINDOW_SIZE)
 
 
+# operating mode of the files cache (for fast skipping of unchanged files)
+DEFAULT_FILES_CACHE_MODE_UI = 'ctime,size,inode'
+DEFAULT_FILES_CACHE_MODE = 'cis'  # == CacheMode(DEFAULT_FILES_CACHE_MODE_UI)
+
 # return codes returned by borg command
 # return codes returned by borg command
 # when borg is killed by signal N, rc = 128 + N
 # when borg is killed by signal N, rc = 128 + N
 EXIT_SUCCESS = 0  # everything done, no problems
 EXIT_SUCCESS = 0  # everything done, no problems

+ 16 - 0
src/borg/helpers/parseformat.py

@@ -117,6 +117,22 @@ def ChunkerParams(s):
     return int(chunk_min), int(chunk_max), int(chunk_mask), int(window_size)
     return int(chunk_min), int(chunk_max), int(chunk_mask), int(window_size)
 
 
 
 
+def FilesCacheMode(s):
+    ENTRIES_MAP = dict(ctime='c', mtime='m', size='s', inode='i', rechunk='r', disabled='d')
+    VALID_MODES = ('cis', 'ims', 'cs', 'ms', 'cr', 'mr', 'd')  # letters in alpha order
+    entries = set(s.strip().split(','))
+    if not entries <= set(ENTRIES_MAP):
+        raise ValueError('cache mode must be a comma-separated list of: %s' % ','.join(sorted(ENTRIES_MAP)))
+    short_entries = {ENTRIES_MAP[entry] for entry in entries}
+    mode = ''.join(sorted(short_entries))
+    if mode not in VALID_MODES:
+        raise ValueError('cache mode short must be one of: %s' % ','.join(VALID_MODES))
+    return mode
+
+
+assert FilesCacheMode(DEFAULT_FILES_CACHE_MODE_UI) == DEFAULT_FILES_CACHE_MODE  # keep these 2 values in sync!
+
+
 def partial_format(format, mapping):
 def partial_format(format, mapping):
     """
     """
     Apply format.format_map(mapping) while preserving unknown keys
     Apply format.format_map(mapping) while preserving unknown keys

+ 46 - 15
src/borg/testsuite/archiver.py

@@ -318,8 +318,6 @@ class ArchiverTestCaseBase(BaseTestCase):
         """Create a minimal test case including all supported file types
         """Create a minimal test case including all supported file types
         """
         """
         # File
         # File
-        self.create_regular_file('empty', size=0)
-        os.utime('input/empty', (MAX_S, MAX_S))
         self.create_regular_file('file1', size=1024 * 80)
         self.create_regular_file('file1', size=1024 * 80)
         self.create_regular_file('flagfile', size=1024)
         self.create_regular_file('flagfile', size=1024)
         # Directory
         # Directory
@@ -370,6 +368,8 @@ class ArchiverTestCaseBase(BaseTestCase):
             if e.errno not in (errno.EINVAL, errno.ENOSYS):
             if e.errno not in (errno.EINVAL, errno.ENOSYS):
                 raise
                 raise
             have_root = False
             have_root = False
+        time.sleep(1)  # "empty" must have newer timestamp than other files
+        self.create_regular_file('empty', size=0)
         return have_root
         return have_root
 
 
 
 
@@ -1591,9 +1591,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         """test that various file status show expected results
         """test that various file status show expected results
 
 
         clearly incomplete: only tests for the weird "unchanged" status for now"""
         clearly incomplete: only tests for the weird "unchanged" status for now"""
-        now = time.time()
         self.create_regular_file('file1', size=1024 * 80)
         self.create_regular_file('file1', size=1024 * 80)
-        os.utime('input/file1', (now - 5, now - 5))  # 5 seconds ago
+        time.sleep(1)  # file2 must have newer timestamps than file1
         self.create_regular_file('file2', size=1024 * 80)
         self.create_regular_file('file2', size=1024 * 80)
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('init', '--encryption=repokey', self.repository_location)
         output = self.cmd('create', '--list', self.repository_location + '::test', 'input')
         output = self.cmd('create', '--list', self.repository_location + '::test', 'input')
@@ -1606,12 +1605,51 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         # https://borgbackup.readthedocs.org/en/latest/faq.html#i-am-seeing-a-added-status-for-a-unchanged-file
         # https://borgbackup.readthedocs.org/en/latest/faq.html#i-am-seeing-a-added-status-for-a-unchanged-file
         self.assert_in("A input/file2", output)
         self.assert_in("A input/file2", output)
 
 
+    def test_file_status_cs_cache_mode(self):
+        """test that a changed file with faked "previous" mtime still gets backed up in ctime,size cache_mode"""
+        self.create_regular_file('file1', contents=b'123')
+        time.sleep(1)  # file2 must have newer timestamps than file1
+        self.create_regular_file('file2', size=10)
+        self.cmd('init', '--encryption=repokey', self.repository_location)
+        output = self.cmd('create', '--list', '--files-cache=ctime,size', self.repository_location + '::test1', 'input')
+        # modify file1, but cheat with the mtime (and atime) and also keep same size:
+        st = os.stat('input/file1')
+        self.create_regular_file('file1', contents=b'321')
+        os.utime('input/file1', ns=(st.st_atime_ns, st.st_mtime_ns))
+        # this mode uses ctime for change detection, so it should find file1 as modified
+        output = self.cmd('create', '--list', '--files-cache=ctime,size', self.repository_location + '::test2', 'input')
+        self.assert_in("A input/file1", output)
+
+    def test_file_status_ms_cache_mode(self):
+        """test that a chmod'ed file with no content changes does not get chunked again in mtime,size cache_mode"""
+        self.create_regular_file('file1', size=10)
+        time.sleep(1)  # file2 must have newer timestamps than file1
+        self.create_regular_file('file2', size=10)
+        self.cmd('init', '--encryption=repokey', self.repository_location)
+        output = self.cmd('create', '--list', '--files-cache=mtime,size', self.repository_location + '::test1', 'input')
+        # change mode of file1, no content change:
+        st = os.stat('input/file1')
+        os.chmod('input/file1', st.st_mode ^ stat.S_IRWXO)  # this triggers a ctime change, but mtime is unchanged
+        # this mode uses mtime for change detection, so it should find file1 as unmodified
+        output = self.cmd('create', '--list', '--files-cache=mtime,size', self.repository_location + '::test2', 'input')
+        self.assert_in("U input/file1", output)
+
+    def test_file_status_rc_cache_mode(self):
+        """test that files get rechunked unconditionally in rechunk,ctime cache mode"""
+        self.create_regular_file('file1', size=10)
+        time.sleep(1)  # file2 must have newer timestamps than file1
+        self.create_regular_file('file2', size=10)
+        self.cmd('init', '--encryption=repokey', self.repository_location)
+        output = self.cmd('create', '--list', '--files-cache=rechunk,ctime', self.repository_location + '::test1', 'input')
+        # no changes here, but this mode rechunks unconditionally
+        output = self.cmd('create', '--list', '--files-cache=rechunk,ctime', self.repository_location + '::test2', 'input')
+        self.assert_in("A input/file1", output)
+
     def test_file_status_excluded(self):
     def test_file_status_excluded(self):
         """test that excluded paths are listed"""
         """test that excluded paths are listed"""
 
 
-        now = time.time()
         self.create_regular_file('file1', size=1024 * 80)
         self.create_regular_file('file1', size=1024 * 80)
-        os.utime('input/file1', (now - 5, now - 5))  # 5 seconds ago
+        time.sleep(1)  # file2 must have newer timestamps than file1
         self.create_regular_file('file2', size=1024 * 80)
         self.create_regular_file('file2', size=1024 * 80)
         if has_lchflags:
         if has_lchflags:
             self.create_regular_file('file3', size=1024 * 80)
             self.create_regular_file('file3', size=1024 * 80)
@@ -1647,9 +1685,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         assert 'stats' in archive
         assert 'stats' in archive
 
 
     def test_create_topical(self):
     def test_create_topical(self):
-        now = time.time()
         self.create_regular_file('file1', size=1024 * 80)
         self.create_regular_file('file1', size=1024 * 80)
-        os.utime('input/file1', (now-5, now-5))
+        time.sleep(1)  # file2 must have newer timestamps than file1
         self.create_regular_file('file2', size=1024 * 80)
         self.create_regular_file('file2', size=1024 * 80)
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('init', '--encryption=repokey', self.repository_location)
         # no listing by default
         # no listing by default
@@ -2363,7 +2400,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             fd.write(b'b' * 280)
             fd.write(b'b' * 280)
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('create', '--chunker-params', '7,9,8,128', self.repository_location + '::test1', 'input')
         self.cmd('create', '--chunker-params', '7,9,8,128', self.repository_location + '::test1', 'input')
-        self.cmd('create', self.repository_location + '::test2', 'input', '--no-files-cache')
+        self.cmd('create', self.repository_location + '::test2', 'input', '--files-cache=disabled')
         list = self.cmd('list', self.repository_location + '::test1', 'input/large_file',
         list = self.cmd('list', self.repository_location + '::test1', 'input/large_file',
                         '--format', '{num_chunks} {unique_chunks}')
                         '--format', '{num_chunks} {unique_chunks}')
         num_chunks, unique_chunks = map(int, list.split(' '))
         num_chunks, unique_chunks = map(int, list.split(' '))
@@ -3513,7 +3550,6 @@ class TestCommonOptions:
         add_common_option('-p', '--progress', dest='progress', action='store_true', help='foo')
         add_common_option('-p', '--progress', dest='progress', action='store_true', help='foo')
         add_common_option('--lock-wait', dest='lock_wait', type=int, metavar='N', default=1,
         add_common_option('--lock-wait', dest='lock_wait', type=int, metavar='N', default=1,
                           help='(default: %(default)d).')
                           help='(default: %(default)d).')
-        add_common_option('--no-files-cache', dest='no_files_cache', action='store_false', help='foo')
 
 
     @pytest.fixture
     @pytest.fixture
     def basic_parser(self):
     def basic_parser(self):
@@ -3555,7 +3591,6 @@ class TestCommonOptions:
 
 
     def test_simple(self, parse_vars_from_line):
     def test_simple(self, parse_vars_from_line):
         assert parse_vars_from_line('--error') == {
         assert parse_vars_from_line('--error') == {
-            'no_files_cache': True,
             'append': [],
             'append': [],
             'lock_wait': 1,
             'lock_wait': 1,
             'log_level': 'error',
             'log_level': 'error',
@@ -3563,7 +3598,6 @@ class TestCommonOptions:
         }
         }
 
 
         assert parse_vars_from_line('--error', 'subcommand', '--critical') == {
         assert parse_vars_from_line('--error', 'subcommand', '--critical') == {
-            'no_files_cache': True,
             'append': [],
             'append': [],
             'lock_wait': 1,
             'lock_wait': 1,
             'log_level': 'critical',
             'log_level': 'critical',
@@ -3576,7 +3610,6 @@ class TestCommonOptions:
             parse_vars_from_line('--append-only', 'subcommand')
             parse_vars_from_line('--append-only', 'subcommand')
 
 
         assert parse_vars_from_line('--append=foo', '--append', 'bar', 'subcommand', '--append', 'baz') == {
         assert parse_vars_from_line('--append=foo', '--append', 'bar', 'subcommand', '--append', 'baz') == {
-            'no_files_cache': True,
             'append': ['foo', 'bar', 'baz'],
             'append': ['foo', 'bar', 'baz'],
             'lock_wait': 1,
             'lock_wait': 1,
             'log_level': 'warning',
             'log_level': 'warning',
@@ -3589,7 +3622,6 @@ class TestCommonOptions:
     @pytest.mark.parametrize('flag,args_key,args_value', (
     @pytest.mark.parametrize('flag,args_key,args_value', (
         ('-p', 'progress', True),
         ('-p', 'progress', True),
         ('--lock-wait=3', 'lock_wait', 3),
         ('--lock-wait=3', 'lock_wait', 3),
-        ('--no-files-cache', 'no_files_cache', False),
     ))
     ))
     def test_flag_position_independence(self, parse_vars_from_line, position, flag, args_key, args_value):
     def test_flag_position_independence(self, parse_vars_from_line, position, flag, args_key, args_value):
         line = []
         line = []
@@ -3600,7 +3632,6 @@ class TestCommonOptions:
             line.append(flag)
             line.append(flag)
 
 
         result = {
         result = {
-            'no_files_cache': True,
             'append': [],
             'append': [],
             'lock_wait': 1,
             'lock_wait': 1,
             'log_level': 'warning',
             'log_level': 'warning',