Browse Source

Merge pull request #3633 from ThomasWaldmann/fix-modified-status

fix borg create never showing M status
TW 7 years ago
parent
commit
935a768ff8
4 changed files with 32 additions and 15 deletions
  1. 5 4
      src/borg/archive.py
  2. 25 9
      src/borg/cache.py
  3. 1 1
      src/borg/testsuite/archiver.py
  4. 1 1
      src/borg/testsuite/cache.py

+ 5 - 4
src/borg/archive.py

@@ -1132,12 +1132,13 @@ class FilesystemObjectProcessors:
             if not hardlinked or hardlink_master:
                 if not is_special_file:
                     path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path)))
-                    ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode, files_cache_mode)
+                    known, ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode, files_cache_mode)
                 else:
                     # in --read-special mode, we may be called for special files.
                     # there should be no information in the cache about special files processed in
                     # read-special mode, but we better play safe as this was wrong in the past:
-                    path_hash = ids = None
+                    path_hash = None
+                    known, ids = False, None
                 first_run = not cache.files and cache.do_files
                 if first_run:
                     logger.debug('Processing files ...')
@@ -1146,12 +1147,13 @@ class FilesystemObjectProcessors:
                     # Make sure all ids are available
                     for id_ in ids:
                         if not cache.seen_chunk(id_):
+                            status = 'M'  # cache said it is unmodified, but we lost a chunk: process file like modified
                             break
                     else:
                         chunks = [cache.chunk_incref(id_, self.stats) for id_ in ids]
                         status = 'U'  # regular file, unchanged
                 else:
-                    status = 'A'  # regular file, added
+                    status = 'M' if known else 'A'  # regular file, modified or added
                 item.hardlink_master = hardlinked
                 item.update(self.metadata_collector.stat_simple_attrs(st))
                 # Only chunkify the file if needed
@@ -1166,7 +1168,6 @@ class FilesystemObjectProcessors:
                         # we must not memorize special files, because the contents of e.g. a
                         # block or char device will change without its mtime/size/inode changing.
                         cache.memorize_file(path_hash, st, [c.id for c in item.chunks], files_cache_mode)
-                    status = status or 'M'  # regular file, modified (if not 'A' already)
                 self.stats.nfiles += 1
             item.update(self.metadata_collector.stat_attrs(st, path))
             item.get_size(memorize=True)

+ 25 - 9
src/borg/cache.py

@@ -918,24 +918,40 @@ class LocalCache(CacheStatsMixin):
             stats.update(-size, -csize, False)
 
     def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE):
+        """
+        Check if we know the file that has this path_hash (know == it is in our files cache) and
+        whether it is unchanged (the size/inode number/cmtime is same for stuff we check in this cache_mode).
+
+        :param path_hash: hash(file_path), to save some memory in the files cache
+        :param st: the file's stat() result
+        :param ignore_inode: whether the inode number shall be ignored
+        :param cache_mode: what shall be compared in the file stat infos vs. cached stat infos comparison
+        :return: known, ids (known is True if we have infos about this file in the cache,
+                             ids is the list of chunk ids IF the file has not changed, otherwise None).
+        """
         if 'd' in cache_mode or not self.do_files or not stat.S_ISREG(st.st_mode):  # d(isabled)
-            return None
+            return False, None
         if self.files is None:
             self._read_files()
+        # note: r(echunk) does not need the files cache in this method, but the files cache will
+        # be updated and saved to disk to memorize the files. To preserve previous generations in
+        # the cache, this means that it also needs to get loaded from disk first, so keep
+        # _read_files() above here.
         if 'r' in cache_mode:  # r(echunk)
-            return None
+            return False, None
         entry = self.files.get(path_hash)
         if not entry:
-            return None
+            return False, None
+        # we know the file!
         entry = FileCacheEntry(*msgpack.unpackb(entry))
         if 's' in cache_mode and entry.size != st.st_size:
-            return None
+            return True, None
         if 'i' in cache_mode and not ignore_inode and entry.inode != st.st_ino:
-            return None
+            return True, None
         if 'c' in cache_mode and bigint_to_int(entry.cmtime) != st.st_ctime_ns:
-            return None
+            return True, None
         elif 'm' in cache_mode and bigint_to_int(entry.cmtime) != st.st_mtime_ns:
-            return None
+            return True, None
         # we ignored the inode number in the comparison above or it is still same.
         # if it is still the same, replacing it in the tuple doesn't change it.
         # if we ignored it, a reason for doing that is that files were moved to a new
@@ -945,7 +961,7 @@ class LocalCache(CacheStatsMixin):
         # again at that time), we need to update the inode number in the cache with what
         # we see in the filesystem.
         self.files[path_hash] = msgpack.packb(entry._replace(inode=st.st_ino, age=0))
-        return entry.chunk_ids
+        return True, entry.chunk_ids
 
     def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE):
         # note: r(echunk) modes will update the files cache, d(isabled) mode won't
@@ -999,7 +1015,7 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
     do_files = False
 
     def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE):
-        return None
+        return False, None
 
     def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE):
         pass

+ 1 - 1
src/borg/testsuite/archiver.py

@@ -1678,7 +1678,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         os.utime('input/file1', ns=(st.st_atime_ns, st.st_mtime_ns))
         # this mode uses ctime for change detection, so it should find file1 as modified
         output = self.cmd('create', '--list', '--files-cache=ctime,size', self.repository_location + '::test2', 'input')
-        self.assert_in("A input/file1", output)
+        self.assert_in("M input/file1", output)
 
     def test_file_status_ms_cache_mode(self):
         """test that a chmod'ed file with no content changes does not get chunked again in mtime,size cache_mode"""

+ 1 - 1
src/borg/testsuite/cache.py

@@ -256,7 +256,7 @@ class TestAdHocCache:
             repository.get(H(5))
 
     def test_files_cache(self, cache):
-        assert cache.file_known_and_unchanged(bytes(32), None) is None
+        assert cache.file_known_and_unchanged(bytes(32), None) == (False, None)
         assert not cache.do_files
         assert cache.files is None