1 year ago · 4488c077a7
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -1552,25 +1552,27 @@ class FilesystemObjectProcessors:
 
				                             started_hashing = time.monotonic()
			
 
				                             path_hash = self.key.id_hash(hashed_path)
			
 
				                             self.stats.hashing_time += time.monotonic() - started_hashing
			
 
				-                            known, ids = cache.file_known_and_unchanged(hashed_path, path_hash, st)
			
 
				+                            known, chunks = cache.file_known_and_unchanged(hashed_path, path_hash, st)
			
 
				                         else:
			
 
				                             # in --read-special mode, we may be called for special files.
			
 
				                             # there should be no information in the cache about special files processed in
			
 
				                             # read-special mode, but we better play safe as this was wrong in the past:
			
 
				                             hashed_path = path_hash = None
			
 
				-                            known, ids = False, None
			
 
				-                        if ids is not None:
			
 
				+                            known, chunks = False, None
			
 
				+                        if chunks is not None:
			
 
				                             # Make sure all ids are available
			
 
				-                            for id_ in ids:
			
 
				-                                if not cache.seen_chunk(id_):
			
 
				+                            for chunk in chunks:
			
 
				+                                if not cache.seen_chunk(chunk.id):
			
 
				                                     # cache said it is unmodified, but we lost a chunk: process file like modified
			
 
				                                     status = "M"
			
 
				                                     break
			
 
				                             else:
			
 
				                                 item.chunks = []
			
 
				-                                for chunk_id in ids:
			
 
				+                                for chunk in chunks:
			
 
				                                     # process one-by-one, so we will know in item.chunks how far we got
			
 
				-                                    chunk_entry = cache.chunk_incref(chunk_id, self.stats)
			
 
				+                                    chunk_entry = cache.chunk_incref(chunk.id, self.stats)
			
 
				+                                    # chunk.size is from files cache, chunk_entry.size from index:
			
 
				+                                    assert chunk == chunk_entry
			
 
				                                     item.chunks.append(chunk_entry)
			
 
				                                 status = "U"  # regular file, unchanged
			
 
				                         else:
			
@@ -1606,7 +1608,7 @@ class FilesystemObjectProcessors:
 
				                                 # block or char device will change without its mtime/size/inode changing.
			
 
				                                 # also, we must not memorize a potentially inconsistent/corrupt file that
			
 
				                                 # changed while we backed it up.
			
 
				-                                cache.memorize_file(hashed_path, path_hash, st, [c.id for c in item.chunks])
			
 
				+                                cache.memorize_file(hashed_path, path_hash, st, item.chunks)
			
 
				                         self.stats.files_stats[status] += 1  # must be done late
			
 
				                         if not changed_while_backup:
			
 
				                             status = None  # we already called print_file_status
			
--- a/src/borg/cache.py
+++ b/src/borg/cache.py
@@ -35,8 +35,8 @@ from .platform import SaveFile
 
				 from .remote import cache_if_remote
			
 
				 from .repository import LIST_SCAN_LIMIT
			
 
				 
			
 
				-# note: cmtime might me either a ctime or a mtime timestamp
			
 
				-FileCacheEntry = namedtuple("FileCacheEntry", "age inode size cmtime chunk_ids")
			
 
				+# note: cmtime might be either a ctime or a mtime timestamp, chunks is a list of ChunkListEntry
			
 
				+FileCacheEntry = namedtuple("FileCacheEntry", "age inode size cmtime chunks")
			
 
				 
			
 
				 
			
 
				 class SecurityManager:
			
@@ -1030,8 +1030,8 @@ class LocalCache(CacheStatsMixin):
 
				         :param hashed_path: the file's path as we gave it to hash(hashed_path)
			
 
				         :param path_hash: hash(hashed_path), to save some memory in the files cache
			
 
				         :param st: the file's stat() result
			
 
				-        :return: known, ids (known is True if we have infos about this file in the cache,
			
 
				-                             ids is the list of chunk ids IF the file has not changed, otherwise None).
			
 
				+        :return: known, chunks (known is True if we have infos about this file in the cache,
			
 
				+                               chunks is a list[ChunkListEntry] IF the file has not changed, otherwise None).
			
 
				         """
			
 
				         if not stat.S_ISREG(st.st_mode):
			
 
				             return False, None
			
@@ -1072,9 +1072,10 @@ class LocalCache(CacheStatsMixin):
 
				         # again at that time), we need to update the inode number in the cache with what
			
 
				         # we see in the filesystem.
			
 
				         self.files[path_hash] = msgpack.packb(entry._replace(inode=st.st_ino, age=0))
			
 
				-        return True, entry.chunk_ids
			
 
				+        chunks = [ChunkListEntry(*chunk) for chunk in entry.chunks]  # convert to list of namedtuple
			
 
				+        return True, chunks
			
 
				 
			
 
				-    def memorize_file(self, hashed_path, path_hash, st, ids):
			
 
				+    def memorize_file(self, hashed_path, path_hash, st, chunks):
			
 
				         if not stat.S_ISREG(st.st_mode):
			
 
				             return
			
 
				         cache_mode = self.cache_mode
			
@@ -1092,13 +1093,13 @@ class LocalCache(CacheStatsMixin):
 
				             cmtime_type = "ctime"
			
 
				             cmtime_ns = safe_ns(st.st_ctime_ns)
			
 
				         entry = FileCacheEntry(
			
 
				-            age=0, inode=st.st_ino, size=st.st_size, cmtime=int_to_timestamp(cmtime_ns), chunk_ids=ids
			
 
				+            age=0, inode=st.st_ino, size=st.st_size, cmtime=int_to_timestamp(cmtime_ns), chunks=chunks
			
 
				         )
			
 
				         self.files[path_hash] = msgpack.packb(entry)
			
 
				         self._newest_cmtime = max(self._newest_cmtime or 0, cmtime_ns)
			
 
				         files_cache_logger.debug(
			
 
				             "FILES-CACHE-UPDATE: put %r [has %s] <- %r",
			
 
				-            entry._replace(chunk_ids="[%d entries]" % len(entry.chunk_ids)),
			
 
				+            entry._replace(chunks="[%d entries]" % len(entry.chunks)),
			
 
				             cmtime_type,
			
 
				             hashed_path,
			
 
				         )
			
@@ -1149,7 +1150,7 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
 
				         files_cache_logger.debug("UNKNOWN: files cache not implemented")
			
 
				         return False, None
			
 
				 
			
 
				-    def memorize_file(self, hashed_path, path_hash, st, ids):
			
 
				+    def memorize_file(self, hashed_path, path_hash, st, chunks):
			
 
				         pass
			
 
				 
			
 
				     def add_chunk(self, id, meta, data, *, stats, wait=True, compress=True, size=None, ro_type=ROBJ_FILE_STREAM):