Browse Source

Merge pull request #5706 from ThomasWaldmann/files-cache-debug-logging-1.1

verbose files cache logging via --debug-topic=files_cache, fixes #5659
TW 4 năm trước cách đây
mục cha
commit
abe2b6b315
3 tập tin đã thay đổi với 42 bổ sung12 xóa
  1. 5 4
      src/borg/archive.py
  2. 36 7
      src/borg/cache.py
  3. 1 1
      src/borg/testsuite/cache.py

+ 5 - 4
src/borg/archive.py

@@ -1059,13 +1059,14 @@ Utilization of max. archive size: {csize_max:.0%}
                 item.mode = stat.S_IFREG | stat.S_IMODE(item.mode)
             if not hardlinked or hardlink_master:
                 if not is_special_file:
-                    path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path)))
-                    known, ids = cache.file_known_and_unchanged(path_hash, st)
+                    hashed_path = safe_encode(os.path.join(self.cwd, path))
+                    path_hash = self.key.id_hash(hashed_path)
+                    known, ids = cache.file_known_and_unchanged(hashed_path, path_hash, st)
                 else:
                     # in --read-special mode, we may be called for special files.
                     # there should be no information in the cache about special files processed in
                     # read-special mode, but we better play safe as this was wrong in the past:
-                    path_hash = None
+                    hashed_path = path_hash = None
                     known, ids = False, None
                 chunks = None
                 if ids is not None:
@@ -1091,7 +1092,7 @@ Utilization of max. archive size: {csize_max:.0%}
                     if not is_special_file:
                         # we must not memorize special files, because the contents of e.g. a
                         # block or char device will change without its mtime/size/inode changing.
-                        cache.memorize_file(path_hash, st, [c.id for c in item.chunks])
+                        cache.memorize_file(hashed_path, path_hash, st, [c.id for c in item.chunks])
                 self.stats.nfiles += 1
             item.update(self.stat_ext_attrs(st, path))
             item.get_size(memorize=True)

+ 36 - 7
src/borg/cache.py

@@ -10,6 +10,8 @@ from .logger import create_logger
 
 logger = create_logger()
 
+files_cache_logger = create_logger('borg.debug.files_cache')
+
 from .constants import CACHE_README, DEFAULT_FILES_CACHE_MODE
 from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer
 from .helpers import Location
@@ -518,6 +520,7 @@ class LocalCache(CacheStatsMixin):
         self.files = {}
         self._newest_cmtime = None
         logger.debug('Reading files cache ...')
+        files_cache_logger.debug("FILES-CACHE-LOAD: starting...")
         msg = None
         try:
             with IntegrityCheckedFile(path=os.path.join(self.path, files_cache_name()), write=False,
@@ -544,6 +547,7 @@ class LocalCache(CacheStatsMixin):
             logger.warning(msg)
             logger.warning('Continuing without files cache - expect lower performance.')
             self.files = {}
+        files_cache_logger.debug("FILES-CACHE-LOAD: finished, %d entries loaded.", len(self.files))
 
     def begin_txn(self):
         # Initialize transaction snapshot
@@ -578,7 +582,9 @@ class LocalCache(CacheStatsMixin):
                 self._newest_cmtime = 2 ** 63 - 1  # nanoseconds, good until y2262
             ttl = int(os.environ.get('BORG_FILES_CACHE_TTL', 20))
             pi.output('Saving files cache')
+            files_cache_logger.debug("FILES-CACHE-SAVE: starting...")
             with IntegrityCheckedFile(path=os.path.join(self.path, files_cache_name()), write=True) as fd:
+                entry_count = 0
                 for path_hash, item in self.files.items():
                     # Only keep files seen in this backup that are older than newest cmtime seen in this backup -
                     # this is to avoid issues with filesystem snapshots and cmtime granularity.
@@ -587,6 +593,10 @@ class LocalCache(CacheStatsMixin):
                     if entry.age == 0 and bigint_to_int(entry.cmtime) < self._newest_cmtime or \
                        entry.age > 0 and entry.age < ttl:
                         msgpack.pack((path_hash, entry), fd)
+                        entry_count += 1
+            files_cache_logger.debug("FILES-CACHE-KILL: removed all old entries with age >= TTL [%d]", ttl)
+            files_cache_logger.debug("FILES-CACHE-KILL: removed all current entries with newest cmtime %d", self._newest_cmtime)
+            files_cache_logger.debug("FILES-CACHE-SAVE: finished, %d remaining entries saved.", entry_count)
             self.cache_config.integrity[files_cache_name()] = fd.integrity_data
         pi.output('Saving chunks cache')
         with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=True) as fd:
@@ -937,36 +947,46 @@ class LocalCache(CacheStatsMixin):
         else:
             stats.update(-size, -csize, False)
 
-    def file_known_and_unchanged(self, path_hash, st):
+    def file_known_and_unchanged(self, hashed_path, path_hash, st):
         """
         Check if we know the file that has this path_hash (know == it is in our files cache) and
         whether it is unchanged (the size/inode number/cmtime is same for stuff we check in this cache_mode).
 
-        :param path_hash: hash(file_path), to save some memory in the files cache
+        :param hashed_path: the file's path as we gave it to hash(hashed_path)
+        :param path_hash: hash(hashed_path), to save some memory in the files cache
         :param st: the file's stat() result
         :return: known, ids (known is True if we have infos about this file in the cache,
                              ids is the list of chunk ids IF the file has not changed, otherwise None).
         """
+        if not stat.S_ISREG(st.st_mode):
+            return False, None
         cache_mode = self.cache_mode
-        if 'd' in cache_mode or not stat.S_ISREG(st.st_mode):  # d(isabled)
+        if 'd' in cache_mode:  # d(isabled)
+            files_cache_logger.debug('UNKNOWN: files cache disabled')
             return False, None
         # note: r(echunk) does not need the files cache in this method, but the files cache will
         # be updated and saved to disk to memorize the files. To preserve previous generations in
         # the cache, this means that it also needs to get loaded from disk first.
         if 'r' in cache_mode:  # r(echunk)
+            files_cache_logger.debug('UNKNOWN: rechunking enforced')
             return False, None
         entry = self.files.get(path_hash)
         if not entry:
+            files_cache_logger.debug('UNKNOWN: no file metadata in cache for: %r', hashed_path)
             return False, None
         # we know the file!
         entry = FileCacheEntry(*msgpack.unpackb(entry))
         if 's' in cache_mode and entry.size != st.st_size:
+            files_cache_logger.debug('KNOWN-CHANGED: file size has changed: %r', hashed_path)
             return True, None
         if 'i' in cache_mode and entry.inode != st.st_ino:
+            files_cache_logger.debug('KNOWN-CHANGED: file inode number has changed: %r', hashed_path)
             return True, None
         if 'c' in cache_mode and bigint_to_int(entry.cmtime) != st.st_ctime_ns:
+            files_cache_logger.debug('KNOWN-CHANGED: file ctime has changed: %r', hashed_path)
             return True, None
         elif 'm' in cache_mode and bigint_to_int(entry.cmtime) != st.st_mtime_ns:
+            files_cache_logger.debug('KNOWN-CHANGED: file mtime has changed: %r', hashed_path)
             return True, None
         # we ignored the inode number in the comparison above or it is still same.
         # if it is still the same, replacing it in the tuple doesn't change it.
@@ -979,18 +999,26 @@ class LocalCache(CacheStatsMixin):
         self.files[path_hash] = msgpack.packb(entry._replace(inode=st.st_ino, age=0))
         return True, entry.chunk_ids
 
-    def memorize_file(self, path_hash, st, ids):
+    def memorize_file(self, hashed_path, path_hash, st, ids):
+        if not stat.S_ISREG(st.st_mode):
+            return
         cache_mode = self.cache_mode
         # note: r(echunk) modes will update the files cache, d(isabled) mode won't
-        if 'd' in cache_mode or not stat.S_ISREG(st.st_mode):
+        if 'd' in cache_mode:
+            files_cache_logger.debug('FILES-CACHE-NOUPDATE: files cache disabled')
             return
         if 'c' in cache_mode:
+            cmtime_type = 'ctime'
             cmtime_ns = safe_ns(st.st_ctime_ns)
         elif 'm' in cache_mode:
+            cmtime_type = 'mtime'
             cmtime_ns = safe_ns(st.st_mtime_ns)
         entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, cmtime=int_to_bigint(cmtime_ns), chunk_ids=ids)
         self.files[path_hash] = msgpack.packb(entry)
         self._newest_cmtime = max(self._newest_cmtime or 0, cmtime_ns)
+        files_cache_logger.debug('FILES-CACHE-UPDATE: put %r [has %s] <- %r',
+                                 entry._replace(chunk_ids='[%d entries]' % len(entry.chunk_ids)),
+                                 cmtime_type, hashed_path)
 
 
 class AdHocCache(CacheStatsMixin):
@@ -1031,10 +1059,11 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
     files = None
     cache_mode = 'd'
 
-    def file_known_and_unchanged(self, path_hash, st):
+    def file_known_and_unchanged(self, hashed_path, path_hash, st):
+        files_cache_logger.debug("UNKNOWN: files cache not implemented")
         return False, None
 
-    def memorize_file(self, path_hash, st, ids):
+    def memorize_file(self, hashed_path, path_hash, st, ids):
         pass
 
     def add_chunk(self, id, chunk, stats, overwrite=False, wait=True):

+ 1 - 1
src/borg/testsuite/cache.py

@@ -257,7 +257,7 @@ class TestAdHocCache:
             repository.get(H(5))
 
     def test_files_cache(self, cache):
-        assert cache.file_known_and_unchanged(bytes(32), None) == (False, None)
+        assert cache.file_known_and_unchanged(b'foo', bytes(32), None) == (False, None)
         assert cache.cache_mode == 'd'
         assert cache.files is None