2
0
Эх сурвалжийг харах

Merge pull request #5706 from ThomasWaldmann/files-cache-debug-logging-1.1

verbose files cache logging via --debug-topic=files_cache, fixes #5659
TW 4 жил өмнө
parent
commit
abe2b6b315

+ 5 - 4
src/borg/archive.py

@@ -1059,13 +1059,14 @@ Utilization of max. archive size: {csize_max:.0%}
                 item.mode = stat.S_IFREG | stat.S_IMODE(item.mode)
                 item.mode = stat.S_IFREG | stat.S_IMODE(item.mode)
             if not hardlinked or hardlink_master:
             if not hardlinked or hardlink_master:
                 if not is_special_file:
                 if not is_special_file:
-                    path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path)))
-                    known, ids = cache.file_known_and_unchanged(path_hash, st)
+                    hashed_path = safe_encode(os.path.join(self.cwd, path))
+                    path_hash = self.key.id_hash(hashed_path)
+                    known, ids = cache.file_known_and_unchanged(hashed_path, path_hash, st)
                 else:
                 else:
                     # in --read-special mode, we may be called for special files.
                     # in --read-special mode, we may be called for special files.
                     # there should be no information in the cache about special files processed in
                     # there should be no information in the cache about special files processed in
                     # read-special mode, but we better play safe as this was wrong in the past:
                     # read-special mode, but we better play safe as this was wrong in the past:
-                    path_hash = None
+                    hashed_path = path_hash = None
                     known, ids = False, None
                     known, ids = False, None
                 chunks = None
                 chunks = None
                 if ids is not None:
                 if ids is not None:
@@ -1091,7 +1092,7 @@ Utilization of max. archive size: {csize_max:.0%}
                     if not is_special_file:
                     if not is_special_file:
                         # we must not memorize special files, because the contents of e.g. a
                         # we must not memorize special files, because the contents of e.g. a
                         # block or char device will change without its mtime/size/inode changing.
                         # block or char device will change without its mtime/size/inode changing.
-                        cache.memorize_file(path_hash, st, [c.id for c in item.chunks])
+                        cache.memorize_file(hashed_path, path_hash, st, [c.id for c in item.chunks])
                 self.stats.nfiles += 1
                 self.stats.nfiles += 1
             item.update(self.stat_ext_attrs(st, path))
             item.update(self.stat_ext_attrs(st, path))
             item.get_size(memorize=True)
             item.get_size(memorize=True)

+ 36 - 7
src/borg/cache.py

@@ -10,6 +10,8 @@ from .logger import create_logger
 
 
 logger = create_logger()
 logger = create_logger()
 
 
+files_cache_logger = create_logger('borg.debug.files_cache')
+
 from .constants import CACHE_README, DEFAULT_FILES_CACHE_MODE
 from .constants import CACHE_README, DEFAULT_FILES_CACHE_MODE
 from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer
 from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer
 from .helpers import Location
 from .helpers import Location
@@ -518,6 +520,7 @@ class LocalCache(CacheStatsMixin):
         self.files = {}
         self.files = {}
         self._newest_cmtime = None
         self._newest_cmtime = None
         logger.debug('Reading files cache ...')
         logger.debug('Reading files cache ...')
+        files_cache_logger.debug("FILES-CACHE-LOAD: starting...")
         msg = None
         msg = None
         try:
         try:
             with IntegrityCheckedFile(path=os.path.join(self.path, files_cache_name()), write=False,
             with IntegrityCheckedFile(path=os.path.join(self.path, files_cache_name()), write=False,
@@ -544,6 +547,7 @@ class LocalCache(CacheStatsMixin):
             logger.warning(msg)
             logger.warning(msg)
             logger.warning('Continuing without files cache - expect lower performance.')
             logger.warning('Continuing without files cache - expect lower performance.')
             self.files = {}
             self.files = {}
+        files_cache_logger.debug("FILES-CACHE-LOAD: finished, %d entries loaded.", len(self.files))
 
 
     def begin_txn(self):
     def begin_txn(self):
         # Initialize transaction snapshot
         # Initialize transaction snapshot
@@ -578,7 +582,9 @@ class LocalCache(CacheStatsMixin):
                 self._newest_cmtime = 2 ** 63 - 1  # nanoseconds, good until y2262
                 self._newest_cmtime = 2 ** 63 - 1  # nanoseconds, good until y2262
             ttl = int(os.environ.get('BORG_FILES_CACHE_TTL', 20))
             ttl = int(os.environ.get('BORG_FILES_CACHE_TTL', 20))
             pi.output('Saving files cache')
             pi.output('Saving files cache')
+            files_cache_logger.debug("FILES-CACHE-SAVE: starting...")
             with IntegrityCheckedFile(path=os.path.join(self.path, files_cache_name()), write=True) as fd:
             with IntegrityCheckedFile(path=os.path.join(self.path, files_cache_name()), write=True) as fd:
+                entry_count = 0
                 for path_hash, item in self.files.items():
                 for path_hash, item in self.files.items():
                     # Only keep files seen in this backup that are older than newest cmtime seen in this backup -
                     # Only keep files seen in this backup that are older than newest cmtime seen in this backup -
                     # this is to avoid issues with filesystem snapshots and cmtime granularity.
                     # this is to avoid issues with filesystem snapshots and cmtime granularity.
@@ -587,6 +593,10 @@ class LocalCache(CacheStatsMixin):
                     if entry.age == 0 and bigint_to_int(entry.cmtime) < self._newest_cmtime or \
                     if entry.age == 0 and bigint_to_int(entry.cmtime) < self._newest_cmtime or \
                        entry.age > 0 and entry.age < ttl:
                        entry.age > 0 and entry.age < ttl:
                         msgpack.pack((path_hash, entry), fd)
                         msgpack.pack((path_hash, entry), fd)
+                        entry_count += 1
+            files_cache_logger.debug("FILES-CACHE-KILL: removed all old entries with age >= TTL [%d]", ttl)
+            files_cache_logger.debug("FILES-CACHE-KILL: removed all current entries with newest cmtime %d", self._newest_cmtime)
+            files_cache_logger.debug("FILES-CACHE-SAVE: finished, %d remaining entries saved.", entry_count)
             self.cache_config.integrity[files_cache_name()] = fd.integrity_data
             self.cache_config.integrity[files_cache_name()] = fd.integrity_data
         pi.output('Saving chunks cache')
         pi.output('Saving chunks cache')
         with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=True) as fd:
         with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=True) as fd:
@@ -937,36 +947,46 @@ class LocalCache(CacheStatsMixin):
         else:
         else:
             stats.update(-size, -csize, False)
             stats.update(-size, -csize, False)
 
 
-    def file_known_and_unchanged(self, path_hash, st):
+    def file_known_and_unchanged(self, hashed_path, path_hash, st):
         """
         """
         Check if we know the file that has this path_hash (know == it is in our files cache) and
         Check if we know the file that has this path_hash (know == it is in our files cache) and
         whether it is unchanged (the size/inode number/cmtime is same for stuff we check in this cache_mode).
         whether it is unchanged (the size/inode number/cmtime is same for stuff we check in this cache_mode).
 
 
-        :param path_hash: hash(file_path), to save some memory in the files cache
+        :param hashed_path: the file's path as we gave it to hash(hashed_path)
+        :param path_hash: hash(hashed_path), to save some memory in the files cache
         :param st: the file's stat() result
         :param st: the file's stat() result
         :return: known, ids (known is True if we have infos about this file in the cache,
         :return: known, ids (known is True if we have infos about this file in the cache,
                              ids is the list of chunk ids IF the file has not changed, otherwise None).
                              ids is the list of chunk ids IF the file has not changed, otherwise None).
         """
         """
+        if not stat.S_ISREG(st.st_mode):
+            return False, None
         cache_mode = self.cache_mode
         cache_mode = self.cache_mode
-        if 'd' in cache_mode or not stat.S_ISREG(st.st_mode):  # d(isabled)
+        if 'd' in cache_mode:  # d(isabled)
+            files_cache_logger.debug('UNKNOWN: files cache disabled')
             return False, None
             return False, None
         # note: r(echunk) does not need the files cache in this method, but the files cache will
         # note: r(echunk) does not need the files cache in this method, but the files cache will
         # be updated and saved to disk to memorize the files. To preserve previous generations in
         # be updated and saved to disk to memorize the files. To preserve previous generations in
         # the cache, this means that it also needs to get loaded from disk first.
         # the cache, this means that it also needs to get loaded from disk first.
         if 'r' in cache_mode:  # r(echunk)
         if 'r' in cache_mode:  # r(echunk)
+            files_cache_logger.debug('UNKNOWN: rechunking enforced')
             return False, None
             return False, None
         entry = self.files.get(path_hash)
         entry = self.files.get(path_hash)
         if not entry:
         if not entry:
+            files_cache_logger.debug('UNKNOWN: no file metadata in cache for: %r', hashed_path)
             return False, None
             return False, None
         # we know the file!
         # we know the file!
         entry = FileCacheEntry(*msgpack.unpackb(entry))
         entry = FileCacheEntry(*msgpack.unpackb(entry))
         if 's' in cache_mode and entry.size != st.st_size:
         if 's' in cache_mode and entry.size != st.st_size:
+            files_cache_logger.debug('KNOWN-CHANGED: file size has changed: %r', hashed_path)
             return True, None
             return True, None
         if 'i' in cache_mode and entry.inode != st.st_ino:
         if 'i' in cache_mode and entry.inode != st.st_ino:
+            files_cache_logger.debug('KNOWN-CHANGED: file inode number has changed: %r', hashed_path)
             return True, None
             return True, None
         if 'c' in cache_mode and bigint_to_int(entry.cmtime) != st.st_ctime_ns:
         if 'c' in cache_mode and bigint_to_int(entry.cmtime) != st.st_ctime_ns:
+            files_cache_logger.debug('KNOWN-CHANGED: file ctime has changed: %r', hashed_path)
             return True, None
             return True, None
         elif 'm' in cache_mode and bigint_to_int(entry.cmtime) != st.st_mtime_ns:
         elif 'm' in cache_mode and bigint_to_int(entry.cmtime) != st.st_mtime_ns:
+            files_cache_logger.debug('KNOWN-CHANGED: file mtime has changed: %r', hashed_path)
             return True, None
             return True, None
         # we ignored the inode number in the comparison above or it is still same.
         # we ignored the inode number in the comparison above or it is still same.
         # if it is still the same, replacing it in the tuple doesn't change it.
         # if it is still the same, replacing it in the tuple doesn't change it.
@@ -979,18 +999,26 @@ class LocalCache(CacheStatsMixin):
         self.files[path_hash] = msgpack.packb(entry._replace(inode=st.st_ino, age=0))
         self.files[path_hash] = msgpack.packb(entry._replace(inode=st.st_ino, age=0))
         return True, entry.chunk_ids
         return True, entry.chunk_ids
 
 
-    def memorize_file(self, path_hash, st, ids):
+    def memorize_file(self, hashed_path, path_hash, st, ids):
+        if not stat.S_ISREG(st.st_mode):
+            return
         cache_mode = self.cache_mode
         cache_mode = self.cache_mode
         # note: r(echunk) modes will update the files cache, d(isabled) mode won't
         # note: r(echunk) modes will update the files cache, d(isabled) mode won't
-        if 'd' in cache_mode or not stat.S_ISREG(st.st_mode):
+        if 'd' in cache_mode:
+            files_cache_logger.debug('FILES-CACHE-NOUPDATE: files cache disabled')
             return
             return
         if 'c' in cache_mode:
         if 'c' in cache_mode:
+            cmtime_type = 'ctime'
             cmtime_ns = safe_ns(st.st_ctime_ns)
             cmtime_ns = safe_ns(st.st_ctime_ns)
         elif 'm' in cache_mode:
         elif 'm' in cache_mode:
+            cmtime_type = 'mtime'
             cmtime_ns = safe_ns(st.st_mtime_ns)
             cmtime_ns = safe_ns(st.st_mtime_ns)
         entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, cmtime=int_to_bigint(cmtime_ns), chunk_ids=ids)
         entry = FileCacheEntry(age=0, inode=st.st_ino, size=st.st_size, cmtime=int_to_bigint(cmtime_ns), chunk_ids=ids)
         self.files[path_hash] = msgpack.packb(entry)
         self.files[path_hash] = msgpack.packb(entry)
         self._newest_cmtime = max(self._newest_cmtime or 0, cmtime_ns)
         self._newest_cmtime = max(self._newest_cmtime or 0, cmtime_ns)
+        files_cache_logger.debug('FILES-CACHE-UPDATE: put %r [has %s] <- %r',
+                                 entry._replace(chunk_ids='[%d entries]' % len(entry.chunk_ids)),
+                                 cmtime_type, hashed_path)
 
 
 
 
 class AdHocCache(CacheStatsMixin):
 class AdHocCache(CacheStatsMixin):
@@ -1031,10 +1059,11 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
     files = None
     files = None
     cache_mode = 'd'
     cache_mode = 'd'
 
 
-    def file_known_and_unchanged(self, path_hash, st):
+    def file_known_and_unchanged(self, hashed_path, path_hash, st):
+        files_cache_logger.debug("UNKNOWN: files cache not implemented")
         return False, None
         return False, None
 
 
-    def memorize_file(self, path_hash, st, ids):
+    def memorize_file(self, hashed_path, path_hash, st, ids):
         pass
         pass
 
 
     def add_chunk(self, id, chunk, stats, overwrite=False, wait=True):
     def add_chunk(self, id, chunk, stats, overwrite=False, wait=True):

+ 1 - 1
src/borg/testsuite/cache.py

@@ -257,7 +257,7 @@ class TestAdHocCache:
             repository.get(H(5))
             repository.get(H(5))
 
 
     def test_files_cache(self, cache):
     def test_files_cache(self, cache):
-        assert cache.file_known_and_unchanged(bytes(32), None) == (False, None)
+        assert cache.file_known_and_unchanged(b'foo', bytes(32), None) == (False, None)
         assert cache.cache_mode == 'd'
         assert cache.cache_mode == 'd'
         assert cache.files is None
         assert cache.files is None