Explorar o código

Merge pull request #3726 from milkey-mouse/files-cache-non-lazy-fp1.2

non-lazy files cache (1.2 forward-port)
TW %!s(int64=7) %!d(string=hai) anos
pai
achega
e6abb0804b
Modificáronse 4 ficheiros con 41 adicións e 35 borrados
  1. 3 6
      src/borg/archive.py
  2. 8 6
      src/borg/archiver.py
  3. 29 22
      src/borg/cache.py
  4. 1 1
      src/borg/testsuite/cache.py

+ 3 - 6
src/borg/archive.py

@@ -1131,22 +1131,19 @@ class FilesystemObjectProcessors:
         self.add_item(item, stats=self.stats)
         return 'i'  # stdin
 
-    def process_file(self, path, st, cache, ignore_inode=False, files_cache_mode=DEFAULT_FILES_CACHE_MODE):
+    def process_file(self, path, st, cache):
         with self.create_helper(path, st, None) as (item, status, hardlinked, hardlink_master):  # no status yet
             is_special_file = is_special(st.st_mode)
             if not hardlinked or hardlink_master:
                 if not is_special_file:
                     path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path)))
-                    known, ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode, files_cache_mode)
+                    known, ids = cache.file_known_and_unchanged(path_hash, st)
                 else:
                     # in --read-special mode, we may be called for special files.
                     # there should be no information in the cache about special files processed in
                     # read-special mode, but we better play safe as this was wrong in the past:
                     path_hash = None
                     known, ids = False, None
-                first_run = not cache.files and cache.do_files
-                if first_run:
-                    logger.debug('Processing files ...')
                 chunks = None
                 if ids is not None:
                     # Make sure all ids are available
@@ -1172,7 +1169,7 @@ class FilesystemObjectProcessors:
                     if not is_special_file:
                         # we must not memorize special files, because the contents of e.g. a
                         # block or char device will change without its mtime/size/inode changing.
-                        cache.memorize_file(path_hash, st, [c.id for c in item.chunks], files_cache_mode)
+                        cache.memorize_file(path_hash, st, [c.id for c in item.chunks])
                 self.stats.nfiles += 1
             item.update(self.metadata_collector.stat_attrs(st, path))
             item.get_size(memorize=True)

+ 8 - 6
src/borg/archiver.py

@@ -144,7 +144,9 @@ def with_repository(fake=False, invert_fake=False, create=False, lock=True,
                 if cache:
                     with Cache(repository, kwargs['key'], kwargs['manifest'],
                                do_files=getattr(args, 'cache_files', False),
-                               progress=getattr(args, 'progress', False), lock_wait=self.lock_wait) as cache_:
+                               ignore_inode=getattr(args, 'ignore_inode', False),
+                               progress=getattr(args, 'progress', False), lock_wait=self.lock_wait,
+                               cache_mode=getattr(args, 'files_cache_mode', DEFAULT_FILES_CACHE_MODE)) as cache_:
                         return method(self, args, repository=repository, cache=cache_, **kwargs)
                 else:
                     return method(self, args, repository=repository, **kwargs)
@@ -453,6 +455,7 @@ class Archiver:
                     skip_inodes.add((st.st_ino, st.st_dev))
                 except OSError:
                     pass
+            logger.debug('Processing files ...')
             for path in args.paths:
                 if path == '-':  # stdin
                     path = args.stdin_name
@@ -501,16 +504,15 @@ class Archiver:
 
         self.output_filter = args.output_filter
         self.output_list = args.output_list
-        self.ignore_inode = args.ignore_inode
         self.nobsdflags = args.nobsdflags
         self.exclude_nodump = args.exclude_nodump
-        self.files_cache_mode = args.files_cache_mode
         dry_run = args.dry_run
         t0 = datetime.utcnow()
         t0_monotonic = time.monotonic()
         if not dry_run:
             with Cache(repository, key, manifest, do_files=args.cache_files, progress=args.progress,
-                       lock_wait=self.lock_wait, permit_adhoc_cache=args.no_cache_sync) as cache:
+                       lock_wait=self.lock_wait, permit_adhoc_cache=args.no_cache_sync,
+                       cache_mode=args.files_cache_mode, ignore_inode=args.ignore_inode) as cache:
                 archive = Archive(repository, key, manifest, args.location.archive, cache=cache,
                                   create=True, checkpoint_interval=args.checkpoint_interval,
                                   numeric_owner=args.numeric_owner, noatime=args.noatime, noctime=args.noctime,
@@ -576,7 +578,7 @@ class Archiver:
                         return
             if stat.S_ISREG(st.st_mode):
                 if not dry_run:
-                    status = fso.process_file(path, st, cache, self.ignore_inode, self.files_cache_mode)
+                    status = fso.process_file(path, st, cache)
             elif stat.S_ISDIR(st.st_mode):
                 if recurse:
                     tag_paths = dir_is_tagged(path, exclude_caches, exclude_if_present)
@@ -1346,7 +1348,7 @@ class Archiver:
 
         to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints))
         stats = Statistics()
-        with Cache(repository, key, manifest, do_files=False, lock_wait=self.lock_wait) as cache:
+        with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache:
             list_logger = logging.getLogger('borg.output.list')
             # set up counters for the progress display
             to_delete_len = len(to_delete)

+ 29 - 22
src/borg/cache.py

@@ -359,11 +359,18 @@ class Cache:
             shutil.rmtree(path)
 
     def __new__(cls, repository, key, manifest, path=None, sync=True, do_files=False, warn_if_unencrypted=True,
-                progress=False, lock_wait=None, permit_adhoc_cache=False):
+                progress=False, lock_wait=None, permit_adhoc_cache=False, cache_mode=DEFAULT_FILES_CACHE_MODE,
+                ignore_inode=False):
+
+        if not do_files and 'd' not in cache_mode:
+            cache_mode = 'd'
+        elif ignore_inode and 'i' in cache_mode:
+            cache_mode = ''.join(set(cache_mode) - set('i'))
+
         def local():
             return LocalCache(repository=repository, key=key, manifest=manifest, path=path, sync=sync,
-                              do_files=do_files, warn_if_unencrypted=warn_if_unencrypted, progress=progress,
-                              lock_wait=lock_wait)
+                              warn_if_unencrypted=warn_if_unencrypted, progress=progress,
+                              lock_wait=lock_wait, cache_mode=cache_mode)
 
         def adhoc():
             return AdHocCache(repository=repository, key=key, manifest=manifest)
@@ -421,19 +428,19 @@ class LocalCache(CacheStatsMixin):
     Persistent, local (client-side) cache.
     """
 
-    def __init__(self, repository, key, manifest, path=None, sync=True, do_files=False, warn_if_unencrypted=True,
-                 progress=False, lock_wait=None):
+    def __init__(self, repository, key, manifest, path=None, sync=True, warn_if_unencrypted=True,
+                 progress=False, lock_wait=None, cache_mode=DEFAULT_FILES_CACHE_MODE):
         """
-        :param do_files: use file metadata cache
         :param warn_if_unencrypted: print warning if accessing unknown unencrypted repository
         :param lock_wait: timeout for lock acquisition (None: return immediately if lock unavailable)
         :param sync: do :meth:`.sync`
+        :param cache_mode: what shall be compared in the file stat infos vs. cached stat infos comparison
         """
         self.repository = repository
         self.key = key
         self.manifest = manifest
         self.progress = progress
-        self.do_files = do_files
+        self.cache_mode = cache_mode
         self.timestamp = None
         self.txn_active = False
 
@@ -485,7 +492,10 @@ class LocalCache(CacheStatsMixin):
         with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=False,
                                   integrity_data=self.cache_config.integrity.get('chunks')) as fd:
             self.chunks = ChunkIndex.read(fd)
-        self.files = None
+        if 'd' in self.cache_mode:  # d(isabled)
+            self.files = None
+        else:
+            self._read_files()
 
     def open(self):
         if not os.path.isdir(self.path):
@@ -917,26 +927,22 @@ class LocalCache(CacheStatsMixin):
         else:
             stats.update(-size, -csize, False)
 
-    def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE):
+    def file_known_and_unchanged(self, path_hash, st):
         """
         Check if we know the file that has this path_hash (know == it is in our files cache) and
         whether it is unchanged (the size/inode number/cmtime is same for stuff we check in this cache_mode).
 
         :param path_hash: hash(file_path), to save some memory in the files cache
         :param st: the file's stat() result
-        :param ignore_inode: whether the inode number shall be ignored
-        :param cache_mode: what shall be compared in the file stat infos vs. cached stat infos comparison
         :return: known, ids (known is True if we have infos about this file in the cache,
                              ids is the list of chunk ids IF the file has not changed, otherwise None).
         """
-        if 'd' in cache_mode or not self.do_files or not stat.S_ISREG(st.st_mode):  # d(isabled)
+        cache_mode = self.cache_mode
+        if 'd' in cache_mode or not stat.S_ISREG(st.st_mode):  # d(isabled)
             return False, None
-        if self.files is None:
-            self._read_files()
         # note: r(echunk) does not need the files cache in this method, but the files cache will
         # be updated and saved to disk to memorize the files. To preserve previous generations in
-        # the cache, this means that it also needs to get loaded from disk first, so keep
-        # _read_files() above here.
+        # the cache, this means that it also needs to get loaded from disk first.
         if 'r' in cache_mode:  # r(echunk)
             return False, None
         entry = self.files.get(path_hash)
@@ -946,7 +952,7 @@ class LocalCache(CacheStatsMixin):
         entry = FileCacheEntry(*msgpack.unpackb(entry))
         if 's' in cache_mode and entry.size != st.st_size:
             return True, None
-        if 'i' in cache_mode and not ignore_inode and entry.inode != st.st_ino:
+        if 'i' in cache_mode and entry.inode != st.st_ino:
             return True, None
         if 'c' in cache_mode and bigint_to_int(entry.cmtime) != st.st_ctime_ns:
             return True, None
@@ -963,9 +969,10 @@ class LocalCache(CacheStatsMixin):
         self.files[path_hash] = msgpack.packb(entry._replace(inode=st.st_ino, age=0))
         return True, entry.chunk_ids
 
-    def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE):
+    def memorize_file(self, path_hash, st, ids):
+        cache_mode = self.cache_mode
         # note: r(echunk) modes will update the files cache, d(isabled) mode won't
-        if 'd' in cache_mode or not self.do_files or not stat.S_ISREG(st.st_mode):
+        if 'd' in cache_mode or not stat.S_ISREG(st.st_mode):
             return
         if 'c' in cache_mode:
             cmtime_ns = safe_ns(st.st_ctime_ns)
@@ -1012,12 +1019,12 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
         pass
 
     files = None
-    do_files = False
+    cache_mode = 'd'
 
-    def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE):
+    def file_known_and_unchanged(self, path_hash, st):
         return False, None
 
-    def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE):
+    def memorize_file(self, path_hash, st, ids):
         pass
 
     def add_chunk(self, id, chunk, stats, overwrite=False, wait=True):

+ 1 - 1
src/borg/testsuite/cache.py

@@ -257,7 +257,7 @@ class TestAdHocCache:
 
     def test_files_cache(self, cache):
         assert cache.file_known_and_unchanged(bytes(32), None) == (False, None)
-        assert not cache.do_files
+        assert cache.cache_mode == 'd'
         assert cache.files is None
 
     def test_txn(self, cache):