Browse Source

Merge pull request #3726 from milkey-mouse/files-cache-non-lazy-fp1.2

non-lazy files cache (1.2 forward-port)
TW 7 years ago
parent
commit
e6abb0804b
4 changed files with 41 additions and 35 deletions
  1. 3 6
      src/borg/archive.py
  2. 8 6
      src/borg/archiver.py
  3. 29 22
      src/borg/cache.py
  4. 1 1
      src/borg/testsuite/cache.py

+ 3 - 6
src/borg/archive.py

@@ -1131,22 +1131,19 @@ class FilesystemObjectProcessors:
         self.add_item(item, stats=self.stats)
         self.add_item(item, stats=self.stats)
         return 'i'  # stdin
         return 'i'  # stdin
 
 
-    def process_file(self, path, st, cache, ignore_inode=False, files_cache_mode=DEFAULT_FILES_CACHE_MODE):
+    def process_file(self, path, st, cache):
         with self.create_helper(path, st, None) as (item, status, hardlinked, hardlink_master):  # no status yet
         with self.create_helper(path, st, None) as (item, status, hardlinked, hardlink_master):  # no status yet
             is_special_file = is_special(st.st_mode)
             is_special_file = is_special(st.st_mode)
             if not hardlinked or hardlink_master:
             if not hardlinked or hardlink_master:
                 if not is_special_file:
                 if not is_special_file:
                     path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path)))
                     path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path)))
-                    known, ids = cache.file_known_and_unchanged(path_hash, st, ignore_inode, files_cache_mode)
+                    known, ids = cache.file_known_and_unchanged(path_hash, st)
                 else:
                 else:
                     # in --read-special mode, we may be called for special files.
                     # in --read-special mode, we may be called for special files.
                     # there should be no information in the cache about special files processed in
                     # there should be no information in the cache about special files processed in
                     # read-special mode, but we better play safe as this was wrong in the past:
                     # read-special mode, but we better play safe as this was wrong in the past:
                     path_hash = None
                     path_hash = None
                     known, ids = False, None
                     known, ids = False, None
-                first_run = not cache.files and cache.do_files
-                if first_run:
-                    logger.debug('Processing files ...')
                 chunks = None
                 chunks = None
                 if ids is not None:
                 if ids is not None:
                     # Make sure all ids are available
                     # Make sure all ids are available
@@ -1172,7 +1169,7 @@ class FilesystemObjectProcessors:
                     if not is_special_file:
                     if not is_special_file:
                         # we must not memorize special files, because the contents of e.g. a
                         # we must not memorize special files, because the contents of e.g. a
                         # block or char device will change without its mtime/size/inode changing.
                         # block or char device will change without its mtime/size/inode changing.
-                        cache.memorize_file(path_hash, st, [c.id for c in item.chunks], files_cache_mode)
+                        cache.memorize_file(path_hash, st, [c.id for c in item.chunks])
                 self.stats.nfiles += 1
                 self.stats.nfiles += 1
             item.update(self.metadata_collector.stat_attrs(st, path))
             item.update(self.metadata_collector.stat_attrs(st, path))
             item.get_size(memorize=True)
             item.get_size(memorize=True)

+ 8 - 6
src/borg/archiver.py

@@ -144,7 +144,9 @@ def with_repository(fake=False, invert_fake=False, create=False, lock=True,
                 if cache:
                 if cache:
                     with Cache(repository, kwargs['key'], kwargs['manifest'],
                     with Cache(repository, kwargs['key'], kwargs['manifest'],
                                do_files=getattr(args, 'cache_files', False),
                                do_files=getattr(args, 'cache_files', False),
-                               progress=getattr(args, 'progress', False), lock_wait=self.lock_wait) as cache_:
+                               ignore_inode=getattr(args, 'ignore_inode', False),
+                               progress=getattr(args, 'progress', False), lock_wait=self.lock_wait,
+                               cache_mode=getattr(args, 'files_cache_mode', DEFAULT_FILES_CACHE_MODE)) as cache_:
                         return method(self, args, repository=repository, cache=cache_, **kwargs)
                         return method(self, args, repository=repository, cache=cache_, **kwargs)
                 else:
                 else:
                     return method(self, args, repository=repository, **kwargs)
                     return method(self, args, repository=repository, **kwargs)
@@ -453,6 +455,7 @@ class Archiver:
                     skip_inodes.add((st.st_ino, st.st_dev))
                     skip_inodes.add((st.st_ino, st.st_dev))
                 except OSError:
                 except OSError:
                     pass
                     pass
+            logger.debug('Processing files ...')
             for path in args.paths:
             for path in args.paths:
                 if path == '-':  # stdin
                 if path == '-':  # stdin
                     path = args.stdin_name
                     path = args.stdin_name
@@ -501,16 +504,15 @@ class Archiver:
 
 
         self.output_filter = args.output_filter
         self.output_filter = args.output_filter
         self.output_list = args.output_list
         self.output_list = args.output_list
-        self.ignore_inode = args.ignore_inode
         self.nobsdflags = args.nobsdflags
         self.nobsdflags = args.nobsdflags
         self.exclude_nodump = args.exclude_nodump
         self.exclude_nodump = args.exclude_nodump
-        self.files_cache_mode = args.files_cache_mode
         dry_run = args.dry_run
         dry_run = args.dry_run
         t0 = datetime.utcnow()
         t0 = datetime.utcnow()
         t0_monotonic = time.monotonic()
         t0_monotonic = time.monotonic()
         if not dry_run:
         if not dry_run:
             with Cache(repository, key, manifest, do_files=args.cache_files, progress=args.progress,
             with Cache(repository, key, manifest, do_files=args.cache_files, progress=args.progress,
-                       lock_wait=self.lock_wait, permit_adhoc_cache=args.no_cache_sync) as cache:
+                       lock_wait=self.lock_wait, permit_adhoc_cache=args.no_cache_sync,
+                       cache_mode=args.files_cache_mode, ignore_inode=args.ignore_inode) as cache:
                 archive = Archive(repository, key, manifest, args.location.archive, cache=cache,
                 archive = Archive(repository, key, manifest, args.location.archive, cache=cache,
                                   create=True, checkpoint_interval=args.checkpoint_interval,
                                   create=True, checkpoint_interval=args.checkpoint_interval,
                                   numeric_owner=args.numeric_owner, noatime=args.noatime, noctime=args.noctime,
                                   numeric_owner=args.numeric_owner, noatime=args.noatime, noctime=args.noctime,
@@ -576,7 +578,7 @@ class Archiver:
                         return
                         return
             if stat.S_ISREG(st.st_mode):
             if stat.S_ISREG(st.st_mode):
                 if not dry_run:
                 if not dry_run:
-                    status = fso.process_file(path, st, cache, self.ignore_inode, self.files_cache_mode)
+                    status = fso.process_file(path, st, cache)
             elif stat.S_ISDIR(st.st_mode):
             elif stat.S_ISDIR(st.st_mode):
                 if recurse:
                 if recurse:
                     tag_paths = dir_is_tagged(path, exclude_caches, exclude_if_present)
                     tag_paths = dir_is_tagged(path, exclude_caches, exclude_if_present)
@@ -1346,7 +1348,7 @@ class Archiver:
 
 
         to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints))
         to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints))
         stats = Statistics()
         stats = Statistics()
-        with Cache(repository, key, manifest, do_files=False, lock_wait=self.lock_wait) as cache:
+        with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache:
             list_logger = logging.getLogger('borg.output.list')
             list_logger = logging.getLogger('borg.output.list')
             # set up counters for the progress display
             # set up counters for the progress display
             to_delete_len = len(to_delete)
             to_delete_len = len(to_delete)

+ 29 - 22
src/borg/cache.py

@@ -359,11 +359,18 @@ class Cache:
             shutil.rmtree(path)
             shutil.rmtree(path)
 
 
     def __new__(cls, repository, key, manifest, path=None, sync=True, do_files=False, warn_if_unencrypted=True,
     def __new__(cls, repository, key, manifest, path=None, sync=True, do_files=False, warn_if_unencrypted=True,
-                progress=False, lock_wait=None, permit_adhoc_cache=False):
+                progress=False, lock_wait=None, permit_adhoc_cache=False, cache_mode=DEFAULT_FILES_CACHE_MODE,
+                ignore_inode=False):
+
+        if not do_files and 'd' not in cache_mode:
+            cache_mode = 'd'
+        elif ignore_inode and 'i' in cache_mode:
+            cache_mode = ''.join(set(cache_mode) - set('i'))
+
         def local():
         def local():
             return LocalCache(repository=repository, key=key, manifest=manifest, path=path, sync=sync,
             return LocalCache(repository=repository, key=key, manifest=manifest, path=path, sync=sync,
-                              do_files=do_files, warn_if_unencrypted=warn_if_unencrypted, progress=progress,
-                              lock_wait=lock_wait)
+                              warn_if_unencrypted=warn_if_unencrypted, progress=progress,
+                              lock_wait=lock_wait, cache_mode=cache_mode)
 
 
         def adhoc():
         def adhoc():
             return AdHocCache(repository=repository, key=key, manifest=manifest)
             return AdHocCache(repository=repository, key=key, manifest=manifest)
@@ -421,19 +428,19 @@ class LocalCache(CacheStatsMixin):
     Persistent, local (client-side) cache.
     Persistent, local (client-side) cache.
     """
     """
 
 
-    def __init__(self, repository, key, manifest, path=None, sync=True, do_files=False, warn_if_unencrypted=True,
-                 progress=False, lock_wait=None):
+    def __init__(self, repository, key, manifest, path=None, sync=True, warn_if_unencrypted=True,
+                 progress=False, lock_wait=None, cache_mode=DEFAULT_FILES_CACHE_MODE):
         """
         """
-        :param do_files: use file metadata cache
         :param warn_if_unencrypted: print warning if accessing unknown unencrypted repository
         :param warn_if_unencrypted: print warning if accessing unknown unencrypted repository
         :param lock_wait: timeout for lock acquisition (None: return immediately if lock unavailable)
         :param lock_wait: timeout for lock acquisition (None: return immediately if lock unavailable)
         :param sync: do :meth:`.sync`
         :param sync: do :meth:`.sync`
+        :param cache_mode: what shall be compared in the file stat infos vs. cached stat infos comparison
         """
         """
         self.repository = repository
         self.repository = repository
         self.key = key
         self.key = key
         self.manifest = manifest
         self.manifest = manifest
         self.progress = progress
         self.progress = progress
-        self.do_files = do_files
+        self.cache_mode = cache_mode
         self.timestamp = None
         self.timestamp = None
         self.txn_active = False
         self.txn_active = False
 
 
@@ -485,7 +492,10 @@ class LocalCache(CacheStatsMixin):
         with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=False,
         with IntegrityCheckedFile(path=os.path.join(self.path, 'chunks'), write=False,
                                   integrity_data=self.cache_config.integrity.get('chunks')) as fd:
                                   integrity_data=self.cache_config.integrity.get('chunks')) as fd:
             self.chunks = ChunkIndex.read(fd)
             self.chunks = ChunkIndex.read(fd)
-        self.files = None
+        if 'd' in self.cache_mode:  # d(isabled)
+            self.files = None
+        else:
+            self._read_files()
 
 
     def open(self):
     def open(self):
         if not os.path.isdir(self.path):
         if not os.path.isdir(self.path):
@@ -917,26 +927,22 @@ class LocalCache(CacheStatsMixin):
         else:
         else:
             stats.update(-size, -csize, False)
             stats.update(-size, -csize, False)
 
 
-    def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE):
+    def file_known_and_unchanged(self, path_hash, st):
         """
         """
         Check if we know the file that has this path_hash (know == it is in our files cache) and
         Check if we know the file that has this path_hash (know == it is in our files cache) and
         whether it is unchanged (the size/inode number/cmtime is same for stuff we check in this cache_mode).
         whether it is unchanged (the size/inode number/cmtime is same for stuff we check in this cache_mode).
 
 
         :param path_hash: hash(file_path), to save some memory in the files cache
         :param path_hash: hash(file_path), to save some memory in the files cache
         :param st: the file's stat() result
         :param st: the file's stat() result
-        :param ignore_inode: whether the inode number shall be ignored
-        :param cache_mode: what shall be compared in the file stat infos vs. cached stat infos comparison
         :return: known, ids (known is True if we have infos about this file in the cache,
         :return: known, ids (known is True if we have infos about this file in the cache,
                              ids is the list of chunk ids IF the file has not changed, otherwise None).
                              ids is the list of chunk ids IF the file has not changed, otherwise None).
         """
         """
-        if 'd' in cache_mode or not self.do_files or not stat.S_ISREG(st.st_mode):  # d(isabled)
+        cache_mode = self.cache_mode
+        if 'd' in cache_mode or not stat.S_ISREG(st.st_mode):  # d(isabled)
             return False, None
             return False, None
-        if self.files is None:
-            self._read_files()
         # note: r(echunk) does not need the files cache in this method, but the files cache will
         # note: r(echunk) does not need the files cache in this method, but the files cache will
         # be updated and saved to disk to memorize the files. To preserve previous generations in
         # be updated and saved to disk to memorize the files. To preserve previous generations in
-        # the cache, this means that it also needs to get loaded from disk first, so keep
-        # _read_files() above here.
+        # the cache, this means that it also needs to get loaded from disk first.
         if 'r' in cache_mode:  # r(echunk)
         if 'r' in cache_mode:  # r(echunk)
             return False, None
             return False, None
         entry = self.files.get(path_hash)
         entry = self.files.get(path_hash)
@@ -946,7 +952,7 @@ class LocalCache(CacheStatsMixin):
         entry = FileCacheEntry(*msgpack.unpackb(entry))
         entry = FileCacheEntry(*msgpack.unpackb(entry))
         if 's' in cache_mode and entry.size != st.st_size:
         if 's' in cache_mode and entry.size != st.st_size:
             return True, None
             return True, None
-        if 'i' in cache_mode and not ignore_inode and entry.inode != st.st_ino:
+        if 'i' in cache_mode and entry.inode != st.st_ino:
             return True, None
             return True, None
         if 'c' in cache_mode and bigint_to_int(entry.cmtime) != st.st_ctime_ns:
         if 'c' in cache_mode and bigint_to_int(entry.cmtime) != st.st_ctime_ns:
             return True, None
             return True, None
@@ -963,9 +969,10 @@ class LocalCache(CacheStatsMixin):
         self.files[path_hash] = msgpack.packb(entry._replace(inode=st.st_ino, age=0))
         self.files[path_hash] = msgpack.packb(entry._replace(inode=st.st_ino, age=0))
         return True, entry.chunk_ids
         return True, entry.chunk_ids
 
 
-    def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE):
+    def memorize_file(self, path_hash, st, ids):
+        cache_mode = self.cache_mode
         # note: r(echunk) modes will update the files cache, d(isabled) mode won't
         # note: r(echunk) modes will update the files cache, d(isabled) mode won't
-        if 'd' in cache_mode or not self.do_files or not stat.S_ISREG(st.st_mode):
+        if 'd' in cache_mode or not stat.S_ISREG(st.st_mode):
             return
             return
         if 'c' in cache_mode:
         if 'c' in cache_mode:
             cmtime_ns = safe_ns(st.st_ctime_ns)
             cmtime_ns = safe_ns(st.st_ctime_ns)
@@ -1012,12 +1019,12 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
         pass
         pass
 
 
     files = None
     files = None
-    do_files = False
+    cache_mode = 'd'
 
 
-    def file_known_and_unchanged(self, path_hash, st, ignore_inode=False, cache_mode=DEFAULT_FILES_CACHE_MODE):
+    def file_known_and_unchanged(self, path_hash, st):
         return False, None
         return False, None
 
 
-    def memorize_file(self, path_hash, st, ids, cache_mode=DEFAULT_FILES_CACHE_MODE):
+    def memorize_file(self, path_hash, st, ids):
         pass
         pass
 
 
     def add_chunk(self, id, chunk, stats, overwrite=False, wait=True):
     def add_chunk(self, id, chunk, stats, overwrite=False, wait=True):

+ 1 - 1
src/borg/testsuite/cache.py

@@ -257,7 +257,7 @@ class TestAdHocCache:
 
 
     def test_files_cache(self, cache):
     def test_files_cache(self, cache):
         assert cache.file_known_and_unchanged(bytes(32), None) == (False, None)
         assert cache.file_known_and_unchanged(bytes(32), None) == (False, None)
-        assert not cache.do_files
+        assert cache.cache_mode == 'd'
         assert cache.files is None
         assert cache.files is None
 
 
     def test_txn(self, cache):
     def test_txn(self, cache):