Răsfoiți Sursa

Merge pull request #5078 from ThomasWaldmann/fuse-linear-read-large-file

Fix FUSE low linear read speed on large files, fixes #5067
TW 5 ani în urmă
părinte
comite
f7729e6a6b
1 a modificat fișierele cu 36 adăugiri și 2 ștergeri
  1. 36 2
      src/borg/fuse.py

+ 36 - 2
src/borg/fuse.py

@@ -41,6 +41,12 @@ else:
         llfuse.main(single=True)
         return None
 
+# size of some LRUCaches (1 element per simultaneously open file)
+# note: _inode_cache might have rather large elements - Item.chunks can be large!
+#       also, simultaneously reading too many files should be avoided anyway.
+#       thus, do not set FILES to high values.
+FILES = 4
+
 
 class ItemCache:
     """
@@ -234,6 +240,8 @@ class FuseOperations(llfuse.Operations):
         # in the archives. For example archive directories or intermediate directories
         # not contained in archives.
         self.items = {}
+        # cache up to <FILES> Items
+        self._inode_cache = LRUCache(capacity=FILES, dispose=lambda _: None)
         # _inode_count is the current count of synthetic inodes, i.e. those in self.items
         self._inode_count = 0
         # Maps inode numbers to the inode number of the parent
@@ -249,6 +257,7 @@ class FuseOperations(llfuse.Operations):
         data_cache_capacity = int(os.environ.get('BORG_MOUNT_DATA_CACHE_ENTRIES', os.cpu_count() or 1))
         logger.debug('mount data cache capacity: %d chunks', data_cache_capacity)
         self.data_cache = LRUCache(capacity=data_cache_capacity, dispose=lambda _: None)
+        self._last_pos = LRUCache(capacity=FILES, dispose=lambda _: None)
 
     def _create_filesystem(self):
         self._create_dir(parent=1)  # first call, create root dir (inode == 1)
@@ -512,10 +521,17 @@ class FuseOperations(llfuse.Operations):
         return stat_
 
     def get_item(self, inode):
+        item = self._inode_cache.get(inode)
+        if item is not None:
+            return item
         try:
+            # this is a cheap get-from-dictionary operation, no need to cache the result.
             return self.items[inode]
         except KeyError:
-            return self.cache.get(inode)
+            # while self.cache does some internal caching, it has still quite some overhead, so we cache the result.
+            item = self.cache.get(inode)
+            self._inode_cache[inode] = item
+            return item
 
     def _find_inode(self, path, prefix=[]):
         segments = prefix + path.split(b'/')
@@ -603,9 +619,23 @@ class FuseOperations(llfuse.Operations):
     def read(self, fh, offset, size):
         parts = []
         item = self.get_item(fh)
-        for id, s, csize in item.chunks:
+
+        # optimize for linear reads:
+        # we cache the chunk number and the in-file offset of the chunk in _last_pos[fh]
+        chunk_no, chunk_offset = self._last_pos.get(fh, (0, 0))
+        if chunk_offset > offset:
+            # this is not a linear read, so we lost track and need to start from beginning again...
+            chunk_no, chunk_offset = (0, 0)
+
+        offset -= chunk_offset
+        chunks = item.chunks
+        # note: using index iteration to avoid frequently copying big (sub)lists by slicing
+        for idx in range(chunk_no, len(chunks)):
+            id, s, csize = chunks[idx]
             if s < offset:
                 offset -= s
+                chunk_offset += s
+                chunk_no += 1
                 continue
             n = min(size, s - offset)
             if id in self.data_cache:
@@ -622,6 +652,10 @@ class FuseOperations(llfuse.Operations):
             offset = 0
             size -= n
             if not size:
+                if fh in self._last_pos:
+                    self._last_pos.upd(fh, (chunk_no, chunk_offset))
+                else:
+                    self._last_pos[fh] = (chunk_no, chunk_offset)
                 break
         return b''.join(parts)