8 år sedan · b6a4cf19bc
--- a/src/borg/archiver.py
+++ b/src/borg/archiver.py
@@ -1279,7 +1279,7 @@ class Archiver:
 
				     def _do_mount(self, args, repository, manifest, key):
			
 
				         from .fuse import FuseOperations
			
 
				 
			
 
				-        with cache_if_remote(repository) as cached_repo:
			
 
				+        with cache_if_remote(repository, decrypted_cache=key) as cached_repo:
			
 
				             operations = FuseOperations(key, repository, manifest, args, cached_repo)
			
 
				             logger.info("Mounting filesystem")
			
 
				             try:
			
--- a/src/borg/fuse.py
+++ b/src/borg/fuse.py
@@ -2,6 +2,7 @@ import errno
 
				 import io
			
 
				 import os
			
 
				 import stat
			
 
				+import struct
			
 
				 import sys
			
 
				 import tempfile
			
 
				 import time
			
@@ -35,22 +36,168 @@ else:
 
				 
			
 
				 
			
 
				 class ItemCache:
			
 
				-    def __init__(self):
			
 
				-        self.fd = tempfile.TemporaryFile(prefix='borg-tmp')
			
 
				+    """
			
 
				+    This is the "meat" of the file system's metadata storage.
			
 
				+
			
 
				+    This class generates inode numbers that efficiently index items in archives,
			
 
				+    and retrieves items from these inode numbers.
			
 
				+    """
			
 
				+
			
 
				+    # 2 MiB are approximately ~230000 items (depends on the average number of items per metadata chunk).
			
 
				+    #
			
 
				+    # Since growing a bytearray has to copy it, growing it will converge to O(n^2), however,
			
 
				+    # this is not yet relevant due to the swiftness of copying memory. If it becomes an issue,
			
 
				+    # use an anonymous mmap and just resize that (or, if on 64 bit, make it so big you never need
			
 
				+    # to resize it in the first place; that's free).
			
 
				+    GROW_META_BY = 2 * 1024 * 1024
			
 
				+
			
 
				+    indirect_entry_struct = struct.Struct('=cII')
			
 
				+    assert indirect_entry_struct.size == 9
			
 
				+
			
 
				+    def __init__(self, decrypted_repository):
			
 
				+        self.decrypted_repository = decrypted_repository
			
 
				+        # self.meta, the "meta-array" is a densely packed array of metadata about where items can be found.
			
 
				+        # It is indexed by the inode number minus self.offset. (This is in a way eerily similar to how the first
			
 
				+        # unices did this).
			
 
				+        # The meta-array contains chunk IDs and item entries (described in iter_archive_items).
			
 
				+        # The chunk IDs are referenced by item entries through relative offsets,
			
 
				+        # which are bounded by the metadata chunk size.
			
 
				+        self.meta = bytearray()
			
 
				+        # The current write offset in self.meta
			
 
				+        self.write_offset = 0
			
 
				+
			
 
				+        # Offset added to meta-indices, resulting in inodes,
			
 
				+        # or subtracted from inodes, resulting in meta-indices.
			
 
				+        # XXX: Merge FuseOperations.items and ItemCache to avoid
			
 
				+        #      this implicit limitation / hack (on the number of synthetic inodes, degenerate
			
 
				+        #      cases can inflate their number far beyond the number of archives).
			
 
				         self.offset = 1000000
			
 
				 
			
 
				-    def add(self, item):
			
 
				-        pos = self.fd.seek(0, io.SEEK_END)
			
 
				-        self.fd.write(msgpack.packb(item.as_dict()))
			
 
				-        return pos + self.offset
			
 
				+        # A temporary file that contains direct items, i.e. items directly cached in this layer.
			
 
				+        # These are items that span more than one chunk and thus cannot be efficiently cached
			
 
				+        # by the object cache (self.decrypted_repository), which would require variable-length structures;
			
 
				+        # possible but not worth the effort, see iter_archive_items.
			
 
				+        self.fd = tempfile.TemporaryFile(prefix='borg-tmp')
			
 
				+
			
 
				+        # A small LRU cache for chunks requested by ItemCache.get() from the object cache,
			
 
				+        # this significantly speeds up directory traversal and similar operations which
			
 
				+        # tend to re-read the same chunks over and over.
			
 
				+        # The capacity is kept low because increasing it does not provide any significant advantage,
			
 
				+        # but makes LRUCache's square behaviour noticeable and consumes more memory.
			
 
				+        self.chunks = LRUCache(capacity=10, dispose=lambda _: None)
			
 
				+
			
 
				+        # Instrumentation
			
 
				+        # Count of indirect items, i.e. data is cached in the object cache, not directly in this cache
			
 
				+        self.indirect_items = 0
			
 
				+        # Count of direct items, i.e. data is in self.fd
			
 
				+        self.direct_items = 0
			
 
				 
			
 
				     def get(self, inode):
			
 
				         offset = inode - self.offset
			
 
				         if offset < 0:
			
 
				             raise ValueError('ItemCache.get() called with an invalid inode number')
			
 
				-        self.fd.seek(offset, io.SEEK_SET)
			
 
				-        item = next(msgpack.Unpacker(self.fd, read_size=1024))
			
 
				-        return Item(internal_dict=item)
			
 
				+        if self.meta[offset] == ord(b'I'):
			
 
				+            _, chunk_id_relative_offset, chunk_offset = self.indirect_entry_struct.unpack_from(self.meta, offset)
			
 
				+            chunk_id_offset = offset - chunk_id_relative_offset
			
 
				+            # bytearray slices are bytearrays as well, explicitly convert to bytes()
			
 
				+            chunk_id = bytes(self.meta[chunk_id_offset:chunk_id_offset + 32])
			
 
				+            chunk = self.chunks.get(chunk_id)
			
 
				+            if not chunk:
			
 
				+                csize, chunk = next(self.decrypted_repository.get_many([chunk_id]))
			
 
				+                self.chunks[chunk_id] = chunk
			
 
				+            data = memoryview(chunk)[chunk_offset:]
			
 
				+            unpacker = msgpack.Unpacker()
			
 
				+            unpacker.feed(data)
			
 
				+            return Item(internal_dict=next(unpacker))
			
 
				+        elif self.meta[offset] == ord(b'S'):
			
 
				+            fd_offset = int.from_bytes(self.meta[offset + 1:offset + 9], 'little')
			
 
				+            self.fd.seek(fd_offset, io.SEEK_SET)
			
 
				+            return Item(internal_dict=next(msgpack.Unpacker(self.fd, read_size=1024)))
			
 
				+        else:
			
 
				+            raise ValueError('Invalid entry type in self.meta')
			
 
				+
			
 
				+    def iter_archive_items(self, archive_item_ids):
			
 
				+        unpacker = msgpack.Unpacker()
			
 
				+
			
 
				+        # Current offset in the metadata stream, which consists of all metadata chunks glued together
			
 
				+        stream_offset = 0
			
 
				+        # Offset of the current chunk in the metadata stream
			
 
				+        chunk_begin = 0
			
 
				+        # Length of the chunk preciding the current chunk
			
 
				+        last_chunk_length = 0
			
 
				+        msgpacked_bytes = b''
			
 
				+
			
 
				+        write_offset = self.write_offset
			
 
				+        meta = self.meta
			
 
				+        pack_indirect_into = self.indirect_entry_struct.pack_into
			
 
				+
			
 
				+        def write_bytes(append_msgpacked_bytes):
			
 
				+            # XXX: Future versions of msgpack include an Unpacker.tell() method that provides this for free.
			
 
				+            nonlocal msgpacked_bytes
			
 
				+            nonlocal stream_offset
			
 
				+            msgpacked_bytes += append_msgpacked_bytes
			
 
				+            stream_offset += len(append_msgpacked_bytes)
			
 
				+
			
 
				+        for key, (csize, data) in zip(archive_item_ids, self.decrypted_repository.get_many(archive_item_ids)):
			
 
				+            # Store the chunk ID in the meta-array
			
 
				+            if write_offset + 32 >= len(meta):
			
 
				+                self.meta = meta = meta + bytes(self.GROW_META_BY)
			
 
				+            meta[write_offset:write_offset + 32] = key
			
 
				+            current_id_offset = write_offset
			
 
				+            write_offset += 32
			
 
				+
			
 
				+            # The chunk boundaries cannot be tracked through write_bytes, because the unpack state machine
			
 
				+            # *can* and *will* consume partial items, so calls to write_bytes are unrelated to chunk boundaries.
			
 
				+            chunk_begin += last_chunk_length
			
 
				+            last_chunk_length = len(data)
			
 
				+
			
 
				+            unpacker.feed(data)
			
 
				+            while True:
			
 
				+                try:
			
 
				+                    item = unpacker.unpack(write_bytes)
			
 
				+                except msgpack.OutOfData:
			
 
				+                    # Need more data, feed the next chunk
			
 
				+                    break
			
 
				+
			
 
				+                current_item = msgpacked_bytes
			
 
				+                current_item_length = len(current_item)
			
 
				+                current_spans_chunks = stream_offset - current_item_length < chunk_begin
			
 
				+                msgpacked_bytes = b''
			
 
				+
			
 
				+                if write_offset + 9 >= len(meta):
			
 
				+                    self.meta = meta = meta + bytes(self.GROW_META_BY)
			
 
				+
			
 
				+                # item entries in the meta-array come in two different flavours, both nine bytes long.
			
 
				+                # (1) for items that span chunks:
			
 
				+                #
			
 
				+                #     'S' + 8 byte offset into the self.fd file, where the msgpacked item starts.
			
 
				+                #
			
 
				+                # (2) for items that are completely contained in one chunk, which usually is the great majority
			
 
				+                #     (about 700:1 for system backups)
			
 
				+                #
			
 
				+                #     'I' + 4 byte offset where the chunk ID is + 4 byte offset in the chunk
			
 
				+                #     where the msgpacked items starts
			
 
				+                #
			
 
				+                #     The chunk ID offset is the number of bytes _back_ from the start of the entry, i.e.:
			
 
				+                #
			
 
				+                #     |Chunk ID| ....          |S1234abcd|
			
 
				+                #      ^------ offset ----------^
			
 
				+
			
 
				+                if current_spans_chunks:
			
 
				+                    pos = self.fd.seek(0, io.SEEK_END)
			
 
				+                    self.fd.write(current_item)
			
 
				+                    meta[write_offset:write_offset + 9] = b'S' + pos.to_bytes(8, 'little')
			
 
				+                    self.direct_items += 1
			
 
				+                else:
			
 
				+                    item_offset = stream_offset - current_item_length - chunk_begin
			
 
				+                    pack_indirect_into(meta, write_offset, b'I', write_offset - current_id_offset, item_offset)
			
 
				+                    self.indirect_items += 1
			
 
				+                inode = write_offset + self.offset
			
 
				+                write_offset += 9
			
 
				+
			
 
				+                yield inode, Item(internal_dict=item)
			
 
				+
			
 
				+        self.write_offset = write_offset
			
 
				 
			
 
				 
			
 
				 class FuseOperations(llfuse.Operations):
			
@@ -60,22 +207,30 @@ class FuseOperations(llfuse.Operations):
 
				     allow_damaged_files = False
			
 
				     versions = False
			
 
				 
			
 
				-    def __init__(self, key, repository, manifest, args, cached_repo):
			
 
				+    def __init__(self, key, repository, manifest, args, decrypted_repository):
			
 
				         super().__init__()
			
 
				         self.repository_uncached = repository
			
 
				-        self.repository = cached_repo
			
 
				+        self.decrypted_repository = decrypted_repository
			
 
				         self.args = args
			
 
				         self.manifest = manifest
			
 
				         self.key = key
			
 
				-        self._inode_count = 0
			
 
				+        # Maps inode numbers to Item instances. This is used for synthetic inodes,
			
 
				+        # i.e. file-system objects that are made up by FuseOperations and are not contained
			
 
				+        # in the archives. For example archive directories or intermediate directories
			
 
				+        # not contained in archives.
			
 
				         self.items = {}
			
 
				+        # _inode_count is the current count of synthetic inodes, i.e. those in self.items
			
 
				+        self._inode_count = 0
			
 
				+        # Maps inode numbers to the inode number of the parent
			
 
				         self.parent = {}
			
 
				+        # Maps inode numbers to a dictionary mapping byte directory entry names to their inode numbers,
			
 
				+        # i.e. this contains all dirents of everything that is mounted. (It becomes really big).
			
 
				         self.contents = defaultdict(dict)
			
 
				         self.default_uid = os.getuid()
			
 
				         self.default_gid = os.getgid()
			
 
				         self.default_dir = Item(mode=0o40755, mtime=int(time.time() * 1e9), uid=self.default_uid, gid=self.default_gid)
			
 
				         self.pending_archives = {}
			
 
				-        self.cache = ItemCache()
			
 
				+        self.cache = ItemCache(decrypted_repository)
			
 
				         data_cache_capacity = int(os.environ.get('BORG_MOUNT_DATA_CACHE_ENTRIES', os.cpu_count() or 1))
			
 
				         logger.debug('mount data cache capacity: %d chunks', data_cache_capacity)
			
 
				         self.data_cache = LRUCache(capacity=data_cache_capacity, dispose=lambda _: None)
			
@@ -97,18 +252,19 @@ class FuseOperations(llfuse.Operations):
 
				                     self.pending_archives[archive_inode] = archive_name
			
 
				 
			
 
				     def sig_info_handler(self, sig_no, stack):
			
 
				-        logger.debug('fuse: %d inodes, %d synth inodes, %d edges (%s)',
			
 
				-                     self._inode_count, len(self.items), len(self.parent),
			
 
				+        logger.debug('fuse: %d synth inodes, %d edges (%s)',
			
 
				+                     self._inode_count, len(self.parent),
			
 
				                      # getsizeof is the size of the dict itself; key and value are two small-ish integers,
			
 
				                      # which are shared due to code structure (this has been verified).
			
 
				                      format_file_size(sys.getsizeof(self.parent) + len(self.parent) * sys.getsizeof(self._inode_count)))
			
 
				         logger.debug('fuse: %d pending archives', len(self.pending_archives))
			
 
				-        logger.debug('fuse: ItemCache %d entries, %s',
			
 
				-                     self._inode_count - len(self.items),
			
 
				+        logger.debug('fuse: ItemCache %d entries (%d direct, %d indirect), meta-array size %s, direct items size %s',
			
 
				+                     self.cache.direct_items + self.cache.indirect_items, self.cache.direct_items, self.cache.indirect_items,
			
 
				+                     format_file_size(sys.getsizeof(self.cache.meta)),
			
 
				                      format_file_size(os.stat(self.cache.fd.fileno()).st_size))
			
 
				         logger.debug('fuse: data cache: %d/%d entries, %s', len(self.data_cache.items()), self.data_cache._capacity,
			
 
				                      format_file_size(sum(len(chunk) for key, chunk in self.data_cache.items())))
			
 
				-        self.repository.log_instrumentation()
			
 
				+        self.decrypted_repository.log_instrumentation()
			
 
				 
			
 
				     def mount(self, mountpoint, mount_options, foreground=False):
			
 
				         """Mount filesystem on *mountpoint* with *mount_options*."""
			
@@ -158,36 +314,31 @@ class FuseOperations(llfuse.Operations):
 
				         """
			
 
				         self.file_versions = {}  # for versions mode: original path -> version
			
 
				         t0 = time.perf_counter()
			
 
				-        unpacker = msgpack.Unpacker()
			
 
				         archive = Archive(self.repository_uncached, self.key, self.manifest, archive_name,
			
 
				                           consider_part_files=self.args.consider_part_files)
			
 
				-        for key, chunk in zip(archive.metadata.items, self.repository.get_many(archive.metadata.items)):
			
 
				-            data = self.key.decrypt(key, chunk)
			
 
				-            unpacker.feed(data)
			
 
				-            for item in unpacker:
			
 
				-                item = Item(internal_dict=item)
			
 
				-                path = os.fsencode(os.path.normpath(item.path))
			
 
				-                is_dir = stat.S_ISDIR(item.mode)
			
 
				-                if is_dir:
			
 
				-                    try:
			
 
				-                        # This can happen if an archive was created with a command line like
			
 
				-                        # $ borg create ... dir1/file dir1
			
 
				-                        # In this case the code below will have created a default_dir inode for dir1 already.
			
 
				-                        inode = self._find_inode(path, prefix)
			
 
				-                    except KeyError:
			
 
				-                        pass
			
 
				-                    else:
			
 
				-                        self.items[inode] = item
			
 
				-                        continue
			
 
				-                segments = prefix + path.split(b'/')
			
 
				-                parent = 1
			
 
				-                for segment in segments[:-1]:
			
 
				-                    parent = self.process_inner(segment, parent)
			
 
				-                self.process_leaf(segments[-1], item, parent, prefix, is_dir)
			
 
				+        for item_inode, item in self.cache.iter_archive_items(archive.metadata.items):
			
 
				+            path = os.fsencode(item.path)
			
 
				+            is_dir = stat.S_ISDIR(item.mode)
			
 
				+            if is_dir:
			
 
				+                try:
			
 
				+                    # This can happen if an archive was created with a command line like
			
 
				+                    # $ borg create ... dir1/file dir1
			
 
				+                    # In this case the code below will have created a default_dir inode for dir1 already.
			
 
				+                    inode = self._find_inode(path, prefix)
			
 
				+                except KeyError:
			
 
				+                    pass
			
 
				+                else:
			
 
				+                    self.items[inode] = item
			
 
				+                    continue
			
 
				+            segments = prefix + path.split(b'/')
			
 
				+            parent = 1
			
 
				+            for segment in segments[:-1]:
			
 
				+                parent = self.process_inner(segment, parent)
			
 
				+            self.process_leaf(segments[-1], item, parent, prefix, is_dir, item_inode)
			
 
				         duration = time.perf_counter() - t0
			
 
				         logger.debug('fuse: process_archive completed in %.1f s for archive %s', duration, archive.name)
			
 
				 
			
 
				-    def process_leaf(self, name, item, parent, prefix, is_dir):
			
 
				+    def process_leaf(self, name, item, parent, prefix, is_dir, item_inode):
			
 
				         def file_version(item):
			
 
				             if 'chunks' in item:
			
 
				                 ident = 0
			
@@ -208,14 +359,14 @@ class FuseOperations(llfuse.Operations):
 
				             if version is not None:
			
 
				                 # regular file, with contents - maybe a hardlink master
			
 
				                 name = make_versioned_name(name, version)
			
 
				-                path = os.fsencode(os.path.normpath(item.path))
			
 
				+                path = os.fsencode(item.path)
			
 
				                 self.file_versions[path] = version
			
 
				 
			
 
				         path = item.path
			
 
				         del item.path  # safe some space
			
 
				         if 'source' in item and hardlinkable(item.mode):
			
 
				             # a hardlink, no contents, <source> is the hardlink master
			
 
				-            source = os.fsencode(os.path.normpath(item.source))
			
 
				+            source = os.fsencode(item.source)
			
 
				             if self.versions:
			
 
				                 # adjust source name with version
			
 
				                 version = self.file_versions[source]
			
@@ -230,7 +381,7 @@ class FuseOperations(llfuse.Operations):
 
				             item.nlink = item.get('nlink', 1) + 1
			
 
				             self.items[inode] = item
			
 
				         else:
			
 
				-            inode = self.cache.add(item)
			
 
				+            inode = item_inode
			
 
				         self.parent[inode] = parent
			
 
				         if name:
			
 
				             self.contents[parent][name] = inode
			
--- a/src/borg/lrucache.py
+++ b/src/borg/lrucache.py
@@ -1,3 +1,6 @@
 
				+sentinel = object()
			
 
				+
			
 
				+
			
 
				 class LRUCache:
			
 
				     def __init__(self, capacity, dispose):
			
 
				         self._cache = {}
			
@@ -28,6 +31,14 @@ class LRUCache:
 
				     def __contains__(self, key):
			
 
				         return key in self._cache
			
 
				 
			
 
				+    def get(self, key, default=None):
			
 
				+        value = self._cache.get(key, sentinel)
			
 
				+        if value is sentinel:
			
 
				+            return default
			
 
				+        self._lru.remove(key)
			
 
				+        self._lru.append(key)
			
 
				+        return value
			
 
				+
			
 
				     def clear(self):
			
 
				         for value in self._cache.values():
			
 
				             self._dispose(value)
			
--- a/src/borg/testsuite/lrucache.py
+++ b/src/borg/testsuite/lrucache.py
@@ -19,7 +19,10 @@ class TestLRUCache:
 
				         assert 'b' in c
			
 
				         with pytest.raises(KeyError):
			
 
				             c['a']
			
 
				+        assert c.get('a') is None
			
 
				+        assert c.get('a', 'foo') == 'foo'
			
 
				         assert c['b'] == 1
			
 
				+        assert c.get('b') == 1
			
 
				         assert c['c'] == 2
			
 
				         c['d'] = 3
			
 
				         assert len(c) == 2