瀏覽代碼

Merge pull request #2778 from enkore/f/fuse-versions-numbering

fuse: versions view, linear numbering by archive time
TW 8 年之前
父節點
當前提交
9bd522819e
共有 6 個文件被更改,包括 93 次插入20 次删除
  1. 8 0
      src/borg/_hashindex.c
  2. 20 1
      src/borg/crypto/low_level.pyx
  3. 19 10
      src/borg/fuse.py
  4. 41 4
      src/borg/hashindex.pyx
  5. 2 2
      src/borg/helpers.py
  6. 3 3
      src/borg/testsuite/archiver.py

+ 8 - 0
src/borg/_hashindex.c

@@ -695,3 +695,11 @@ hashindex_size(HashIndex *index)
 {
     return sizeof(HashHeader) + index->num_buckets * index->bucket_size;
 }
+
+/*
+ * Used by the FuseVersionsIndex.
+ */
+typedef struct {
+    uint32_t version;
+    char hash[16];
+} __attribute__((__packed__)) FuseVersionsElement;

+ 20 - 1
src/borg/crypto/low_level.pyx

@@ -8,7 +8,7 @@ from libc.stdlib cimport malloc, free
 from cpython.buffer cimport PyBUF_SIMPLE, PyObject_GetBuffer, PyBuffer_Release
 from cpython.bytes cimport PyBytes_FromStringAndSize
 
-API_VERSION = '1.1_01'
+API_VERSION = '1.1_02'
 
 
 cdef extern from "../algorithms/blake2-libselect.h":
@@ -252,6 +252,25 @@ def blake2b_256(key, data):
     return PyBytes_FromStringAndSize(<char*> &md[0], 32)
 
 
+def blake2b_128(data):
+    cdef blake2b_state state
+    cdef unsigned char md[16]
+    cdef unsigned char *data_ptr = data
+
+    if blake2b_init(&state, 16) == -1:
+        raise Exception('blake2b_init() failed')
+
+    rc = blake2b_update(&state, data_ptr, len(data))
+    if rc == -1:
+        raise Exception('blake2b_update() failed')
+
+    rc = blake2b_final(&state, &md[0], 16)
+    if rc == -1:
+        raise Exception('blake2b_final() failed')
+
+    return PyBytes_FromStringAndSize(<char*> &md[0], 16)
+
+
 def hkdf_hmac_sha512(ikm, salt, info, output_length):
     """
     Compute HKDF-HMAC-SHA512 with input key material *ikm*, *salt* and *info* to produce *output_length* bytes.

+ 19 - 10
src/borg/fuse.py

@@ -9,7 +9,6 @@ import time
 from collections import defaultdict
 from signal import SIGINT
 from distutils.version import LooseVersion
-from zlib import adler32
 
 import llfuse
 import msgpack
@@ -17,7 +16,9 @@ import msgpack
 from .logger import create_logger
 logger = create_logger()
 
+from .crypto.low_level import blake2b_128
 from .archive import Archive
+from .hashindex import FuseVersionsIndex
 from .helpers import daemonize, hardlinkable, signal_handler, format_file_size
 from .item import Item
 from .lrucache import LRUCache
@@ -240,13 +241,14 @@ class FuseOperations(llfuse.Operations):
         if self.args.location.archive:
             self.process_archive(self.args.location.archive)
         else:
+            self.versions_index = FuseVersionsIndex()
             archive_names = (x.name for x in self.manifest.archives.list_considering(self.args))
             for archive_name in archive_names:
                 if self.versions:
                     # process archives immediately
                     self.process_archive(archive_name)
                 else:
-                    # lazy load archives, create archive placeholder inode
+                    # lazily load archives, create archive placeholder inode
                     archive_inode = self._create_dir(parent=1)
                     self.contents[1][os.fsencode(archive_name)] = archive_inode
                     self.pending_archives[archive_inode] = archive_name
@@ -339,12 +341,19 @@ class FuseOperations(llfuse.Operations):
         logger.debug('fuse: process_archive completed in %.1f s for archive %s', duration, archive.name)
 
     def process_leaf(self, name, item, parent, prefix, is_dir, item_inode):
-        def file_version(item):
+        def file_version(item, path):
             if 'chunks' in item:
-                ident = 0
-                for chunkid, _, _ in item.chunks:
-                    ident = adler32(chunkid, ident)
-                return ident
+                file_id = blake2b_128(path)
+                current_version, previous_id = self.versions_index.get(file_id, (0, None))
+
+                chunk_ids = [chunk_id for chunk_id, _, _ in item.chunks]
+                contents_id = blake2b_128(b''.join(chunk_ids))
+
+                if contents_id != previous_id:
+                    current_version += 1
+                    self.versions_index[file_id] = current_version, contents_id
+
+                return current_version
 
         def make_versioned_name(name, version, add_dir=False):
             if add_dir:
@@ -353,16 +362,16 @@ class FuseOperations(llfuse.Operations):
                 name += b'/' + path_fname[-1]
             # keep original extension at end to avoid confusing tools
             name, ext = os.path.splitext(name)
-            version_enc = os.fsencode('.%08x' % version)
+            version_enc = os.fsencode('.%05d' % version)
             return name + version_enc + ext
 
         if self.versions and not is_dir:
             parent = self.process_inner(name, parent)
-            version = file_version(item)
+            path = os.fsencode(item.path)
+            version = file_version(item, path)
             if version is not None:
                 # regular file, with contents - maybe a hardlink master
                 name = make_versioned_name(name, version)
-                path = os.fsencode(item.path)
                 self.file_versions[path] = version
 
         path = item.path

+ 41 - 4
src/borg/hashindex.pyx

@@ -6,17 +6,22 @@ import os
 cimport cython
 from libc.stdint cimport uint32_t, UINT32_MAX, uint64_t
 from libc.errno cimport errno
+from libc.string cimport memcpy
 from cpython.exc cimport PyErr_SetFromErrnoWithFilename
 from cpython.buffer cimport PyBUF_SIMPLE, PyObject_GetBuffer, PyBuffer_Release
-from cpython.bytes cimport PyBytes_FromStringAndSize
+from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_CheckExact, PyBytes_GET_SIZE, PyBytes_AS_STRING
 
-API_VERSION = '1.1_06'
+API_VERSION = '1.1_07'
 
 
 cdef extern from "_hashindex.c":
     ctypedef struct HashIndex:
         pass
 
+    ctypedef struct FuseVersionsElement:
+        uint32_t version
+        char hash[16]
+
     HashIndex *hashindex_read(object file_py, int permit_compact) except *
     HashIndex *hashindex_init(int capacity, int key_size, int value_size)
     void hashindex_free(HashIndex *index)
@@ -74,11 +79,13 @@ cdef class IndexBase:
     cdef HashIndex *index
     cdef int key_size
 
+    _key_size = 32
+
     MAX_LOAD_FACTOR = HASH_MAX_LOAD
     MAX_VALUE = _MAX_VALUE
 
-    def __cinit__(self, capacity=0, path=None, key_size=32, permit_compact=False):
-        self.key_size = key_size
+    def __cinit__(self, capacity=0, path=None, permit_compact=False):
+        self.key_size = self._key_size
         if path:
             if isinstance(path, (str, bytes)):
                 with open(path, 'rb') as fd:
@@ -153,6 +160,36 @@ cdef class IndexBase:
         return hashindex_compact(self.index)
 
 
+cdef class FuseVersionsIndex(IndexBase):
+    # 4 byte version + 16 byte file contents hash
+    value_size = 20
+    _key_size = 16
+
+    def __getitem__(self, key):
+        cdef FuseVersionsElement *data
+        assert len(key) == self.key_size
+        data = <FuseVersionsElement *>hashindex_get(self.index, <char *>key)
+        if data == NULL:
+            raise KeyError(key)
+        return _le32toh(data.version), PyBytes_FromStringAndSize(data.hash, 16)
+
+    def __setitem__(self, key, value):
+        cdef FuseVersionsElement data
+        assert len(key) == self.key_size
+        data.version = value[0]
+        assert data.version <= _MAX_VALUE, "maximum number of versions reached"
+        if not PyBytes_CheckExact(value[1]) or PyBytes_GET_SIZE(value[1]) != 16:
+            raise TypeError("Expected bytes of length 16 for second value")
+        memcpy(data.hash, PyBytes_AS_STRING(value[1]), 16)
+        data.version = _htole32(data.version)
+        if not hashindex_set(self.index, <char *>key, <void *> &data):
+            raise Exception('hashindex_set failed')
+
+    def __contains__(self, key):
+        assert len(key) == self.key_size
+        return hashindex_get(self.index, <char *>key) != NULL
+
+
 cdef class NSIndex(IndexBase):
 
     value_size = 8

+ 2 - 2
src/borg/helpers.py

@@ -131,13 +131,13 @@ class MandatoryFeatureUnsupported(Error):
 
 def check_extension_modules():
     from . import platform, compress, item
-    if hashindex.API_VERSION != '1.1_06':
+    if hashindex.API_VERSION != '1.1_07':
         raise ExtensionModuleError
     if chunker.API_VERSION != '1.1_01':
         raise ExtensionModuleError
     if compress.API_VERSION != '1.1_03':
         raise ExtensionModuleError
-    if borg.crypto.low_level.API_VERSION != '1.1_01':
+    if borg.crypto.low_level.API_VERSION != '1.1_02':
         raise ExtensionModuleError
     if platform.API_VERSION != platform.OS_API_VERSION != '1.1_01':
         raise ExtensionModuleError

+ 3 - 3
src/borg/testsuite/archiver.py

@@ -2101,11 +2101,11 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         with self.fuse_mount(self.repository_location, mountpoint, '-o', 'versions'):
             path = os.path.join(mountpoint, 'input', 'test')  # filename shows up as directory ...
             files = os.listdir(path)
-            assert all(f.startswith('test.') for f in files)  # ... with files test.xxxxxxxx in there
+            assert all(f.startswith('test.') for f in files)  # ... with files test.xxxxx in there
             assert {b'first', b'second'} == {open(os.path.join(path, f), 'rb').read() for f in files}
             if are_hardlinks_supported():
-                st1 = os.stat(os.path.join(mountpoint, 'input', 'hardlink1', 'hardlink1.00000000'))
-                st2 = os.stat(os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00000000'))
+                st1 = os.stat(os.path.join(mountpoint, 'input', 'hardlink1', 'hardlink1.00001'))
+                st2 = os.stat(os.path.join(mountpoint, 'input', 'hardlink2', 'hardlink2.00001'))
                 assert st1.st_ino == st2.st_ino
 
     @unittest.skipUnless(has_llfuse, 'llfuse not installed')