瀏覽代碼

hashindex_add C implementation

this was also the loop contents of hashindex_merge, but we also need it callable from Cython/Python code.

this saves some cycles, esp. if the key is already present in the index.
Thomas Waldmann 9 年之前
父節點
當前提交
720fc49498
共有 3 個文件被更改,包括 24 次插入19 次删除
  1. 12 9
      borg/_hashindex.c
  2. 3 10
      borg/cache.py
  3. 9 0
      borg/hashindex.pyx

+ 12 - 9
borg/_hashindex.c

@@ -390,21 +390,24 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs
     *total_chunks = chunks;
 }
 
+static void
+hashindex_add(HashIndex *index, const void *key, int32_t *other_values)
+{
+    int32_t *my_values = (int32_t *)hashindex_get(index, key);
+    if(my_values == NULL) {
+        hashindex_set(index, key, other_values);
+    } else {
+        *my_values += *other_values;
+    }
+}
+
 static void
 hashindex_merge(HashIndex *index, HashIndex *other)
 {
     int32_t key_size = index->key_size;
-    const int32_t *other_values;
-    int32_t *my_values;
     void *key = NULL;
 
     while((key = hashindex_next_key(other, key))) {
-        other_values = key + key_size;
-        my_values = (int32_t *)hashindex_get(index, key);
-        if(my_values == NULL) {
-            hashindex_set(index, key, other_values);
-        } else {
-            *my_values += *other_values;
-        }
+        hashindex_add(index, key, key + key_size);
     }
 }

+ 3 - 10
borg/cache.py

@@ -255,18 +255,11 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
             for id in ids:
                 os.unlink(mkpath(id))
 
-        def add(chunk_idx, id, size, csize, incr=1):
-            try:
-                count, size, csize = chunk_idx[id]
-                chunk_idx[id] = count + incr, size, csize
-            except KeyError:
-                chunk_idx[id] = incr, size, csize
-
         def fetch_and_build_idx(archive_id, repository, key):
             chunk_idx = ChunkIndex()
             cdata = repository.get(archive_id)
             data = key.decrypt(archive_id, cdata)
-            add(chunk_idx, archive_id, len(data), len(cdata))
+            chunk_idx.add(archive_id, 1, len(data), len(cdata))
             archive = msgpack.unpackb(data)
             if archive[b'version'] != 1:
                 raise Exception('Unknown archive metadata version')
@@ -274,7 +267,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
             unpacker = msgpack.Unpacker()
             for item_id, chunk in zip(archive[b'items'], repository.get_many(archive[b'items'])):
                 data = key.decrypt(item_id, chunk)
-                add(chunk_idx, item_id, len(data), len(chunk))
+                chunk_idx.add(item_id, 1, len(data), len(chunk))
                 unpacker.feed(data)
                 for item in unpacker:
                     if not isinstance(item, dict):
@@ -282,7 +275,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                         continue
                     if b'chunks' in item:
                         for chunk_id, size, csize in item[b'chunks']:
-                            add(chunk_idx, chunk_id, size, csize)
+                            chunk_idx.add(chunk_id, 1, size, csize)
             if self.do_cache:
                 fn = mkpath(archive_id)
                 fn_tmp = mkpath(archive_id, suffix='.tmp')

+ 9 - 0
borg/hashindex.pyx

@@ -15,6 +15,7 @@ cdef extern from "_hashindex.c":
                              long long *unique_size, long long *unique_csize,
                              long long *total_unique_chunks, long long *total_chunks)
     void hashindex_merge(HashIndex *index, HashIndex *other)
+    void hashindex_add(HashIndex *index, void *key, void *value)
     int hashindex_get_size(HashIndex *index)
     int hashindex_write(HashIndex *index, char *path)
     void *hashindex_get(HashIndex *index, void *key)
@@ -196,6 +197,14 @@ cdef class ChunkIndex(IndexBase):
                             &total_unique_chunks, &total_chunks)
         return total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks
 
+    def add(self, key, refs, size, csize):
+        assert len(key) == self.key_size
+        cdef int[3] data
+        data[0] = _htole32(refs)
+        data[1] = _htole32(size)
+        data[2] = _htole32(csize)
+        hashindex_add(self.index, <char *>key, data)
+
     def merge(self, ChunkIndex other):
         hashindex_merge(self.index, other.index)