فهرست منبع

hashindex_add C implementation

this was also the loop contents of hashindex_merge, but we also need it callable from Cython/Python code.

this saves some cycles, esp. if the key is already present in the index.
Thomas Waldmann 10 سال پیش
والد
کامیت
720fc49498
3فایلهای تغییر یافته به همراه24 افزوده شده و 19 حذف شده
  1. 12 9
      borg/_hashindex.c
  2. 3 10
      borg/cache.py
  3. 9 0
      borg/hashindex.pyx

+ 12 - 9
borg/_hashindex.c

@@ -390,21 +390,24 @@ hashindex_summarize(HashIndex *index, long long *total_size, long long *total_cs
     *total_chunks = chunks;
 }
 
+static void
+hashindex_add(HashIndex *index, const void *key, int32_t *other_values)
+{
+    int32_t *my_values = (int32_t *)hashindex_get(index, key);
+    if(my_values == NULL) {
+        hashindex_set(index, key, other_values);
+    } else {
+        *my_values += *other_values;
+    }
+}
+
 static void
 hashindex_merge(HashIndex *index, HashIndex *other)
 {
     int32_t key_size = index->key_size;
-    const int32_t *other_values;
-    int32_t *my_values;
     void *key = NULL;
 
     while((key = hashindex_next_key(other, key))) {
-        other_values = key + key_size;
-        my_values = (int32_t *)hashindex_get(index, key);
-        if(my_values == NULL) {
-            hashindex_set(index, key, other_values);
-        } else {
-            *my_values += *other_values;
-        }
+        hashindex_add(index, key, key + key_size);
     }
 }

+ 3 - 10
borg/cache.py

@@ -255,18 +255,11 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
             for id in ids:
                 os.unlink(mkpath(id))
 
-        def add(chunk_idx, id, size, csize, incr=1):
-            try:
-                count, size, csize = chunk_idx[id]
-                chunk_idx[id] = count + incr, size, csize
-            except KeyError:
-                chunk_idx[id] = incr, size, csize
-
         def fetch_and_build_idx(archive_id, repository, key):
             chunk_idx = ChunkIndex()
             cdata = repository.get(archive_id)
             data = key.decrypt(archive_id, cdata)
-            add(chunk_idx, archive_id, len(data), len(cdata))
+            chunk_idx.add(archive_id, 1, len(data), len(cdata))
             archive = msgpack.unpackb(data)
             if archive[b'version'] != 1:
                 raise Exception('Unknown archive metadata version')
@@ -274,7 +267,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
             unpacker = msgpack.Unpacker()
             for item_id, chunk in zip(archive[b'items'], repository.get_many(archive[b'items'])):
                 data = key.decrypt(item_id, chunk)
-                add(chunk_idx, item_id, len(data), len(chunk))
+                chunk_idx.add(item_id, 1, len(data), len(chunk))
                 unpacker.feed(data)
                 for item in unpacker:
                     if not isinstance(item, dict):
@@ -282,7 +275,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                         continue
                     if b'chunks' in item:
                         for chunk_id, size, csize in item[b'chunks']:
-                            add(chunk_idx, chunk_id, size, csize)
+                            chunk_idx.add(chunk_id, 1, size, csize)
             if self.do_cache:
                 fn = mkpath(archive_id)
                 fn_tmp = mkpath(archive_id, suffix='.tmp')

+ 9 - 0
borg/hashindex.pyx

@@ -15,6 +15,7 @@ cdef extern from "_hashindex.c":
                              long long *unique_size, long long *unique_csize,
                              long long *total_unique_chunks, long long *total_chunks)
     void hashindex_merge(HashIndex *index, HashIndex *other)
+    void hashindex_add(HashIndex *index, void *key, void *value)
     int hashindex_get_size(HashIndex *index)
     int hashindex_write(HashIndex *index, char *path)
     void *hashindex_get(HashIndex *index, void *key)
@@ -196,6 +197,14 @@ cdef class ChunkIndex(IndexBase):
                             &total_unique_chunks, &total_chunks)
         return total_size, total_csize, unique_size, unique_csize, total_unique_chunks, total_chunks
 
+    def add(self, key, refs, size, csize):
+        assert len(key) == self.key_size
+        cdef int[3] data
+        data[0] = _htole32(refs)
+        data[1] = _htole32(size)
+        data[2] = _htole32(csize)
+        hashindex_add(self.index, <char *>key, data)
+
     def merge(self, ChunkIndex other):
         hashindex_merge(self.index, other.index)