2 年之前 · 16f4bf5d7b
--- a/src/borg/_hashindex.c
+++ b/src/borg/_hashindex.c
@@ -191,11 +191,14 @@ hashindex_lookup(HashIndex *index, const unsigned char *key, int *start_idx)
 
															         /* When idx == start, we have done a full pass over all buckets.
														
 
															          * - We did not find a bucket with the key we searched for.
														
 
															          * - We did not find an empty bucket either.
														
 
															-         * So all buckets are either full or deleted/tombstones.
														
 
															-         * This is an invalid state we never should get into, see
														
 
															-         * upper_limit and min_empty.
														
 
															+         * - We may have found a deleted/tombstone bucket, though.
														
 
															+         * This can easily happen if we have a compact hashtable.
														
 
															          */
														
 
															-        assert(idx != start);
														
 
															+        if(idx == start) {
														
 
															+            if(didx != -1)
														
 
															+                break;  /* we have found a deleted/tombstone bucket at least */
														
 
															+            return -2;  /* HT is completely full, no empty or deleted buckets. */
														
 
															+        }
														
 
															     }
														
 
															     /* we get here if we did not find a bucket with the key we searched for. */
														
 
															     if (start_idx != NULL) {
														
@@ -745,8 +748,8 @@ hashindex_set(HashIndex *index, const unsigned char *key, const void *value)
 
															     uint8_t *ptr;
														
 
															     if(idx < 0)
														
 
															     {
														
 
															-        if(index->num_entries > index->upper_limit) {
														
 
															-            /* hashtable too full, grow it! */
														
 
															+        if(index->num_entries >= index->upper_limit || idx == -2) {
														
 
															+            /* hashtable too full or even a compact hashtable, grow/rebuild it! */
														
 
															             if(!hashindex_resize(index, grow_size(index->num_buckets))) {
														
 
															                 return 0;
														
 
															             }
														
@@ -754,7 +757,7 @@ hashindex_set(HashIndex *index, const unsigned char *key, const void *value)
 
															              * so we only have EMPTY or USED buckets, but no DELETED ones any more.
														
 
															              */
														
 
															             idx = hashindex_lookup(index, key, &start_idx);
														
 
															-            assert(idx < 0);
														
 
															+            assert(idx == -1);
														
 
															             assert(BUCKET_IS_EMPTY(index, start_idx));
														
 
															         }
														
 
															         idx = start_idx;
														
@@ -768,7 +771,7 @@ hashindex_set(HashIndex *index, const unsigned char *key, const void *value)
 
															                  * so we only have EMPTY or USED buckets, but no DELETED ones any more.
														
 
															                  */
														
 
															                 idx = hashindex_lookup(index, key, &start_idx);
														
 
															-                assert(idx < 0);
														
 
															+                assert(idx == -1);
														
 
															                 assert(BUCKET_IS_EMPTY(index, start_idx));
														
 
															                 idx = start_idx;
														
 
															             }
														
@@ -879,6 +882,8 @@ hashindex_compact(HashIndex *index)
 
															     index->num_buckets = index->num_entries;
														
 
															     index->num_empty = 0;
														
 
															+    index->min_empty = 0;
														
 
															+    index->upper_limit = index->num_entries;  /* triggers a resize/rebuild when a new entry is added */
														
 
															     return saved_size;
														
 
															 }
														
--- a/src/borg/testsuite/hashindex_pytest.py
+++ b/src/borg/testsuite/hashindex_pytest.py
@@ -0,0 +1,68 @@
 
															+# more hashindex tests. kept separate so we can use pytest here.
														
 
															+
														
 
															+import os
														
 
															+import random
														
 
															+
														
 
															+import pytest
														
 
															+
														
 
															+from ..hashindex import NSIndex
														
 
															+
														
 
															+
														
 
															+def make_hashtables(*, entries, loops):
														
 
															+    idx = NSIndex()
														
 
															+    kv = {}
														
 
															+    for i in range(loops):
														
 
															+        # put some entries
														
 
															+        for j in range(entries):
														
 
															+            k = random.randbytes(32)
														
 
															+            v = random.randint(0, NSIndex.MAX_VALUE - 1)
														
 
															+            idx[k] = (v, v, v)
														
 
															+            kv[k] = v
														
 
															+        # check and delete a random amount of entries
														
 
															+        delete_keys = random.sample(list(kv), k=random.randint(0, len(kv)))
														
 
															+        for k in delete_keys:
														
 
															+            v = kv.pop(k)
														
 
															+            assert idx.pop(k) == (v, v, v)
														
 
															+        # check if remaining entries are as expected
														
 
															+        for k, v in kv.items():
														
 
															+            assert idx[k] == (v, v, v)
														
 
															+        # check entry count
														
 
															+        assert len(kv) == len(idx)
														
 
															+    return idx, kv
														
 
															+
														
 
															+
														
 
															+@pytest.mark.skipif("BORG_TESTS_SLOW" not in os.environ, reason="slow tests not enabled, use BORG_TESTS_SLOW=1")
														
 
															+def test_hashindex_stress():
														
 
															+    """checks if the hashtable behaves as expected
														
 
															+
														
 
															+    This can be used in _hashindex.c before running this test to provoke more collisions (don't forget to compile):
														
 
															+    #define HASH_MAX_LOAD .99
														
 
															+    #define HASH_MAX_EFF_LOAD .999
														
 
															+    """
														
 
															+    make_hashtables(entries=10000, loops=1000)  # we do quite some assertions while making them
														
 
															+
														
 
															+
														
 
															+def test_hashindex_compact():
														
 
															+    """test that we do not lose or corrupt data by the compaction nor by expanding/rebuilding"""
														
 
															+    idx, kv = make_hashtables(entries=5000, loops=5)
														
 
															+    size_noncompact = idx.size()
														
 
															+    # compact the hashtable (remove empty/tombstone buckets)
														
 
															+    saved_space = idx.compact()
														
 
															+    # did we actually compact (reduce space usage)?
														
 
															+    size_compact = idx.size()
														
 
															+    assert saved_space > 0
														
 
															+    assert size_noncompact - size_compact == saved_space
														
 
															+    # did we lose anything?
														
 
															+    for k, v in kv.items():
														
 
															+        assert k in idx and idx[k] == (v, v, v)
														
 
															+    assert len(idx) == len(kv)
														
 
															+    # now expand the hashtable again. trigger a resize/rebuild by adding an entry.
														
 
															+    k = b"x" * 32
														
 
															+    idx[k] = (0, 0, 0)
														
 
															+    kv[k] = 0
														
 
															+    size_rebuilt = idx.size()
														
 
															+    assert size_rebuilt > size_compact + 1
														
 
															+    # did we lose anything?
														
 
															+    for k, v in kv.items():
														
 
															+        assert k in idx and idx[k] == (v, v, v)
														
 
															+    assert len(idx) == len(kv)
														
--- a/src/borg/testsuite/hashindex_stress.py
+++ b/src/borg/testsuite/hashindex_stress.py
@@ -1,37 +0,0 @@
 
															-import os
														
 
															-import random
														
 
															-
														
 
															-import pytest
														
 
															-
														
 
															-from ..hashindex import NSIndex
														
 
															-
														
 
															-
														
 
															-@pytest.mark.skipif("BORG_TESTS_SLOW" not in os.environ, reason="slow tests not enabled, use BORG_TESTS_SLOW=1")
														
 
															-def test_hashindex_stress():
														
 
															-    """checks if the hashtable behaves as expected
														
 
															-
														
 
															-    This can be used in _hashindex.c before running this test to provoke more collisions (don't forget to compile):
														
 
															-    #define HASH_MAX_LOAD .99
														
 
															-    #define HASH_MAX_EFF_LOAD .999
														
 
															-    """
														
 
															-    ENTRIES = 10000
														
 
															-    LOOPS = 1000
														
 
															-    idx = NSIndex()
														
 
															-    kv = {}
														
 
															-    for i in range(LOOPS):
														
 
															-        # put some entries
														
 
															-        for j in range(ENTRIES):
														
 
															-            k = random.randbytes(32)
														
 
															-            v = random.randint(0, NSIndex.MAX_VALUE - 1)
														
 
															-            idx[k] = (v, v, v)
														
 
															-            kv[k] = v
														
 
															-        # check and delete a random amount of entries
														
 
															-        delete_keys = random.sample(list(kv), k=random.randint(0, len(kv)))
														
 
															-        for k in delete_keys:
														
 
															-            v = kv.pop(k)
														
 
															-            assert idx.pop(k) == (v, v, v)
														
 
															-        # check if remaining entries are as expected
														
 
															-        for k, v in kv.items():
														
 
															-            assert idx[k] == (v, v, v)
														
 
															-        # check entry count
														
 
															-        assert len(kv) == len(idx)