7 mēneši atpakaļ · ad5b18008d
--- a/src/borg/hashindex.pyx
+++ b/src/borg/hashindex.pyx
@@ -3,9 +3,10 @@ from collections import namedtuple
 
				 cimport cython
			
 
				 from libc.stdint cimport uint32_t, UINT32_MAX, uint64_t
			
 
				 from libc.string cimport memcpy
			
 
				-from cpython.buffer cimport PyBUF_SIMPLE, PyObject_GetBuffer, PyBuffer_Release
			
 
				 from cpython.bytes cimport PyBytes_FromStringAndSize, PyBytes_CheckExact, PyBytes_GET_SIZE, PyBytes_AS_STRING
			
 
				 
			
 
				+from borghash cimport _borghash
			
 
				+
			
 
				 API_VERSION = '1.2_01'
			
 
				 
			
 
				 
			
@@ -349,109 +350,63 @@ cdef class NSKeyIterator1:  # legacy borg 1.x
 
				 ChunkIndexEntry = namedtuple('ChunkIndexEntry', 'refcount size')
			
 
				 
			
 
				 
			
 
				-cdef class ChunkIndex(IndexBase):
			
 
				+class ChunkIndex:
			
 
				     """
			
 
				     Mapping of 32 byte keys to (refcount, size), which are all 32-bit unsigned.
			
 
				-
			
 
				-    The reference count cannot overflow. If an overflow would occur, the refcount
			
 
				-    is fixed to MAX_VALUE and will neither increase nor decrease by incref(), decref()
			
 
				-    or add().
			
 
				-
			
 
				-    Prior signed 32-bit overflow is handled correctly for most cases: All values
			
 
				-    from UINT32_MAX (2**32-1, inclusive) to MAX_VALUE (exclusive) are reserved and either
			
 
				-    cause silent data loss (-1, -2) or will raise an AssertionError when accessed.
			
 
				-    Other values are handled correctly. Note that previously the refcount could also reach
			
 
				-    0 by *increasing* it.
			
 
				-
			
 
				-    Assigning refcounts in this reserved range is an invalid operation and raises AssertionError.
			
 
				     """
			
 
				+    MAX_VALUE = 2**32 - 1  # borghash has the full uint32_t range
			
 
				 
			
 
				-    value_size = 8
			
 
				+    def __init__(self, capacity=1000, path=None, permit_compact=False, usable=None):
			
 
				+        if path:
			
 
				+            self.ht = _borghash.HashTableNT.read(path)
			
 
				+        else:
			
 
				+            if usable is not None:
			
 
				+                capacity = usable * 2  # load factor 0.5
			
 
				+            self.ht = _borghash.HashTableNT(key_size=32, value_format="<II", namedtuple_type=ChunkIndexEntry, capacity=capacity)
			
 
				+
			
 
				+    def __setitem__(self, key, value):
			
 
				+        if not isinstance(value, ChunkIndexEntry) and isinstance(value, tuple):
			
 
				+            value = ChunkIndexEntry(*value)
			
 
				+        self.ht[key] = value
			
 
				 
			
 
				     def __getitem__(self, key):
			
 
				-        assert len(key) == self.key_size
			
 
				-        data = <uint32_t *>hashindex_get(self.index, <unsigned char *>key)
			
 
				-        if not data:
			
 
				-            raise KeyError(key)
			
 
				-        cdef uint32_t refcount = _le32toh(data[0])
			
 
				-        assert refcount <= _MAX_VALUE, "invalid reference count"
			
 
				-        return ChunkIndexEntry(refcount, _le32toh(data[1]))
			
 
				+        return self.ht[key]
			
 
				 
			
 
				-    def __setitem__(self, key, value):
			
 
				-        assert len(key) == self.key_size
			
 
				-        cdef uint32_t[2] data
			
 
				-        cdef uint32_t refcount = value[0]
			
 
				-        assert refcount <= _MAX_VALUE, "invalid reference count"
			
 
				-        data[0] = _htole32(refcount)
			
 
				-        data[1] = _htole32(value[1])
			
 
				-        if not hashindex_set(self.index, <unsigned char *>key, data):
			
 
				-            raise Exception('hashindex_set failed')
			
 
				+    def __delitem__(self, key):
			
 
				+        del self.ht[key]
			
 
				 
			
 
				     def __contains__(self, key):
			
 
				-        assert len(key) == self.key_size
			
 
				-        data = <uint32_t *>hashindex_get(self.index, <unsigned char *>key)
			
 
				-        if data != NULL:
			
 
				-            assert _le32toh(data[0]) <= _MAX_VALUE, "invalid reference count"
			
 
				-        return data != NULL
			
 
				+        return key in self.ht
			
 
				 
			
 
				-    def iteritems(self, marker=None):
			
 
				-        cdef const unsigned char *key
			
 
				-        iter = ChunkKeyIterator(self.key_size)
			
 
				-        iter.idx = self
			
 
				-        iter.index = self.index
			
 
				-        if marker:
			
 
				-            key = hashindex_get(self.index, <unsigned char *>marker)
			
 
				-            if marker is None:
			
 
				-                raise IndexError
			
 
				-            iter.key = key - self.key_size
			
 
				-        return iter
			
 
				+    def __len__(self):
			
 
				+        return len(self.ht)
			
 
				+
			
 
				+    def iteritems(self):
			
 
				+        yield from self.ht.iteritems()
			
 
				 
			
 
				     def add(self, key, refs, size):
			
 
				-        assert len(key) == self.key_size
			
 
				-        cdef uint32_t[2] data
			
 
				-        data[0] = _htole32(refs)
			
 
				-        data[1] = _htole32(size)
			
 
				-        self._add(<unsigned char*> key, data)
			
 
				-
			
 
				-    cdef _add(self, unsigned char *key, uint32_t *data):
			
 
				-        cdef uint64_t refcount1, refcount2, result64
			
 
				-        values = <uint32_t*> hashindex_get(self.index, key)
			
 
				-        if values:
			
 
				-            refcount1 = _le32toh(values[0])
			
 
				-            refcount2 = _le32toh(data[0])
			
 
				-            assert refcount1 <= _MAX_VALUE, "invalid reference count"
			
 
				-            assert refcount2 <= _MAX_VALUE, "invalid reference count"
			
 
				-            result64 = refcount1 + refcount2
			
 
				-            values[0] = _htole32(min(result64, _MAX_VALUE))
			
 
				-            values[1] = data[1]
			
 
				-        else:
			
 
				-            if not hashindex_set(self.index, key, data):
			
 
				-                raise Exception('hashindex_set failed')
			
 
				+        v = self.get(key, ChunkIndexEntry(0, 0))
			
 
				+        refcount = min(self.MAX_VALUE, v.refcount + refs)
			
 
				+        self[key] = v._replace(refcount=refcount, size=size)
			
 
				 
			
 
				+    def get(self, key, default=None):
			
 
				+        try:
			
 
				+            return self[key]
			
 
				+        except KeyError:
			
 
				+            return default
			
 
				 
			
 
				-cdef class ChunkKeyIterator:
			
 
				-    cdef ChunkIndex idx
			
 
				-    cdef HashIndex *index
			
 
				-    cdef const unsigned char *key
			
 
				-    cdef int key_size
			
 
				-    cdef int exhausted
			
 
				+    def compact(self):
			
 
				+        pass
			
 
				 
			
 
				-    def __cinit__(self, key_size):
			
 
				-        self.key = NULL
			
 
				-        self.key_size = key_size
			
 
				-        self.exhausted = 0
			
 
				+    def clear(self):
			
 
				+        pass
			
 
				 
			
 
				-    def __iter__(self):
			
 
				-        return self
			
 
				+    @classmethod
			
 
				+    def read(cls, path, permit_compact=False):
			
 
				+        return cls(path=path)
			
 
				 
			
 
				-    def __next__(self):
			
 
				-        if self.exhausted:
			
 
				-            raise StopIteration
			
 
				-        self.key = hashindex_next_key(self.index, <unsigned char *>self.key)
			
 
				-        if not self.key:
			
 
				-            self.exhausted = 1
			
 
				-            raise StopIteration
			
 
				-        cdef uint32_t *value = <uint32_t *>(self.key + self.key_size)
			
 
				-        cdef uint32_t refcount = _le32toh(value[0])
			
 
				-        assert refcount <= _MAX_VALUE, "invalid reference count"
			
 
				-        return (<char *>self.key)[:self.key_size], ChunkIndexEntry(refcount, _le32toh(value[1]))
			
 
				+    def write(self, path):
			
 
				+        self.ht.write(path)
			
 
				+
			
 
				+    def size(self):
			
 
				+        return self.ht.size()
			
--- a/src/borg/selftest.py
+++ b/src/borg/selftest.py
@@ -21,19 +21,13 @@ import sys
 
				 import time
			
 
				 from unittest import TestResult, TestSuite, defaultTestLoader
			
 
				 
			
 
				-from .testsuite.hashindex_test import HashIndexDataTestCase, HashIndexRefcountingTestCase, HashIndexTestCase
			
 
				+from .testsuite.hashindex_test import HashIndexRefcountingTestCase
			
 
				 from .testsuite.crypto_test import CryptoTestCase
			
 
				 from .testsuite.chunker_test import ChunkerTestCase
			
 
				 
			
 
				-SELFTEST_CASES = [
			
 
				-    HashIndexDataTestCase,
			
 
				-    HashIndexRefcountingTestCase,
			
 
				-    HashIndexTestCase,
			
 
				-    CryptoTestCase,
			
 
				-    ChunkerTestCase,
			
 
				-]
			
 
				+SELFTEST_CASES = [HashIndexRefcountingTestCase, CryptoTestCase, ChunkerTestCase]
			
 
				 
			
 
				-SELFTEST_COUNT = 19
			
 
				+SELFTEST_COUNT = 13
			
 
				 
			
 
				 
			
 
				 class SelfTestResult(TestResult):
			
--- a/src/borg/testsuite/hashindex_test.py
+++ b/src/borg/testsuite/hashindex_test.py
@@ -1,16 +1,11 @@
 
				 # Note: these tests are part of the self test, do not use or import pytest functionality here.
			
 
				 #       See borg.selftest for details. If you add/remove test methods, update SELFTEST_COUNT
			
 
				 
			
 
				-import base64
			
 
				 import hashlib
			
 
				-import io
			
 
				-import os
			
 
				-import tempfile
			
 
				-import zlib
			
 
				+import struct
			
 
				 
			
 
				 from ..hashindex import NSIndex, ChunkIndex
			
 
				-from ..crypto.file_integrity import IntegrityCheckedFile, FileIntegrityError
			
 
				-from . import BaseTestCase, unopened_tempfile
			
 
				+from . import BaseTestCase
			
 
				 
			
 
				 
			
 
				 def H(x):
			
@@ -23,361 +18,21 @@ def H2(x):
 
				     return hashlib.sha256(H(x)).digest()
			
 
				 
			
 
				 
			
 
				-class HashIndexTestCase(BaseTestCase):
			
 
				-    def _generic_test(self, cls, make_value, sha):
			
 
				-        idx = cls()
			
 
				-        self.assert_equal(len(idx), 0)
			
 
				-        # Test set
			
 
				-        for x in range(100):
			
 
				-            idx[H(x)] = make_value(x)
			
 
				-        self.assert_equal(len(idx), 100)
			
 
				-        for x in range(100):
			
 
				-            self.assert_equal(idx[H(x)], make_value(x))
			
 
				-        # Test update
			
 
				-        for x in range(100):
			
 
				-            idx[H(x)] = make_value(x * 2)
			
 
				-        self.assert_equal(len(idx), 100)
			
 
				-        for x in range(100):
			
 
				-            self.assert_equal(idx[H(x)], make_value(x * 2))
			
 
				-        # Test delete
			
 
				-        for x in range(50):
			
 
				-            del idx[H(x)]
			
 
				-        # Test some keys still in there
			
 
				-        for x in range(50, 100):
			
 
				-            assert H(x) in idx
			
 
				-        # Test some keys not there any more
			
 
				-        for x in range(50):
			
 
				-            assert H(x) not in idx
			
 
				-        # Test delete non-existing key
			
 
				-        for x in range(50):
			
 
				-            self.assert_raises(KeyError, idx.__delitem__, H(x))
			
 
				-        self.assert_equal(len(idx), 50)
			
 
				-        with unopened_tempfile() as filepath:
			
 
				-            idx.write(filepath)
			
 
				-            del idx
			
 
				-            # Verify file contents
			
 
				-            with open(filepath, "rb") as fd:
			
 
				-                self.assert_equal(hashlib.sha256(fd.read()).hexdigest(), sha)
			
 
				-            # Make sure we can open the file
			
 
				-            idx = cls.read(filepath)
			
 
				-            self.assert_equal(len(idx), 50)
			
 
				-            for x in range(50, 100):
			
 
				-                self.assert_equal(idx[H(x)], make_value(x * 2))
			
 
				-            idx.clear()
			
 
				-            self.assert_equal(len(idx), 0)
			
 
				-            idx.write(filepath)
			
 
				-            del idx
			
 
				-            self.assert_equal(len(cls.read(filepath)), 0)
			
 
				-        idx = cls()
			
 
				-        # Test setdefault - set non-existing key
			
 
				-        idx.setdefault(H(0), make_value(42))
			
 
				-        assert H(0) in idx
			
 
				-        assert idx[H(0)] == make_value(42)
			
 
				-        # Test setdefault - do not set existing key
			
 
				-        idx.setdefault(H(0), make_value(23))
			
 
				-        assert H(0) in idx
			
 
				-        assert idx[H(0)] == make_value(42)
			
 
				-        # Test setdefault - get-like return value, key not present
			
 
				-        assert idx.setdefault(H(1), make_value(23)) == make_value(23)
			
 
				-        # Test setdefault - get-like return value, key present
			
 
				-        assert idx.setdefault(H(0), make_value(23)) == make_value(42)
			
 
				-        # clean up setdefault test
			
 
				-        del idx
			
 
				-
			
 
				-    def test_nsindex(self):
			
 
				-        self._generic_test(
			
 
				-            NSIndex, lambda x: (x, x, x), "640b909cf07884cc11fdf5431ffc27dee399770ceadecce31dffecd130a311a3"
			
 
				-        )
			
 
				-
			
 
				-    def test_chunkindex(self):
			
 
				-        self._generic_test(
			
 
				-            ChunkIndex, lambda x: (x, x), "5915fcf986da12e5f3ac68e05242b9c729e6101b0460b1d4e4a9e9f7cdf1b7da"
			
 
				-        )
			
 
				-
			
 
				-    def test_resize(self):
			
 
				-        n = 2000  # Must be >= MIN_BUCKETS
			
 
				-        with unopened_tempfile() as filepath:
			
 
				-            idx = NSIndex()
			
 
				-            idx.write(filepath)
			
 
				-            initial_size = os.path.getsize(filepath)
			
 
				-            self.assert_equal(len(idx), 0)
			
 
				-            for x in range(n):
			
 
				-                idx[H(x)] = x, x, x
			
 
				-            idx.write(filepath)
			
 
				-            assert initial_size < os.path.getsize(filepath)
			
 
				-            for x in range(n):
			
 
				-                del idx[H(x)]
			
 
				-            self.assert_equal(len(idx), 0)
			
 
				-            idx.write(filepath)
			
 
				-            self.assert_equal(initial_size, os.path.getsize(filepath))
			
 
				-
			
 
				-    def test_iteritems(self):
			
 
				-        idx = NSIndex()
			
 
				-        for x in range(100):
			
 
				-            idx[H(x)] = x, x, x
			
 
				-        iterator = idx.iteritems()
			
 
				-        all = list(iterator)
			
 
				-        self.assert_equal(len(all), 100)
			
 
				-        # iterator is already exhausted by list():
			
 
				-        self.assert_raises(StopIteration, next, iterator)
			
 
				-        second_half = list(idx.iteritems(marker=all[49][0]))
			
 
				-        self.assert_equal(len(second_half), 50)
			
 
				-        self.assert_equal(second_half, all[50:])
			
 
				-
			
 
				-
			
 
				-class HashIndexExtraTestCase(BaseTestCase):
			
 
				-    """These tests are separate because they should not become part of the selftest."""
			
 
				-
			
 
				-    def test_chunk_indexer(self):
			
 
				-        # see _hashindex.c hash_sizes, we want to be close to the max. load
			
 
				-        # because interesting errors happen there.
			
 
				-        key_count = int(65537 * ChunkIndex.MAX_LOAD_FACTOR) - 10
			
 
				-        index = ChunkIndex(key_count)
			
 
				-        all_keys = [hashlib.sha256(H(k)).digest() for k in range(key_count)]
			
 
				-        # we're gonna delete 1/3 of all_keys, so let's split them 2/3 and 1/3:
			
 
				-        keys, to_delete_keys = all_keys[0 : (2 * key_count // 3)], all_keys[(2 * key_count // 3) :]
			
 
				-
			
 
				-        for i, key in enumerate(keys):
			
 
				-            index[key] = (i, i)
			
 
				-        for i, key in enumerate(to_delete_keys):
			
 
				-            index[key] = (i, i)
			
 
				-
			
 
				-        for key in to_delete_keys:
			
 
				-            del index[key]
			
 
				-        for i, key in enumerate(keys):
			
 
				-            assert index[key] == (i, i)
			
 
				-        for key in to_delete_keys:
			
 
				-            assert index.get(key) is None
			
 
				-
			
 
				-        # now delete every key still in the index
			
 
				-        for key in keys:
			
 
				-            del index[key]
			
 
				-        # the index should now be empty
			
 
				-        assert list(index.iteritems()) == []
			
 
				-
			
 
				-
			
 
				-class HashIndexSizeTestCase(BaseTestCase):
			
 
				-    def test_size_on_disk(self):
			
 
				-        idx = ChunkIndex()
			
 
				-        assert idx.size() == 1024 + 1031 * (32 + 2 * 4)
			
 
				-
			
 
				-    def test_size_on_disk_accurate(self):
			
 
				-        idx = ChunkIndex()
			
 
				-        for i in range(1234):
			
 
				-            idx[H(i)] = i, i**2
			
 
				-        with unopened_tempfile() as filepath:
			
 
				-            idx.write(filepath)
			
 
				-            size = os.path.getsize(filepath)
			
 
				-        assert idx.size() == size
			
 
				-
			
 
				-
			
 
				 class HashIndexRefcountingTestCase(BaseTestCase):
			
 
				     def test_chunkindex_add(self):
			
 
				-        idx1 = ChunkIndex()
			
 
				-        idx1.add(H(1), 5, 6)
			
 
				-        assert idx1[H(1)] == (5, 6)
			
 
				-        idx1.add(H(1), 1, 2)
			
 
				-        assert idx1[H(1)] == (6, 2)
			
 
				-
			
 
				-    def test_setitem_raises(self):
			
 
				-        idx1 = ChunkIndex()
			
 
				-        with self.assert_raises(AssertionError):
			
 
				-            idx1[H(1)] = ChunkIndex.MAX_VALUE + 1, 0
			
 
				+        chunks = ChunkIndex()
			
 
				+        x = H2(1)
			
 
				+        chunks.add(x, 5, 6)
			
 
				+        assert chunks[x] == (5, 6)
			
 
				+        chunks.add(x, 1, 2)
			
 
				+        assert chunks[x] == (6, 2)
			
 
				 
			
 
				     def test_keyerror(self):
			
 
				-        idx = ChunkIndex()
			
 
				+        chunks = ChunkIndex()
			
 
				         with self.assert_raises(KeyError):
			
 
				-            idx[H(1)]
			
 
				-        with self.assert_raises(OverflowError):
			
 
				-            idx.add(H(1), -1, 0)
			
 
				-
			
 
				-
			
 
				-class HashIndexDataTestCase(BaseTestCase):
			
 
				-    # This bytestring was created with borg2-pre 2022-09-30
			
 
				-    HASHINDEX = (
			
 
				-        b"eJzt0DEKgwAMQNFoBXsMj9DqDUQoToKTR3Hzwr2DZi+0HS19HwIZHhnST/OjHYeljIhLTl1FVDlN7te"
			
 
				-        b"Q9M/tGcdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHMdxHM"
			
 
				-        b"dxHMdxHMdxHMdxHMdxHMdxHPfqbu+7F2nKz67Nc9sX97r1+Rt/4TiO4ziO4ziO4ziO4ziO4ziO4ziO4"
			
 
				-        b"ziO4ziO4ziO4ziO4ziO4ziO4ziO4ziO487lDoRvHEk="
			
 
				-    )
			
 
				-
			
 
				-    def _serialize_hashindex(self, idx):
			
 
				-        with tempfile.TemporaryDirectory() as tempdir:
			
 
				-            file = os.path.join(tempdir, "idx")
			
 
				-            idx.write(file)
			
 
				-            with open(file, "rb") as f:
			
 
				-                return self._pack(f.read())
			
 
				-
			
 
				-    def _deserialize_hashindex(self, bytestring):
			
 
				-        with tempfile.TemporaryDirectory() as tempdir:
			
 
				-            file = os.path.join(tempdir, "idx")
			
 
				-            with open(file, "wb") as f:
			
 
				-                f.write(self._unpack(bytestring))
			
 
				-            return ChunkIndex.read(file)
			
 
				-
			
 
				-    def _pack(self, bytestring):
			
 
				-        return base64.b64encode(zlib.compress(bytestring))
			
 
				-
			
 
				-    def _unpack(self, bytestring):
			
 
				-        return zlib.decompress(base64.b64decode(bytestring))
			
 
				-
			
 
				-    def test_identical_creation(self):
			
 
				-        idx1 = ChunkIndex()
			
 
				-        idx1[H(1)] = 1, 2
			
 
				-        idx1[H(2)] = 2**31 - 1, 0
			
 
				-        idx1[H(3)] = 4294962296, 0  # 4294962296 is -5000 interpreted as an uint32_t
			
 
				-
			
 
				-        serialized = self._serialize_hashindex(idx1)
			
 
				-        assert self._unpack(serialized) == self._unpack(self.HASHINDEX)
			
 
				-
			
 
				-
			
 
				-class HashIndexIntegrityTestCase(HashIndexDataTestCase):
			
 
				-    def write_integrity_checked_index(self, tempdir):
			
 
				-        idx = self._deserialize_hashindex(self.HASHINDEX)
			
 
				-        file = os.path.join(tempdir, "idx")
			
 
				-        with IntegrityCheckedFile(path=file, write=True) as fd:
			
 
				-            idx.write(fd)
			
 
				-        integrity_data = fd.integrity_data
			
 
				-        assert "final" in integrity_data
			
 
				-        assert "HashHeader" in integrity_data
			
 
				-        return file, integrity_data
			
 
				-
			
 
				-    def test_integrity_checked_file(self):
			
 
				-        with tempfile.TemporaryDirectory() as tempdir:
			
 
				-            file, integrity_data = self.write_integrity_checked_index(tempdir)
			
 
				-            with open(file, "r+b") as fd:
			
 
				-                fd.write(b"Foo")
			
 
				-            with self.assert_raises(FileIntegrityError):
			
 
				-                with IntegrityCheckedFile(path=file, write=False, integrity_data=integrity_data) as fd:
			
 
				-                    ChunkIndex.read(fd)
			
 
				-
			
 
				-
			
 
				-class HashIndexCompactTestCase(HashIndexDataTestCase):
			
 
				-    def index(self, num_entries, num_buckets, num_empty):
			
 
				-        index_data = io.BytesIO()
			
 
				-        index_data.write(b"BORG2IDX")
			
 
				-        # version
			
 
				-        index_data.write((2).to_bytes(4, "little"))
			
 
				-        # num_entries
			
 
				-        index_data.write(num_entries.to_bytes(4, "little"))
			
 
				-        # num_buckets
			
 
				-        index_data.write(num_buckets.to_bytes(4, "little"))
			
 
				-        # num_empty
			
 
				-        index_data.write(num_empty.to_bytes(4, "little"))
			
 
				-        # key_size
			
 
				-        index_data.write((32).to_bytes(4, "little"))
			
 
				-        # value_size
			
 
				-        index_data.write((3 * 4).to_bytes(4, "little"))
			
 
				-        # reserved
			
 
				-        index_data.write(bytes(1024 - 32))
			
 
				-
			
 
				-        self.index_data = index_data
			
 
				-
			
 
				-    def index_from_data(self):
			
 
				-        self.index_data.seek(0)
			
 
				-        # Since we are trying to carefully control the layout of the hashindex,
			
 
				-        # we set permit_compact to prevent hashindex_read from resizing the hash table.
			
 
				-        index = ChunkIndex.read(self.index_data, permit_compact=True)
			
 
				-        return index
			
 
				-
			
 
				-    def write_entry(self, key, *values):
			
 
				-        self.index_data.write(key)
			
 
				-        for value in values:
			
 
				-            self.index_data.write(value.to_bytes(4, "little"))
			
 
				-
			
 
				-    def write_empty(self, key):
			
 
				-        self.write_entry(key, 0xFFFFFFFF, 0, 0)
			
 
				-
			
 
				-    def write_deleted(self, key):
			
 
				-        self.write_entry(key, 0xFFFFFFFE, 0, 0)
			
 
				-
			
 
				-    def compare_indexes(self, idx1, idx2):
			
 
				-        """Check that the two hash tables contain the same data.  idx1
			
 
				-        is allowed to have "mis-filed" entries, because we only need to
			
 
				-        iterate over it.  But idx2 needs to support lookup."""
			
 
				-        for k, v in idx1.iteritems():
			
 
				-            assert v == idx2[k]
			
 
				-        assert len(idx1) == len(idx2)
			
 
				-
			
 
				-    def compare_compact(self, layout):
			
 
				-        """A generic test of a hashindex with the specified layout.  layout should
			
 
				-        be a string consisting only of the characters '*' (filled), 'D' (deleted)
			
 
				-        and 'E' (empty).
			
 
				-        """
			
 
				-        num_buckets = len(layout)
			
 
				-        num_empty = layout.count("E")
			
 
				-        num_entries = layout.count("*")
			
 
				-        self.index(num_entries=num_entries, num_buckets=num_buckets, num_empty=num_empty)
			
 
				-        k = 0
			
 
				-        for c in layout:
			
 
				-            if c == "D":
			
 
				-                self.write_deleted(H2(k))
			
 
				-            elif c == "E":
			
 
				-                self.write_empty(H2(k))
			
 
				-            else:
			
 
				-                assert c == "*"
			
 
				-                self.write_entry(H2(k), 3 * k + 1, 3 * k + 2, 3 * k + 3)
			
 
				-            k += 1
			
 
				-        idx = self.index_from_data()
			
 
				-        cpt = self.index_from_data()
			
 
				-        cpt.compact()
			
 
				-        # Note that idx is not a valid hash table, since the entries are not
			
 
				-        # stored where they should be.  So lookups of the form idx[k] can fail.
			
 
				-        # But cpt is a valid hash table, since there are no empty buckets.
			
 
				-        assert idx.size() == 1024 + num_buckets * (32 + 3 * 4)
			
 
				-        assert cpt.size() == 1024 + num_entries * (32 + 3 * 4)
			
 
				-        self.compare_indexes(idx, cpt)
			
 
				-
			
 
				-    def test_simple(self):
			
 
				-        self.compare_compact("*DE**E")
			
 
				-
			
 
				-    def test_first_empty(self):
			
 
				-        self.compare_compact("D*E**E")
			
 
				-
			
 
				-    def test_last_used(self):
			
 
				-        self.compare_compact("D*E*E*")
			
 
				-
			
 
				-    def test_too_few_empty_slots(self):
			
 
				-        self.compare_compact("D**EE*")
			
 
				-
			
 
				-    def test_empty(self):
			
 
				-        self.compare_compact("DEDEED")
			
 
				-
			
 
				-    def test_num_buckets_zero(self):
			
 
				-        self.compare_compact("")
			
 
				-
			
 
				-    def test_already_compact(self):
			
 
				-        self.compare_compact("***")
			
 
				-
			
 
				-    def test_all_at_front(self):
			
 
				-        self.compare_compact("*DEEED")
			
 
				-        self.compare_compact("**DEED")
			
 
				-        self.compare_compact("***EED")
			
 
				-        self.compare_compact("****ED")
			
 
				-        self.compare_compact("*****D")
			
 
				-
			
 
				-    def test_all_at_back(self):
			
 
				-        self.compare_compact("EDEEE*")
			
 
				-        self.compare_compact("DEDE**")
			
 
				-        self.compare_compact("DED***")
			
 
				-        self.compare_compact("ED****")
			
 
				-        self.compare_compact("D*****")
			
 
				-
			
 
				-
			
 
				-class NSIndexTestCase(BaseTestCase):
			
 
				-    def test_nsindex_segment_limit(self):
			
 
				-        idx = NSIndex()
			
 
				-        with self.assert_raises(AssertionError):
			
 
				-            idx[H(1)] = NSIndex.MAX_VALUE + 1, 0, 0
			
 
				-        assert H(1) not in idx
			
 
				-        idx[H(2)] = NSIndex.MAX_VALUE, 0, 0
			
 
				-        assert H(2) in idx
			
 
				-
			
 
				-
			
 
				-class AllIndexTestCase(BaseTestCase):
			
 
				-    def test_max_load_factor(self):
			
 
				-        assert NSIndex.MAX_LOAD_FACTOR < 1.0
			
 
				-        assert ChunkIndex.MAX_LOAD_FACTOR < 1.0
			
 
				+            chunks[H(1)]
			
 
				+        with self.assert_raises(struct.error):
			
 
				+            chunks.add(H(1), -1, 0)
			
 
				 
			
 
				 
			
 
				 class IndexCorruptionTestCase(BaseTestCase):