浏览代码

HashIndex: Switch to a non-mmap based implementation

This eliminates unnecessary IO on low memory systems
Jonas Borgström 11 年之前
父节点
当前提交
2f72b9f960
共有 11 个文件被更改,包括 166 次插入218 次删除
  1. 7 0
      CHANGES
  2. 99 144
      attic/_hashindex.c
  3. 2 2
      attic/archive.py
  4. 3 3
      attic/cache.py
  5. 25 24
      attic/hashindex.pyx
  6. 1 1
      attic/helpers.py
  7. 1 1
      attic/remote.py
  8. 15 19
      attic/repository.py
  9. 1 1
      attic/testsuite/archiver.py
  10. 11 22
      attic/testsuite/hashindex.py
  11. 1 1
      attic/testsuite/repository.py

+ 7 - 0
CHANGES

@@ -3,6 +3,13 @@ Attic Changelog
 
 Here you can see the full list of changes between each Attic release.
 
+Version 0.14
+------------
+
+(feature release, released on X)
+
+- HashIndex improvements, eliminates unnecessary IO on low memory systems.
+
 Version 0.13
 ------------
 

+ 99 - 144
attic/_hashindex.c

@@ -7,7 +7,6 @@
 #include <sys/stat.h>
 #include <fcntl.h>
 #include <unistd.h>
-#include <sys/mman.h>
 
 #if defined(BYTE_ORDER)&&(BYTE_ORDER == BIG_ENDIAN)
 #define _le32toh(x) __builtin_bswap32(x)
@@ -28,9 +27,7 @@ typedef struct {
 } __attribute__((__packed__)) HashHeader;
 
 typedef struct {
-    char *path;
-    void *map_addr;
-    off_t map_length;
+    void *data;
     void *buckets;
     int num_entries;
     int num_buckets;
@@ -39,7 +36,7 @@ typedef struct {
     int bucket_size;
     int lower_limit;
     int upper_limit;
-    int readonly;
+    int data_len;
 } HashIndex;
 
 #define MAGIC "ATTICIDX"
@@ -58,15 +55,14 @@ typedef struct {
 #define BUCKET_MATCHES_KEY(index, idx, key) (memcmp(key, BUCKET_ADDR(index, idx), index->key_size) == 0)
 
 #define BUCKET_MARK_DELETED(index, idx) (*((uint32_t *)(BUCKET_ADDR(index, idx) + index->key_size)) = DELETED)
+#define BUCKET_MARK_EMPTY(index, idx) (*((uint32_t *)(BUCKET_ADDR(index, idx) + index->key_size)) = EMPTY)
 
-#define EPRINTF(msg, ...) EPRINTF_PATH(index->path, msg, ##__VA_ARGS__)
+#define EPRINTF(msg, ...) fprintf(stderr, "hashindex: " msg "\n", ##__VA_ARGS__)
 #define EPRINTF_PATH(path, msg, ...) fprintf(stderr, "hashindex: %s: " msg "\n", path, ##__VA_ARGS__)
 
-static HashIndex *hashindex_open(const char *path, int readonly);
-static int hashindex_close(HashIndex *index);
-static int hashindex_clear(HashIndex *index);
-static int hashindex_flush(HashIndex *index);
-static HashIndex *hashindex_create(const char *path, int capacity, int key_size, int value_size);
+static HashIndex *hashindex_read(const char *path);
+static int hashindex_write(HashIndex *index, const char *path);
+static HashIndex *hashindex_init(int capacity, int key_size, int value_size);
 static const void *hashindex_get(HashIndex *index, const void *key);
 static int hashindex_set(HashIndex *index, const void *key, const void *value);
 static int hashindex_delete(HashIndex *index, const void *key);
@@ -96,7 +92,7 @@ hashindex_lookup(HashIndex *index, const void *key)
             }
         }
         else if(BUCKET_MATCHES_KEY(index, idx, key)) {
-            if (didx != -1 && !index->readonly) {
+            if (didx != -1) {
                 memcpy(BUCKET_ADDR(index, didx), BUCKET_ADDR(index, idx), index->bucket_size);
                 BUCKET_MARK_DELETED(index, idx);
                 idx = didx;
@@ -113,200 +109,159 @@ hashindex_lookup(HashIndex *index, const void *key)
 static int
 hashindex_resize(HashIndex *index, int capacity)
 {
-    char *new_path = malloc(strlen(index->path) + 5);
-    int ret = 0;
     HashIndex *new;
     void *key = NULL;
-    strcpy(new_path, index->path);
-    strcat(new_path, ".tmp");
 
-    if(!(new = hashindex_create(new_path, capacity, index->key_size, index->value_size))) {
-        free(new_path);
+    if(!(new = hashindex_init(capacity, index->key_size, index->value_size))) {
         return 0;
     }
     while((key = hashindex_next_key(index, key))) {
         hashindex_set(new, key, hashindex_get(index, key));
     }
-    munmap(index->map_addr, index->map_length);
-    index->map_addr = new->map_addr;
-    index->map_length = new->map_length;
+    free(index->data);
+    index->data = new->data;
+    index->data_len = new->data_len;
     index->num_buckets = new->num_buckets;
     index->lower_limit = new->lower_limit;
     index->upper_limit = new->upper_limit;
     index->buckets = new->buckets;
-    if(unlink(index->path) < 0) {
-        EPRINTF("unlink failed");
-        goto out;
-    }
-    if(rename(new_path, index->path) < 0) {
-        EPRINTF_PATH(new_path, "rename failed");
-        goto out;
-    }
-    ret = 1;
-out:
-    free(new_path);
-    free(new->path);
     free(new);
-    return ret;
+    return 1;
 }
 
 /* Public API */
 static HashIndex *
-hashindex_open(const char *path, int readonly)
+hashindex_read(const char *path)
 {
-    void *addr;
-    int fd, oflags, prot;
+    FILE *fd;
     off_t length;
-    HashHeader *header;
-    HashIndex *index;
-
-    if(readonly) {
-        oflags = O_RDONLY;
-        prot = PROT_READ;
-    }
-    else {
-        oflags = O_RDWR;
-        prot = PROT_READ | PROT_WRITE;
-    }
+    HashHeader header;
+    HashIndex *index = NULL;
 
-    if((fd = open(path, oflags)) < 0) {
-        EPRINTF_PATH(path, "open failed");
-        fprintf(stderr, "Failed to open %s\n", path);
+    if((fd = fopen(path, "r")) == NULL) {
+        EPRINTF_PATH(path, "fopen failed");
         return NULL;
     }
-    if((length = lseek(fd, 0, SEEK_END)) < 0) {
-        EPRINTF_PATH(path, "lseek failed");
-        if(close(fd) < 0) {
-            EPRINTF_PATH(path, "close failed");
-        }
-        return NULL;
+    if(fread(&header, 1, sizeof(HashHeader), fd) != sizeof(HashHeader)) {
+        EPRINTF_PATH(path, "fread failed");
+        goto fail;
     }
-    addr = mmap(0, length, prot, MAP_SHARED, fd, 0);
-    if(close(fd) < 0) {
-        EPRINTF_PATH(path, "close failed");
-        return NULL;
+    if(fseek(fd, 0, SEEK_END) < 0) {
+        EPRINTF_PATH(path, "fseek failed");
+        goto fail;
     }
-    if(addr == MAP_FAILED) {
-        EPRINTF_PATH(path, "mmap failed");
-        return NULL;
+    if((length = ftell(fd)) < 0) {
+        EPRINTF_PATH(path, "ftell failed");
+        goto fail;
+    }
+    if(fseek(fd, 0, SEEK_SET) < 0) {
+        EPRINTF_PATH(path, "fseek failed");
+        goto fail;
     }
-    header = (HashHeader *)addr;
-    if(memcmp(header->magic, MAGIC, 8)) {
+    if(memcmp(header.magic, MAGIC, 8)) {
         EPRINTF_PATH(path, "Unknown file header");
-        return NULL;
+        goto fail;
     }
-    if(length != sizeof(HashHeader) + _le32toh(header->num_buckets) * (header->key_size + header->value_size)) {
+    if(length != sizeof(HashHeader) + _le32toh(header.num_buckets) * (header.key_size + header.value_size)) {
         EPRINTF_PATH(path, "Incorrect file length");
-        return NULL;
+        goto fail;
     }
     if(!(index = malloc(sizeof(HashIndex)))) {
         EPRINTF_PATH(path, "malloc failed");
-        return NULL;
+        goto fail;
     }
-    index->readonly = readonly;
-    index->map_addr = addr;
-    index->map_length = length;
-    index->num_entries = _le32toh(header->num_entries);
-    index->num_buckets = _le32toh(header->num_buckets);
-    index->key_size = header->key_size;
-    index->value_size = header->value_size;
+    if(!(index->data = malloc(length))) {
+        EPRINTF_PATH(path, "malloc failed");
+        free(index);
+        index = NULL;
+        goto fail;
+    }
+    if(fread(index->data, 1, length, fd) != length) {
+        EPRINTF_PATH(path, "fread failed");
+        free(index->data);
+        free(index);
+        index = NULL;
+        goto fail;
+    }
+    index->data_len = length;
+    index->num_entries = _le32toh(header.num_entries);
+    index->num_buckets = _le32toh(header.num_buckets);
+    index->key_size = header.key_size;
+    index->value_size = header.value_size;
     index->bucket_size = index->key_size + index->value_size;
-    index->buckets = (addr + sizeof(HashHeader));
+    index->buckets = index->data + sizeof(HashHeader);
     index->lower_limit = index->num_buckets > MIN_BUCKETS ? ((int)(index->num_buckets * BUCKET_LOWER_LIMIT)) : 0;
     index->upper_limit = (int)(index->num_buckets * BUCKET_UPPER_LIMIT);
-    if(!(index->path = strdup(path))) {
-        EPRINTF_PATH(path, "strdup failed");
-        free(index);
-        return NULL;
+fail:
+    if(fclose(fd) < 0) {
+        EPRINTF_PATH(path, "fclose failed");
     }
     return index;
 }
 
 static HashIndex *
-hashindex_create(const char *path, int capacity, int key_size, int value_size)
+hashindex_init(int capacity, int key_size, int value_size)
 {
-    FILE *fd;
-    char bucket[MAX_BUCKET_SIZE] = {};
-    int i, bucket_size;
+    HashIndex *index;
     HashHeader header = {
         .magic = MAGIC, .num_entries = 0, .key_size = key_size, .value_size = value_size
     };
+    int i;
     capacity = MAX(MIN_BUCKETS, capacity);
-    header.num_buckets = _htole32(capacity);
 
-    if(!(fd = fopen(path, "w"))) {
-        EPRINTF_PATH(path, "fopen failed");
+    if(!(index = malloc(sizeof(HashIndex)))) {
+        EPRINTF("malloc failed");
         return NULL;
     }
-    bucket_size = key_size + value_size;
-    if(fwrite(&header, 1, sizeof(header), fd) != sizeof(header)) {
-        goto error;
-    }
-    *((uint32_t *)(bucket + key_size)) = EMPTY;
-    for(i = 0; i < capacity; i++) {
-        if(fwrite(bucket, 1, bucket_size, fd) != bucket_size) {
-            goto error;
-        }
-    }
-    if(fclose(fd) < 0) {
-        EPRINTF_PATH(path, "fclose failed");
-        if(unlink(path) < 0) {
-            EPRINTF_PATH(path, "unlink failed");
-    }
+    index->data_len = sizeof(HashHeader) + capacity * (key_size + value_size);
+    if(!(index->data = calloc(index->data_len, 1))) {
+        EPRINTF("malloc failed");
+        free(index);
         return NULL;
     }
-    return hashindex_open(path, 0);
-error:
-    if(unlink(path) < 0) {
-        EPRINTF_PATH(path, "unlink failed");
-    }
-    EPRINTF_PATH(path, "fwrite failed");
-    if(fclose(fd) < 0) {
-        EPRINTF_PATH(path, "fclose failed");
+    index->num_entries = 0;
+    index->key_size = key_size;
+    index->value_size = value_size;
+    index->num_buckets = capacity;
+    index->bucket_size = index->key_size + index->value_size;
+    index->lower_limit = index->num_buckets > MIN_BUCKETS ? ((int)(index->num_buckets * BUCKET_LOWER_LIMIT)) : 0;
+    index->upper_limit = (int)(index->num_buckets * BUCKET_UPPER_LIMIT);
+    index->buckets = index->data + sizeof(HashHeader);
+    memcpy(index->data, &header, sizeof(HashHeader));
+    for(i = 0; i < capacity; i++) {
+        BUCKET_MARK_EMPTY(index, i);
     }
-    return NULL;
+    return index;
 }
 
-static int
-hashindex_clear(HashIndex *index)
+static void
+hashindex_free(HashIndex *index)
 {
-    int i;
-    for(i = 0; i < index->num_buckets; i++) {
-        BUCKET_MARK_DELETED(index, i);
-    }
-    index->num_entries = 0;
-    return hashindex_resize(index, MIN_BUCKETS);
+    free(index->data);
+    free(index);
 }
 
 static int
-hashindex_flush(HashIndex *index)
+hashindex_write(HashIndex *index, const char *path)
 {
-    if(index->readonly) {
-        return 1;
-    }
-    *((uint32_t *)(index->map_addr + 8)) = _htole32(index->num_entries);
-    *((uint32_t *)(index->map_addr + 12)) = _htole32(index->num_buckets);
-    if(msync(index->map_addr, index->map_length, MS_SYNC) < 0) {
-        EPRINTF("msync failed");
+    FILE *fd;
+    int ret = 1;
+
+    if((fd = fopen(path, "w")) == NULL) {
+        EPRINTF_PATH(path, "open failed");
+        fprintf(stderr, "Failed to open %s for writing\n", path);
         return 0;
     }
-    return 1;
-}
-
-static int
-hashindex_close(HashIndex *index)
-{
-    int rv = 1;
-    if(hashindex_flush(index) < 0) {
-        rv = 0;
+    *((uint32_t *)(index->data + 8)) = _htole32(index->num_entries);
+    *((uint32_t *)(index->data + 12)) = _htole32(index->num_buckets);
+    if(fwrite(index->data, 1, index->data_len, fd) != index->data_len) {
+        EPRINTF_PATH(path, "fwrite failed");
+        ret = 0;
     }
-    if(munmap(index->map_addr, index->map_length) < 0) {
-        EPRINTF("munmap failed");
-        rv = 0;
+    if(fclose(fd) < 0) {
+        EPRINTF_PATH(path, "fclose failed");
     }
-    free(index->path);
-    free(index);
-    return rv;
+    return ret;
 }
 
 static const void *

+ 2 - 2
attic/archive.py

@@ -61,7 +61,7 @@ class ChunkBuffer:
     BUFFER_SIZE = 1 * 1024 * 1024
 
     def __init__(self, key):
-        self.buffer = BytesIO()
+        self.buffer = BytesIO().write
         self.packer = msgpack.Packer(unicode_errors='surrogateescape')
         self.chunks = []
         self.key = key
@@ -504,7 +504,7 @@ class ArchiveChecker:
         # Explicity set the initial hash table capacity to avoid performance issues
         # due to hash table "resonance"
         capacity = int(len(self.repository) * 1.2)
-        self.chunks = ChunkIndex.create(os.path.join(self.tmpdir, 'chunks').encode('utf-8'), capacity=capacity)
+        self.chunks = ChunkIndex(capacity)
         marker = None
         while True:
             result = self.repository.list(limit=10000, marker=marker)

+ 3 - 3
attic/cache.py

@@ -49,7 +49,7 @@ class Cache(object):
         config.set('cache', 'manifest', '')
         with open(os.path.join(self.path, 'config'), 'w') as fd:
             config.write(fd)
-        ChunkIndex.create(os.path.join(self.path, 'chunks').encode('utf-8'))
+        ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8'))
         with open(os.path.join(self.path, 'files'), 'w') as fd:
             pass  # empty file
 
@@ -65,7 +65,7 @@ class Cache(object):
         self.id = self.config.get('cache', 'repository')
         self.manifest_id = unhexlify(self.config.get('cache', 'manifest'))
         self.timestamp = self.config.get('cache', 'timestamp', fallback=None)
-        self.chunks = ChunkIndex(os.path.join(self.path, 'chunks').encode('utf-8'))
+        self.chunks = ChunkIndex.read(os.path.join(self.path, 'chunks').encode('utf-8'))
         self.files = None
 
     def close(self):
@@ -113,7 +113,7 @@ class Cache(object):
         self.config.set('cache', 'timestamp', self.manifest.timestamp)
         with open(os.path.join(self.path, 'config'), 'w') as fd:
             self.config.write(fd)
-        self.chunks.flush()
+        self.chunks.write(os.path.join(self.path, 'chunks').encode('utf-8'))
         os.rename(os.path.join(self.path, 'txn.active'),
                   os.path.join(self.path, 'txn.tmp'))
         shutil.rmtree(os.path.join(self.path, 'txn.tmp'))

+ 25 - 24
attic/hashindex.pyx

@@ -1,20 +1,19 @@
 # -*- coding: utf-8 -*-
 import os
 
-API_VERSION = 1
+API_VERSION = 2
 
 
 cdef extern from "_hashindex.c":
     ctypedef struct HashIndex:
         pass
 
-    HashIndex *hashindex_open(char *path, int readonly)
-    HashIndex *hashindex_create(char *path, int capacity, int key_size, int value_size)
+    HashIndex *hashindex_read(char *path)
+    HashIndex *hashindex_init(int capacity, int key_size, int value_size)
+    void hashindex_free(HashIndex *index)
     void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, long long *unique_size, long long *unique_csize)
     int hashindex_get_size(HashIndex *index)
-    int hashindex_clear(HashIndex *index)
-    int hashindex_close(HashIndex *index)
-    int hashindex_flush(HashIndex *index)
+    int hashindex_write(HashIndex *index, char *path)
     void *hashindex_get(HashIndex *index, void *key)
     void *hashindex_next_key(HashIndex *index, void *key)
     int hashindex_delete(HashIndex *index, void *key)
@@ -29,31 +28,33 @@ cdef class IndexBase:
     cdef HashIndex *index
     key_size = 32
 
-    def __cinit__(self, path, readonly=False):
-        self.index = hashindex_open(<bytes>os.fsencode(path), readonly)
-        if not self.index:
-            raise Exception('Failed to open %s' % path)
+    def __cinit__(self, capacity=0, path=None):
+        if path:
+            self.index = hashindex_read(<bytes>os.fsencode(path))
+            if not self.index:
+                raise Exception('hashindex_read failed')
+        else:
+            self.index = hashindex_init(capacity, self.key_size, self.value_size)
+            if not self.index:
+                raise Exception('hashindex_init failed')
 
     def __dealloc__(self):
         if self.index:
-            if not hashindex_close(self.index):
-                raise Exception('hashindex_close failed')
+            hashindex_free(self.index)
 
     @classmethod
-    def create(cls, path, capacity=0):
-        index = hashindex_create(<bytes>os.fsencode(path), capacity, cls.key_size, cls.value_size)
-        if not index:
-            raise Exception('Failed to create %s' % path)
-        hashindex_close(index)
-        return cls(path)
+    def read(cls, path):
+        return cls(path=path)
 
-    def clear(self):
-        if not hashindex_clear(self.index):
-            raise Exception('hashindex_clear failed')
+    def write(self, path):
+        if not hashindex_write(self.index, <bytes>os.fsencode(path)):
+            raise Exception('hashindex_write failed')
 
-    def flush(self):
-        if not hashindex_flush(self.index):
-            raise Exception('hashindex_flush failed')
+    def clear(self):
+        hashindex_free(self.index)
+        self.index = hashindex_init(0, self.key_size, self.value_size)
+        if not self.index:
+            raise Exception('hashindex_init failed')
 
     def setdefault(self, key, value):
         if not key in self:

+ 1 - 1
attic/helpers.py

@@ -73,7 +73,7 @@ class UpgradableLock:
 
 def check_extension_modules():
     import attic.platform
-    if (attic.hashindex.API_VERSION != 1 or
+    if (attic.hashindex.API_VERSION != 2 or
         attic.chunker.API_VERSION != 1 or
         attic.crypto.API_VERSION != 2 or
         attic.platform.API_VERSION != 1):

+ 1 - 1
attic/remote.py

@@ -265,7 +265,7 @@ class RepositoryCache:
 
     def initialize(self):
         self.tmppath = tempfile.mkdtemp()
-        self.index = NSIndex.create(os.path.join(self.tmppath, 'index'))
+        self.index = NSIndex()
         self.data_fd = open(os.path.join(self.tmppath, 'data'), 'a+b')
 
     def cleanup(self):

+ 15 - 19
attic/repository.py

@@ -128,24 +128,22 @@ class Repository(object):
         self.write_index()
         self.rollback()
 
-    def get_read_only_index(self, transaction_id):
+    def open_index(self, transaction_id):
         if transaction_id is None:
-            return {}
-        return NSIndex((os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8'), readonly=True)
+            return NSIndex()
+        return NSIndex.read((os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8'))
 
-    def get_index(self, transaction_id, do_cleanup=True):
+    def prepare_txn(self, transaction_id, do_cleanup=True):
         self._active_txn = True
         self.lock.upgrade()
+        if not self.index:
+            self.index = self.open_index(transaction_id)
         if transaction_id is None:
-            self.index = NSIndex.create(os.path.join(self.path, 'index.tmp').encode('utf-8'))
             self.segments = {}
             self.compact = set()
         else:
             if do_cleanup:
                 self.io.cleanup(transaction_id)
-            shutil.copy(os.path.join(self.path, 'index.%d' % transaction_id),
-                        os.path.join(self.path, 'index.tmp'))
-            self.index = NSIndex(os.path.join(self.path, 'index.tmp').encode('utf-8'))
             hints = read_msgpack(os.path.join(self.path, 'hints.%d' % transaction_id))
             if hints[b'version'] != 1:
                 raise ValueError('Unknown hints file version: %d' % hints['version'])
@@ -158,7 +156,7 @@ class Repository(object):
                  b'compact': list(self.compact)}
         transaction_id = self.io.get_segments_transaction_id()
         write_msgpack(os.path.join(self.path, 'hints.%d' % transaction_id), hints)
-        self.index.flush()
+        self.index.write(os.path.join(self.path, 'index.tmp'))
         os.rename(os.path.join(self.path, 'index.tmp'),
                   os.path.join(self.path, 'index.%d' % transaction_id))
         # Remove old indices
@@ -199,7 +197,7 @@ class Repository(object):
         self.compact = set()
 
     def replay_segments(self, index_transaction_id, segments_transaction_id):
-        self.get_index(index_transaction_id, do_cleanup=False)
+        self.prepare_txn(index_transaction_id, do_cleanup=False)
         for segment, filename in self.io.segment_iterator():
             if index_transaction_id is not None and segment <= index_transaction_id:
                 continue
@@ -248,7 +246,7 @@ class Repository(object):
         assert not self._active_txn
         try:
             transaction_id = self.get_transaction_id()
-            current_index = self.get_read_only_index(transaction_id)
+            current_index = self.open_index(transaction_id)
         except Exception:
             transaction_id = self.io.get_segments_transaction_id()
             current_index = None
@@ -259,7 +257,7 @@ class Repository(object):
         if repair:
             self.io.cleanup(transaction_id)
         segments_transaction_id = self.io.get_segments_transaction_id()
-        self.get_index(None)
+        self.prepare_txn(None)
         for segment, filename in self.io.segment_iterator():
             if segment > transaction_id:
                 continue
@@ -310,8 +308,6 @@ class Repository(object):
         if repair:
             self.compact_segments()
             self.write_index()
-        else:
-            os.unlink(os.path.join(self.path, 'index.tmp'))
         self.rollback()
         return not error_found or repair
 
@@ -323,17 +319,17 @@ class Repository(object):
 
     def __len__(self):
         if not self.index:
-            self.index = self.get_read_only_index(self.get_transaction_id())
+            self.index = self.open_index(self.get_transaction_id())
         return len(self.index)
 
     def list(self, limit=None, marker=None):
         if not self.index:
-            self.index = self.get_read_only_index(self.get_transaction_id())
+            self.index = self.open_index(self.get_transaction_id())
         return [id_ for id_, _ in islice(self.index.iteritems(marker=marker), limit)]
 
     def get(self, id_):
         if not self.index:
-            self.index = self.get_read_only_index(self.get_transaction_id())
+            self.index = self.open_index(self.get_transaction_id())
         try:
             segment, offset = self.index[id_]
             return self.io.read(segment, offset, id_)
@@ -346,7 +342,7 @@ class Repository(object):
 
     def put(self, id, data, wait=True):
         if not self._active_txn:
-            self.get_index(self.get_transaction_id())
+            self.prepare_txn(self.get_transaction_id())
         try:
             segment, _ = self.index[id]
             self.segments[segment] -= 1
@@ -363,7 +359,7 @@ class Repository(object):
 
     def delete(self, id, wait=True):
         if not self._active_txn:
-            self.get_index(self.get_transaction_id())
+            self.prepare_txn(self.get_transaction_id())
         try:
             segment, offset = self.index.pop(id)
         except KeyError:

+ 1 - 1
attic/testsuite/archiver.py

@@ -339,7 +339,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
         def verify_uniqueness():
             repository = Repository(self.repository_path)
-            for key, _ in repository.get_read_only_index(repository.get_transaction_id()).iteritems():
+            for key, _ in repository.open_index(repository.get_transaction_id()).iteritems():
                 data = repository.get(key)
                 hash = sha256(data).digest()
                 if not hash in seen:

+ 11 - 22
attic/testsuite/hashindex.py

@@ -8,8 +8,7 @@ from attic.testsuite import AtticTestCase
 class HashIndexTestCase(AtticTestCase):
 
     def _generic_test(self, cls, make_value, sha):
-        idx_name = tempfile.NamedTemporaryFile()
-        idx = cls.create(idx_name.name)
+        idx = cls()
         self.assert_equal(len(idx), 0)
         # Test set
         for x in range(100):
@@ -27,19 +26,22 @@ class HashIndexTestCase(AtticTestCase):
         for x in range(50):
             del idx[bytes('%-32d' % x, 'ascii')]
         self.assert_equal(len(idx), 50)
+        idx_name = tempfile.NamedTemporaryFile()
+        idx.write(idx_name.name)
         del idx
         # Verify file contents
         with open(idx_name.name, 'rb') as fd:
             self.assert_equal(hashlib.sha256(fd.read()).hexdigest(), sha)
         # Make sure we can open the file
-        idx = cls(idx_name.name)
+        idx = cls.read(idx_name.name)
         self.assert_equal(len(idx), 50)
         for x in range(50, 100):
             self.assert_equal(idx[bytes('%-32d' % x, 'ascii')], make_value(x * 2))
         idx.clear()
         self.assert_equal(len(idx), 0)
+        idx.write(idx_name.name)
         del idx
-        self.assert_equal(len(cls(idx_name.name)), 0)
+        self.assert_equal(len(cls.read(idx_name.name)), 0)
 
     def test_nsindex(self):
         self._generic_test(NSIndex, lambda x: (x, x), '369a18ae6a52524eb2884a3c0fdc2824947edd017a2688c5d4d7b3510c245ab9')
@@ -50,35 +52,22 @@ class HashIndexTestCase(AtticTestCase):
     def test_resize(self):
         n = 2000  # Must be >= MIN_BUCKETS
         idx_name = tempfile.NamedTemporaryFile()
-        idx = NSIndex.create(idx_name.name)
+        idx = NSIndex()
+        idx.write(idx_name.name)
         initial_size = os.path.getsize(idx_name.name)
         self.assert_equal(len(idx), 0)
         for x in range(n):
             idx[bytes('%-32d' % x, 'ascii')] = x, x
-        idx.flush()
+        idx.write(idx_name.name)
         self.assert_true(initial_size < os.path.getsize(idx_name.name))
         for x in range(n):
             del idx[bytes('%-32d' % x, 'ascii')]
         self.assert_equal(len(idx), 0)
-        idx.flush()
+        idx.write(idx_name.name)
         self.assert_equal(initial_size, os.path.getsize(idx_name.name))
 
-    def test_read_only(self):
-        """Make sure read_only indices work even they contain a lot of tombstones
-        """
-        idx_name = tempfile.NamedTemporaryFile()
-        idx = NSIndex.create(idx_name.name)
-        for x in range(100):
-            idx[bytes('%-0.32d' % x, 'ascii')] = x, x
-        for x in range(99):
-            del idx[bytes('%-0.32d' % x, 'ascii')]
-        idx.flush()
-        idx2 = NSIndex(idx_name.name, readonly=True)
-        self.assert_equal(idx2[bytes('%-0.32d' % 99, 'ascii')], (99, 99))
-
     def test_iteritems(self):
-        idx_name = tempfile.NamedTemporaryFile()
-        idx = NSIndex.create(idx_name.name)
+        idx = NSIndex()
         for x in range(100):
             idx[bytes('%-0.32d' % x, 'ascii')] = x, x
         all = list(idx.iteritems())

+ 1 - 1
attic/testsuite/repository.py

@@ -208,7 +208,7 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
         return sorted(int(n) for n in os.listdir(os.path.join(self.tmppath, 'repository', 'data', '0')) if n.isdigit())[-1]
 
     def open_index(self):
-        return NSIndex(os.path.join(self.tmppath, 'repository', 'index.{}'.format(self.get_head())))
+        return NSIndex.read(os.path.join(self.tmppath, 'repository', 'index.{}'.format(self.get_head())))
 
     def corrupt_object(self, id_):
         idx = self.open_index()