11 年之前 · 2f72b9f960
--- a/CHANGES
+++ b/CHANGES
@@ -3,6 +3,13 @@ Attic Changelog
 
				 
			
 
				 Here you can see the full list of changes between each Attic release.
			
 
				 
			
 
				+Version 0.14
			
 
				+------------
			
 
				+
			
 
				+(feature release, released on X)
			
 
				+
			
 
				+- HashIndex improvements, eliminates unnecessary IO on low memory systems.
			
 
				+
			
 
				 Version 0.13
			
 
				 ------------
			
 
				 
			
--- a/attic/_hashindex.c
+++ b/attic/_hashindex.c
@@ -7,7 +7,6 @@
 
				 #include <sys/stat.h>
			
 
				 #include <fcntl.h>
			
 
				 #include <unistd.h>
			
 
				-#include <sys/mman.h>
			
 
				 
			
 
				 #if defined(BYTE_ORDER)&&(BYTE_ORDER == BIG_ENDIAN)
			
 
				 #define _le32toh(x) __builtin_bswap32(x)
			
@@ -28,9 +27,7 @@ typedef struct {
 
				 } __attribute__((__packed__)) HashHeader;
			
 
				 
			
 
				 typedef struct {
			
 
				-    char *path;
			
 
				-    void *map_addr;
			
 
				-    off_t map_length;
			
 
				+    void *data;
			
 
				     void *buckets;
			
 
				     int num_entries;
			
 
				     int num_buckets;
			
@@ -39,7 +36,7 @@ typedef struct {
 
				     int bucket_size;
			
 
				     int lower_limit;
			
 
				     int upper_limit;
			
 
				-    int readonly;
			
 
				+    int data_len;
			
 
				 } HashIndex;
			
 
				 
			
 
				 #define MAGIC "ATTICIDX"
			
@@ -58,15 +55,14 @@ typedef struct {
 
				 #define BUCKET_MATCHES_KEY(index, idx, key) (memcmp(key, BUCKET_ADDR(index, idx), index->key_size) == 0)
			
 
				 
			
 
				 #define BUCKET_MARK_DELETED(index, idx) (*((uint32_t *)(BUCKET_ADDR(index, idx) + index->key_size)) = DELETED)
			
 
				+#define BUCKET_MARK_EMPTY(index, idx) (*((uint32_t *)(BUCKET_ADDR(index, idx) + index->key_size)) = EMPTY)
			
 
				 
			
 
				-#define EPRINTF(msg, ...) EPRINTF_PATH(index->path, msg, ##__VA_ARGS__)
			
 
				+#define EPRINTF(msg, ...) fprintf(stderr, "hashindex: " msg "\n", ##__VA_ARGS__)
			
 
				 #define EPRINTF_PATH(path, msg, ...) fprintf(stderr, "hashindex: %s: " msg "\n", path, ##__VA_ARGS__)
			
 
				 
			
 
				-static HashIndex *hashindex_open(const char *path, int readonly);
			
 
				-static int hashindex_close(HashIndex *index);
			
 
				-static int hashindex_clear(HashIndex *index);
			
 
				-static int hashindex_flush(HashIndex *index);
			
 
				-static HashIndex *hashindex_create(const char *path, int capacity, int key_size, int value_size);
			
 
				+static HashIndex *hashindex_read(const char *path);
			
 
				+static int hashindex_write(HashIndex *index, const char *path);
			
 
				+static HashIndex *hashindex_init(int capacity, int key_size, int value_size);
			
 
				 static const void *hashindex_get(HashIndex *index, const void *key);
			
 
				 static int hashindex_set(HashIndex *index, const void *key, const void *value);
			
 
				 static int hashindex_delete(HashIndex *index, const void *key);
			
@@ -96,7 +92,7 @@ hashindex_lookup(HashIndex *index, const void *key)
 
				             }
			
 
				         }
			
 
				         else if(BUCKET_MATCHES_KEY(index, idx, key)) {
			
 
				-            if (didx != -1 && !index->readonly) {
			
 
				+            if (didx != -1) {
			
 
				                 memcpy(BUCKET_ADDR(index, didx), BUCKET_ADDR(index, idx), index->bucket_size);
			
 
				                 BUCKET_MARK_DELETED(index, idx);
			
 
				                 idx = didx;
			
@@ -113,200 +109,159 @@ hashindex_lookup(HashIndex *index, const void *key)
 
				 static int
			
 
				 hashindex_resize(HashIndex *index, int capacity)
			
 
				 {
			
 
				-    char *new_path = malloc(strlen(index->path) + 5);
			
 
				-    int ret = 0;
			
 
				     HashIndex *new;
			
 
				     void *key = NULL;
			
 
				-    strcpy(new_path, index->path);
			
 
				-    strcat(new_path, ".tmp");
			
 
				 
			
 
				-    if(!(new = hashindex_create(new_path, capacity, index->key_size, index->value_size))) {
			
 
				-        free(new_path);
			
 
				+    if(!(new = hashindex_init(capacity, index->key_size, index->value_size))) {
			
 
				         return 0;
			
 
				     }
			
 
				     while((key = hashindex_next_key(index, key))) {
			
 
				         hashindex_set(new, key, hashindex_get(index, key));
			
 
				     }
			
 
				-    munmap(index->map_addr, index->map_length);
			
 
				-    index->map_addr = new->map_addr;
			
 
				-    index->map_length = new->map_length;
			
 
				+    free(index->data);
			
 
				+    index->data = new->data;
			
 
				+    index->data_len = new->data_len;
			
 
				     index->num_buckets = new->num_buckets;
			
 
				     index->lower_limit = new->lower_limit;
			
 
				     index->upper_limit = new->upper_limit;
			
 
				     index->buckets = new->buckets;
			
 
				-    if(unlink(index->path) < 0) {
			
 
				-        EPRINTF("unlink failed");
			
 
				-        goto out;
			
 
				-    }
			
 
				-    if(rename(new_path, index->path) < 0) {
			
 
				-        EPRINTF_PATH(new_path, "rename failed");
			
 
				-        goto out;
			
 
				-    }
			
 
				-    ret = 1;
			
 
				-out:
			
 
				-    free(new_path);
			
 
				-    free(new->path);
			
 
				     free(new);
			
 
				-    return ret;
			
 
				+    return 1;
			
 
				 }
			
 
				 
			
 
				 /* Public API */
			
 
				 static HashIndex *
			
 
				-hashindex_open(const char *path, int readonly)
			
 
				+hashindex_read(const char *path)
			
 
				 {
			
 
				-    void *addr;
			
 
				-    int fd, oflags, prot;
			
 
				+    FILE *fd;
			
 
				     off_t length;
			
 
				-    HashHeader *header;
			
 
				-    HashIndex *index;
			
 
				-
			
 
				-    if(readonly) {
			
 
				-        oflags = O_RDONLY;
			
 
				-        prot = PROT_READ;
			
 
				-    }
			
 
				-    else {
			
 
				-        oflags = O_RDWR;
			
 
				-        prot = PROT_READ | PROT_WRITE;
			
 
				-    }
			
 
				+    HashHeader header;
			
 
				+    HashIndex *index = NULL;
			
 
				 
			
 
				-    if((fd = open(path, oflags)) < 0) {
			
 
				-        EPRINTF_PATH(path, "open failed");
			
 
				-        fprintf(stderr, "Failed to open %s\n", path);
			
 
				+    if((fd = fopen(path, "r")) == NULL) {
			
 
				+        EPRINTF_PATH(path, "fopen failed");
			
 
				         return NULL;
			
 
				     }
			
 
				-    if((length = lseek(fd, 0, SEEK_END)) < 0) {
			
 
				-        EPRINTF_PATH(path, "lseek failed");
			
 
				-        if(close(fd) < 0) {
			
 
				-            EPRINTF_PATH(path, "close failed");
			
 
				-        }
			
 
				-        return NULL;
			
 
				+    if(fread(&header, 1, sizeof(HashHeader), fd) != sizeof(HashHeader)) {
			
 
				+        EPRINTF_PATH(path, "fread failed");
			
 
				+        goto fail;
			
 
				     }
			
 
				-    addr = mmap(0, length, prot, MAP_SHARED, fd, 0);
			
 
				-    if(close(fd) < 0) {
			
 
				-        EPRINTF_PATH(path, "close failed");
			
 
				-        return NULL;
			
 
				+    if(fseek(fd, 0, SEEK_END) < 0) {
			
 
				+        EPRINTF_PATH(path, "fseek failed");
			
 
				+        goto fail;
			
 
				     }
			
 
				-    if(addr == MAP_FAILED) {
			
 
				-        EPRINTF_PATH(path, "mmap failed");
			
 
				-        return NULL;
			
 
				+    if((length = ftell(fd)) < 0) {
			
 
				+        EPRINTF_PATH(path, "ftell failed");
			
 
				+        goto fail;
			
 
				+    }
			
 
				+    if(fseek(fd, 0, SEEK_SET) < 0) {
			
 
				+        EPRINTF_PATH(path, "fseek failed");
			
 
				+        goto fail;
			
 
				     }
			
 
				-    header = (HashHeader *)addr;
			
 
				-    if(memcmp(header->magic, MAGIC, 8)) {
			
 
				+    if(memcmp(header.magic, MAGIC, 8)) {
			
 
				         EPRINTF_PATH(path, "Unknown file header");
			
 
				-        return NULL;
			
 
				+        goto fail;
			
 
				     }
			
 
				-    if(length != sizeof(HashHeader) + _le32toh(header->num_buckets) * (header->key_size + header->value_size)) {
			
 
				+    if(length != sizeof(HashHeader) + _le32toh(header.num_buckets) * (header.key_size + header.value_size)) {
			
 
				         EPRINTF_PATH(path, "Incorrect file length");
			
 
				-        return NULL;
			
 
				+        goto fail;
			
 
				     }
			
 
				     if(!(index = malloc(sizeof(HashIndex)))) {
			
 
				         EPRINTF_PATH(path, "malloc failed");
			
 
				-        return NULL;
			
 
				+        goto fail;
			
 
				     }
			
 
				-    index->readonly = readonly;
			
 
				-    index->map_addr = addr;
			
 
				-    index->map_length = length;
			
 
				-    index->num_entries = _le32toh(header->num_entries);
			
 
				-    index->num_buckets = _le32toh(header->num_buckets);
			
 
				-    index->key_size = header->key_size;
			
 
				-    index->value_size = header->value_size;
			
 
				+    if(!(index->data = malloc(length))) {
			
 
				+        EPRINTF_PATH(path, "malloc failed");
			
 
				+        free(index);
			
 
				+        index = NULL;
			
 
				+        goto fail;
			
 
				+    }
			
 
				+    if(fread(index->data, 1, length, fd) != length) {
			
 
				+        EPRINTF_PATH(path, "fread failed");
			
 
				+        free(index->data);
			
 
				+        free(index);
			
 
				+        index = NULL;
			
 
				+        goto fail;
			
 
				+    }
			
 
				+    index->data_len = length;
			
 
				+    index->num_entries = _le32toh(header.num_entries);
			
 
				+    index->num_buckets = _le32toh(header.num_buckets);
			
 
				+    index->key_size = header.key_size;
			
 
				+    index->value_size = header.value_size;
			
 
				     index->bucket_size = index->key_size + index->value_size;
			
 
				-    index->buckets = (addr + sizeof(HashHeader));
			
 
				+    index->buckets = index->data + sizeof(HashHeader);
			
 
				     index->lower_limit = index->num_buckets > MIN_BUCKETS ? ((int)(index->num_buckets * BUCKET_LOWER_LIMIT)) : 0;
			
 
				     index->upper_limit = (int)(index->num_buckets * BUCKET_UPPER_LIMIT);
			
 
				-    if(!(index->path = strdup(path))) {
			
 
				-        EPRINTF_PATH(path, "strdup failed");
			
 
				-        free(index);
			
 
				-        return NULL;
			
 
				+fail:
			
 
				+    if(fclose(fd) < 0) {
			
 
				+        EPRINTF_PATH(path, "fclose failed");
			
 
				     }
			
 
				     return index;
			
 
				 }
			
 
				 
			
 
				 static HashIndex *
			
 
				-hashindex_create(const char *path, int capacity, int key_size, int value_size)
			
 
				+hashindex_init(int capacity, int key_size, int value_size)
			
 
				 {
			
 
				-    FILE *fd;
			
 
				-    char bucket[MAX_BUCKET_SIZE] = {};
			
 
				-    int i, bucket_size;
			
 
				+    HashIndex *index;
			
 
				     HashHeader header = {
			
 
				         .magic = MAGIC, .num_entries = 0, .key_size = key_size, .value_size = value_size
			
 
				     };
			
 
				+    int i;
			
 
				     capacity = MAX(MIN_BUCKETS, capacity);
			
 
				-    header.num_buckets = _htole32(capacity);
			
 
				 
			
 
				-    if(!(fd = fopen(path, "w"))) {
			
 
				-        EPRINTF_PATH(path, "fopen failed");
			
 
				+    if(!(index = malloc(sizeof(HashIndex)))) {
			
 
				+        EPRINTF("malloc failed");
			
 
				         return NULL;
			
 
				     }
			
 
				-    bucket_size = key_size + value_size;
			
 
				-    if(fwrite(&header, 1, sizeof(header), fd) != sizeof(header)) {
			
 
				-        goto error;
			
 
				-    }
			
 
				-    *((uint32_t *)(bucket + key_size)) = EMPTY;
			
 
				-    for(i = 0; i < capacity; i++) {
			
 
				-        if(fwrite(bucket, 1, bucket_size, fd) != bucket_size) {
			
 
				-            goto error;
			
 
				-        }
			
 
				-    }
			
 
				-    if(fclose(fd) < 0) {
			
 
				-        EPRINTF_PATH(path, "fclose failed");
			
 
				-        if(unlink(path) < 0) {
			
 
				-            EPRINTF_PATH(path, "unlink failed");
			
 
				-    }
			
 
				+    index->data_len = sizeof(HashHeader) + capacity * (key_size + value_size);
			
 
				+    if(!(index->data = calloc(index->data_len, 1))) {
			
 
				+        EPRINTF("malloc failed");
			
 
				+        free(index);
			
 
				         return NULL;
			
 
				     }
			
 
				-    return hashindex_open(path, 0);
			
 
				-error:
			
 
				-    if(unlink(path) < 0) {
			
 
				-        EPRINTF_PATH(path, "unlink failed");
			
 
				-    }
			
 
				-    EPRINTF_PATH(path, "fwrite failed");
			
 
				-    if(fclose(fd) < 0) {
			
 
				-        EPRINTF_PATH(path, "fclose failed");
			
 
				+    index->num_entries = 0;
			
 
				+    index->key_size = key_size;
			
 
				+    index->value_size = value_size;
			
 
				+    index->num_buckets = capacity;
			
 
				+    index->bucket_size = index->key_size + index->value_size;
			
 
				+    index->lower_limit = index->num_buckets > MIN_BUCKETS ? ((int)(index->num_buckets * BUCKET_LOWER_LIMIT)) : 0;
			
 
				+    index->upper_limit = (int)(index->num_buckets * BUCKET_UPPER_LIMIT);
			
 
				+    index->buckets = index->data + sizeof(HashHeader);
			
 
				+    memcpy(index->data, &header, sizeof(HashHeader));
			
 
				+    for(i = 0; i < capacity; i++) {
			
 
				+        BUCKET_MARK_EMPTY(index, i);
			
 
				     }
			
 
				-    return NULL;
			
 
				+    return index;
			
 
				 }
			
 
				 
			
 
				-static int
			
 
				-hashindex_clear(HashIndex *index)
			
 
				+static void
			
 
				+hashindex_free(HashIndex *index)
			
 
				 {
			
 
				-    int i;
			
 
				-    for(i = 0; i < index->num_buckets; i++) {
			
 
				-        BUCKET_MARK_DELETED(index, i);
			
 
				-    }
			
 
				-    index->num_entries = 0;
			
 
				-    return hashindex_resize(index, MIN_BUCKETS);
			
 
				+    free(index->data);
			
 
				+    free(index);
			
 
				 }
			
 
				 
			
 
				 static int
			
 
				-hashindex_flush(HashIndex *index)
			
 
				+hashindex_write(HashIndex *index, const char *path)
			
 
				 {
			
 
				-    if(index->readonly) {
			
 
				-        return 1;
			
 
				-    }
			
 
				-    *((uint32_t *)(index->map_addr + 8)) = _htole32(index->num_entries);
			
 
				-    *((uint32_t *)(index->map_addr + 12)) = _htole32(index->num_buckets);
			
 
				-    if(msync(index->map_addr, index->map_length, MS_SYNC) < 0) {
			
 
				-        EPRINTF("msync failed");
			
 
				+    FILE *fd;
			
 
				+    int ret = 1;
			
 
				+
			
 
				+    if((fd = fopen(path, "w")) == NULL) {
			
 
				+        EPRINTF_PATH(path, "open failed");
			
 
				+        fprintf(stderr, "Failed to open %s for writing\n", path);
			
 
				         return 0;
			
 
				     }
			
 
				-    return 1;
			
 
				-}
			
 
				-
			
 
				-static int
			
 
				-hashindex_close(HashIndex *index)
			
 
				-{
			
 
				-    int rv = 1;
			
 
				-    if(hashindex_flush(index) < 0) {
			
 
				-        rv = 0;
			
 
				+    *((uint32_t *)(index->data + 8)) = _htole32(index->num_entries);
			
 
				+    *((uint32_t *)(index->data + 12)) = _htole32(index->num_buckets);
			
 
				+    if(fwrite(index->data, 1, index->data_len, fd) != index->data_len) {
			
 
				+        EPRINTF_PATH(path, "fwrite failed");
			
 
				+        ret = 0;
			
 
				     }
			
 
				-    if(munmap(index->map_addr, index->map_length) < 0) {
			
 
				-        EPRINTF("munmap failed");
			
 
				-        rv = 0;
			
 
				+    if(fclose(fd) < 0) {
			
 
				+        EPRINTF_PATH(path, "fclose failed");
			
 
				     }
			
 
				-    free(index->path);
			
 
				-    free(index);
			
 
				-    return rv;
			
 
				+    return ret;
			
 
				 }
			
 
				 
			
 
				 static const void *
			
--- a/attic/archive.py
+++ b/attic/archive.py
@@ -61,7 +61,7 @@ class ChunkBuffer:
 
				     BUFFER_SIZE = 1 * 1024 * 1024
			
 
				 
			
 
				     def __init__(self, key):
			
 
				-        self.buffer = BytesIO()
			
 
				+        self.buffer = BytesIO().write
			
 
				         self.packer = msgpack.Packer(unicode_errors='surrogateescape')
			
 
				         self.chunks = []
			
 
				         self.key = key
			
@@ -504,7 +504,7 @@ class ArchiveChecker:
 
				         # Explicity set the initial hash table capacity to avoid performance issues
			
 
				         # due to hash table "resonance"
			
 
				         capacity = int(len(self.repository) * 1.2)
			
 
				-        self.chunks = ChunkIndex.create(os.path.join(self.tmpdir, 'chunks').encode('utf-8'), capacity=capacity)
			
 
				+        self.chunks = ChunkIndex(capacity)
			
 
				         marker = None
			
 
				         while True:
			
 
				             result = self.repository.list(limit=10000, marker=marker)
			
--- a/attic/cache.py
+++ b/attic/cache.py
@@ -49,7 +49,7 @@ class Cache(object):
 
				         config.set('cache', 'manifest', '')
			
 
				         with open(os.path.join(self.path, 'config'), 'w') as fd:
			
 
				             config.write(fd)
			
 
				-        ChunkIndex.create(os.path.join(self.path, 'chunks').encode('utf-8'))
			
 
				+        ChunkIndex().write(os.path.join(self.path, 'chunks').encode('utf-8'))
			
 
				         with open(os.path.join(self.path, 'files'), 'w') as fd:
			
 
				             pass  # empty file
			
 
				 
			
@@ -65,7 +65,7 @@ class Cache(object):
 
				         self.id = self.config.get('cache', 'repository')
			
 
				         self.manifest_id = unhexlify(self.config.get('cache', 'manifest'))
			
 
				         self.timestamp = self.config.get('cache', 'timestamp', fallback=None)
			
 
				-        self.chunks = ChunkIndex(os.path.join(self.path, 'chunks').encode('utf-8'))
			
 
				+        self.chunks = ChunkIndex.read(os.path.join(self.path, 'chunks').encode('utf-8'))
			
 
				         self.files = None
			
 
				 
			
 
				     def close(self):
			
@@ -113,7 +113,7 @@ class Cache(object):
 
				         self.config.set('cache', 'timestamp', self.manifest.timestamp)
			
 
				         with open(os.path.join(self.path, 'config'), 'w') as fd:
			
 
				             self.config.write(fd)
			
 
				-        self.chunks.flush()
			
 
				+        self.chunks.write(os.path.join(self.path, 'chunks').encode('utf-8'))
			
 
				         os.rename(os.path.join(self.path, 'txn.active'),
			
 
				                   os.path.join(self.path, 'txn.tmp'))
			
 
				         shutil.rmtree(os.path.join(self.path, 'txn.tmp'))
			
--- a/attic/hashindex.pyx
+++ b/attic/hashindex.pyx
@@ -1,20 +1,19 @@
 
				 # -*- coding: utf-8 -*-
			
 
				 import os
			
 
				 
			
 
				-API_VERSION = 1
			
 
				+API_VERSION = 2
			
 
				 
			
 
				 
			
 
				 cdef extern from "_hashindex.c":
			
 
				     ctypedef struct HashIndex:
			
 
				         pass
			
 
				 
			
 
				-    HashIndex *hashindex_open(char *path, int readonly)
			
 
				-    HashIndex *hashindex_create(char *path, int capacity, int key_size, int value_size)
			
 
				+    HashIndex *hashindex_read(char *path)
			
 
				+    HashIndex *hashindex_init(int capacity, int key_size, int value_size)
			
 
				+    void hashindex_free(HashIndex *index)
			
 
				     void hashindex_summarize(HashIndex *index, long long *total_size, long long *total_csize, long long *unique_size, long long *unique_csize)
			
 
				     int hashindex_get_size(HashIndex *index)
			
 
				-    int hashindex_clear(HashIndex *index)
			
 
				-    int hashindex_close(HashIndex *index)
			
 
				-    int hashindex_flush(HashIndex *index)
			
 
				+    int hashindex_write(HashIndex *index, char *path)
			
 
				     void *hashindex_get(HashIndex *index, void *key)
			
 
				     void *hashindex_next_key(HashIndex *index, void *key)
			
 
				     int hashindex_delete(HashIndex *index, void *key)
			
@@ -29,31 +28,33 @@ cdef class IndexBase:
 
				     cdef HashIndex *index
			
 
				     key_size = 32
			
 
				 
			
 
				-    def __cinit__(self, path, readonly=False):
			
 
				-        self.index = hashindex_open(<bytes>os.fsencode(path), readonly)
			
 
				-        if not self.index:
			
 
				-            raise Exception('Failed to open %s' % path)
			
 
				+    def __cinit__(self, capacity=0, path=None):
			
 
				+        if path:
			
 
				+            self.index = hashindex_read(<bytes>os.fsencode(path))
			
 
				+            if not self.index:
			
 
				+                raise Exception('hashindex_read failed')
			
 
				+        else:
			
 
				+            self.index = hashindex_init(capacity, self.key_size, self.value_size)
			
 
				+            if not self.index:
			
 
				+                raise Exception('hashindex_init failed')
			
 
				 
			
 
				     def __dealloc__(self):
			
 
				         if self.index:
			
 
				-            if not hashindex_close(self.index):
			
 
				-                raise Exception('hashindex_close failed')
			
 
				+            hashindex_free(self.index)
			
 
				 
			
 
				     @classmethod
			
 
				-    def create(cls, path, capacity=0):
			
 
				-        index = hashindex_create(<bytes>os.fsencode(path), capacity, cls.key_size, cls.value_size)
			
 
				-        if not index:
			
 
				-            raise Exception('Failed to create %s' % path)
			
 
				-        hashindex_close(index)
			
 
				-        return cls(path)
			
 
				+    def read(cls, path):
			
 
				+        return cls(path=path)
			
 
				 
			
 
				-    def clear(self):
			
 
				-        if not hashindex_clear(self.index):
			
 
				-            raise Exception('hashindex_clear failed')
			
 
				+    def write(self, path):
			
 
				+        if not hashindex_write(self.index, <bytes>os.fsencode(path)):
			
 
				+            raise Exception('hashindex_write failed')
			
 
				 
			
 
				-    def flush(self):
			
 
				-        if not hashindex_flush(self.index):
			
 
				-            raise Exception('hashindex_flush failed')
			
 
				+    def clear(self):
			
 
				+        hashindex_free(self.index)
			
 
				+        self.index = hashindex_init(0, self.key_size, self.value_size)
			
 
				+        if not self.index:
			
 
				+            raise Exception('hashindex_init failed')
			
 
				 
			
 
				     def setdefault(self, key, value):
			
 
				         if not key in self:
			
--- a/attic/helpers.py
+++ b/attic/helpers.py
@@ -73,7 +73,7 @@ class UpgradableLock:
 
				 
			
 
				 def check_extension_modules():
			
 
				     import attic.platform
			
 
				-    if (attic.hashindex.API_VERSION != 1 or
			
 
				+    if (attic.hashindex.API_VERSION != 2 or
			
 
				         attic.chunker.API_VERSION != 1 or
			
 
				         attic.crypto.API_VERSION != 2 or
			
 
				         attic.platform.API_VERSION != 1):
			
--- a/attic/remote.py
+++ b/attic/remote.py
@@ -265,7 +265,7 @@ class RepositoryCache:
 
				 
			
 
				     def initialize(self):
			
 
				         self.tmppath = tempfile.mkdtemp()
			
 
				-        self.index = NSIndex.create(os.path.join(self.tmppath, 'index'))
			
 
				+        self.index = NSIndex()
			
 
				         self.data_fd = open(os.path.join(self.tmppath, 'data'), 'a+b')
			
 
				 
			
 
				     def cleanup(self):
			
--- a/attic/repository.py
+++ b/attic/repository.py
@@ -128,24 +128,22 @@ class Repository(object):
 
				         self.write_index()
			
 
				         self.rollback()
			
 
				 
			
 
				-    def get_read_only_index(self, transaction_id):
			
 
				+    def open_index(self, transaction_id):
			
 
				         if transaction_id is None:
			
 
				-            return {}
			
 
				-        return NSIndex((os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8'), readonly=True)
			
 
				+            return NSIndex()
			
 
				+        return NSIndex.read((os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8'))
			
 
				 
			
 
				-    def get_index(self, transaction_id, do_cleanup=True):
			
 
				+    def prepare_txn(self, transaction_id, do_cleanup=True):
			
 
				         self._active_txn = True
			
 
				         self.lock.upgrade()
			
 
				+        if not self.index:
			
 
				+            self.index = self.open_index(transaction_id)
			
 
				         if transaction_id is None:
			
 
				-            self.index = NSIndex.create(os.path.join(self.path, 'index.tmp').encode('utf-8'))
			
 
				             self.segments = {}
			
 
				             self.compact = set()
			
 
				         else:
			
 
				             if do_cleanup:
			
 
				                 self.io.cleanup(transaction_id)
			
 
				-            shutil.copy(os.path.join(self.path, 'index.%d' % transaction_id),
			
 
				-                        os.path.join(self.path, 'index.tmp'))
			
 
				-            self.index = NSIndex(os.path.join(self.path, 'index.tmp').encode('utf-8'))
			
 
				             hints = read_msgpack(os.path.join(self.path, 'hints.%d' % transaction_id))
			
 
				             if hints[b'version'] != 1:
			
 
				                 raise ValueError('Unknown hints file version: %d' % hints['version'])
			
@@ -158,7 +156,7 @@ class Repository(object):
 
				                  b'compact': list(self.compact)}
			
 
				         transaction_id = self.io.get_segments_transaction_id()
			
 
				         write_msgpack(os.path.join(self.path, 'hints.%d' % transaction_id), hints)
			
 
				-        self.index.flush()
			
 
				+        self.index.write(os.path.join(self.path, 'index.tmp'))
			
 
				         os.rename(os.path.join(self.path, 'index.tmp'),
			
 
				                   os.path.join(self.path, 'index.%d' % transaction_id))
			
 
				         # Remove old indices
			
@@ -199,7 +197,7 @@ class Repository(object):
 
				         self.compact = set()
			
 
				 
			
 
				     def replay_segments(self, index_transaction_id, segments_transaction_id):
			
 
				-        self.get_index(index_transaction_id, do_cleanup=False)
			
 
				+        self.prepare_txn(index_transaction_id, do_cleanup=False)
			
 
				         for segment, filename in self.io.segment_iterator():
			
 
				             if index_transaction_id is not None and segment <= index_transaction_id:
			
 
				                 continue
			
@@ -248,7 +246,7 @@ class Repository(object):
 
				         assert not self._active_txn
			
 
				         try:
			
 
				             transaction_id = self.get_transaction_id()
			
 
				-            current_index = self.get_read_only_index(transaction_id)
			
 
				+            current_index = self.open_index(transaction_id)
			
 
				         except Exception:
			
 
				             transaction_id = self.io.get_segments_transaction_id()
			
 
				             current_index = None
			
@@ -259,7 +257,7 @@ class Repository(object):
 
				         if repair:
			
 
				             self.io.cleanup(transaction_id)
			
 
				         segments_transaction_id = self.io.get_segments_transaction_id()
			
 
				-        self.get_index(None)
			
 
				+        self.prepare_txn(None)
			
 
				         for segment, filename in self.io.segment_iterator():
			
 
				             if segment > transaction_id:
			
 
				                 continue
			
@@ -310,8 +308,6 @@ class Repository(object):
 
				         if repair:
			
 
				             self.compact_segments()
			
 
				             self.write_index()
			
 
				-        else:
			
 
				-            os.unlink(os.path.join(self.path, 'index.tmp'))
			
 
				         self.rollback()
			
 
				         return not error_found or repair
			
 
				 
			
@@ -323,17 +319,17 @@ class Repository(object):
 
				 
			
 
				     def __len__(self):
			
 
				         if not self.index:
			
 
				-            self.index = self.get_read_only_index(self.get_transaction_id())
			
 
				+            self.index = self.open_index(self.get_transaction_id())
			
 
				         return len(self.index)
			
 
				 
			
 
				     def list(self, limit=None, marker=None):
			
 
				         if not self.index:
			
 
				-            self.index = self.get_read_only_index(self.get_transaction_id())
			
 
				+            self.index = self.open_index(self.get_transaction_id())
			
 
				         return [id_ for id_, _ in islice(self.index.iteritems(marker=marker), limit)]
			
 
				 
			
 
				     def get(self, id_):
			
 
				         if not self.index:
			
 
				-            self.index = self.get_read_only_index(self.get_transaction_id())
			
 
				+            self.index = self.open_index(self.get_transaction_id())
			
 
				         try:
			
 
				             segment, offset = self.index[id_]
			
 
				             return self.io.read(segment, offset, id_)
			
@@ -346,7 +342,7 @@ class Repository(object):
 
				 
			
 
				     def put(self, id, data, wait=True):
			
 
				         if not self._active_txn:
			
 
				-            self.get_index(self.get_transaction_id())
			
 
				+            self.prepare_txn(self.get_transaction_id())
			
 
				         try:
			
 
				             segment, _ = self.index[id]
			
 
				             self.segments[segment] -= 1
			
@@ -363,7 +359,7 @@ class Repository(object):
 
				 
			
 
				     def delete(self, id, wait=True):
			
 
				         if not self._active_txn:
			
 
				-            self.get_index(self.get_transaction_id())
			
 
				+            self.prepare_txn(self.get_transaction_id())
			
 
				         try:
			
 
				             segment, offset = self.index.pop(id)
			
 
				         except KeyError:
			
--- a/attic/testsuite/archiver.py
+++ b/attic/testsuite/archiver.py
@@ -339,7 +339,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
				 
			
 
				         def verify_uniqueness():
			
 
				             repository = Repository(self.repository_path)
			
 
				-            for key, _ in repository.get_read_only_index(repository.get_transaction_id()).iteritems():
			
 
				+            for key, _ in repository.open_index(repository.get_transaction_id()).iteritems():
			
 
				                 data = repository.get(key)
			
 
				                 hash = sha256(data).digest()
			
 
				                 if not hash in seen:
			
--- a/attic/testsuite/hashindex.py
+++ b/attic/testsuite/hashindex.py
@@ -8,8 +8,7 @@ from attic.testsuite import AtticTestCase
 
				 class HashIndexTestCase(AtticTestCase):
			
 
				 
			
 
				     def _generic_test(self, cls, make_value, sha):
			
 
				-        idx_name = tempfile.NamedTemporaryFile()
			
 
				-        idx = cls.create(idx_name.name)
			
 
				+        idx = cls()
			
 
				         self.assert_equal(len(idx), 0)
			
 
				         # Test set
			
 
				         for x in range(100):
			
@@ -27,19 +26,22 @@ class HashIndexTestCase(AtticTestCase):
 
				         for x in range(50):
			
 
				             del idx[bytes('%-32d' % x, 'ascii')]
			
 
				         self.assert_equal(len(idx), 50)
			
 
				+        idx_name = tempfile.NamedTemporaryFile()
			
 
				+        idx.write(idx_name.name)
			
 
				         del idx
			
 
				         # Verify file contents
			
 
				         with open(idx_name.name, 'rb') as fd:
			
 
				             self.assert_equal(hashlib.sha256(fd.read()).hexdigest(), sha)
			
 
				         # Make sure we can open the file
			
 
				-        idx = cls(idx_name.name)
			
 
				+        idx = cls.read(idx_name.name)
			
 
				         self.assert_equal(len(idx), 50)
			
 
				         for x in range(50, 100):
			
 
				             self.assert_equal(idx[bytes('%-32d' % x, 'ascii')], make_value(x * 2))
			
 
				         idx.clear()
			
 
				         self.assert_equal(len(idx), 0)
			
 
				+        idx.write(idx_name.name)
			
 
				         del idx
			
 
				-        self.assert_equal(len(cls(idx_name.name)), 0)
			
 
				+        self.assert_equal(len(cls.read(idx_name.name)), 0)
			
 
				 
			
 
				     def test_nsindex(self):
			
 
				         self._generic_test(NSIndex, lambda x: (x, x), '369a18ae6a52524eb2884a3c0fdc2824947edd017a2688c5d4d7b3510c245ab9')
			
@@ -50,35 +52,22 @@ class HashIndexTestCase(AtticTestCase):
 
				     def test_resize(self):
			
 
				         n = 2000  # Must be >= MIN_BUCKETS
			
 
				         idx_name = tempfile.NamedTemporaryFile()
			
 
				-        idx = NSIndex.create(idx_name.name)
			
 
				+        idx = NSIndex()
			
 
				+        idx.write(idx_name.name)
			
 
				         initial_size = os.path.getsize(idx_name.name)
			
 
				         self.assert_equal(len(idx), 0)
			
 
				         for x in range(n):
			
 
				             idx[bytes('%-32d' % x, 'ascii')] = x, x
			
 
				-        idx.flush()
			
 
				+        idx.write(idx_name.name)
			
 
				         self.assert_true(initial_size < os.path.getsize(idx_name.name))
			
 
				         for x in range(n):
			
 
				             del idx[bytes('%-32d' % x, 'ascii')]
			
 
				         self.assert_equal(len(idx), 0)
			
 
				-        idx.flush()
			
 
				+        idx.write(idx_name.name)
			
 
				         self.assert_equal(initial_size, os.path.getsize(idx_name.name))
			
 
				 
			
 
				-    def test_read_only(self):
			
 
				-        """Make sure read_only indices work even they contain a lot of tombstones
			
 
				-        """
			
 
				-        idx_name = tempfile.NamedTemporaryFile()
			
 
				-        idx = NSIndex.create(idx_name.name)
			
 
				-        for x in range(100):
			
 
				-            idx[bytes('%-0.32d' % x, 'ascii')] = x, x
			
 
				-        for x in range(99):
			
 
				-            del idx[bytes('%-0.32d' % x, 'ascii')]
			
 
				-        idx.flush()
			
 
				-        idx2 = NSIndex(idx_name.name, readonly=True)
			
 
				-        self.assert_equal(idx2[bytes('%-0.32d' % 99, 'ascii')], (99, 99))
			
 
				-
			
 
				     def test_iteritems(self):
			
 
				-        idx_name = tempfile.NamedTemporaryFile()
			
 
				-        idx = NSIndex.create(idx_name.name)
			
 
				+        idx = NSIndex()
			
 
				         for x in range(100):
			
 
				             idx[bytes('%-0.32d' % x, 'ascii')] = x, x
			
 
				         all = list(idx.iteritems())
			
--- a/attic/testsuite/repository.py
+++ b/attic/testsuite/repository.py
@@ -208,7 +208,7 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
 
				         return sorted(int(n) for n in os.listdir(os.path.join(self.tmppath, 'repository', 'data', '0')) if n.isdigit())[-1]
			
 
				 
			
 
				     def open_index(self):
			
 
				-        return NSIndex(os.path.join(self.tmppath, 'repository', 'index.{}'.format(self.get_head())))
			
 
				+        return NSIndex.read(os.path.join(self.tmppath, 'repository', 'index.{}'.format(self.get_head())))
			
 
				 
			
 
				     def corrupt_object(self, id_):
			
 
				         idx = self.open_index()