7 years ago · de113bab23
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -1332,11 +1332,10 @@ class ArchiveChecker:
 
				     def init_chunks(self):
			
 
				         """Fetch a list of all object keys from repository
			
 
				         """
			
 
				-        # Explicitly set the initial hash table capacity to avoid performance issues
			
 
				+        # Explicitly set the initial usable hash table capacity to avoid performance issues
			
 
				         # due to hash table "resonance".
			
 
				-        # Since reconstruction of archive items can add some new chunks, add 10 % headroom
			
 
				-        capacity = int(len(self.repository) / ChunkIndex.MAX_LOAD_FACTOR * 1.1)
			
 
				-        self.chunks = ChunkIndex(capacity)
			
 
				+        # Since reconstruction of archive items can add some new chunks, add 10 % headroom.
			
 
				+        self.chunks = ChunkIndex(usable=len(self.repository) * 1.1)
			
 
				         marker = None
			
 
				         while True:
			
 
				             result = self.repository.list(limit=LIST_SCAN_LIMIT, marker=marker)
			
--- a/src/borg/cache.py
+++ b/src/borg/cache.py
@@ -780,11 +780,11 @@ class LocalCache(CacheStatsMixin):
 
				             # deallocates old hashindex, creates empty hashindex:
			
 
				             chunk_idx.clear()
			
 
				             cleanup_outdated(cached_ids - archive_ids)
			
 
				-            # Explicitly set the initial hash table capacity to avoid performance issues
			
 
				+            # Explicitly set the usable initial hash table capacity to avoid performance issues
			
 
				             # due to hash table "resonance".
			
 
				-            master_index_capacity = int(len(self.repository) / ChunkIndex.MAX_LOAD_FACTOR)
			
 
				+            master_index_capacity = len(self.repository)
			
 
				             if archive_ids:
			
 
				-                chunk_idx = None if not self.do_cache else ChunkIndex(master_index_capacity)
			
 
				+                chunk_idx = None if not self.do_cache else ChunkIndex(usable=master_index_capacity)
			
 
				                 pi = ProgressIndicatorPercent(total=len(archive_ids), step=0.1,
			
 
				                                               msg='%3.0f%% Syncing chunks cache. Processing archive %s',
			
 
				                                               msgid='cache.sync')
			
@@ -805,7 +805,7 @@ class LocalCache(CacheStatsMixin):
 
				                         logger.info("Merging into master chunks index ...")
			
 
				                         chunk_idx.merge(archive_chunk_idx)
			
 
				                     else:
			
 
				-                        chunk_idx = chunk_idx or ChunkIndex(master_index_capacity)
			
 
				+                        chunk_idx = chunk_idx or ChunkIndex(usable=master_index_capacity)
			
 
				                         logger.info('Fetching archive index for %s ...', archive_name)
			
 
				                         fetch_and_build_idx(archive_id, decrypted_repository, chunk_idx)
			
 
				                 if not self.do_cache:
			
@@ -1087,12 +1087,11 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
 
				 
			
 
				     def begin_txn(self):
			
 
				         self._txn_active = True
			
 
				-        # Explicitly set the initial hash table capacity to avoid performance issues
			
 
				+        # Explicitly set the initial usable hash table capacity to avoid performance issues
			
 
				         # due to hash table "resonance".
			
 
				         # Since we're creating an archive, add 10 % from the start.
			
 
				         num_chunks = len(self.repository)
			
 
				-        capacity = int(num_chunks / ChunkIndex.MAX_LOAD_FACTOR * 1.1)
			
 
				-        self.chunks = ChunkIndex(capacity)
			
 
				+        self.chunks = ChunkIndex(usable=num_chunks * 1.1)
			
 
				         pi = ProgressIndicatorPercent(total=num_chunks, msg='Downloading chunk list... %3.0f%%',
			
 
				                                       msgid='cache.download_chunks')
			
 
				         t0 = perf_counter()
			
--- a/src/borg/hashindex.pyx
+++ b/src/borg/hashindex.pyx
@@ -84,7 +84,7 @@ cdef class IndexBase:
 
				     MAX_LOAD_FACTOR = HASH_MAX_LOAD
			
 
				     MAX_VALUE = _MAX_VALUE
			
 
				 
			
 
				-    def __cinit__(self, capacity=0, path=None, permit_compact=False):
			
 
				+    def __cinit__(self, capacity=0, path=None, permit_compact=False, usable=None):
			
 
				         self.key_size = self._key_size
			
 
				         if path:
			
 
				             if isinstance(path, (str, bytes)):
			
@@ -94,6 +94,8 @@ cdef class IndexBase:
 
				                 self.index = hashindex_read(path, permit_compact)
			
 
				             assert self.index, 'hashindex_read() returned NULL with no exception set'
			
 
				         else:
			
 
				+            if usable is not None:
			
 
				+                capacity = int(usable / self.MAX_LOAD_FACTOR)
			
 
				             self.index = hashindex_init(capacity, self.key_size, self.value_size)
			
 
				             if not self.index:
			
 
				                 raise Exception('hashindex_init failed')