Bläddra i källkod

move capacity calculation to IndexBase, fixes #2646

we just give how many "usable" hashtable entries we want and it computes
the hashtable capacity internally via int(usable / MAX_LOAD_FACTOR).
Thomas Waldmann 7 år sedan
förälder
incheckning
de113bab23
3 ändrade filer med 12 tillägg och 12 borttagningar
  1. 3 4
      src/borg/archive.py
  2. 6 7
      src/borg/cache.py
  3. 3 1
      src/borg/hashindex.pyx

+ 3 - 4
src/borg/archive.py

@@ -1332,11 +1332,10 @@ class ArchiveChecker:
     def init_chunks(self):
         """Fetch a list of all object keys from repository
         """
-        # Explicitly set the initial hash table capacity to avoid performance issues
+        # Explicitly set the initial usable hash table capacity to avoid performance issues
         # due to hash table "resonance".
-        # Since reconstruction of archive items can add some new chunks, add 10 % headroom
-        capacity = int(len(self.repository) / ChunkIndex.MAX_LOAD_FACTOR * 1.1)
-        self.chunks = ChunkIndex(capacity)
+        # Since reconstruction of archive items can add some new chunks, add 10 % headroom.
+        self.chunks = ChunkIndex(usable=len(self.repository) * 1.1)
         marker = None
         while True:
             result = self.repository.list(limit=LIST_SCAN_LIMIT, marker=marker)

+ 6 - 7
src/borg/cache.py

@@ -780,11 +780,11 @@ class LocalCache(CacheStatsMixin):
             # deallocates old hashindex, creates empty hashindex:
             chunk_idx.clear()
             cleanup_outdated(cached_ids - archive_ids)
-            # Explicitly set the initial hash table capacity to avoid performance issues
+            # Explicitly set the usable initial hash table capacity to avoid performance issues
             # due to hash table "resonance".
-            master_index_capacity = int(len(self.repository) / ChunkIndex.MAX_LOAD_FACTOR)
+            master_index_capacity = len(self.repository)
             if archive_ids:
-                chunk_idx = None if not self.do_cache else ChunkIndex(master_index_capacity)
+                chunk_idx = None if not self.do_cache else ChunkIndex(usable=master_index_capacity)
                 pi = ProgressIndicatorPercent(total=len(archive_ids), step=0.1,
                                               msg='%3.0f%% Syncing chunks cache. Processing archive %s',
                                               msgid='cache.sync')
@@ -805,7 +805,7 @@ class LocalCache(CacheStatsMixin):
                         logger.info("Merging into master chunks index ...")
                         chunk_idx.merge(archive_chunk_idx)
                     else:
-                        chunk_idx = chunk_idx or ChunkIndex(master_index_capacity)
+                        chunk_idx = chunk_idx or ChunkIndex(usable=master_index_capacity)
                         logger.info('Fetching archive index for %s ...', archive_name)
                         fetch_and_build_idx(archive_id, decrypted_repository, chunk_idx)
                 if not self.do_cache:
@@ -1087,12 +1087,11 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
 
     def begin_txn(self):
         self._txn_active = True
-        # Explicitly set the initial hash table capacity to avoid performance issues
+        # Explicitly set the initial usable hash table capacity to avoid performance issues
         # due to hash table "resonance".
         # Since we're creating an archive, add 10 % from the start.
         num_chunks = len(self.repository)
-        capacity = int(num_chunks / ChunkIndex.MAX_LOAD_FACTOR * 1.1)
-        self.chunks = ChunkIndex(capacity)
+        self.chunks = ChunkIndex(usable=num_chunks * 1.1)
         pi = ProgressIndicatorPercent(total=num_chunks, msg='Downloading chunk list... %3.0f%%',
                                       msgid='cache.download_chunks')
         t0 = perf_counter()

+ 3 - 1
src/borg/hashindex.pyx

@@ -84,7 +84,7 @@ cdef class IndexBase:
     MAX_LOAD_FACTOR = HASH_MAX_LOAD
     MAX_VALUE = _MAX_VALUE
 
-    def __cinit__(self, capacity=0, path=None, permit_compact=False):
+    def __cinit__(self, capacity=0, path=None, permit_compact=False, usable=None):
         self.key_size = self._key_size
         if path:
             if isinstance(path, (str, bytes)):
@@ -94,6 +94,8 @@ cdef class IndexBase:
                 self.index = hashindex_read(path, permit_compact)
             assert self.index, 'hashindex_read() returned NULL with no exception set'
         else:
+            if usable is not None:
+                capacity = int(usable / self.MAX_LOAD_FACTOR)
             self.index = hashindex_init(capacity, self.key_size, self.value_size)
             if not self.index:
                 raise Exception('hashindex_init failed')