Prechádzať zdrojové kódy

attic check performance improvement and minor progress reporting changes

Jonas Borgström 11 rokov pred
rodič
commit
893120e609

+ 9 - 5
attic/archive.py

@@ -419,7 +419,10 @@ class ArchiveChecker:
         shutil.rmtree(self.tmpdir)
 
     def init_chunks(self):
-        self.chunks = ChunkIndex.create(os.path.join(self.tmpdir, 'chunks').encode('utf-8'))
+        # Explicity set the initial hash table capacity to avoid performance issues
+        # due to hash table "resonance"
+        capacity = int(len(self.repository) * 1.2)
+        self.chunks = ChunkIndex.create(os.path.join(self.tmpdir, 'chunks').encode('utf-8'), capacity=capacity)
         marker = None
         while True:
             result = self.repository.list(limit=10000, marker=marker)
@@ -466,11 +469,11 @@ class ArchiveChecker:
         if not Manifest.MANIFEST_ID in self.chunks:
             self.manifest = self.rebuild_manifest()
         else:
-            self.manifest, _ = Manifest.load(repository)
+            self.manifest, _ = Manifest.load(repository, key=self.key)
         self.rebuild_chunks()
         self.verify_chunks()
         if not self.error_found:
-            self.report_progress('Archive consistency check complete, no errors found.')
+            self.report_progress('Archive consistency check complete, no problems found.')
         return self.repair or not self.error_found
 
     def verify_chunks(self):
@@ -563,8 +566,9 @@ class ArchiveChecker:
                     for item in unpacker:
                         yield item
 
-        for name, info in list(self.manifest.archives.items()):
-            self.report_progress('Analyzing archive: ' + name)
+        num_archives = len(self.manifest.archives)
+        for i, (name, info) in enumerate(list(self.manifest.archives.items()), 1):
+            self.report_progress('Analyzing archive {} ({}/{})'.format(name, i, num_archives))
             archive_id = info[b'id']
             if not archive_id in self.chunks:
                 self.report_progress('Archive metadata block is missing', error=True)

+ 1 - 1
attic/archiver.py

@@ -64,7 +64,7 @@ class Archiver:
         """
         repository = self.open_repository(args.repository)
         if args.repair:
-            while not os.environ.get('ATTIC_CHECK_I_KWOW_WHAT_I_AM_DOING'):
+            while not os.environ.get('ATTIC_CHECK_I_KNOW_WHAT_I_AM_DOING'):
                 self.print_error("""Warning: 'check --repair' is an experimental feature that might result
 in data loss.
 

+ 2 - 2
attic/hashindex.pyx

@@ -37,8 +37,8 @@ cdef class IndexBase:
                 raise Exception('hashindex_close failed')
 
     @classmethod
-    def create(cls, path):
-        index = hashindex_create(<bytes>os.fsencode(path), 0, cls.key_size, cls.value_size)
+    def create(cls, path, capacity=0):
+        index = hashindex_create(<bytes>os.fsencode(path), capacity, cls.key_size, cls.value_size)
         if not index:
             raise Exception('Failed to create %s' % path)
         hashindex_close(index)

+ 3 - 2
attic/helpers.py

@@ -63,10 +63,11 @@ class Manifest:
         self.repository = repository
 
     @classmethod
-    def load(cls, repository):
+    def load(cls, repository, key=None):
         from .key import key_factory
         cdata = repository.get(cls.MANIFEST_ID)
-        key = key_factory(repository, cdata)
+        if not key:
+            key = key_factory(repository, cdata)
         manifest = cls(key, repository)
         data = key.decrypt(None, cdata)
         manifest.id = key.id_hash(data)

+ 1 - 6
attic/repository.py

@@ -217,7 +217,6 @@ class Repository(object):
         else:
             current_index = None
             report_progress('No suitable index found', error=True)
-        progress_time = None
 
         for segment, filename in self.io.segment_iterator():
             if segment > transaction_id:
@@ -227,10 +226,6 @@ class Repository(object):
                 else:
                     report_progress('Uncommitted segment {} found'.format(segment), error=True)
                 continue
-            if progress:
-                if int(time.time()) != progress_time:
-                    progress_time = int(time.time())
-                    report_progress('Checking segment {}/{}'.format(segment, transaction_id))
             try:
                 objects = list(self.io.iter_objects(segment))
             except (IntegrityError, struct.error):
@@ -272,7 +267,7 @@ class Repository(object):
         if current_index and len(current_index) != len(self.index):
             report_progress('Index object count mismatch. {} != {}'.format(len(current_index), len(self.index)), error=True)
         if not error_found:
-            report_progress('Repository check complete, no errors found.')
+            report_progress('Repository check complete, no problems found.')
         if repair:
             self.write_index()
         else:

+ 1 - 1
attic/testsuite/archiver.py

@@ -42,7 +42,7 @@ class ArchiverTestCaseBase(AtticTestCase):
     prefix = ''
 
     def setUp(self):
-        os.environ['ATTIC_CHECK_I_KWOW_WHAT_I_AM_DOING'] = '1'
+        os.environ['ATTIC_CHECK_I_KNOW_WHAT_I_AM_DOING'] = '1'
         self.archiver = Archiver()
         self.tmpdir = tempfile.mkdtemp()
         self.repository_path = os.path.join(self.tmpdir, 'repository')