فهرست منبع

More repository check/repair improvements

Jonas Borgström 11 سال پیش
والد
کامیت
bbf490bf8b
2فایلهای تغییر یافته به همراه76 افزوده شده و 27 حذف شده
  1. 22 4
      attic/repository.py
  2. 54 23
      attic/testsuite/repository.py

+ 22 - 4
attic/repository.py

@@ -206,7 +206,10 @@ class Repository(object):
         segments_transaction_id = self.io.get_segments_transaction_id(index_transaction_id)
         if index_transaction_id is None and segments_transaction_id is None:
             return True
-        transaction_id = max(index_transaction_id or 0, segments_transaction_id or 0)
+        if segments_transaction_id is not None:
+            transaction_id = segments_transaction_id
+        else:
+            transaction_id = index_transaction_id
         self.get_index(None)
         if index_transaction_id == segments_transaction_id:
             current_index = self.get_read_only_index(transaction_id)
@@ -217,6 +220,11 @@ class Repository(object):
 
         for segment, filename in self.io.segment_iterator():
             if segment > transaction_id:
+                if repair:
+                    report_progress('Deleting uncommitted segment {}'.format(segment), error=True)
+                    self.io.delete_segment(segment)
+                else:
+                    report_progress('Uncommitted segment {} found'.format(segment), error=True)
                 continue
             if progress:
                 if int(time.time()) != progress_time:
@@ -254,12 +262,22 @@ class Repository(object):
                     continue
                 else:
                     report_progress('Unexpected tag {} in segment {}'.format(tag, segment), error=True)
+        # We might need to add a commit tag if no committed segment is found
+        if repair and segments_transaction_id is None:
+            report_progress('Adding commit tag to segment {}'.format(transaction_id))
+            self.io.segment = transaction_id + 1
+            self.io.write_commit()
+            self.io.close_segment()
         if current_index and len(current_index) != len(self.index):
             report_progress('Index object count mismatch. {} != {}'.format(len(current_index), len(self.index)), error=True)
         if not error_found:
             report_progress('Check complete, no errors found.')
         if repair:
             self.write_index()
+        else:
+            # Delete temporary index file
+            self.index = None
+            os.unlink(os.path.join(self.path, 'index.tmp'))
         self.rollback()
         return not error_found or repair
 
@@ -364,9 +382,9 @@ class LoggedIO(object):
         """Verify that the transaction id is consistent with the index transaction id
         """
         for segment, filename in self.segment_iterator(reverse=True):
-            if index_transaction_id is not None and segment < index_transaction_id:
-                # The index is newer than any committed transaction found
-                return -1
+#            if index_transaction_id is not None and segment < index_transaction_id:
+#                # The index is newer than any committed transaction found
+#                return -1
             if self.is_committed_segment(filename):
                 return segment
         return None

+ 54 - 23
attic/testsuite/repository.py

@@ -106,6 +106,14 @@ class RepositoryCheckTestCase(AtticTestCase):
         self.repository.close()
         shutil.rmtree(self.tmppath)
 
+    def list_indices(self):
+        return [name for name in os.listdir(os.path.join(self.tmppath, 'repository')) if name.startswith('index.')]
+
+    def check(self, repair=False, status=True):
+        self.assert_equal(self.repository.check(repair=repair), status)
+        # Make sure no tmp files are left behind
+        self.assert_equal([name for name in os.listdir(os.path.join(self.tmppath, 'repository')) if 'tmp' in name], [], 'Found tmp files')
+
     def get_objects(self, *ids):
         for id_ in ids:
             self.repository.get(('%032d' % id_).encode('ascii'))
@@ -145,75 +153,98 @@ class RepositoryCheckTestCase(AtticTestCase):
     def test_repair_corrupted_segment(self):
         self.add_objects([[1, 2, 3], [4, 5, 6]])
         self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
-        self.assert_equal(True, self.repository.check())
+        self.check(status=True)
         self.corrupt_object(5)
         self.assert_raises(IntegrityError, lambda: self.get_objects(5))
         self.repository.rollback()
         # Make sure a regular check does not repair anything
-        self.assert_equal(False, self.repository.check())
-        self.assert_equal(False, self.repository.check())
+        self.check(status=False)
+        self.check(status=False)
         # Make sure a repair actually repairs the repo
-        self.assert_equal(True, self.repository.check(repair=True))
+        self.check(repair=True, status=True)
         self.get_objects(4)
-        self.assert_equal(True, self.repository.check())
+        self.check(status=True)
         self.assert_equal(set([1, 2, 3, 4, 6]), self.list_objects())
 
     def test_repair_missing_segment(self):
         self.add_objects([[1, 2, 3], [4, 5, 6]])
         self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
-        self.assert_equal(True, self.repository.check())
+        self.check(status=True)
         self.delete_segment(1)
         self.repository.rollback()
-        self.assert_equal(True, self.repository.check(repair=True))
+        self.check(repair=True, status=True)
         self.assert_equal(set([1, 2, 3]), self.list_objects())
 
     def test_repair_missing_commit_segment(self):
         self.add_objects([[1, 2, 3], [4, 5, 6]])
         self.delete_segment(1)
         self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4))
-        self.assert_equal(False, self.repository.check())
+        self.check(status=False)
         self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4))
-        self.assert_equal(True, self.repository.check(repair=True))
+        self.check(repair=True, status=True)
         self.assert_raises(Repository.DoesNotExist, lambda: self.get_objects(4))
         self.assert_equal(set([1, 2, 3]), self.list_objects())
 
     def test_repair_corrupted_commit_segment(self):
         self.add_objects([[1, 2, 3], [4, 5, 6]])
-        with open(os.path.join(self.tmppath, 'repository', 'data', '0', '1'), 'ab') as fd:
+        with open(os.path.join(self.tmppath, 'repository', 'data', '0', '1'), 'r+b') as fd:
+            fd.seek(-1, os.SEEK_END)
             fd.write(b'X')
         self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4))
-        self.assert_equal(False, self.repository.check())
-        self.assert_equal(True, self.repository.check(repair=True))
-        self.get_objects(4)
-        self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
+        self.check(status=False)
+        self.check(repair=True, status=True)
+        self.get_objects(3)
+        self.assert_raises(Repository.DoesNotExist, lambda: self.get_objects(4))
+        self.assert_equal(set([1, 2, 3]), self.list_objects())
+
+    def test_repair_no_commits(self):
+        self.add_objects([[1, 2, 3]])
+        with open(os.path.join(self.tmppath, 'repository', 'data', '0', '0'), 'r+b') as fd:
+            fd.seek(-1, os.SEEK_END)
+            fd.write(b'X')
+        self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4))
+        self.check(status=False)
+        self.check(status=False)
+        self.assert_equal(self.list_indices(), ['index.0'])
+        self.check(repair=True, status=True)
+        self.assert_equal(self.list_indices(), ['index.1'])
+        self.check(status=True)
+        self.get_objects(3)
+        self.assert_equal(set([1, 2, 3]), self.list_objects())
 
     def test_repair_missing_index(self):
         self.add_objects([[1, 2, 3], [4, 5, 6]])
         self.delete_index()
         self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4))
-        self.assert_equal(False, self.repository.check())
-        self.assert_equal(True, self.repository.check(repair=True))
-        self.assert_equal(True, self.repository.check())
+        self.check(status=False)
+        self.check(repair=True, status=True)
+        self.check(status=True)
         self.get_objects(4)
         self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
 
     def test_repair_index_too_old(self):
         self.add_objects([[1, 2, 3], [4, 5, 6]])
+        self.assert_equal(self.list_indices(), ['index.1'])
         self.rename_index('index.0')
+        self.assert_equal(self.list_indices(), ['index.0'])
         self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4))
-        self.assert_equal(False, self.repository.check())
-        self.assert_equal(True, self.repository.check(repair=True))
-        self.assert_equal(True, self.repository.check())
+        self.check(status=False)
+        self.check(repair=True, status=True)
+        self.assert_equal(self.list_indices(), ['index.1'])
+        self.check(status=True)
         self.get_objects(4)
         self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
 
     def test_repair_index_too_new(self):
         self.add_objects([[1, 2, 3], [4, 5, 6]])
+        self.assert_equal(self.list_indices(), ['index.1'])
         self.rename_index('index.100')
+        self.assert_equal(self.list_indices(), ['index.100'])
         self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4))
-        self.assert_equal(False, self.repository.check())
-        self.assert_equal(True, self.repository.check(repair=True))
-        self.assert_equal(True, self.repository.check())
+        self.check(status=False)
+        self.check(repair=True, status=True)
+        self.assert_equal(self.list_indices(), ['index.1'])
+        self.check(status=True)
         self.get_objects(4)
         self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())