2
0
Эх сурвалжийг харах

More attic check --repair improvements

Jonas Borgström 11 жил өмнө
parent
commit
1809ea2f3e

+ 9 - 0
attic/archiver.py

@@ -64,6 +64,15 @@ class Archiver:
         """Check repository consistency
         """Check repository consistency
         """
         """
         repository = self.open_repository(args.repository)
         repository = self.open_repository(args.repository)
+        if args.repair:
+            while True:
+                self.print_error("""Warning: check --repair is an experimental feature that might result
+in data loss. Checking and repairing archive metadata consistency is not yet
+supported so some types of corruptions will be undetected and not repaired.
+
+Type "Yes I am sure" if you understand this and want to continue.\n""")
+                if input('Do you want to continue? ') == 'Yes I am sure':
+                    break
         if args.progress is None:
         if args.progress is None:
             args.progress = is_a_terminal(sys.stdout) or args.verbose
             args.progress = is_a_terminal(sys.stdout) or args.verbose
         if not repository.check(progress=args.progress, repair=args.repair):
         if not repository.check(progress=args.progress, repair=args.repair):

+ 3 - 1
attic/remote.py

@@ -5,7 +5,7 @@ import select
 from subprocess import Popen, PIPE
 from subprocess import Popen, PIPE
 import sys
 import sys
 
 
-from .helpers import Error
+from .helpers import Error, IntegrityError
 from .repository import Repository
 from .repository import Repository
 
 
 BUFSIZE = 10 * 1024 * 1024
 BUFSIZE = 10 * 1024 * 1024
@@ -134,6 +134,8 @@ class RemoteRepository(object):
                             raise Repository.AlreadyExists(self.location.orig)
                             raise Repository.AlreadyExists(self.location.orig)
                         elif error == b'CheckNeeded':
                         elif error == b'CheckNeeded':
                             raise Repository.CheckNeeded(self.location.orig)
                             raise Repository.CheckNeeded(self.location.orig)
+                        elif error == b'IntegrityError':
+                            raise IntegrityError
                         raise self.RPCError(error)
                         raise self.RPCError(error)
                     else:
                     else:
                         yield res
                         yield res

+ 12 - 11
attic/repository.py

@@ -192,8 +192,15 @@ class Repository(object):
         This method verifies all segment checksums and makes sure
         This method verifies all segment checksums and makes sure
         the index is consistent with the data stored in the segments.
         the index is consistent with the data stored in the segments.
         """
         """
+        error_found = False
+        def report_progress(msg, error=False):
+            nonlocal error_found
+            if error:
+                error_found = True
+            if error or progress:
+                print(msg, file=sys.stderr)
+
         assert not self._active_txn
         assert not self._active_txn
-        assert not self.index
         index_transaction_id = self.get_index_transaction_id()
         index_transaction_id = self.get_index_transaction_id()
         segments_transaction_id = self.io.get_segments_transaction_id(index_transaction_id)
         segments_transaction_id = self.io.get_segments_transaction_id(index_transaction_id)
         if index_transaction_id is None and segments_transaction_id is None:
         if index_transaction_id is None and segments_transaction_id is None:
@@ -204,15 +211,8 @@ class Repository(object):
             current_index = self.get_read_only_index(transaction_id)
             current_index = self.get_read_only_index(transaction_id)
         else:
         else:
             current_index = None
             current_index = None
+            report_progress('No suitable index found', error=True)
         progress_time = None
         progress_time = None
-        error_found = False
-
-        def report_progress(msg, error=False):
-            nonlocal error_found
-            if error:
-                error_found = True
-            if error or progress:
-                print(msg, file=sys.stderr)
 
 
         for segment, filename in self.io.segment_iterator():
         for segment, filename in self.io.segment_iterator():
             if segment > transaction_id:
             if segment > transaction_id:
@@ -259,7 +259,8 @@ class Repository(object):
             report_progress('Check complete, no errors found.')
             report_progress('Check complete, no errors found.')
         if repair:
         if repair:
             self.write_index()
             self.write_index()
-        return not error_found
+        self.rollback()
+        return not error_found or repair
 
 
     def rollback(self):
     def rollback(self):
         """
         """
@@ -362,7 +363,7 @@ class LoggedIO(object):
         """Verify that the transaction id is consistent with the index transaction id
         """Verify that the transaction id is consistent with the index transaction id
         """
         """
         for segment, filename in self.segment_iterator(reverse=True):
         for segment, filename in self.segment_iterator(reverse=True):
-            if segment < index_transaction_id:
+            if index_transaction_id is not None and segment < index_transaction_id:
                 # The index is newer than any committed transaction found
                 # The index is newer than any committed transaction found
                 return -1
                 return -1
             if self.is_committed_segment(filename):
             if self.is_committed_segment(filename):

+ 84 - 21
attic/testsuite/repository.py

@@ -2,7 +2,7 @@ import os
 import shutil
 import shutil
 import tempfile
 import tempfile
 from attic.hashindex import NSIndex
 from attic.hashindex import NSIndex
-from attic.helpers import Location
+from attic.helpers import Location, IntegrityError
 from attic.remote import RemoteRepository
 from attic.remote import RemoteRepository
 from attic.repository import Repository
 from attic.repository import Repository
 from attic.testsuite import AtticTestCase
 from attic.testsuite import AtticTestCase
@@ -106,10 +106,15 @@ class RepositoryCheckTestCase(AtticTestCase):
         self.repository.close()
         self.repository.close()
         shutil.rmtree(self.tmppath)
         shutil.rmtree(self.tmppath)
 
 
-    def add_objects(self, ids):
+    def get_objects(self, *ids):
         for id_ in ids:
         for id_ in ids:
-            self.repository.put(('%032d' % id_).encode('ascii'), b'data')
-        self.repository.commit()
+            self.repository.get(('%032d' % id_).encode('ascii'))
+
+    def add_objects(self, segments):
+        for ids in segments:
+            for id_ in ids:
+                self.repository.put(('%032d' % id_).encode('ascii'), b'data')
+            self.repository.commit()
 
 
     def get_head(self):
     def get_head(self):
         return sorted(int(n) for n in os.listdir(os.path.join(self.tmppath, 'repository', 'data', '0')) if n.isdigit())[-1]
         return sorted(int(n) for n in os.listdir(os.path.join(self.tmppath, 'repository', 'data', '0')) if n.isdigit())[-1]
@@ -124,36 +129,94 @@ class RepositoryCheckTestCase(AtticTestCase):
             fd.seek(offset)
             fd.seek(offset)
             fd.write(b'BOOM')
             fd.write(b'BOOM')
 
 
+    def delete_segment(self, segment):
+        os.unlink(os.path.join(self.tmppath, 'repository', 'data', '0', str(segment)))
+
+    def delete_index(self):
+        os.unlink(os.path.join(self.tmppath, 'repository', 'index.{}'.format(self.get_head())))
+
+    def rename_index(self, new_name):
+        os.rename(os.path.join(self.tmppath, 'repository', 'index.{}'.format(self.get_head())),
+                  os.path.join(self.tmppath, 'repository', new_name))
+
     def list_objects(self):
     def list_objects(self):
         return set((int(key) for key, _ in list(self.open_index().iteritems())))
         return set((int(key) for key, _ in list(self.open_index().iteritems())))
 
 
-    def test_check(self):
-        self.add_objects([1, 2, 3])
-        self.add_objects([4, 5, 6])
+    def test_repair_corrupted_segment(self):
+        self.add_objects([[1, 2, 3], [4, 5, 6]])
         self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
         self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
         self.assert_equal(True, self.repository.check())
         self.assert_equal(True, self.repository.check())
         self.corrupt_object(5)
         self.corrupt_object(5)
-        self.reopen()
+        self.assert_raises(IntegrityError, lambda: self.get_objects(5))
+        self.repository.rollback()
+        # Make sure a regular check does not repair anything
         self.assert_equal(False, self.repository.check())
         self.assert_equal(False, self.repository.check())
-        self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
-
-    def test_check_repair(self):
-        self.add_objects([1, 2, 3])
-        self.add_objects([4, 5, 6])
-        self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
+        self.assert_equal(False, self.repository.check())
+        # Make sure a repair actually repairs the repo
+        self.assert_equal(True, self.repository.check(repair=True))
+        self.get_objects(4)
         self.assert_equal(True, self.repository.check())
         self.assert_equal(True, self.repository.check())
-        self.corrupt_object(5)
-        self.reopen()
-        self.assert_equal(False, self.repository.check(repair=True))
         self.assert_equal(set([1, 2, 3, 4, 6]), self.list_objects())
         self.assert_equal(set([1, 2, 3, 4, 6]), self.list_objects())
 
 
+    def test_repair_missing_segment(self):
+        self.add_objects([[1, 2, 3], [4, 5, 6]])
+        self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
+        self.assert_equal(True, self.repository.check())
+        self.delete_segment(1)
+        self.repository.rollback()
+        self.assert_equal(True, self.repository.check(repair=True))
+        self.assert_equal(set([1, 2, 3]), self.list_objects())
 
 
-    def test_check_missing_or_corrupt_commit_tag(self):
-        self.add_objects([1, 2, 3])
+    def test_repair_missing_commit_segment(self):
+        self.add_objects([[1, 2, 3], [4, 5, 6]])
+        self.delete_segment(1)
+        self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4))
+        self.assert_equal(False, self.repository.check())
+        self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4))
+        self.assert_equal(True, self.repository.check(repair=True))
+        self.assert_raises(Repository.DoesNotExist, lambda: self.get_objects(4))
         self.assert_equal(set([1, 2, 3]), self.list_objects())
         self.assert_equal(set([1, 2, 3]), self.list_objects())
-        with open(os.path.join(self.tmppath, 'repository', 'data', '0', str(self.get_head())), 'ab') as fd:
+
+    def test_repair_corrupted_commit_segment(self):
+        self.add_objects([[1, 2, 3], [4, 5, 6]])
+        with open(os.path.join(self.tmppath, 'repository', 'data', '0', '1'), 'ab') as fd:
             fd.write(b'X')
             fd.write(b'X')
-        self.assert_raises(Repository.CheckNeeded, lambda: self.repository.get(bytes(32)))
+        self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4))
+        self.assert_equal(False, self.repository.check())
+        self.assert_equal(True, self.repository.check(repair=True))
+        self.get_objects(4)
+        self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
+
+    def test_repair_missing_index(self):
+        self.add_objects([[1, 2, 3], [4, 5, 6]])
+        self.delete_index()
+        self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4))
+        self.assert_equal(False, self.repository.check())
+        self.assert_equal(True, self.repository.check(repair=True))
+        self.assert_equal(True, self.repository.check())
+        self.get_objects(4)
+        self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
+
+    def test_repair_index_too_old(self):
+        self.add_objects([[1, 2, 3], [4, 5, 6]])
+        self.rename_index('index.0')
+        self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4))
+        self.assert_equal(False, self.repository.check())
+        self.assert_equal(True, self.repository.check(repair=True))
+        self.assert_equal(True, self.repository.check())
+        self.get_objects(4)
+        self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
+
+    def test_repair_index_too_new(self):
+        self.add_objects([[1, 2, 3], [4, 5, 6]])
+        self.rename_index('index.100')
+        self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4))
+        self.assert_equal(False, self.repository.check())
+        self.assert_equal(True, self.repository.check(repair=True))
+        self.assert_equal(True, self.repository.check())
+        self.get_objects(4)
+        self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
+
 
 
 class RemoteRepositoryTestCase(RepositoryTestCase):
 class RemoteRepositoryTestCase(RepositoryTestCase):