Bläddra i källkod

More attic check --repair improvements

Jonas Borgström 11 år sedan
förälder
incheckning
1809ea2f3e
4 ändrade filer med 108 tillägg och 33 borttagningar
  1. 9 0
      attic/archiver.py
  2. 3 1
      attic/remote.py
  3. 12 11
      attic/repository.py
  4. 84 21
      attic/testsuite/repository.py

+ 9 - 0
attic/archiver.py

@@ -64,6 +64,15 @@ class Archiver:
         """Check repository consistency
         """
         repository = self.open_repository(args.repository)
+        if args.repair:
+            while True:
+                self.print_error("""Warning: check --repair is an experimental feature that might result
+in data loss. Checking and repairing archive metadata consistency is not yet
+supported so some types of corruptions will be undetected and not repaired.
+
+Type "Yes I am sure" if you understand this and want to continue.\n""")
+                if input('Do you want to continue? ') == 'Yes I am sure':
+                    break
         if args.progress is None:
             args.progress = is_a_terminal(sys.stdout) or args.verbose
         if not repository.check(progress=args.progress, repair=args.repair):

+ 3 - 1
attic/remote.py

@@ -5,7 +5,7 @@ import select
 from subprocess import Popen, PIPE
 import sys
 
-from .helpers import Error
+from .helpers import Error, IntegrityError
 from .repository import Repository
 
 BUFSIZE = 10 * 1024 * 1024
@@ -134,6 +134,8 @@ class RemoteRepository(object):
                             raise Repository.AlreadyExists(self.location.orig)
                         elif error == b'CheckNeeded':
                             raise Repository.CheckNeeded(self.location.orig)
+                        elif error == b'IntegrityError':
+                            raise IntegrityError
                         raise self.RPCError(error)
                     else:
                         yield res

+ 12 - 11
attic/repository.py

@@ -192,8 +192,15 @@ class Repository(object):
         This method verifies all segment checksums and makes sure
         the index is consistent with the data stored in the segments.
         """
+        error_found = False
+        def report_progress(msg, error=False):
+            nonlocal error_found
+            if error:
+                error_found = True
+            if error or progress:
+                print(msg, file=sys.stderr)
+
         assert not self._active_txn
-        assert not self.index
         index_transaction_id = self.get_index_transaction_id()
         segments_transaction_id = self.io.get_segments_transaction_id(index_transaction_id)
         if index_transaction_id is None and segments_transaction_id is None:
@@ -204,15 +211,8 @@ class Repository(object):
             current_index = self.get_read_only_index(transaction_id)
         else:
             current_index = None
+            report_progress('No suitable index found', error=True)
         progress_time = None
-        error_found = False
-
-        def report_progress(msg, error=False):
-            nonlocal error_found
-            if error:
-                error_found = True
-            if error or progress:
-                print(msg, file=sys.stderr)
 
         for segment, filename in self.io.segment_iterator():
             if segment > transaction_id:
@@ -259,7 +259,8 @@ class Repository(object):
             report_progress('Check complete, no errors found.')
         if repair:
             self.write_index()
-        return not error_found
+        self.rollback()
+        return not error_found or repair
 
     def rollback(self):
         """
@@ -362,7 +363,7 @@ class LoggedIO(object):
         """Verify that the transaction id is consistent with the index transaction id
         """
         for segment, filename in self.segment_iterator(reverse=True):
-            if segment < index_transaction_id:
+            if index_transaction_id is not None and segment < index_transaction_id:
                 # The index is newer than any committed transaction found
                 return -1
             if self.is_committed_segment(filename):

+ 84 - 21
attic/testsuite/repository.py

@@ -2,7 +2,7 @@ import os
 import shutil
 import tempfile
 from attic.hashindex import NSIndex
-from attic.helpers import Location
+from attic.helpers import Location, IntegrityError
 from attic.remote import RemoteRepository
 from attic.repository import Repository
 from attic.testsuite import AtticTestCase
@@ -106,10 +106,15 @@ class RepositoryCheckTestCase(AtticTestCase):
         self.repository.close()
         shutil.rmtree(self.tmppath)
 
-    def add_objects(self, ids):
+    def get_objects(self, *ids):
         for id_ in ids:
-            self.repository.put(('%032d' % id_).encode('ascii'), b'data')
-        self.repository.commit()
+            self.repository.get(('%032d' % id_).encode('ascii'))
+
+    def add_objects(self, segments):
+        for ids in segments:
+            for id_ in ids:
+                self.repository.put(('%032d' % id_).encode('ascii'), b'data')
+            self.repository.commit()
 
     def get_head(self):
         return sorted(int(n) for n in os.listdir(os.path.join(self.tmppath, 'repository', 'data', '0')) if n.isdigit())[-1]
@@ -124,36 +129,94 @@ class RepositoryCheckTestCase(AtticTestCase):
             fd.seek(offset)
             fd.write(b'BOOM')
 
+    def delete_segment(self, segment):
+        os.unlink(os.path.join(self.tmppath, 'repository', 'data', '0', str(segment)))
+
+    def delete_index(self):
+        os.unlink(os.path.join(self.tmppath, 'repository', 'index.{}'.format(self.get_head())))
+
+    def rename_index(self, new_name):
+        os.rename(os.path.join(self.tmppath, 'repository', 'index.{}'.format(self.get_head())),
+                  os.path.join(self.tmppath, 'repository', new_name))
+
     def list_objects(self):
         return set((int(key) for key, _ in list(self.open_index().iteritems())))
 
-    def test_check(self):
-        self.add_objects([1, 2, 3])
-        self.add_objects([4, 5, 6])
+    def test_repair_corrupted_segment(self):
+        self.add_objects([[1, 2, 3], [4, 5, 6]])
         self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
         self.assert_equal(True, self.repository.check())
         self.corrupt_object(5)
-        self.reopen()
+        self.assert_raises(IntegrityError, lambda: self.get_objects(5))
+        self.repository.rollback()
+        # Make sure a regular check does not repair anything
         self.assert_equal(False, self.repository.check())
-        self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
-
-    def test_check_repair(self):
-        self.add_objects([1, 2, 3])
-        self.add_objects([4, 5, 6])
-        self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
+        self.assert_equal(False, self.repository.check())
+        # Make sure a repair actually repairs the repo
+        self.assert_equal(True, self.repository.check(repair=True))
+        self.get_objects(4)
         self.assert_equal(True, self.repository.check())
-        self.corrupt_object(5)
-        self.reopen()
-        self.assert_equal(False, self.repository.check(repair=True))
         self.assert_equal(set([1, 2, 3, 4, 6]), self.list_objects())
 
+    def test_repair_missing_segment(self):
+        self.add_objects([[1, 2, 3], [4, 5, 6]])
+        self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
+        self.assert_equal(True, self.repository.check())
+        self.delete_segment(1)
+        self.repository.rollback()
+        self.assert_equal(True, self.repository.check(repair=True))
+        self.assert_equal(set([1, 2, 3]), self.list_objects())
 
-    def test_check_missing_or_corrupt_commit_tag(self):
-        self.add_objects([1, 2, 3])
+    def test_repair_missing_commit_segment(self):
+        self.add_objects([[1, 2, 3], [4, 5, 6]])
+        self.delete_segment(1)
+        self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4))
+        self.assert_equal(False, self.repository.check())
+        self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4))
+        self.assert_equal(True, self.repository.check(repair=True))
+        self.assert_raises(Repository.DoesNotExist, lambda: self.get_objects(4))
         self.assert_equal(set([1, 2, 3]), self.list_objects())
-        with open(os.path.join(self.tmppath, 'repository', 'data', '0', str(self.get_head())), 'ab') as fd:
+
+    def test_repair_corrupted_commit_segment(self):
+        self.add_objects([[1, 2, 3], [4, 5, 6]])
+        with open(os.path.join(self.tmppath, 'repository', 'data', '0', '1'), 'ab') as fd:
             fd.write(b'X')
-        self.assert_raises(Repository.CheckNeeded, lambda: self.repository.get(bytes(32)))
+        self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4))
+        self.assert_equal(False, self.repository.check())
+        self.assert_equal(True, self.repository.check(repair=True))
+        self.get_objects(4)
+        self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
+
+    def test_repair_missing_index(self):
+        self.add_objects([[1, 2, 3], [4, 5, 6]])
+        self.delete_index()
+        self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4))
+        self.assert_equal(False, self.repository.check())
+        self.assert_equal(True, self.repository.check(repair=True))
+        self.assert_equal(True, self.repository.check())
+        self.get_objects(4)
+        self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
+
+    def test_repair_index_too_old(self):
+        self.add_objects([[1, 2, 3], [4, 5, 6]])
+        self.rename_index('index.0')
+        self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4))
+        self.assert_equal(False, self.repository.check())
+        self.assert_equal(True, self.repository.check(repair=True))
+        self.assert_equal(True, self.repository.check())
+        self.get_objects(4)
+        self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
+
+    def test_repair_index_too_new(self):
+        self.add_objects([[1, 2, 3], [4, 5, 6]])
+        self.rename_index('index.100')
+        self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4))
+        self.assert_equal(False, self.repository.check())
+        self.assert_equal(True, self.repository.check(repair=True))
+        self.assert_equal(True, self.repository.check())
+        self.get_objects(4)
+        self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
+
 
 class RemoteRepositoryTestCase(RepositoryTestCase):