Bläddra i källkod

Basic repository consistency check functionality.

Still no archive metadata validation or repair functionality.
Jonas Borgström 11 år sedan
förälder
incheckning
faedaf8160

+ 8 - 0
CHANGES

@@ -3,6 +3,14 @@ Attic Changelog
 
 Here you can see the full list of changes between each Attic release.
 
+Version 0.11
+------------
+
+(feature release, released on X)
+
+- New "check" command for repository consistency checking (#24)
+- Documentation improvements
+
 Version 0.10
 ------------
 

+ 30 - 1
attic/archiver.py

@@ -13,7 +13,7 @@ from attic.cache import Cache
 from attic.key import key_creator
 from attic.helpers import Error, location_validator, format_time, \
     format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, \
-    get_cache_dir, get_keys_dir, format_timedelta, prune_split, Manifest, remove_surrogates
+    get_cache_dir, get_keys_dir, format_timedelta, prune_split, Manifest, remove_surrogates, is_a_terminal
 from attic.remote import RepositoryServer, RemoteRepository
 
 
@@ -59,6 +59,17 @@ class Archiver:
         repository.commit()
         return self.exit_code
 
+    def do_check(self, args):
+        """Check repository consistency
+        """
+        repository = self.open_repository(args.repository)
+        if args.progress is None:
+            args.progress = is_a_terminal(sys.stdout) or args.verbose
+        if not repository.check(progress=args.progress):
+            if args.progress:
+                print('No problems found', file=sys.stderr)
+        return self.exit_code
+
     def do_change_passphrase(self, args):
         """Change repository key file passphrase
         """
@@ -360,6 +371,24 @@ class Archiver:
                                choices=('none', 'passphrase', 'keyfile'), default='none',
                                help='select encryption method')
 
+        check_epilog = """
+        Progress status will be reported on the standard output stream by default when
+        it is attached to a terminal. Any problems found are printed to the standard error
+        stream and the command will have a non zero exit code.
+        """
+        subparser = subparsers.add_parser('check', parents=[common_parser],
+                                          description=self.do_check.__doc__,
+                                          epilog=check_epilog)
+        subparser.set_defaults(func=self.do_check)
+        subparser.add_argument('repository', metavar='REPOSITORY',
+                               type=location_validator(archive=False),
+                               help='repository to check consistency of')
+        subparser.add_argument('--progress', dest='progress', action='store_true',
+                               default=None,
+                               help='Report progress status to standard output stream')
+        subparser.add_argument('--no-progress', dest='progress', action='store_false',
+                               help='Disable progress reporting')
+
         subparser = subparsers.add_parser('change-passphrase', parents=[common_parser],
                                           description=self.do_change_passphrase.__doc__)
         subparser.set_defaults(func=self.do_change_passphrase)

+ 4 - 0
attic/hashindex.pyx

@@ -107,11 +107,13 @@ cdef class NSIndex(IndexBase):
 
     def iteritems(self, marker=None, limit=0):
         iter = NSKeyIterator()
+        iter.idx = self
         iter.index = self.index
         return iter
 
 
 cdef class NSKeyIterator:
+    cdef NSIndex idx
     cdef HashIndex *index
     cdef char *key
 
@@ -156,11 +158,13 @@ cdef class ChunkIndex(IndexBase):
 
     def iteritems(self, marker=None, limit=0):
         iter = ChunkKeyIterator()
+        iter.idx = self
         iter.index = self.index
         return iter
 
 
 cdef class ChunkKeyIterator:
+    cdef ChunkIndex idx
     cdef HashIndex *index
     cdef char *key
 

+ 9 - 0
attic/helpers.py

@@ -431,6 +431,15 @@ def daemonize():
     os.dup2(fd, 2)
 
 
+def is_a_terminal(fd):
+    """Determine if `fd` is associated with a terminal or not
+    """
+    try:
+        os.ttyname(fd.fileno())
+        return True
+    except:
+        return False
+
 if sys.version < '3.3':
     # st_mtime_ns attribute only available in 3.3+
     def st_mtime_ns(st):

+ 3 - 0
attic/remote.py

@@ -178,6 +178,9 @@ class RemoteRepository(object):
                     w_fds = []
         self.ignore_responses |= set(waiting_for)
 
+    def check(self, progress=False):
+        return self.call('check', progress)
+
     def commit(self, *args):
         return self.call('commit')
 

+ 38 - 0
attic/repository.py

@@ -5,6 +5,7 @@ import os
 import re
 import shutil
 import struct
+import sys
 from zlib import crc32
 
 from .hashindex import NSIndex
@@ -198,6 +199,41 @@ class Repository(object):
         if self.io.head is not None:
             self.write_index()
 
+    def check(self, progress=False):
+        """Check repository consistency
+
+        This method verifies all segment checksums and makes sure
+        the index is consistent with the data stored in the segments.
+        """
+        error_found = False
+        def report_error(msg):
+            nonlocal error_found
+            error_found = True
+            print(msg, file=sys.stderr)
+        seen = set()
+        for segment, filename in self.io._segment_names():
+            if progress:
+                print('Checking segment {}/{}'.format(segment, self.io.head))
+            try:
+                objects = list(self.io.iter_objects(segment))
+            except (IntegrityError, struct.error):
+                report_error('Error reading segment {}'.format(segment))
+                objects = []
+            for tag, key, offset in objects:
+                if tag == TAG_PUT:
+                    if key in seen:
+                        report_error('Key found in more than one segment. Segment={}, key={}'.format(segment, hexlify(key)))
+                    seen.add(key)
+                    if self.index.get(key, (0, 0)) != (segment, offset):
+                        report_error('Index vs segment header mismatch. Segment={}, key={}'.format(segment, hexlify(key)))
+                elif tag == TAG_COMMIT:
+                    continue
+                else:
+                    raise self.RepositoryCheckFailed(self.path, 'Unexpected tag {} in segment {}'.format(tag, segment))
+        if len(self.index) != len(seen):
+            report_error('Index object count mismatch. {} != {}'.format(len(self.index), len(seen)))
+        return not error_found
+
     def rollback(self):
         """
         """
@@ -309,6 +345,8 @@ class LoggedIO(object):
         """
         self.head = None
         self.segment = 0
+        # FIXME: Only delete segments if we're sure there's at least
+        # one complete segment somewhere
         for segment, filename in self._segment_names(reverse=True):
             if self.is_complete_segment(filename):
                 self.head = segment

+ 2 - 0
attic/testsuite/archiver.py

@@ -205,12 +205,14 @@ class ArchiverTestCase(AtticTestCase):
         self.attic('init', self.repository_location)
         self.create_src_archive('test')
         self.attic('verify', self.repository_location + '::test')
+        self.attic('check', self.repository_location)
         name = sorted(os.listdir(os.path.join(self.tmpdir, 'repository', 'data', '0')), reverse=True)[0]
         fd = open(os.path.join(self.tmpdir, 'repository', 'data', '0', name), 'r+')
         fd.seek(100)
         fd.write('XXXX')
         fd.close()
         self.attic('verify', self.repository_location + '::test', exit_code=1)
+        self.attic('check', self.repository_location, exit_code=1)
 
     def test_readonly_repository(self):
         self.attic('init', self.repository_location)

+ 48 - 0
attic/testsuite/repository.py

@@ -102,7 +102,55 @@ class RepositoryTestCase(AtticTestCase):
         self.repository.commit()
 
 
+class RepositoryCheckTestCase(AtticTestCase):
+
+    def open(self, create=False):
+        return Repository(os.path.join(self.tmppath, 'repository'), create=create)
+
+    def setUp(self):
+        self.tmppath = tempfile.mkdtemp()
+        self.repository = self.open(create=True)
+
+    def tearDown(self):
+        self.repository.close()
+        shutil.rmtree(self.tmppath)
+
+    def add_objects(self, ids):
+        for id_ in ids:
+            self.repository.put(('%032d' % id_).encode('ascii'), b'data')
+        self.repository.commit()
+
+    def open_index(self):
+        head = sorted(int(n[6:]) for n in os.listdir(os.path.join(self.tmppath, 'repository')) if n.startswith('index') and n[6:].isdigit())[0]
+        return NSIndex(os.path.join(self.tmppath, 'repository', 'index.{}'.format(head)))
+
+    def corrupt_object(self, id_):
+        idx = self.open_index()
+        segment, offset = idx[('%032d' % id_).encode('ascii')]
+        with open(os.path.join(self.tmppath, 'repository', 'data', '0', str(segment)), 'r+b') as fd:
+            fd.seek(offset)
+            fd.write(b'BOOM')
+
+    def list_objects(self):
+        return set((int(key) for key, _ in list(self.open_index().iteritems())))
+
+    def test_check(self):
+        self.add_objects([1, 2, 3])
+        self.add_objects([4, 5, 6])
+        self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
+        self.assert_equal(True, self.repository.check())
+        self.corrupt_object(5)
+        self.assert_equal(False, self.repository.check())
+        self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
+
+
 class RemoteRepositoryTestCase(RepositoryTestCase):
 
     def open(self, create=False):
         return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create)
+
+
+class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase):
+
+    def open(self, create=False):
+        return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create)

+ 1 - 1
docs/update_usage.sh

@@ -2,7 +2,7 @@
 if [ ! -d usage ]; then
   mkdir usage
 fi
-for cmd in change-passphrase create delete extract info init list mount prune verify; do
+for cmd in change-passphrase check create delete extract info init list mount prune verify; do
   FILENAME="usage/$cmd.rst.inc"
   LINE=`echo -n attic $cmd | tr 'a-z- ' '-'`
   echo -e ".. _attic_$cmd:\n" > $FILENAME

+ 6 - 0
docs/usage.rst

@@ -93,6 +93,12 @@ not corrupt. |project_name| will not compare the the archived files with the
 files on disk.
 
 
+.. include:: usage/check.rst.inc
+
+The check command verifies the consistency of a repository. Any inconsistencies
+found are reported to the standard error stream and the command will have a
+non zero exit code.
+
 .. include:: usage/delete.rst.inc
 
 This command deletes an archive from the repository. Any disk space not

+ 28 - 0
docs/usage/check.rst.inc

@@ -0,0 +1,28 @@
+.. _attic_check:
+
+attic check
+-----------
+::
+
+
+    usage: attic check [-h] [-v] [--progress] [--no-progress] REPOSITORY
+    
+    Check repository consistency
+    
+    positional arguments:
+      REPOSITORY     repository to check consistency of
+    
+    optional arguments:
+      -h, --help     show this help message and exit
+      -v, --verbose  verbose output
+      --progress     Report progress status to standard output stream
+      --no-progress  Disable progress reporting
+    
+    Progress status will be reported on the standard output stream by default when
+    it is attached to a terminal. Any problems found are printed to the standard
+    error stream and the command will have a non zero exit code.
+
+Description
+~~~~~~~~~~~
+
+