Bladeren bron

Basic repository consistency check functionality.

Still no archive metadata validation or repair functionality.
Jonas Borgström 11 jaren geleden
bovenliggende
commit
faedaf8160
11 gewijzigde bestanden met toevoegingen van 177 en 2 verwijderingen
  1. 8 0
      CHANGES
  2. 30 1
      attic/archiver.py
  3. 4 0
      attic/hashindex.pyx
  4. 9 0
      attic/helpers.py
  5. 3 0
      attic/remote.py
  6. 38 0
      attic/repository.py
  7. 2 0
      attic/testsuite/archiver.py
  8. 48 0
      attic/testsuite/repository.py
  9. 1 1
      docs/update_usage.sh
  10. 6 0
      docs/usage.rst
  11. 28 0
      docs/usage/check.rst.inc

+ 8 - 0
CHANGES

@@ -3,6 +3,14 @@ Attic Changelog
 
 Here you can see the full list of changes between each Attic release.
 
+Version 0.11
+------------
+
+(feature release, released on X)
+
+- New "check" command for repository consistency checking (#24)
+- Documentation improvements
+
 Version 0.10
 ------------
 

+ 30 - 1
attic/archiver.py

@@ -13,7 +13,7 @@ from attic.cache import Cache
 from attic.key import key_creator
 from attic.helpers import Error, location_validator, format_time, \
     format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, \
-    get_cache_dir, get_keys_dir, format_timedelta, prune_split, Manifest, remove_surrogates
+    get_cache_dir, get_keys_dir, format_timedelta, prune_split, Manifest, remove_surrogates, is_a_terminal
 from attic.remote import RepositoryServer, RemoteRepository
 
 
@@ -59,6 +59,17 @@ class Archiver:
         repository.commit()
         return self.exit_code
 
+    def do_check(self, args):
+        """Check repository consistency
+        """
+        repository = self.open_repository(args.repository)
+        if args.progress is None:
+            args.progress = is_a_terminal(sys.stdout) or args.verbose
+        if not repository.check(progress=args.progress):
+            if args.progress:
+                print('No problems found', file=sys.stderr)
+        return self.exit_code
+
     def do_change_passphrase(self, args):
         """Change repository key file passphrase
         """
@@ -360,6 +371,24 @@ class Archiver:
                                choices=('none', 'passphrase', 'keyfile'), default='none',
                                help='select encryption method')
 
+        check_epilog = """
+        Progress status will be reported on the standard output stream by default when
+        it is attached to a terminal. Any problems found are printed to the standard error
+        stream and the command will have a non zero exit code.
+        """
+        subparser = subparsers.add_parser('check', parents=[common_parser],
+                                          description=self.do_check.__doc__,
+                                          epilog=check_epilog)
+        subparser.set_defaults(func=self.do_check)
+        subparser.add_argument('repository', metavar='REPOSITORY',
+                               type=location_validator(archive=False),
+                               help='repository to check consistency of')
+        subparser.add_argument('--progress', dest='progress', action='store_true',
+                               default=None,
+                               help='Report progress status to standard output stream')
+        subparser.add_argument('--no-progress', dest='progress', action='store_false',
+                               help='Disable progress reporting')
+
         subparser = subparsers.add_parser('change-passphrase', parents=[common_parser],
                                           description=self.do_change_passphrase.__doc__)
         subparser.set_defaults(func=self.do_change_passphrase)

+ 4 - 0
attic/hashindex.pyx

@@ -107,11 +107,13 @@ cdef class NSIndex(IndexBase):
 
     def iteritems(self, marker=None, limit=0):
         iter = NSKeyIterator()
+        iter.idx = self
         iter.index = self.index
         return iter
 
 
 cdef class NSKeyIterator:
+    cdef NSIndex idx
     cdef HashIndex *index
     cdef char *key
 
@@ -156,11 +158,13 @@ cdef class ChunkIndex(IndexBase):
 
     def iteritems(self, marker=None, limit=0):
         iter = ChunkKeyIterator()
+        iter.idx = self
         iter.index = self.index
         return iter
 
 
 cdef class ChunkKeyIterator:
+    cdef ChunkIndex idx
     cdef HashIndex *index
     cdef char *key
 

+ 9 - 0
attic/helpers.py

@@ -431,6 +431,15 @@ def daemonize():
     os.dup2(fd, 2)
 
 
+def is_a_terminal(fd):
+    """Determine if `fd` is associated with a terminal or not
+    """
+    try:
+        os.ttyname(fd.fileno())
+        return True
+    except:
+        return False
+
 if sys.version < '3.3':
     # st_mtime_ns attribute only available in 3.3+
     def st_mtime_ns(st):

+ 3 - 0
attic/remote.py

@@ -178,6 +178,9 @@ class RemoteRepository(object):
                     w_fds = []
         self.ignore_responses |= set(waiting_for)
 
+    def check(self, progress=False):
+        return self.call('check', progress)
+
     def commit(self, *args):
         return self.call('commit')
 

+ 38 - 0
attic/repository.py

@@ -5,6 +5,7 @@ import os
 import re
 import shutil
 import struct
+import sys
 from zlib import crc32
 
 from .hashindex import NSIndex
@@ -198,6 +199,41 @@ class Repository(object):
         if self.io.head is not None:
             self.write_index()
 
+    def check(self, progress=False):
+        """Check repository consistency
+
+        This method verifies all segment checksums and makes sure
+        the index is consistent with the data stored in the segments.
+        """
+        error_found = False
+        def report_error(msg):
+            nonlocal error_found
+            error_found = True
+            print(msg, file=sys.stderr)
+        seen = set()
+        for segment, filename in self.io._segment_names():
+            if progress:
+                print('Checking segment {}/{}'.format(segment, self.io.head))
+            try:
+                objects = list(self.io.iter_objects(segment))
+            except (IntegrityError, struct.error):
+                report_error('Error reading segment {}'.format(segment))
+                objects = []
+            for tag, key, offset in objects:
+                if tag == TAG_PUT:
+                    if key in seen:
+                        report_error('Key found in more than one segment. Segment={}, key={}'.format(segment, hexlify(key)))
+                    seen.add(key)
+                    if self.index.get(key, (0, 0)) != (segment, offset):
+                        report_error('Index vs segment header mismatch. Segment={}, key={}'.format(segment, hexlify(key)))
+                elif tag == TAG_COMMIT:
+                    continue
+                else:
+                    raise self.RepositoryCheckFailed(self.path, 'Unexpected tag {} in segment {}'.format(tag, segment))
+        if len(self.index) != len(seen):
+            report_error('Index object count mismatch. {} != {}'.format(len(self.index), len(seen)))
+        return not error_found
+
     def rollback(self):
         """
         """
@@ -309,6 +345,8 @@ class LoggedIO(object):
         """
         self.head = None
         self.segment = 0
+        # FIXME: Only delete segments if we're sure there's at least
+        # one complete segment somewhere
         for segment, filename in self._segment_names(reverse=True):
             if self.is_complete_segment(filename):
                 self.head = segment

+ 2 - 0
attic/testsuite/archiver.py

@@ -205,12 +205,14 @@ class ArchiverTestCase(AtticTestCase):
         self.attic('init', self.repository_location)
         self.create_src_archive('test')
         self.attic('verify', self.repository_location + '::test')
+        self.attic('check', self.repository_location)
         name = sorted(os.listdir(os.path.join(self.tmpdir, 'repository', 'data', '0')), reverse=True)[0]
         fd = open(os.path.join(self.tmpdir, 'repository', 'data', '0', name), 'r+')
         fd.seek(100)
         fd.write('XXXX')
         fd.close()
         self.attic('verify', self.repository_location + '::test', exit_code=1)
+        self.attic('check', self.repository_location, exit_code=1)
 
     def test_readonly_repository(self):
         self.attic('init', self.repository_location)

+ 48 - 0
attic/testsuite/repository.py

@@ -102,7 +102,55 @@ class RepositoryTestCase(AtticTestCase):
         self.repository.commit()
 
 
+class RepositoryCheckTestCase(AtticTestCase):
+
+    def open(self, create=False):
+        return Repository(os.path.join(self.tmppath, 'repository'), create=create)
+
+    def setUp(self):
+        self.tmppath = tempfile.mkdtemp()
+        self.repository = self.open(create=True)
+
+    def tearDown(self):
+        self.repository.close()
+        shutil.rmtree(self.tmppath)
+
+    def add_objects(self, ids):
+        for id_ in ids:
+            self.repository.put(('%032d' % id_).encode('ascii'), b'data')
+        self.repository.commit()
+
+    def open_index(self):
+        head = sorted(int(n[6:]) for n in os.listdir(os.path.join(self.tmppath, 'repository')) if n.startswith('index') and n[6:].isdigit())[0]
+        return NSIndex(os.path.join(self.tmppath, 'repository', 'index.{}'.format(head)))
+
+    def corrupt_object(self, id_):
+        idx = self.open_index()
+        segment, offset = idx[('%032d' % id_).encode('ascii')]
+        with open(os.path.join(self.tmppath, 'repository', 'data', '0', str(segment)), 'r+b') as fd:
+            fd.seek(offset)
+            fd.write(b'BOOM')
+
+    def list_objects(self):
+        return set((int(key) for key, _ in list(self.open_index().iteritems())))
+
+    def test_check(self):
+        self.add_objects([1, 2, 3])
+        self.add_objects([4, 5, 6])
+        self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
+        self.assert_equal(True, self.repository.check())
+        self.corrupt_object(5)
+        self.assert_equal(False, self.repository.check())
+        self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
+
+
 class RemoteRepositoryTestCase(RepositoryTestCase):
 
     def open(self, create=False):
         return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create)
+
+
+class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase):
+
+    def open(self, create=False):
+        return RemoteRepository(Location('__testsuite__:' + os.path.join(self.tmppath, 'repository')), create=create)

+ 1 - 1
docs/update_usage.sh

@@ -2,7 +2,7 @@
 if [ ! -d usage ]; then
   mkdir usage
 fi
-for cmd in change-passphrase create delete extract info init list mount prune verify; do
+for cmd in change-passphrase check create delete extract info init list mount prune verify; do
   FILENAME="usage/$cmd.rst.inc"
   LINE=`echo -n attic $cmd | tr 'a-z- ' '-'`
   echo -e ".. _attic_$cmd:\n" > $FILENAME

+ 6 - 0
docs/usage.rst

@@ -93,6 +93,12 @@ not corrupt. |project_name| will not compare the the archived files with the
 files on disk.
 
 
+.. include:: usage/check.rst.inc
+
+The check command verifies the consistency of a repository. Any inconsistencies
+found are reported to the standard error stream and the command will have a
+non zero exit code.
+
 .. include:: usage/delete.rst.inc
 
 This command deletes an archive from the repository. Any disk space not

+ 28 - 0
docs/usage/check.rst.inc

@@ -0,0 +1,28 @@
+.. _attic_check:
+
+attic check
+-----------
+::
+
+
+    usage: attic check [-h] [-v] [--progress] [--no-progress] REPOSITORY
+    
+    Check repository consistency
+    
+    positional arguments:
+      REPOSITORY     repository to check consistency of
+    
+    optional arguments:
+      -h, --help     show this help message and exit
+      -v, --verbose  verbose output
+      --progress     Report progress status to standard output stream
+      --no-progress  Disable progress reporting
+    
+    Progress status will be reported on the standard output stream by default when
+    it is attached to a terminal. Any problems found are printed to the standard
+    error stream and the command will have a non zero exit code.
+
+Description
+~~~~~~~~~~~
+
+