浏览代码

Merge branch '1.0-maint'

Also: add missing keys to ARCHIVE_KEYS set.
Thomas Waldmann 9 年之前
父节点
当前提交
9a64835b4d
共有 6 个文件被更改,包括 142 次插入32 次删除
  1. 1 1
      docs/resources.rst
  2. 65 18
      src/borg/archive.py
  3. 14 6
      src/borg/archiver.py
  4. 14 3
      src/borg/constants.py
  5. 14 2
      src/borg/helpers.py
  6. 34 2
      src/borg/testsuite/archive.py

+ 1 - 1
docs/resources.rst

@@ -38,4 +38,4 @@ Software
 
 - `BorgWeb - a very simple web UI for BorgBackup <https://borgweb.readthedocs.io/>`_
 - some other stuff found at the `BorgBackup Github organisation <https://github.com/borgbackup/>`_
-- `atticmatic <https://github.com/witten/atticmatic/>`_ (includes borgmatic)
+- `borgmatic <https://torsion.org/borgmatic/>`_ - simple wrapper script for BorgBackup that creates and prunes backups

+ 65 - 18
src/borg/archive.py

@@ -690,12 +690,40 @@ Number of files: {0.stats.nfiles}'''.format(
             return os.open(path, flags_normal)
 
 
+def valid_msgpacked_dict(d, keys_serialized):
+    """check if the data <d> looks like a msgpacked dict"""
+    d_len = len(d)
+    if d_len == 0:
+        return False
+    if d[0] & 0xf0 == 0x80:  # object is a fixmap (up to 15 elements)
+        offs = 1
+    elif d[0] == 0xde:  # object is a map16 (up to 2^16-1 elements)
+        offs = 3
+    else:
+        # object is not a map (dict)
+        # note: we must not have dicts with > 2^16-1 elements
+        return False
+    if d_len <= offs:
+        return False
+    # is the first dict key a bytestring?
+    if d[offs] & 0xe0 == 0xa0:  # key is a small bytestring (up to 31 chars)
+        pass
+    elif d[offs] in (0xd9, 0xda, 0xdb):  # key is a str8, str16 or str32
+        pass
+    else:
+        # key is not a bytestring
+        return False
+    # is the bytestring any of the expected key names?
+    key_serialized = d[offs:]
+    return any(key_serialized.startswith(pattern) for pattern in keys_serialized)
+
+
 class RobustUnpacker:
     """A restartable/robust version of the streaming msgpack unpacker
     """
-    def __init__(self, validator):
+    def __init__(self, validator, item_keys):
         super().__init__()
-        self.item_keys = [msgpack.packb(name.encode()) for name in ITEM_KEYS]
+        self.item_keys = [msgpack.packb(name.encode()) for name in item_keys]
         self.validator = validator
         self._buffered_data = []
         self._resync = False
@@ -720,18 +748,10 @@ class RobustUnpacker:
             while self._resync:
                 if not data:
                     raise StopIteration
-                # Abort early if the data does not look like a serialized dict
-                if len(data) < 2 or ((data[0] & 0xf0) != 0x80) or ((data[1] & 0xe0) != 0xa0):
-                    data = data[1:]
-                    continue
-                # Make sure it looks like an item dict
-                for pattern in self.item_keys:
-                    if data[1:].startswith(pattern):
-                        break
-                else:
+                # Abort early if the data does not look like a serialized item dict
+                if not valid_msgpacked_dict(data, self.item_keys):
                     data = data[1:]
                     continue
-
                 self._unpacker = msgpack.Unpacker(object_hook=StableDict)
                 self._unpacker.feed(data)
                 try:
@@ -806,7 +826,12 @@ class ArchiveChecker:
                 self.chunks[id_] = init_entry
 
     def identify_key(self, repository):
-        cdata = repository.get(next(self.chunks.iteritems())[0])
+        try:
+            some_chunkid, _ = next(self.chunks.iteritems())
+        except StopIteration:
+            # repo is completely empty, no chunks
+            return None
+        cdata = repository.get(some_chunkid)
         return key_factory(repository, cdata)
 
     def verify_data(self):
@@ -834,13 +859,26 @@ class ArchiveChecker:
 
         Iterates through all objects in the repository looking for archive metadata blocks.
         """
+        required_archive_keys = frozenset(key.encode() for key in REQUIRED_ARCHIVE_KEYS)
+
+        def valid_archive(obj):
+            if not isinstance(obj, dict):
+                return False
+            keys = set(obj)
+            return required_archive_keys.issubset(keys)
+
         logger.info('Rebuilding missing manifest, this might take some time...')
+        # as we have lost the manifest, we do not know any more what valid item keys we had.
+        # collecting any key we encounter in a damaged repo seems unwise, thus we just use
+        # the hardcoded list from the source code. thus, it is not recommended to rebuild a
+        # lost manifest on a older borg version than the most recent one that was ever used
+        # within this repository (assuming that newer borg versions support more item keys).
         manifest = Manifest(self.key, self.repository)
+        archive_keys_serialized = [msgpack.packb(name.encode()) for name in ARCHIVE_KEYS]
         for chunk_id, _ in self.chunks.iteritems():
             cdata = self.repository.get(chunk_id)
             _, data = self.key.decrypt(chunk_id, cdata)
-            # Some basic sanity checks of the payload before feeding it into msgpack
-            if len(data) < 2 or ((data[0] & 0xf0) != 0x80) or ((data[1] & 0xe0) != 0xa0):
+            if not valid_msgpacked_dict(data, archive_keys_serialized):
                 continue
             if b'cmdline' not in data or b'\xa7version\x01' not in data:
                 continue
@@ -850,7 +888,7 @@ class ArchiveChecker:
             # msgpack with invalid data
             except (TypeError, ValueError, StopIteration):
                 continue
-            if isinstance(archive, dict) and b'items' in archive and b'cmdline' in archive:
+            if valid_archive(archive):
                 logger.info('Found archive %s', archive[b'name'].decode('utf-8'))
                 manifest.archives[archive[b'name'].decode('utf-8')] = {b'id': chunk_id, b'time': archive[b'time']}
         logger.info('Manifest rebuild complete.')
@@ -912,7 +950,10 @@ class ArchiveChecker:
 
             Missing item chunks will be skipped and the msgpack stream will be restarted
             """
-            unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and 'path' in item)
+            item_keys = frozenset(key.encode() for key in self.manifest.item_keys)
+            required_item_keys = frozenset(key.encode() for key in REQUIRED_ITEM_KEYS)
+            unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and 'path' in item,
+                                      self.manifest.item_keys)
             _state = 0
 
             def missing_chunk_detector(chunk_id):
@@ -927,6 +968,12 @@ class ArchiveChecker:
                 self.error_found = True
                 logger.error(msg)
 
+            def valid_item(obj):
+                if not isinstance(obj, StableDict):
+                    return False
+                keys = set(obj)
+                return required_item_keys.issubset(keys) and keys.issubset(item_keys)
+
             i = 0
             for state, items in groupby(archive[b'items'], missing_chunk_detector):
                 items = list(items)
@@ -942,7 +989,7 @@ class ArchiveChecker:
                     unpacker.feed(data)
                     try:
                         for item in unpacker:
-                            if isinstance(item, dict):
+                            if valid_item(item):
                                 yield Item(internal_dict=item)
                             else:
                                 report('Did not get expected metadata dict when unpacking item metadata', chunk_id, i)

+ 14 - 6
src/borg/archiver.py

@@ -26,7 +26,8 @@ from . import helpers
 from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics
 from .cache import Cache
 from .constants import *  # NOQA
-from .helpers import Error
+from .helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
+from .helpers import Error, NoManifestError
 from .helpers import location_validator, archivename_validator, ChunkerParams, CompressionSpec
 from .helpers import ItemFormatter, format_time, format_file_size, format_archive
 from .helpers import safe_encode, remove_surrogates, bin_to_hex
@@ -665,10 +666,11 @@ class Archiver:
         cache.commit()
         return self.exit_code
 
-    @with_repository(exclusive=True)
-    def do_delete(self, args, repository, manifest, key):
+    @with_repository(exclusive=True, manifest=False)
+    def do_delete(self, args, repository):
         """Delete an existing repository or archive"""
         if args.location.archive:
+            manifest, key = Manifest.load(repository)
             with Cache(repository, key, manifest, lock_wait=self.lock_wait) as cache:
                 archive = Archive(repository, key, manifest, args.location.archive, cache=cache)
                 stats = Statistics()
@@ -685,9 +687,15 @@ class Archiver:
         else:
             if not args.cache_only:
                 msg = []
-                msg.append("You requested to completely DELETE the repository *including* all archives it contains:")
-                for archive_info in manifest.list_archive_infos(sort_by='ts'):
-                    msg.append(format_archive(archive_info))
+                try:
+                    manifest, key = Manifest.load(repository)
+                except NoManifestError:
+                    msg.append("You requested to completely DELETE the repository *including* all archives it may contain.")
+                    msg.append("This repository seems to have no manifest, so we can't tell anything about its contents.")
+                else:
+                    msg.append("You requested to completely DELETE the repository *including* all archives it contains:")
+                    for archive_info in manifest.list_archive_infos(sort_by='ts'):
+                        msg.append(format_archive(archive_info))
                 msg.append("Type 'YES' if you understand this and want to continue: ")
                 msg = '\n'.join(msg)
                 if not yes(msg, false_msg="Aborting.", truish=('YES', ),

+ 14 - 3
src/borg/constants.py

@@ -1,7 +1,18 @@
 # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
-ITEM_KEYS = set(['path', 'source', 'rdev', 'chunks', 'hardlink_master',
-                 'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime',
-                 'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended', ])
+ITEM_KEYS = frozenset(['path', 'source', 'rdev', 'chunks', 'hardlink_master',
+                       'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime',
+                       'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended', ])
+
+# this is the set of keys that are always present in items:
+REQUIRED_ITEM_KEYS = frozenset(['path', 'mtime', ])
+
+# this set must be kept complete, otherwise rebuild_manifest might malfunction:
+ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'hostname', 'username', 'time', 'time_end',
+                          'comment', 'chunker_params',
+                          'recreate_cmdline', 'recreate_source_id', 'recreate_args'])
+
+# this is the set of keys that are always present in archives:
+REQUIRED_ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'time', ])
 
 ARCHIVE_TEXT_KEYS = (b'name', b'comment', b'hostname', b'username', b'time', b'time_end')
 

+ 14 - 2
src/borg/helpers.py

@@ -84,6 +84,10 @@ class ExtensionModuleError(Error):
     """The Borg binary extension modules do not seem to be properly installed"""
 
 
+class NoManifestError(Error):
+    """Repository has no manifest."""
+
+
 def check_extension_modules():
     from . import platform
     if hashindex.API_VERSION != 2:
@@ -100,11 +104,12 @@ class Manifest:
 
     MANIFEST_ID = b'\0' * 32
 
-    def __init__(self, key, repository):
+    def __init__(self, key, repository, item_keys=None):
         self.archives = {}
         self.config = {}
         self.key = key
         self.repository = repository
+        self.item_keys = frozenset(item_keys) if item_keys is not None else ITEM_KEYS
 
     @property
     def id_str(self):
@@ -113,7 +118,11 @@ class Manifest:
     @classmethod
     def load(cls, repository, key=None):
         from .key import key_factory
-        cdata = repository.get(cls.MANIFEST_ID)
+        from .repository import Repository
+        try:
+            cdata = repository.get(cls.MANIFEST_ID)
+        except Repository.ObjectNotFound:
+            raise NoManifestError
         if not key:
             key = key_factory(repository, cdata)
         manifest = cls(key, repository)
@@ -127,6 +136,8 @@ class Manifest:
         if manifest.timestamp:
             manifest.timestamp = manifest.timestamp.decode('ascii')
         manifest.config = m[b'config']
+        # valid item keys are whatever is known in the repo or every key we know
+        manifest.item_keys = ITEM_KEYS | frozenset(key.decode() for key in m.get(b'item_keys', []))
         return manifest, key
 
     def write(self):
@@ -136,6 +147,7 @@ class Manifest:
             'archives': self.archives,
             'timestamp': self.timestamp,
             'config': self.config,
+            'item_keys': tuple(self.item_keys),
         }))
         self.id = self.key.id_hash(data)
         self.repository.put(self.MANIFEST_ID, self.key.encrypt(Chunk(data)))

+ 34 - 2
src/borg/testsuite/archive.py

@@ -6,7 +6,7 @@ from unittest.mock import Mock
 import pytest
 import msgpack
 
-from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, Statistics
+from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_dict, ITEM_KEYS, Statistics
 from ..item import Item
 from ..key import PlaintextKey
 from ..helpers import Manifest
@@ -139,7 +139,7 @@ class RobustUnpackerTestCase(BaseTestCase):
         return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz')
 
     def process(self, input):
-        unpacker = RobustUnpacker(validator=self._validator)
+        unpacker = RobustUnpacker(validator=self._validator, item_keys=ITEM_KEYS)
         result = []
         for should_sync, chunks in input:
             if should_sync:
@@ -184,3 +184,35 @@ class RobustUnpackerTestCase(BaseTestCase):
         input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])]
         result = self.process(input)
         self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}])
+
+
+@pytest.fixture
+def item_keys_serialized():
+    return [msgpack.packb(name) for name in ITEM_KEYS]
+
+
+@pytest.mark.parametrize('packed',
+    [b'', b'x', b'foobar', ] +
+    [msgpack.packb(o) for o in (
+        [None, 0, 0.0, False, '', {}, [], ()] +
+        [42, 23.42, True, b'foobar', {b'foo': b'bar'}, [b'foo', b'bar'], (b'foo', b'bar')]
+    )])
+def test_invalid_msgpacked_item(packed, item_keys_serialized):
+    assert not valid_msgpacked_dict(packed, item_keys_serialized)
+
+
+@pytest.mark.parametrize('packed',
+    [msgpack.packb(o) for o in [
+        {b'path': b'/a/b/c'},  # small (different msgpack mapping type!)
+        dict((k, b'') for k in ITEM_KEYS),  # as big (key count) as it gets
+        dict((k, b'x' * 1000) for k in ITEM_KEYS),  # as big (key count and volume) as it gets
+    ]])
+def test_valid_msgpacked_items(packed, item_keys_serialized):
+    assert valid_msgpacked_dict(packed, item_keys_serialized)
+
+
+def test_key_length_msgpacked_items():
+    key = b'x' * 32  # 31 bytes is the limit for fixstr msgpack type
+    data = {key: b''}
+    item_keys_serialized = [msgpack.packb(key), ]
+    assert valid_msgpacked_dict(msgpack.packb(data), item_keys_serialized)