|
@@ -690,12 +690,40 @@ Number of files: {0.stats.nfiles}'''.format(
|
|
|
return os.open(path, flags_normal)
|
|
|
|
|
|
|
|
|
+def valid_msgpacked_dict(d, keys_serialized):
|
|
|
+ """check if the data <d> looks like a msgpacked dict"""
|
|
|
+ d_len = len(d)
|
|
|
+ if d_len == 0:
|
|
|
+ return False
|
|
|
+ if d[0] & 0xf0 == 0x80: # object is a fixmap (up to 15 elements)
|
|
|
+ offs = 1
|
|
|
+ elif d[0] == 0xde: # object is a map16 (up to 2^16-1 elements)
|
|
|
+ offs = 3
|
|
|
+ else:
|
|
|
+ # object is not a map (dict)
|
|
|
+ # note: we must not have dicts with > 2^16-1 elements
|
|
|
+ return False
|
|
|
+ if d_len <= offs:
|
|
|
+ return False
|
|
|
+ # is the first dict key a bytestring?
|
|
|
+ if d[offs] & 0xe0 == 0xa0: # key is a small bytestring (up to 31 chars)
|
|
|
+ pass
|
|
|
+ elif d[offs] in (0xd9, 0xda, 0xdb): # key is a str8, str16 or str32
|
|
|
+ pass
|
|
|
+ else:
|
|
|
+ # key is not a bytestring
|
|
|
+ return False
|
|
|
+ # is the bytestring any of the expected key names?
|
|
|
+ key_serialized = d[offs:]
|
|
|
+ return any(key_serialized.startswith(pattern) for pattern in keys_serialized)
|
|
|
+
|
|
|
+
|
|
|
class RobustUnpacker:
|
|
|
"""A restartable/robust version of the streaming msgpack unpacker
|
|
|
"""
|
|
|
- def __init__(self, validator):
|
|
|
+ def __init__(self, validator, item_keys):
|
|
|
super().__init__()
|
|
|
- self.item_keys = [msgpack.packb(name.encode()) for name in ITEM_KEYS]
|
|
|
+ self.item_keys = [msgpack.packb(name.encode()) for name in item_keys]
|
|
|
self.validator = validator
|
|
|
self._buffered_data = []
|
|
|
self._resync = False
|
|
@@ -720,18 +748,10 @@ class RobustUnpacker:
|
|
|
while self._resync:
|
|
|
if not data:
|
|
|
raise StopIteration
|
|
|
- # Abort early if the data does not look like a serialized dict
|
|
|
- if len(data) < 2 or ((data[0] & 0xf0) != 0x80) or ((data[1] & 0xe0) != 0xa0):
|
|
|
- data = data[1:]
|
|
|
- continue
|
|
|
- # Make sure it looks like an item dict
|
|
|
- for pattern in self.item_keys:
|
|
|
- if data[1:].startswith(pattern):
|
|
|
- break
|
|
|
- else:
|
|
|
+ # Abort early if the data does not look like a serialized item dict
|
|
|
+ if not valid_msgpacked_dict(data, self.item_keys):
|
|
|
data = data[1:]
|
|
|
continue
|
|
|
-
|
|
|
self._unpacker = msgpack.Unpacker(object_hook=StableDict)
|
|
|
self._unpacker.feed(data)
|
|
|
try:
|
|
@@ -806,7 +826,12 @@ class ArchiveChecker:
|
|
|
self.chunks[id_] = init_entry
|
|
|
|
|
|
def identify_key(self, repository):
|
|
|
- cdata = repository.get(next(self.chunks.iteritems())[0])
|
|
|
+ try:
|
|
|
+ some_chunkid, _ = next(self.chunks.iteritems())
|
|
|
+ except StopIteration:
|
|
|
+ # repo is completely empty, no chunks
|
|
|
+ return None
|
|
|
+ cdata = repository.get(some_chunkid)
|
|
|
return key_factory(repository, cdata)
|
|
|
|
|
|
def verify_data(self):
|
|
@@ -834,13 +859,26 @@ class ArchiveChecker:
|
|
|
|
|
|
Iterates through all objects in the repository looking for archive metadata blocks.
|
|
|
"""
|
|
|
+ required_archive_keys = frozenset(key.encode() for key in REQUIRED_ARCHIVE_KEYS)
|
|
|
+
|
|
|
+ def valid_archive(obj):
|
|
|
+ if not isinstance(obj, dict):
|
|
|
+ return False
|
|
|
+ keys = set(obj)
|
|
|
+ return required_archive_keys.issubset(keys)
|
|
|
+
|
|
|
logger.info('Rebuilding missing manifest, this might take some time...')
|
|
|
+ # as we have lost the manifest, we do not know any more what valid item keys we had.
|
|
|
+ # collecting any key we encounter in a damaged repo seems unwise, thus we just use
|
|
|
+ # the hardcoded list from the source code. thus, it is not recommended to rebuild a
|
|
|
+ # lost manifest on a older borg version than the most recent one that was ever used
|
|
|
+ # within this repository (assuming that newer borg versions support more item keys).
|
|
|
manifest = Manifest(self.key, self.repository)
|
|
|
+ archive_keys_serialized = [msgpack.packb(name.encode()) for name in ARCHIVE_KEYS]
|
|
|
for chunk_id, _ in self.chunks.iteritems():
|
|
|
cdata = self.repository.get(chunk_id)
|
|
|
_, data = self.key.decrypt(chunk_id, cdata)
|
|
|
- # Some basic sanity checks of the payload before feeding it into msgpack
|
|
|
- if len(data) < 2 or ((data[0] & 0xf0) != 0x80) or ((data[1] & 0xe0) != 0xa0):
|
|
|
+ if not valid_msgpacked_dict(data, archive_keys_serialized):
|
|
|
continue
|
|
|
if b'cmdline' not in data or b'\xa7version\x01' not in data:
|
|
|
continue
|
|
@@ -850,7 +888,7 @@ class ArchiveChecker:
|
|
|
# msgpack with invalid data
|
|
|
except (TypeError, ValueError, StopIteration):
|
|
|
continue
|
|
|
- if isinstance(archive, dict) and b'items' in archive and b'cmdline' in archive:
|
|
|
+ if valid_archive(archive):
|
|
|
logger.info('Found archive %s', archive[b'name'].decode('utf-8'))
|
|
|
manifest.archives[archive[b'name'].decode('utf-8')] = {b'id': chunk_id, b'time': archive[b'time']}
|
|
|
logger.info('Manifest rebuild complete.')
|
|
@@ -912,7 +950,10 @@ class ArchiveChecker:
|
|
|
|
|
|
Missing item chunks will be skipped and the msgpack stream will be restarted
|
|
|
"""
|
|
|
- unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and 'path' in item)
|
|
|
+ item_keys = frozenset(key.encode() for key in self.manifest.item_keys)
|
|
|
+ required_item_keys = frozenset(key.encode() for key in REQUIRED_ITEM_KEYS)
|
|
|
+ unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and 'path' in item,
|
|
|
+ self.manifest.item_keys)
|
|
|
_state = 0
|
|
|
|
|
|
def missing_chunk_detector(chunk_id):
|
|
@@ -927,6 +968,12 @@ class ArchiveChecker:
|
|
|
self.error_found = True
|
|
|
logger.error(msg)
|
|
|
|
|
|
+ def valid_item(obj):
|
|
|
+ if not isinstance(obj, StableDict):
|
|
|
+ return False
|
|
|
+ keys = set(obj)
|
|
|
+ return required_item_keys.issubset(keys) and keys.issubset(item_keys)
|
|
|
+
|
|
|
i = 0
|
|
|
for state, items in groupby(archive[b'items'], missing_chunk_detector):
|
|
|
items = list(items)
|
|
@@ -942,7 +989,7 @@ class ArchiveChecker:
|
|
|
unpacker.feed(data)
|
|
|
try:
|
|
|
for item in unpacker:
|
|
|
- if isinstance(item, dict):
|
|
|
+ if valid_item(item):
|
|
|
yield Item(internal_dict=item)
|
|
|
else:
|
|
|
report('Did not get expected metadata dict when unpacking item metadata', chunk_id, i)
|