Prechádzať zdrojové kódy

Merge pull request #1148 from ThomasWaldmann/fix-1135

fix resync and msgpacked item qualifier, fixes #1135
TW 9 rokov pred
rodič
commit
d9b77d57c8
2 zmenil súbory, kde vykonal 64 pridanie a 11 odobranie
  1. 30 10
      borg/archive.py
  2. 34 1
      borg/testsuite/archive.py

+ 30 - 10
borg/archive.py

@@ -592,6 +592,34 @@ ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks',
                  b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
                  b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
 
 
 
 
+def valid_msgpacked_item(d, item_keys_serialized):
+    """check if the data <d> looks like a msgpacked item metadata dict"""
+    d_len = len(d)
+    if d_len == 0:
+        return False
+    if d[0] & 0xf0 == 0x80:  # object is a fixmap (up to 15 elements)
+        offs = 1
+    elif d[0] == 0xde:  # object is a map16 (up to 2^16-1 elements)
+        offs = 3
+    else:
+        # object is not a map (dict)
+        # note: we must not have item dicts with > 2^16-1 elements
+        return False
+    if d_len <= offs:
+        return False
+    # is the first dict key a bytestring?
+    if d[offs] & 0xe0 == 0xa0:  # key is a small bytestring (up to 31 chars)
+        pass
+    elif d[offs] in (0xd9, 0xda, 0xdb):  # key is a str8, str16 or str32
+        pass
+    else:
+        # key is not a bytestring
+        return False
+    # is the bytestring any of the expected key names?
+    key_serialized = d[offs:]
+    return any(key_serialized.startswith(pattern) for pattern in item_keys_serialized)
+
+
 class RobustUnpacker:
 class RobustUnpacker:
     """A restartable/robust version of the streaming msgpack unpacker
     """A restartable/robust version of the streaming msgpack unpacker
     """
     """
@@ -622,18 +650,10 @@ class RobustUnpacker:
             while self._resync:
             while self._resync:
                 if not data:
                 if not data:
                     raise StopIteration
                     raise StopIteration
-                # Abort early if the data does not look like a serialized dict
-                if len(data) < 2 or ((data[0] & 0xf0) != 0x80) or ((data[1] & 0xe0) != 0xa0):
-                    data = data[1:]
-                    continue
-                # Make sure it looks like an item dict
-                for pattern in self.item_keys:
-                    if data[1:].startswith(pattern):
-                        break
-                else:
+                # Abort early if the data does not look like a serialized item dict
+                if not valid_msgpacked_item(data, self.item_keys):
                     data = data[1:]
                     data = data[1:]
                     continue
                     continue
-
                 self._unpacker = msgpack.Unpacker(object_hook=StableDict)
                 self._unpacker = msgpack.Unpacker(object_hook=StableDict)
                 self._unpacker.feed(data)
                 self._unpacker.feed(data)
                 try:
                 try:

+ 34 - 1
borg/testsuite/archive.py

@@ -2,8 +2,9 @@ from datetime import datetime, timezone
 from unittest.mock import Mock
 from unittest.mock import Mock
 
 
 import msgpack
 import msgpack
+import pytest
 
 
-from ..archive import Archive, CacheChunkBuffer, RobustUnpacker
+from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_item, ITEM_KEYS
 from ..key import PlaintextKey
 from ..key import PlaintextKey
 from ..helpers import Manifest
 from ..helpers import Manifest
 from . import BaseTestCase
 from . import BaseTestCase
@@ -112,3 +113,35 @@ class RobustUnpackerTestCase(BaseTestCase):
         input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])]
         input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])]
         result = self.process(input)
         result = self.process(input)
         self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}])
         self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}])
+
+
+@pytest.fixture
+def item_keys_serialized():
+    return [msgpack.packb(name) for name in ITEM_KEYS]
+
+
+@pytest.mark.parametrize('packed',
+    [b'', b'x', b'foobar', ] +
+    [msgpack.packb(o) for o in (
+        [None, 0, 0.0, False, '', {}, [], ()] +
+        [42, 23.42, True, b'foobar', {b'foo': b'bar'}, [b'foo', b'bar'], (b'foo', b'bar')]
+    )])
+def test_invalid_msgpacked_item(packed, item_keys_serialized):
+    assert not valid_msgpacked_item(packed, item_keys_serialized)
+
+
+@pytest.mark.parametrize('packed',
+    [msgpack.packb(o) for o in [
+        {b'path': b'/a/b/c'},  # small (different msgpack mapping type!)
+        dict((k, b'') for k in ITEM_KEYS),  # as big (key count) as it gets
+        dict((k, b'x' * 1000) for k in ITEM_KEYS),  # as big (key count and volume) as it gets
+    ]])
+def test_valid_msgpacked_items(packed, item_keys_serialized):
+    assert valid_msgpacked_item(packed, item_keys_serialized)
+
+
+def test_key_length_msgpacked_items():
+    key = b'x' * 32  # 31 bytes is the limit for fixstr msgpack type
+    data = {key: b''}
+    item_keys_serialized = [msgpack.packb(key), ]
+    assert valid_msgpacked_item(msgpack.packb(data), item_keys_serialized)