Browse Source

Merge pull request #2731 from ThomasWaldmann/limit-unpacker-manifest

Manifest: use limited unpacker
TW 8 years ago
parent
commit
1932fe8ea9
6 changed files with 58 additions and 34 deletions
  1. 16 0
      src/borg/constants.py
  2. 4 3
      src/borg/crypto/key.py
  3. 33 0
      src/borg/helpers.py
  4. 3 24
      src/borg/remote.py
  5. 1 6
      src/borg/repository.py
  6. 1 1
      src/borg/testsuite/key.py

+ 16 - 0
src/borg/constants.py

@@ -31,6 +31,22 @@ DEFAULT_MAX_SEGMENT_SIZE = 500 * 1024 * 1024
 # the header, and the total size was set to 20 MiB).
 MAX_DATA_SIZE = 20971479
 
+# MAX_OBJECT_SIZE = <20 MiB (MAX_DATA_SIZE) + 41 bytes for a Repository PUT header, which consists of
+# a 1 byte tag ID, 4 byte CRC, 4 byte size and 32 bytes for the ID.
+MAX_OBJECT_SIZE = MAX_DATA_SIZE + 41  # see LoggedIO.put_header_fmt.size assertion in repository module
+assert MAX_OBJECT_SIZE == 20971520 == 20 * 1024 * 1024
+
+# borg.remote read() buffer size
+BUFSIZE = 10 * 1024 * 1024
+
+# to use a safe, limited unpacker, we need to set a upper limit to the archive count in the manifest.
+# this does not mean that you can always really reach that number, because it also needs to be less than
+# MAX_DATA_SIZE or it will trigger the check for that.
+MAX_ARCHIVES = 400000
+
+# repo.list() / .scan() result count limit the borg client uses
+LIST_SCAN_LIMIT = 100000
+
 DEFAULT_SEGMENTS_PER_DIR = 1000
 
 CHUNK_MIN_EXP = 19  # 2**19 == 512kiB

+ 4 - 3
src/borg/crypto/key.py

@@ -21,6 +21,7 @@ from ..helpers import StableDict
 from ..helpers import Error, IntegrityError
 from ..helpers import yes
 from ..helpers import get_keys_dir, get_security_dir
+from ..helpers import get_limited_unpacker
 from ..helpers import bin_to_hex
 from ..item import Key, EncryptedKey
 from ..platform import SaveFile
@@ -216,9 +217,9 @@ class KeyBase:
             logger.warning('Manifest authentication DISABLED.')
             tam_required = False
         data = bytearray(data)
-        # Since we don't trust these bytes we use the slower Python unpacker,
-        # which is assumed to have a lower probability of security issues.
-        unpacked = msgpack.fallback.unpackb(data, object_hook=StableDict, unicode_errors='surrogateescape')
+        unpacker = get_limited_unpacker('manifest')
+        unpacker.feed(data)
+        unpacked = unpacker.unpack()
         if b'tam' not in unpacked:
             if tam_required:
                 raise TAMRequiredError(self.repository._location.canonical_path())

+ 33 - 0
src/borg/helpers.py

@@ -145,6 +145,35 @@ def check_extension_modules():
         raise ExtensionModuleError
 
 
+def get_limited_unpacker(kind):
+    """return a limited Unpacker because we should not trust msgpack data received from remote"""
+    args = dict(use_list=False,  # return tuples, not lists
+                max_bin_len=0,  # not used
+                max_ext_len=0,  # not used
+                max_buffer_size=3 * max(BUFSIZE, MAX_OBJECT_SIZE),
+                max_str_len=MAX_OBJECT_SIZE,  # a chunk or other repo object
+                )
+    if kind == 'server':
+        args.update(dict(max_array_len=100,  # misc. cmd tuples
+                         max_map_len=100,  # misc. cmd dicts
+                         ))
+    elif kind == 'client':
+        args.update(dict(max_array_len=LIST_SCAN_LIMIT,  # result list from repo.list() / .scan()
+                         max_map_len=100,  # misc. result dicts
+                         ))
+    elif kind == 'manifest':
+        args.update(dict(use_list=True,  # default value
+                         max_array_len=100,  # ITEM_KEYS ~= 22
+                         max_map_len=MAX_ARCHIVES,  # list of archives
+                         max_str_len=255,  # archive name
+                         object_hook=StableDict,
+                         unicode_errors='surrogateescape',
+                         ))
+    else:
+        raise ValueError('kind must be "server", "client" or "manifest"')
+    return msgpack.Unpacker(**args)
+
+
 ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts')
 
 
@@ -351,6 +380,10 @@ class Manifest:
             prev_ts = self.last_timestamp
             incremented = (prev_ts + timedelta(microseconds=1)).isoformat()
             self.timestamp = max(incremented, datetime.utcnow().isoformat())
+        # include checks for limits as enforced by limited unpacker (used by load())
+        assert len(self.archives) <= MAX_ARCHIVES
+        assert all(len(name) <= 255 for name in self.archives)
+        assert len(self.item_keys) <= 100
         manifest = ManifestItem(
             version=1,
             archives=StableDict(self.archives.get_raw_dict()),

+ 3 - 24
src/borg/remote.py

@@ -20,16 +20,18 @@ import msgpack
 
 from . import __version__
 from .compress import LZ4
+from .constants import *  # NOQA
 from .helpers import Error, IntegrityError
 from .helpers import bin_to_hex
 from .helpers import get_home_dir
+from .helpers import get_limited_unpacker
 from .helpers import hostname_is_unique
 from .helpers import replace_placeholders
 from .helpers import sysinfo
 from .helpers import format_file_size
 from .helpers import truncate_and_unlink
 from .logger import create_logger, setup_logging
-from .repository import Repository, MAX_OBJECT_SIZE, LIST_SCAN_LIMIT
+from .repository import Repository
 from .version import parse_version, format_version
 from .algorithms.checksums import xxh64
 
@@ -39,8 +41,6 @@ RPC_PROTOCOL_VERSION = 2
 BORG_VERSION = parse_version(__version__)
 MSGID, MSG, ARGS, RESULT = b'i', b'm', b'a', b'r'
 
-BUFSIZE = 10 * 1024 * 1024
-
 MAX_INFLIGHT = 100
 
 RATELIMIT_PERIOD = 0.1
@@ -65,27 +65,6 @@ def os_write(fd, data):
     return amount
 
 
-def get_limited_unpacker(kind):
-    """return a limited Unpacker because we should not trust msgpack data received from remote"""
-    args = dict(use_list=False,  # return tuples, not lists
-                max_bin_len=0,  # not used
-                max_ext_len=0,  # not used
-                max_buffer_size=3 * max(BUFSIZE, MAX_OBJECT_SIZE),
-                max_str_len=MAX_OBJECT_SIZE,  # a chunk or other repo object
-                )
-    if kind == 'server':
-        args.update(dict(max_array_len=100,  # misc. cmd tuples
-                         max_map_len=100,  # misc. cmd dicts
-                         ))
-    elif kind == 'client':
-        args.update(dict(max_array_len=LIST_SCAN_LIMIT,  # result list from repo.list() / .scan()
-                         max_map_len=100,  # misc. result dicts
-                         ))
-    else:
-        raise ValueError('kind must be "server" or "client"')
-    return msgpack.Unpacker(**args)
-
-
 class ConnectionClosed(Error):
     """Connection closed by remote host"""
 

+ 1 - 6
src/borg/repository.py

@@ -34,8 +34,6 @@ TAG_PUT = 0
 TAG_DELETE = 1
 TAG_COMMIT = 2
 
-LIST_SCAN_LIMIT = 100000  # repo.list() / .scan() result count limit the borg client uses
-
 FreeSpace = partial(defaultdict, int)
 
 
@@ -1411,7 +1409,4 @@ class LoggedIO:
         return self.segment - 1  # close_segment() increments it
 
 
-# MAX_OBJECT_SIZE = <20 MiB (MAX_DATA_SIZE) + 41 bytes for a Repository PUT header, which consists of
-# a 1 byte tag ID, 4 byte CRC, 4 byte size and 32 bytes for the ID.
-MAX_OBJECT_SIZE = MAX_DATA_SIZE + LoggedIO.put_header_fmt.size
-assert MAX_OBJECT_SIZE == 20971520 == 20 * 1024 * 1024
+assert LoggedIO.put_header_fmt.size == 41  # see constants.MAX_OBJECT_SIZE

+ 1 - 1
src/borg/testsuite/key.py

@@ -331,7 +331,7 @@ class TestTAM:
             key.unpack_and_verify_manifest(blob)
 
         blob = b'\xc1\xc1\xc1'
-        with pytest.raises(msgpack.UnpackException):
+        with pytest.raises((ValueError, msgpack.UnpackException)):
             key.unpack_and_verify_manifest(blob)
 
     def test_missing_when_required(self, key):