浏览代码

cleanup msgpack related str/bytes mess, fixes #968

see ticket and borg.helpers.msgpack docstring.

this changeset implements the full migration to
msgpack 2.0 spec (use_bin_type=True, raw=False).

still needed compat to the past is done via want_bytes decoder in borg.item.
Thomas Waldmann 3 年之前
父节点
当前提交
8e87f1111b

+ 2 - 5
src/borg/archive.py

@@ -1718,13 +1718,10 @@ class ArchiveChecker:
 
 
         Iterates through all objects in the repository looking for archive metadata blocks.
         Iterates through all objects in the repository looking for archive metadata blocks.
         """
         """
-        required_archive_keys = frozenset(key.encode() for key in REQUIRED_ARCHIVE_KEYS)
-
         def valid_archive(obj):
         def valid_archive(obj):
             if not isinstance(obj, dict):
             if not isinstance(obj, dict):
                 return False
                 return False
-            keys = set(obj)
-            return required_archive_keys.issubset(keys)
+            return REQUIRED_ARCHIVE_KEYS.issubset(obj)
 
 
         logger.info('Rebuilding missing manifest, this might take some time...')
         logger.info('Rebuilding missing manifest, this might take some time...')
         # as we have lost the manifest, we do not know any more what valid item keys we had.
         # as we have lost the manifest, we do not know any more what valid item keys we had.
@@ -1904,7 +1901,7 @@ class ArchiveChecker:
             def valid_item(obj):
             def valid_item(obj):
                 if not isinstance(obj, StableDict):
                 if not isinstance(obj, StableDict):
                     return False, 'not a dictionary'
                     return False, 'not a dictionary'
-                keys = set(k.decode('utf-8', errors='replace') for k in obj)
+                keys = set(obj)
                 if not required_item_keys.issubset(keys):
                 if not required_item_keys.issubset(keys):
                     return False, 'missing required keys: ' + list_keys_safe(required_item_keys - keys)
                     return False, 'missing required keys: ' + list_keys_safe(required_item_keys - keys)
                 if not keys.issubset(item_keys):
                 if not keys.issubset(item_keys):

+ 1 - 1
src/borg/archiver.py

@@ -2331,7 +2331,7 @@ class Archiver:
 
 
             unpacker = msgpack.Unpacker(use_list=False, object_hook=StableDict)
             unpacker = msgpack.Unpacker(use_list=False, object_hook=StableDict)
             first = True
             first = True
-            for item_id in archive_org_dict[b'items']:
+            for item_id in archive_org_dict['items']:
                 data = key.decrypt(item_id, repository.get(item_id))
                 data = key.decrypt(item_id, repository.get(item_id))
                 unpacker.feed(data)
                 unpacker.feed(data)
                 for item in unpacker:
                 for item in unpacker:

+ 5 - 5
src/borg/crypto/key.py

@@ -232,24 +232,24 @@ class KeyBase:
         unpacker = get_limited_unpacker('manifest')
         unpacker = get_limited_unpacker('manifest')
         unpacker.feed(data)
         unpacker.feed(data)
         unpacked = unpacker.unpack()
         unpacked = unpacker.unpack()
-        if b'tam' not in unpacked:
+        if 'tam' not in unpacked:
             if tam_required:
             if tam_required:
                 raise TAMRequiredError(self.repository._location.canonical_path())
                 raise TAMRequiredError(self.repository._location.canonical_path())
             else:
             else:
                 logger.debug('TAM not found and not required')
                 logger.debug('TAM not found and not required')
                 return unpacked, False
                 return unpacked, False
-        tam = unpacked.pop(b'tam', None)
+        tam = unpacked.pop('tam', None)
         if not isinstance(tam, dict):
         if not isinstance(tam, dict):
             raise TAMInvalid()
             raise TAMInvalid()
-        tam_type = tam.get(b'type', b'<none>').decode('ascii', 'replace')
+        tam_type = tam.get('type', '<none>')
         if tam_type != 'HKDF_HMAC_SHA512':
         if tam_type != 'HKDF_HMAC_SHA512':
             if tam_required:
             if tam_required:
                 raise TAMUnsupportedSuiteError(repr(tam_type))
                 raise TAMUnsupportedSuiteError(repr(tam_type))
             else:
             else:
                 logger.debug('Ignoring TAM made with unsupported suite, since TAM is not required: %r', tam_type)
                 logger.debug('Ignoring TAM made with unsupported suite, since TAM is not required: %r', tam_type)
                 return unpacked, False
                 return unpacked, False
-        tam_hmac = tam.get(b'hmac')
-        tam_salt = tam.get(b'salt')
+        tam_hmac = tam.get('hmac')
+        tam_salt = tam.get('salt')
         if not isinstance(tam_salt, bytes) or not isinstance(tam_hmac, bytes):
         if not isinstance(tam_salt, bytes) or not isinstance(tam_hmac, bytes):
             raise TAMInvalid()
             raise TAMInvalid()
         offset = data.index(tam_hmac)
         offset = data.index(tam_hmac)

+ 15 - 19
src/borg/helpers/msgpack.py

@@ -2,8 +2,7 @@
 wrapping msgpack
 wrapping msgpack
 ================
 ================
 
 
-Due to the planned breaking api changes in upstream msgpack, we wrap it the way we need it -
-to avoid having lots of clutter in the calling code. see tickets #968 and #3632.
+We wrap msgpack here the way we need it - to avoid having lots of clutter in the calling code.
 
 
 Packing
 Packing
 -------
 -------
@@ -22,30 +21,27 @@ Packing
 
 
 Unpacking
 Unpacking
 ---------
 ---------
-- raw = True (the old way, used by borg <= 1.3)
-  This is currently still needed to not try to decode "raw" msgpack objects.
-  These could come either from str (new or old msgpack) or bytes (old msgpack).
-  Thus, we basically must know what we want and either keep the bytes we get
-  or decode them to str, if we want str.
-
-- raw = False (the new way)
-  This can be used in future, when we do not have to deal with data any more that was packed the old way.
+- raw = False (used by borg since borg 1.3)
+  We already can use this with borg 1.3 due to the want_bytes decoder.
+  This decoder can be removed in future, when we do not have to deal with data any more that was packed the old way.
   It will then unpack according to the msgpack 2.0 spec format and directly output bytes or str.
   It will then unpack according to the msgpack 2.0 spec format and directly output bytes or str.
 
 
+- raw = True (the old way, used by borg < 1.3)
+
 - unicode_errors = 'surrogateescape' -> see description above (will be used when raw is False).
 - unicode_errors = 'surrogateescape' -> see description above (will be used when raw is False).
 
 
-As of borg 1.3, we have the first part on the way to fix the msgpack str/bytes mess, #968.
-borg now still needs to **read** old repos, archives, keys, ... so we can not yet fix it completely.
-But from now on, borg only **writes** new data according to the new msgpack spec,
-thus we can complete the fix for #968 in a later borg release.
+As of borg 1.3, we have fixed most of the msgpack str/bytes mess, #968.
+Borg now still needs to **read** old repos, archives, keys, ... so we can not yet fix it completely.
+But from now on, borg only **writes** new data according to the new msgpack 2.0 spec,
+thus we can remove some legacy support in a later borg release (some places are marked with "legacy").
 
 
 current way in msgpack terms
 current way in msgpack terms
 ----------------------------
 ----------------------------
 
 
 - pack with use_bin_type=True (according to msgpack 2.0 spec)
 - pack with use_bin_type=True (according to msgpack 2.0 spec)
 - packs str -> raw and bytes -> bin
 - packs str -> raw and bytes -> bin
-- unpack with raw=True (aka "the old way")
-- unpacks raw to bytes (thus we always need to decode manually if we want str)
+- unpack with raw=False (according to msgpack 2.0 spec, using unicode_errors='surrogateescape')
+- unpacks bin to bytes and raw to str (thus we need to re-encode manually if we want bytes from "raw")
 """
 """
 
 
 from .datastruct import StableDict
 from .datastruct import StableDict
@@ -66,8 +62,8 @@ from msgpack import OutOfData
 version = mp_version
 version = mp_version
 
 
 USE_BIN_TYPE = True
 USE_BIN_TYPE = True
-RAW = True  # should become False later when we do not need to read old stuff any more
-UNICODE_ERRORS = 'surrogateescape'  # previously done by safe_encode, safe_decode
+RAW = False
+UNICODE_ERRORS = 'surrogateescape'
 
 
 
 
 class PackException(Exception):
 class PackException(Exception):
@@ -161,7 +157,7 @@ def unpackb(packed, *, raw=RAW, unicode_errors=UNICODE_ERRORS,
 def unpack(stream, *, raw=RAW, unicode_errors=UNICODE_ERRORS,
 def unpack(stream, *, raw=RAW, unicode_errors=UNICODE_ERRORS,
            strict_map_key=False,
            strict_map_key=False,
            **kwargs):
            **kwargs):
-    # assert raw == RAW
+    assert raw == RAW
     assert unicode_errors == UNICODE_ERRORS
     assert unicode_errors == UNICODE_ERRORS
     try:
     try:
         kw = dict(raw=raw, unicode_errors=unicode_errors,
         kw = dict(raw=raw, unicode_errors=unicode_errors,

+ 22 - 13
src/borg/item.pyx

@@ -60,6 +60,15 @@ def fix_tuple_of_str_and_int(t):
     return t
     return t
 
 
 
 
+def want_bytes(v):
+    """we know that we want bytes and the value should be bytes"""
+    # legacy support: it being str can be caused by msgpack unpack decoding old data that was packed with use_bin_type=False
+    if isinstance(v, str):
+        v = v.encode('utf-8', errors='surrogateescape')
+    assert isinstance(v, bytes)
+    return v
+
+
 class PropDict:
 class PropDict:
     """
     """
     Manage a dictionary via properties.
     Manage a dictionary via properties.
@@ -204,10 +213,10 @@ class Item(PropDict):
     user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None')
     user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None')
     group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None')
     group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None')
 
 
-    acl_access = PropDict._make_property('acl_access', bytes)
-    acl_default = PropDict._make_property('acl_default', bytes)
-    acl_extended = PropDict._make_property('acl_extended', bytes)
-    acl_nfs4 = PropDict._make_property('acl_nfs4', bytes)
+    acl_access = PropDict._make_property('acl_access', bytes, decode=want_bytes)
+    acl_default = PropDict._make_property('acl_default', bytes, decode=want_bytes)
+    acl_extended = PropDict._make_property('acl_extended', bytes, decode=want_bytes)
+    acl_nfs4 = PropDict._make_property('acl_nfs4', bytes, decode=want_bytes)
 
 
     mode = PropDict._make_property('mode', int)
     mode = PropDict._make_property('mode', int)
     uid = PropDict._make_property('uid', int)
     uid = PropDict._make_property('uid', int)
@@ -224,7 +233,7 @@ class Item(PropDict):
     # compatibility note: this is a new feature, in old archives size will be missing.
     # compatibility note: this is a new feature, in old archives size will be missing.
     size = PropDict._make_property('size', int)
     size = PropDict._make_property('size', int)
 
 
-    hlid = PropDict._make_property('hlid', bytes)  # hard link id: same value means same hard link.
+    hlid = PropDict._make_property('hlid', bytes, decode=want_bytes)  # hard link id: same value means same hard link.
     hardlink_master = PropDict._make_property('hardlink_master', bool)  # legacy
     hardlink_master = PropDict._make_property('hardlink_master', bool)  # legacy
 
 
     chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None')
     chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None')
@@ -363,9 +372,9 @@ class EncryptedKey(PropDict):
     version = PropDict._make_property('version', int)
     version = PropDict._make_property('version', int)
     algorithm = PropDict._make_property('algorithm', str)
     algorithm = PropDict._make_property('algorithm', str)
     iterations = PropDict._make_property('iterations', int)
     iterations = PropDict._make_property('iterations', int)
-    salt = PropDict._make_property('salt', bytes)
-    hash = PropDict._make_property('hash', bytes)
-    data = PropDict._make_property('data', bytes)
+    salt = PropDict._make_property('salt', bytes, decode=want_bytes)
+    hash = PropDict._make_property('hash', bytes, decode=want_bytes)
+    data = PropDict._make_property('data', bytes, decode=want_bytes)
     argon2_time_cost = PropDict._make_property('argon2_time_cost', int)
     argon2_time_cost = PropDict._make_property('argon2_time_cost', int)
     argon2_memory_cost = PropDict._make_property('argon2_memory_cost', int)
     argon2_memory_cost = PropDict._make_property('argon2_memory_cost', int)
     argon2_parallelism = PropDict._make_property('argon2_parallelism', int)
     argon2_parallelism = PropDict._make_property('argon2_parallelism', int)
@@ -399,10 +408,10 @@ class Key(PropDict):
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
 
 
     version = PropDict._make_property('version', int)
     version = PropDict._make_property('version', int)
-    repository_id = PropDict._make_property('repository_id', bytes)
-    enc_key = PropDict._make_property('enc_key', bytes)
-    enc_hmac_key = PropDict._make_property('enc_hmac_key', bytes)
-    id_key = PropDict._make_property('id_key', bytes)
+    repository_id = PropDict._make_property('repository_id', bytes, decode=want_bytes)
+    enc_key = PropDict._make_property('enc_key', bytes, decode=want_bytes)
+    enc_hmac_key = PropDict._make_property('enc_hmac_key', bytes, decode=want_bytes)
+    id_key = PropDict._make_property('id_key', bytes, decode=want_bytes)
     chunk_seed = PropDict._make_property('chunk_seed', int)
     chunk_seed = PropDict._make_property('chunk_seed', int)
     tam_required = PropDict._make_property('tam_required', bool)
     tam_required = PropDict._make_property('tam_required', bool)
 
 
@@ -443,7 +452,7 @@ class ArchiveItem(PropDict):
     chunker_params = PropDict._make_property('chunker_params', tuple)
     chunker_params = PropDict._make_property('chunker_params', tuple)
     recreate_cmdline = PropDict._make_property('recreate_cmdline', list)  # list of s-e-str
     recreate_cmdline = PropDict._make_property('recreate_cmdline', list)  # list of s-e-str
     # recreate_source_id, recreate_args, recreate_partial_chunks were used in 1.1.0b1 .. b2
     # recreate_source_id, recreate_args, recreate_partial_chunks were used in 1.1.0b1 .. b2
-    recreate_source_id = PropDict._make_property('recreate_source_id', bytes)
+    recreate_source_id = PropDict._make_property('recreate_source_id', bytes, decode=want_bytes)
     recreate_args = PropDict._make_property('recreate_args', list)  # list of s-e-str
     recreate_args = PropDict._make_property('recreate_args', list)  # list of s-e-str
     recreate_partial_chunks = PropDict._make_property('recreate_partial_chunks', list)  # list of tuples
     recreate_partial_chunks = PropDict._make_property('recreate_partial_chunks', list)  # list of tuples
     size = PropDict._make_property('size', int)
     size = PropDict._make_property('size', int)

+ 35 - 42
src/borg/remote.py

@@ -38,8 +38,7 @@ logger = create_logger(__name__)
 
 
 RPC_PROTOCOL_VERSION = 2
 RPC_PROTOCOL_VERSION = 2
 BORG_VERSION = parse_version(__version__)
 BORG_VERSION = parse_version(__version__)
-MSGID, MSG, ARGS, RESULT = 'i', 'm', 'a', 'r'  # pack
-MSGIDB, MSGB, ARGSB, RESULTB = b'i', b'm', b'a', b'r'  # unpack
+MSGID, MSG, ARGS, RESULT = 'i', 'm', 'a', 'r'
 
 
 MAX_INFLIGHT = 100
 MAX_INFLIGHT = 100
 
 
@@ -139,10 +138,6 @@ compatMap = {
 }
 }
 
 
 
 
-def decode_keys(d):
-    return {k.decode(): d[k] for k in d}
-
-
 class RepositoryServer:  # pragma: no cover
 class RepositoryServer:  # pragma: no cover
     rpc_methods = (
     rpc_methods = (
         '__len__',
         '__len__',
@@ -217,14 +212,13 @@ class RepositoryServer:  # pragma: no cover
                 for unpacked in unpacker:
                 for unpacked in unpacker:
                     if isinstance(unpacked, dict):
                     if isinstance(unpacked, dict):
                         dictFormat = True
                         dictFormat = True
-                        msgid = unpacked[MSGIDB]
-                        method = unpacked[MSGB].decode()
-                        args = decode_keys(unpacked[ARGSB])
+                        msgid = unpacked[MSGID]
+                        method = unpacked[MSG]
+                        args = unpacked[ARGS]
                     elif isinstance(unpacked, tuple) and len(unpacked) == 4:
                     elif isinstance(unpacked, tuple) and len(unpacked) == 4:
                         dictFormat = False
                         dictFormat = False
                         # The first field 'type' was always 1 and has always been ignored
                         # The first field 'type' was always 1 and has always been ignored
                         _, msgid, method, args = unpacked
                         _, msgid, method, args = unpacked
-                        method = method.decode()
                         args = self.positional_to_named(method, args)
                         args = self.positional_to_named(method, args)
                     else:
                     else:
                         if self.repository is not None:
                         if self.repository is not None:
@@ -308,7 +302,7 @@ class RepositoryServer:  # pragma: no cover
         # clients since 1.1.0b3 use a dict as client_data
         # clients since 1.1.0b3 use a dict as client_data
         # clients since 1.1.0b6 support json log format from server
         # clients since 1.1.0b6 support json log format from server
         if isinstance(client_data, dict):
         if isinstance(client_data, dict):
-            self.client_version = client_data[b'client_version']
+            self.client_version = client_data['client_version']
             level = logging.getLevelName(logging.getLogger('').level)
             level = logging.getLevelName(logging.getLogger('').level)
             setup_logging(is_serve=True, json=True, level=level)
             setup_logging(is_serve=True, json=True, level=level)
             logger.debug('Initialized logging system for JSON-based protocol')
             logger.debug('Initialized logging system for JSON-based protocol')
@@ -370,7 +364,6 @@ class RepositoryServer:  # pragma: no cover
         return self.repository.id
         return self.repository.id
 
 
     def inject_exception(self, kind):
     def inject_exception(self, kind):
-        kind = kind.decode()
         s1 = 'test string'
         s1 = 'test string'
         s2 = 'test string2'
         s2 = 'test string2'
         if kind == 'DoesNotExist':
         if kind == 'DoesNotExist':
@@ -484,35 +477,35 @@ class RemoteRepository:
 
 
     class RPCError(Exception):
     class RPCError(Exception):
         def __init__(self, unpacked):
         def __init__(self, unpacked):
-            # for borg < 1.1: unpacked only has b'exception_class' as key
-            # for borg 1.1+: unpacked has keys: b'exception_args', b'exception_full', b'exception_short', b'sysinfo'
+            # for borg < 1.1: unpacked only has 'exception_class' as key
+            # for borg 1.1+: unpacked has keys: 'exception_args', 'exception_full', 'exception_short', 'sysinfo'
             self.unpacked = unpacked
             self.unpacked = unpacked
 
 
         def get_message(self):
         def get_message(self):
-            if b'exception_short' in self.unpacked:
-                return b'\n'.join(self.unpacked[b'exception_short']).decode()
+            if 'exception_short' in self.unpacked:
+                return '\n'.join(self.unpacked['exception_short'])
             else:
             else:
                 return self.exception_class
                 return self.exception_class
 
 
         @property
         @property
         def traceback(self):
         def traceback(self):
-            return self.unpacked.get(b'exception_trace', True)
+            return self.unpacked.get('exception_trace', True)
 
 
         @property
         @property
         def exception_class(self):
         def exception_class(self):
-            return self.unpacked[b'exception_class'].decode()
+            return self.unpacked['exception_class']
 
 
         @property
         @property
         def exception_full(self):
         def exception_full(self):
-            if b'exception_full' in self.unpacked:
-                return b'\n'.join(self.unpacked[b'exception_full']).decode()
+            if 'exception_full' in self.unpacked:
+                return '\n'.join(self.unpacked['exception_full'])
             else:
             else:
                 return self.get_message() + '\nRemote Exception (see remote log for the traceback)'
                 return self.get_message() + '\nRemote Exception (see remote log for the traceback)'
 
 
         @property
         @property
         def sysinfo(self):
         def sysinfo(self):
-            if b'sysinfo' in self.unpacked:
-                return self.unpacked[b'sysinfo'].decode()
+            if 'sysinfo' in self.unpacked:
+                return self.unpacked['sysinfo']
             else:
             else:
                 return ''
                 return ''
 
 
@@ -577,9 +570,9 @@ class RemoteRepository:
                 raise ConnectionClosedWithHint('Is borg working on the server?') from None
                 raise ConnectionClosedWithHint('Is borg working on the server?') from None
             if version == RPC_PROTOCOL_VERSION:
             if version == RPC_PROTOCOL_VERSION:
                 self.dictFormat = False
                 self.dictFormat = False
-            elif isinstance(version, dict) and b'server_version' in version:
+            elif isinstance(version, dict) and 'server_version' in version:
                 self.dictFormat = True
                 self.dictFormat = True
-                self.server_version = version[b'server_version']
+                self.server_version = version['server_version']
             else:
             else:
                 raise Exception('Server insisted on using unsupported protocol version %s' % version)
                 raise Exception('Server insisted on using unsupported protocol version %s' % version)
 
 
@@ -734,9 +727,9 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
             return msgid
             return msgid
 
 
         def handle_error(unpacked):
         def handle_error(unpacked):
-            error = unpacked[b'exception_class'].decode()
-            old_server = b'exception_args' not in unpacked
-            args = unpacked.get(b'exception_args')
+            error = unpacked['exception_class']
+            old_server = 'exception_args' not in unpacked
+            args = unpacked.get('exception_args')
 
 
             if error == 'DoesNotExist':
             if error == 'DoesNotExist':
                 raise Repository.DoesNotExist(self.location.processed)
                 raise Repository.DoesNotExist(self.location.processed)
@@ -748,29 +741,29 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
                 if old_server:
                 if old_server:
                     raise IntegrityError('(not available)')
                     raise IntegrityError('(not available)')
                 else:
                 else:
-                    raise IntegrityError(args[0].decode())
+                    raise IntegrityError(args[0])
             elif error == 'AtticRepository':
             elif error == 'AtticRepository':
                 if old_server:
                 if old_server:
                     raise Repository.AtticRepository('(not available)')
                     raise Repository.AtticRepository('(not available)')
                 else:
                 else:
-                    raise Repository.AtticRepository(args[0].decode())
+                    raise Repository.AtticRepository(args[0])
             elif error == 'PathNotAllowed':
             elif error == 'PathNotAllowed':
                 if old_server:
                 if old_server:
                     raise PathNotAllowed('(unknown)')
                     raise PathNotAllowed('(unknown)')
                 else:
                 else:
-                    raise PathNotAllowed(args[0].decode())
+                    raise PathNotAllowed(args[0])
             elif error == 'ParentPathDoesNotExist':
             elif error == 'ParentPathDoesNotExist':
-                raise Repository.ParentPathDoesNotExist(args[0].decode())
+                raise Repository.ParentPathDoesNotExist(args[0])
             elif error == 'ObjectNotFound':
             elif error == 'ObjectNotFound':
                 if old_server:
                 if old_server:
                     raise Repository.ObjectNotFound('(not available)', self.location.processed)
                     raise Repository.ObjectNotFound('(not available)', self.location.processed)
                 else:
                 else:
-                    raise Repository.ObjectNotFound(args[0].decode(), self.location.processed)
+                    raise Repository.ObjectNotFound(args[0], self.location.processed)
             elif error == 'InvalidRPCMethod':
             elif error == 'InvalidRPCMethod':
                 if old_server:
                 if old_server:
                     raise InvalidRPCMethod('(not available)')
                     raise InvalidRPCMethod('(not available)')
                 else:
                 else:
-                    raise InvalidRPCMethod(args[0].decode())
+                    raise InvalidRPCMethod(args[0])
             else:
             else:
                 raise self.RPCError(unpacked)
                 raise self.RPCError(unpacked)
 
 
@@ -789,10 +782,10 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
                 try:
                 try:
                     unpacked = self.responses.pop(waiting_for[0])
                     unpacked = self.responses.pop(waiting_for[0])
                     waiting_for.pop(0)
                     waiting_for.pop(0)
-                    if b'exception_class' in unpacked:
+                    if 'exception_class' in unpacked:
                         handle_error(unpacked)
                         handle_error(unpacked)
                     else:
                     else:
-                        yield unpacked[RESULTB]
+                        yield unpacked[RESULT]
                         if not waiting_for and not calls:
                         if not waiting_for and not calls:
                             return
                             return
                 except KeyError:
                 except KeyError:
@@ -809,10 +802,10 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
                         else:
                         else:
                             return
                             return
                     else:
                     else:
-                        if b'exception_class' in unpacked:
+                        if 'exception_class' in unpacked:
                             handle_error(unpacked)
                             handle_error(unpacked)
                         else:
                         else:
-                            yield unpacked[RESULTB]
+                            yield unpacked[RESULT]
             if self.to_send or ((calls or self.preload_ids) and len(waiting_for) < MAX_INFLIGHT):
             if self.to_send or ((calls or self.preload_ids) and len(waiting_for) < MAX_INFLIGHT):
                 w_fds = [self.stdin_fd]
                 w_fds = [self.stdin_fd]
             else:
             else:
@@ -829,26 +822,26 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
                     self.unpacker.feed(data)
                     self.unpacker.feed(data)
                     for unpacked in self.unpacker:
                     for unpacked in self.unpacker:
                         if isinstance(unpacked, dict):
                         if isinstance(unpacked, dict):
-                            msgid = unpacked[MSGIDB]
+                            msgid = unpacked[MSGID]
                         elif isinstance(unpacked, tuple) and len(unpacked) == 4:
                         elif isinstance(unpacked, tuple) and len(unpacked) == 4:
                             # The first field 'type' was always 1 and has always been ignored
                             # The first field 'type' was always 1 and has always been ignored
                             _, msgid, error, res = unpacked
                             _, msgid, error, res = unpacked
                             if error:
                             if error:
                                 # ignore res, because it is only a fixed string anyway.
                                 # ignore res, because it is only a fixed string anyway.
-                                unpacked = {MSGIDB: msgid, b'exception_class': error}
+                                unpacked = {MSGID: msgid, 'exception_class': error}
                             else:
                             else:
-                                unpacked = {MSGIDB: msgid, RESULTB: res}
+                                unpacked = {MSGID: msgid, RESULT: res}
                         else:
                         else:
                             raise UnexpectedRPCDataFormatFromServer(data)
                             raise UnexpectedRPCDataFormatFromServer(data)
                         if msgid in self.ignore_responses:
                         if msgid in self.ignore_responses:
                             self.ignore_responses.remove(msgid)
                             self.ignore_responses.remove(msgid)
                             # async methods never return values, but may raise exceptions.
                             # async methods never return values, but may raise exceptions.
-                            if b'exception_class' in unpacked:
+                            if 'exception_class' in unpacked:
                                 self.async_responses[msgid] = unpacked
                                 self.async_responses[msgid] = unpacked
                             else:
                             else:
                                 # we currently do not have async result values except "None",
                                 # we currently do not have async result values except "None",
                                 # so we do not add them into async_responses.
                                 # so we do not add them into async_responses.
-                                if unpacked[RESULTB] is not None:
+                                if unpacked[RESULT] is not None:
                                     self.async_responses[msgid] = unpacked
                                     self.async_responses[msgid] = unpacked
                         else:
                         else:
                             self.responses[msgid] = unpacked
                             self.responses[msgid] = unpacked

+ 22 - 22
src/borg/repository.py

@@ -516,16 +516,16 @@ class Repository:
                 integrity = msgpack.unpack(fd)
                 integrity = msgpack.unpack(fd)
         except FileNotFoundError:
         except FileNotFoundError:
             return
             return
-        if integrity.get(b'version') != 2:
-            logger.warning('Unknown integrity data version %r in %s', integrity.get(b'version'), integrity_file)
+        if integrity.get('version') != 2:
+            logger.warning('Unknown integrity data version %r in %s', integrity.get('version'), integrity_file)
             return
             return
-        return integrity[key].decode()
+        return integrity[key]
 
 
     def open_index(self, transaction_id, auto_recover=True):
     def open_index(self, transaction_id, auto_recover=True):
         if transaction_id is None:
         if transaction_id is None:
             return NSIndex()
             return NSIndex()
         index_path = os.path.join(self.path, 'index.%d' % transaction_id)
         index_path = os.path.join(self.path, 'index.%d' % transaction_id)
-        integrity_data = self._read_integrity(transaction_id, b'index')
+        integrity_data = self._read_integrity(transaction_id, 'index')
         try:
         try:
             with IntegrityCheckedFile(index_path, write=False, integrity_data=integrity_data) as fd:
             with IntegrityCheckedFile(index_path, write=False, integrity_data=integrity_data) as fd:
                 return NSIndex.read(fd)
                 return NSIndex.read(fd)
@@ -575,7 +575,7 @@ class Repository:
                 self.io.cleanup(transaction_id)
                 self.io.cleanup(transaction_id)
             hints_path = os.path.join(self.path, 'hints.%d' % transaction_id)
             hints_path = os.path.join(self.path, 'hints.%d' % transaction_id)
             index_path = os.path.join(self.path, 'index.%d' % transaction_id)
             index_path = os.path.join(self.path, 'index.%d' % transaction_id)
-            integrity_data = self._read_integrity(transaction_id, b'hints')
+            integrity_data = self._read_integrity(transaction_id, 'hints')
             try:
             try:
                 with IntegrityCheckedFile(hints_path, write=False, integrity_data=integrity_data) as fd:
                 with IntegrityCheckedFile(hints_path, write=False, integrity_data=integrity_data) as fd:
                     hints = msgpack.unpack(fd)
                     hints = msgpack.unpack(fd)
@@ -588,23 +588,23 @@ class Repository:
                 self.check_transaction()
                 self.check_transaction()
                 self.prepare_txn(transaction_id)
                 self.prepare_txn(transaction_id)
                 return
                 return
-            if hints[b'version'] == 1:
+            if hints['version'] == 1:
                 logger.debug('Upgrading from v1 hints.%d', transaction_id)
                 logger.debug('Upgrading from v1 hints.%d', transaction_id)
-                self.segments = hints[b'segments']
+                self.segments = hints['segments']
                 self.compact = FreeSpace()
                 self.compact = FreeSpace()
                 self.storage_quota_use = 0
                 self.storage_quota_use = 0
                 self.shadow_index = {}
                 self.shadow_index = {}
-                for segment in sorted(hints[b'compact']):
+                for segment in sorted(hints['compact']):
                     logger.debug('Rebuilding sparse info for segment %d', segment)
                     logger.debug('Rebuilding sparse info for segment %d', segment)
                     self._rebuild_sparse(segment)
                     self._rebuild_sparse(segment)
                 logger.debug('Upgrade to v2 hints complete')
                 logger.debug('Upgrade to v2 hints complete')
-            elif hints[b'version'] != 2:
-                raise ValueError('Unknown hints file version: %d' % hints[b'version'])
+            elif hints['version'] != 2:
+                raise ValueError('Unknown hints file version: %d' % hints['version'])
             else:
             else:
-                self.segments = hints[b'segments']
-                self.compact = FreeSpace(hints[b'compact'])
-                self.storage_quota_use = hints.get(b'storage_quota_use', 0)
-                self.shadow_index = hints.get(b'shadow_index', {})
+                self.segments = hints['segments']
+                self.compact = FreeSpace(hints['compact'])
+                self.storage_quota_use = hints.get('storage_quota_use', 0)
+                self.shadow_index = hints.get('shadow_index', {})
             self.log_storage_quota()
             self.log_storage_quota()
             # Drop uncommitted segments in the shadow index
             # Drop uncommitted segments in the shadow index
             for key, shadowed_segments in self.shadow_index.items():
             for key, shadowed_segments in self.shadow_index.items():
@@ -621,16 +621,16 @@ class Repository:
             os.rename(file + '.tmp', file)
             os.rename(file + '.tmp', file)
 
 
         hints = {
         hints = {
-            b'version': 2,
-            b'segments': self.segments,
-            b'compact': self.compact,
-            b'storage_quota_use': self.storage_quota_use,
-            b'shadow_index': self.shadow_index,
+            'version': 2,
+            'segments': self.segments,
+            'compact': self.compact,
+            'storage_quota_use': self.storage_quota_use,
+            'shadow_index': self.shadow_index,
         }
         }
         integrity = {
         integrity = {
             # Integrity version started at 2, the current hints version.
             # Integrity version started at 2, the current hints version.
             # Thus, integrity version == hints version, for now.
             # Thus, integrity version == hints version, for now.
-            b'version': 2,
+            'version': 2,
         }
         }
         transaction_id = self.io.get_segments_transaction_id()
         transaction_id = self.io.get_segments_transaction_id()
         assert transaction_id is not None
         assert transaction_id is not None
@@ -647,7 +647,7 @@ class Repository:
         with IntegrityCheckedFile(hints_file + '.tmp', filename=hints_name, write=True) as fd:
         with IntegrityCheckedFile(hints_file + '.tmp', filename=hints_name, write=True) as fd:
             msgpack.pack(hints, fd)
             msgpack.pack(hints, fd)
             flush_and_sync(fd)
             flush_and_sync(fd)
-        integrity[b'hints'] = fd.integrity_data
+        integrity['hints'] = fd.integrity_data
 
 
         # Write repository index
         # Write repository index
         index_name = 'index.%d' % transaction_id
         index_name = 'index.%d' % transaction_id
@@ -656,7 +656,7 @@ class Repository:
             # XXX: Consider using SyncFile for index write-outs.
             # XXX: Consider using SyncFile for index write-outs.
             self.index.write(fd)
             self.index.write(fd)
             flush_and_sync(fd)
             flush_and_sync(fd)
-        integrity[b'index'] = fd.integrity_data
+        integrity['index'] = fd.integrity_data
 
 
         # Write integrity file, containing checksums of the hints and index files
         # Write integrity file, containing checksums of the hints and index files
         integrity_name = 'integrity.%d' % transaction_id
         integrity_name = 'integrity.%d' % transaction_id

+ 7 - 7
src/borg/testsuite/archive.py

@@ -171,7 +171,7 @@ class RobustUnpackerTestCase(BaseTestCase):
         return b''.join(msgpack.packb({'path': item}) for item in items)
         return b''.join(msgpack.packb({'path': item}) for item in items)
 
 
     def _validator(self, value):
     def _validator(self, value):
-        return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz')
+        return isinstance(value, dict) and value.get('path') in ('foo', 'bar', 'boo', 'baz')
 
 
     def process(self, input):
     def process(self, input):
         unpacker = RobustUnpacker(validator=self._validator, item_keys=ITEM_KEYS)
         unpacker = RobustUnpacker(validator=self._validator, item_keys=ITEM_KEYS)
@@ -190,10 +190,10 @@ class RobustUnpackerTestCase(BaseTestCase):
                   (False, [b'garbage'] + [self.make_chunks(['boo', 'baz'])])]
                   (False, [b'garbage'] + [self.make_chunks(['boo', 'baz'])])]
         result = self.process(chunks)
         result = self.process(chunks)
         self.assert_equal(result, [
         self.assert_equal(result, [
-            {b'path': b'foo'}, {b'path': b'bar'},
+            {'path': 'foo'}, {'path': 'bar'},
             103, 97, 114, 98, 97, 103, 101,
             103, 97, 114, 98, 97, 103, 101,
-            {b'path': b'boo'},
-            {b'path': b'baz'}])
+            {'path': 'boo'},
+            {'path': 'baz'}])
 
 
     def split(self, left, length):
     def split(self, left, length):
         parts = []
         parts = []
@@ -206,19 +206,19 @@ class RobustUnpackerTestCase(BaseTestCase):
         chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 2)
         chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 2)
         input = [(False, chunks)]
         input = [(False, chunks)]
         result = self.process(input)
         result = self.process(input)
-        self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'bar'}, {b'path': b'boo'}, {b'path': b'baz'}])
+        self.assert_equal(result, [{'path': 'foo'}, {'path': 'bar'}, {'path': 'boo'}, {'path': 'baz'}])
 
 
     def test_missing_chunk(self):
     def test_missing_chunk(self):
         chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 4)
         chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 4)
         input = [(False, chunks[:3]), (True, chunks[4:])]
         input = [(False, chunks[:3]), (True, chunks[4:])]
         result = self.process(input)
         result = self.process(input)
-        self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}])
+        self.assert_equal(result, [{'path': 'foo'}, {'path': 'boo'}, {'path': 'baz'}])
 
 
     def test_corrupt_chunk(self):
     def test_corrupt_chunk(self):
         chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 4)
         chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 4)
         input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])]
         input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])]
         result = self.process(input)
         result = self.process(input)
-        self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}])
+        self.assert_equal(result, [{'path': 'foo'}, {'path': 'boo'}, {'path': 'baz'}])
 
 
 
 
 @pytest.fixture
 @pytest.fixture

+ 5 - 5
src/borg/testsuite/archiver.py

@@ -3623,14 +3623,14 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('init', '--encryption=repokey', self.repository_location)
         with Repository(self.repository_path) as repository:
         with Repository(self.repository_path) as repository:
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
-        assert key[b'algorithm'] == b'argon2 chacha20-poly1305'
+        assert key['algorithm'] == 'argon2 chacha20-poly1305'
 
 
     def test_init_with_explicit_key_algorithm(self):
     def test_init_with_explicit_key_algorithm(self):
         """https://github.com/borgbackup/borg/issues/747#issuecomment-1076160401"""
         """https://github.com/borgbackup/borg/issues/747#issuecomment-1076160401"""
         self.cmd('init', '--encryption=repokey', '--key-algorithm=pbkdf2', self.repository_location)
         self.cmd('init', '--encryption=repokey', '--key-algorithm=pbkdf2', self.repository_location)
         with Repository(self.repository_path) as repository:
         with Repository(self.repository_path) as repository:
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
-        assert key[b'algorithm'] == b'sha256'
+        assert key['algorithm'] == 'sha256'
 
 
     def verify_change_passphrase_does_not_change_algorithm(self, given_algorithm, expected_algorithm):
     def verify_change_passphrase_does_not_change_algorithm(self, given_algorithm, expected_algorithm):
         self.cmd('init', '--encryption=repokey', '--key-algorithm', given_algorithm, self.repository_location)
         self.cmd('init', '--encryption=repokey', '--key-algorithm', given_algorithm, self.repository_location)
@@ -3640,7 +3640,7 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
 
 
         with Repository(self.repository_path) as repository:
         with Repository(self.repository_path) as repository:
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
-            assert key[b'algorithm'] == expected_algorithm.encode()
+            assert key['algorithm'] == expected_algorithm
 
 
     def test_change_passphrase_does_not_change_algorithm_argon2(self):
     def test_change_passphrase_does_not_change_algorithm_argon2(self):
         self.verify_change_passphrase_does_not_change_algorithm('argon2', 'argon2 chacha20-poly1305')
         self.verify_change_passphrase_does_not_change_algorithm('argon2', 'argon2 chacha20-poly1305')
@@ -3655,7 +3655,7 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
 
 
         with Repository(self.repository_path) as repository:
         with Repository(self.repository_path) as repository:
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
-            assert key[b'algorithm'] == expected_algorithm.encode()
+            assert key['algorithm'] == expected_algorithm
 
 
     def test_change_location_does_not_change_algorithm_argon2(self):
     def test_change_location_does_not_change_algorithm_argon2(self):
         self.verify_change_location_does_not_change_algorithm('argon2', 'argon2 chacha20-poly1305')
         self.verify_change_location_does_not_change_algorithm('argon2', 'argon2 chacha20-poly1305')
@@ -3969,7 +3969,7 @@ class ManifestAuthenticationTest(ArchiverTestCaseBase):
             key.change_passphrase(key._passphrase)
             key.change_passphrase(key._passphrase)
 
 
             manifest = msgpack.unpackb(key.decrypt(Manifest.MANIFEST_ID, repository.get(Manifest.MANIFEST_ID)))
             manifest = msgpack.unpackb(key.decrypt(Manifest.MANIFEST_ID, repository.get(Manifest.MANIFEST_ID)))
-            del manifest[b'tam']
+            del manifest['tam']
             repository.put(Manifest.MANIFEST_ID, key.encrypt(Manifest.MANIFEST_ID, msgpack.packb(manifest)))
             repository.put(Manifest.MANIFEST_ID, key.encrypt(Manifest.MANIFEST_ID, msgpack.packb(manifest)))
             repository.commit(compact=False)
             repository.commit(compact=False)
         output = self.cmd('list', '--debug', self.repository_location)
         output = self.cmd('list', '--debug', self.repository_location)

+ 8 - 8
src/borg/testsuite/key.py

@@ -360,23 +360,23 @@ class TestTAM:
         assert blob.startswith(b'\x82')
         assert blob.startswith(b'\x82')
 
 
         unpacked = msgpack.unpackb(blob)
         unpacked = msgpack.unpackb(blob)
-        assert unpacked[b'tam'][b'type'] == b'HKDF_HMAC_SHA512'
+        assert unpacked['tam']['type'] == 'HKDF_HMAC_SHA512'
 
 
         unpacked, verified = key.unpack_and_verify_manifest(blob)
         unpacked, verified = key.unpack_and_verify_manifest(blob)
         assert verified
         assert verified
-        assert unpacked[b'foo'] == b'bar'
-        assert b'tam' not in unpacked
+        assert unpacked['foo'] == 'bar'
+        assert 'tam' not in unpacked
 
 
-    @pytest.mark.parametrize('which', (b'hmac', b'salt'))
+    @pytest.mark.parametrize('which', ('hmac', 'salt'))
     def test_tampered(self, key, which):
     def test_tampered(self, key, which):
         data = {'foo': 'bar'}
         data = {'foo': 'bar'}
         blob = key.pack_and_authenticate_metadata(data)
         blob = key.pack_and_authenticate_metadata(data)
         assert blob.startswith(b'\x82')
         assert blob.startswith(b'\x82')
 
 
         unpacked = msgpack.unpackb(blob, object_hook=StableDict)
         unpacked = msgpack.unpackb(blob, object_hook=StableDict)
-        assert len(unpacked[b'tam'][which]) == 64
-        unpacked[b'tam'][which] = unpacked[b'tam'][which][0:32] + bytes(32)
-        assert len(unpacked[b'tam'][which]) == 64
+        assert len(unpacked['tam'][which]) == 64
+        unpacked['tam'][which] = unpacked['tam'][which][0:32] + bytes(32)
+        assert len(unpacked['tam'][which]) == 64
         blob = msgpack.packb(unpacked)
         blob = msgpack.packb(unpacked)
 
 
         with pytest.raises(TAMInvalid):
         with pytest.raises(TAMInvalid):
@@ -421,4 +421,4 @@ def test_key_file_roundtrip(monkeypatch, cli_argument, expected_algorithm):
     load_me = RepoKey.detect(repository, manifest_data=None)
     load_me = RepoKey.detect(repository, manifest_data=None)
 
 
     assert to_dict(load_me) == to_dict(save_me)
     assert to_dict(load_me) == to_dict(save_me)
-    assert msgpack.unpackb(a2b_base64(saved))[b'algorithm'] == expected_algorithm.encode()
+    assert msgpack.unpackb(a2b_base64(saved))['algorithm'] == expected_algorithm

+ 2 - 2
src/borg/testsuite/repository.py

@@ -655,8 +655,8 @@ class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase):
             hints = msgpack.unpack(fd)
             hints = msgpack.unpack(fd)
             fd.seek(0)
             fd.seek(0)
             # Corrupt segment refcount
             # Corrupt segment refcount
-            assert hints[b'segments'][2] == 1
-            hints[b'segments'][2] = 0
+            assert hints['segments'][2] == 1
+            hints['segments'][2] = 0
             msgpack.pack(hints, fd)
             msgpack.pack(hints, fd)
             fd.truncate()
             fd.truncate()