Преглед изворни кода

cleanup msgpack related str/bytes mess, fixes #968

see ticket and borg.helpers.msgpack docstring.

this changeset implements the full migration to
msgpack 2.0 spec (use_bin_type=True, raw=False).

still needed compat to the past is done via want_bytes decoder in borg.item.
Thomas Waldmann пре 3 година
родитељ
комит
8e87f1111b

+ 2 - 5
src/borg/archive.py

@@ -1718,13 +1718,10 @@ class ArchiveChecker:
 
         Iterates through all objects in the repository looking for archive metadata blocks.
         """
-        required_archive_keys = frozenset(key.encode() for key in REQUIRED_ARCHIVE_KEYS)
-
         def valid_archive(obj):
             if not isinstance(obj, dict):
                 return False
-            keys = set(obj)
-            return required_archive_keys.issubset(keys)
+            return REQUIRED_ARCHIVE_KEYS.issubset(obj)
 
         logger.info('Rebuilding missing manifest, this might take some time...')
         # as we have lost the manifest, we do not know any more what valid item keys we had.
@@ -1904,7 +1901,7 @@ class ArchiveChecker:
             def valid_item(obj):
                 if not isinstance(obj, StableDict):
                     return False, 'not a dictionary'
-                keys = set(k.decode('utf-8', errors='replace') for k in obj)
+                keys = set(obj)
                 if not required_item_keys.issubset(keys):
                     return False, 'missing required keys: ' + list_keys_safe(required_item_keys - keys)
                 if not keys.issubset(item_keys):

+ 1 - 1
src/borg/archiver.py

@@ -2331,7 +2331,7 @@ class Archiver:
 
             unpacker = msgpack.Unpacker(use_list=False, object_hook=StableDict)
             first = True
-            for item_id in archive_org_dict[b'items']:
+            for item_id in archive_org_dict['items']:
                 data = key.decrypt(item_id, repository.get(item_id))
                 unpacker.feed(data)
                 for item in unpacker:

+ 5 - 5
src/borg/crypto/key.py

@@ -232,24 +232,24 @@ class KeyBase:
         unpacker = get_limited_unpacker('manifest')
         unpacker.feed(data)
         unpacked = unpacker.unpack()
-        if b'tam' not in unpacked:
+        if 'tam' not in unpacked:
             if tam_required:
                 raise TAMRequiredError(self.repository._location.canonical_path())
             else:
                 logger.debug('TAM not found and not required')
                 return unpacked, False
-        tam = unpacked.pop(b'tam', None)
+        tam = unpacked.pop('tam', None)
         if not isinstance(tam, dict):
             raise TAMInvalid()
-        tam_type = tam.get(b'type', b'<none>').decode('ascii', 'replace')
+        tam_type = tam.get('type', '<none>')
         if tam_type != 'HKDF_HMAC_SHA512':
             if tam_required:
                 raise TAMUnsupportedSuiteError(repr(tam_type))
             else:
                 logger.debug('Ignoring TAM made with unsupported suite, since TAM is not required: %r', tam_type)
                 return unpacked, False
-        tam_hmac = tam.get(b'hmac')
-        tam_salt = tam.get(b'salt')
+        tam_hmac = tam.get('hmac')
+        tam_salt = tam.get('salt')
         if not isinstance(tam_salt, bytes) or not isinstance(tam_hmac, bytes):
             raise TAMInvalid()
         offset = data.index(tam_hmac)

+ 15 - 19
src/borg/helpers/msgpack.py

@@ -2,8 +2,7 @@
 wrapping msgpack
 ================
 
-Due to the planned breaking api changes in upstream msgpack, we wrap it the way we need it -
-to avoid having lots of clutter in the calling code. see tickets #968 and #3632.
+We wrap msgpack here the way we need it - to avoid having lots of clutter in the calling code.
 
 Packing
 -------
@@ -22,30 +21,27 @@ Packing
 
 Unpacking
 ---------
-- raw = True (the old way, used by borg <= 1.3)
-  This is currently still needed to not try to decode "raw" msgpack objects.
-  These could come either from str (new or old msgpack) or bytes (old msgpack).
-  Thus, we basically must know what we want and either keep the bytes we get
-  or decode them to str, if we want str.
-
-- raw = False (the new way)
-  This can be used in future, when we do not have to deal with data any more that was packed the old way.
+- raw = False (used by borg since borg 1.3)
+  We already can use this with borg 1.3 due to the want_bytes decoder.
+  This decoder can be removed in future, when we do not have to deal with data any more that was packed the old way.
   It will then unpack according to the msgpack 2.0 spec format and directly output bytes or str.
 
+- raw = True (the old way, used by borg < 1.3)
+
 - unicode_errors = 'surrogateescape' -> see description above (will be used when raw is False).
 
-As of borg 1.3, we have the first part on the way to fix the msgpack str/bytes mess, #968.
-borg now still needs to **read** old repos, archives, keys, ... so we can not yet fix it completely.
-But from now on, borg only **writes** new data according to the new msgpack spec,
-thus we can complete the fix for #968 in a later borg release.
+As of borg 1.3, we have fixed most of the msgpack str/bytes mess, #968.
+Borg now still needs to **read** old repos, archives, keys, ... so we can not yet fix it completely.
+But from now on, borg only **writes** new data according to the new msgpack 2.0 spec,
+thus we can remove some legacy support in a later borg release (some places are marked with "legacy").
 
 current way in msgpack terms
 ----------------------------
 
 - pack with use_bin_type=True (according to msgpack 2.0 spec)
 - packs str -> raw and bytes -> bin
-- unpack with raw=True (aka "the old way")
-- unpacks raw to bytes (thus we always need to decode manually if we want str)
+- unpack with raw=False (according to msgpack 2.0 spec, using unicode_errors='surrogateescape')
+- unpacks bin to bytes and raw to str (thus we need to re-encode manually if we want bytes from "raw")
 """
 
 from .datastruct import StableDict
@@ -66,8 +62,8 @@ from msgpack import OutOfData
 version = mp_version
 
 USE_BIN_TYPE = True
-RAW = True  # should become False later when we do not need to read old stuff any more
-UNICODE_ERRORS = 'surrogateescape'  # previously done by safe_encode, safe_decode
+RAW = False
+UNICODE_ERRORS = 'surrogateescape'
 
 
 class PackException(Exception):
@@ -161,7 +157,7 @@ def unpackb(packed, *, raw=RAW, unicode_errors=UNICODE_ERRORS,
 def unpack(stream, *, raw=RAW, unicode_errors=UNICODE_ERRORS,
            strict_map_key=False,
            **kwargs):
-    # assert raw == RAW
+    assert raw == RAW
     assert unicode_errors == UNICODE_ERRORS
     try:
         kw = dict(raw=raw, unicode_errors=unicode_errors,

+ 22 - 13
src/borg/item.pyx

@@ -60,6 +60,15 @@ def fix_tuple_of_str_and_int(t):
     return t
 
 
+def want_bytes(v):
+    """we know that we want bytes and the value should be bytes"""
+    # legacy support: it being str can be caused by msgpack unpack decoding old data that was packed with use_bin_type=False
+    if isinstance(v, str):
+        v = v.encode('utf-8', errors='surrogateescape')
+    assert isinstance(v, bytes)
+    return v
+
+
 class PropDict:
     """
     Manage a dictionary via properties.
@@ -204,10 +213,10 @@ class Item(PropDict):
     user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None')
     group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None')
 
-    acl_access = PropDict._make_property('acl_access', bytes)
-    acl_default = PropDict._make_property('acl_default', bytes)
-    acl_extended = PropDict._make_property('acl_extended', bytes)
-    acl_nfs4 = PropDict._make_property('acl_nfs4', bytes)
+    acl_access = PropDict._make_property('acl_access', bytes, decode=want_bytes)
+    acl_default = PropDict._make_property('acl_default', bytes, decode=want_bytes)
+    acl_extended = PropDict._make_property('acl_extended', bytes, decode=want_bytes)
+    acl_nfs4 = PropDict._make_property('acl_nfs4', bytes, decode=want_bytes)
 
     mode = PropDict._make_property('mode', int)
     uid = PropDict._make_property('uid', int)
@@ -224,7 +233,7 @@ class Item(PropDict):
     # compatibility note: this is a new feature, in old archives size will be missing.
     size = PropDict._make_property('size', int)
 
-    hlid = PropDict._make_property('hlid', bytes)  # hard link id: same value means same hard link.
+    hlid = PropDict._make_property('hlid', bytes, decode=want_bytes)  # hard link id: same value means same hard link.
     hardlink_master = PropDict._make_property('hardlink_master', bool)  # legacy
 
     chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None')
@@ -363,9 +372,9 @@ class EncryptedKey(PropDict):
     version = PropDict._make_property('version', int)
     algorithm = PropDict._make_property('algorithm', str)
     iterations = PropDict._make_property('iterations', int)
-    salt = PropDict._make_property('salt', bytes)
-    hash = PropDict._make_property('hash', bytes)
-    data = PropDict._make_property('data', bytes)
+    salt = PropDict._make_property('salt', bytes, decode=want_bytes)
+    hash = PropDict._make_property('hash', bytes, decode=want_bytes)
+    data = PropDict._make_property('data', bytes, decode=want_bytes)
     argon2_time_cost = PropDict._make_property('argon2_time_cost', int)
     argon2_memory_cost = PropDict._make_property('argon2_memory_cost', int)
     argon2_parallelism = PropDict._make_property('argon2_parallelism', int)
@@ -399,10 +408,10 @@ class Key(PropDict):
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
 
     version = PropDict._make_property('version', int)
-    repository_id = PropDict._make_property('repository_id', bytes)
-    enc_key = PropDict._make_property('enc_key', bytes)
-    enc_hmac_key = PropDict._make_property('enc_hmac_key', bytes)
-    id_key = PropDict._make_property('id_key', bytes)
+    repository_id = PropDict._make_property('repository_id', bytes, decode=want_bytes)
+    enc_key = PropDict._make_property('enc_key', bytes, decode=want_bytes)
+    enc_hmac_key = PropDict._make_property('enc_hmac_key', bytes, decode=want_bytes)
+    id_key = PropDict._make_property('id_key', bytes, decode=want_bytes)
     chunk_seed = PropDict._make_property('chunk_seed', int)
     tam_required = PropDict._make_property('tam_required', bool)
 
@@ -443,7 +452,7 @@ class ArchiveItem(PropDict):
     chunker_params = PropDict._make_property('chunker_params', tuple)
     recreate_cmdline = PropDict._make_property('recreate_cmdline', list)  # list of s-e-str
     # recreate_source_id, recreate_args, recreate_partial_chunks were used in 1.1.0b1 .. b2
-    recreate_source_id = PropDict._make_property('recreate_source_id', bytes)
+    recreate_source_id = PropDict._make_property('recreate_source_id', bytes, decode=want_bytes)
     recreate_args = PropDict._make_property('recreate_args', list)  # list of s-e-str
     recreate_partial_chunks = PropDict._make_property('recreate_partial_chunks', list)  # list of tuples
     size = PropDict._make_property('size', int)

+ 35 - 42
src/borg/remote.py

@@ -38,8 +38,7 @@ logger = create_logger(__name__)
 
 RPC_PROTOCOL_VERSION = 2
 BORG_VERSION = parse_version(__version__)
-MSGID, MSG, ARGS, RESULT = 'i', 'm', 'a', 'r'  # pack
-MSGIDB, MSGB, ARGSB, RESULTB = b'i', b'm', b'a', b'r'  # unpack
+MSGID, MSG, ARGS, RESULT = 'i', 'm', 'a', 'r'
 
 MAX_INFLIGHT = 100
 
@@ -139,10 +138,6 @@ compatMap = {
 }
 
 
-def decode_keys(d):
-    return {k.decode(): d[k] for k in d}
-
-
 class RepositoryServer:  # pragma: no cover
     rpc_methods = (
         '__len__',
@@ -217,14 +212,13 @@ class RepositoryServer:  # pragma: no cover
                 for unpacked in unpacker:
                     if isinstance(unpacked, dict):
                         dictFormat = True
-                        msgid = unpacked[MSGIDB]
-                        method = unpacked[MSGB].decode()
-                        args = decode_keys(unpacked[ARGSB])
+                        msgid = unpacked[MSGID]
+                        method = unpacked[MSG]
+                        args = unpacked[ARGS]
                     elif isinstance(unpacked, tuple) and len(unpacked) == 4:
                         dictFormat = False
                         # The first field 'type' was always 1 and has always been ignored
                         _, msgid, method, args = unpacked
-                        method = method.decode()
                         args = self.positional_to_named(method, args)
                     else:
                         if self.repository is not None:
@@ -308,7 +302,7 @@ class RepositoryServer:  # pragma: no cover
         # clients since 1.1.0b3 use a dict as client_data
         # clients since 1.1.0b6 support json log format from server
         if isinstance(client_data, dict):
-            self.client_version = client_data[b'client_version']
+            self.client_version = client_data['client_version']
             level = logging.getLevelName(logging.getLogger('').level)
             setup_logging(is_serve=True, json=True, level=level)
             logger.debug('Initialized logging system for JSON-based protocol')
@@ -370,7 +364,6 @@ class RepositoryServer:  # pragma: no cover
         return self.repository.id
 
     def inject_exception(self, kind):
-        kind = kind.decode()
         s1 = 'test string'
         s2 = 'test string2'
         if kind == 'DoesNotExist':
@@ -484,35 +477,35 @@ class RemoteRepository:
 
     class RPCError(Exception):
         def __init__(self, unpacked):
-            # for borg < 1.1: unpacked only has b'exception_class' as key
-            # for borg 1.1+: unpacked has keys: b'exception_args', b'exception_full', b'exception_short', b'sysinfo'
+            # for borg < 1.1: unpacked only has 'exception_class' as key
+            # for borg 1.1+: unpacked has keys: 'exception_args', 'exception_full', 'exception_short', 'sysinfo'
             self.unpacked = unpacked
 
         def get_message(self):
-            if b'exception_short' in self.unpacked:
-                return b'\n'.join(self.unpacked[b'exception_short']).decode()
+            if 'exception_short' in self.unpacked:
+                return '\n'.join(self.unpacked['exception_short'])
             else:
                 return self.exception_class
 
         @property
         def traceback(self):
-            return self.unpacked.get(b'exception_trace', True)
+            return self.unpacked.get('exception_trace', True)
 
         @property
         def exception_class(self):
-            return self.unpacked[b'exception_class'].decode()
+            return self.unpacked['exception_class']
 
         @property
         def exception_full(self):
-            if b'exception_full' in self.unpacked:
-                return b'\n'.join(self.unpacked[b'exception_full']).decode()
+            if 'exception_full' in self.unpacked:
+                return '\n'.join(self.unpacked['exception_full'])
             else:
                 return self.get_message() + '\nRemote Exception (see remote log for the traceback)'
 
         @property
         def sysinfo(self):
-            if b'sysinfo' in self.unpacked:
-                return self.unpacked[b'sysinfo'].decode()
+            if 'sysinfo' in self.unpacked:
+                return self.unpacked['sysinfo']
             else:
                 return ''
 
@@ -577,9 +570,9 @@ class RemoteRepository:
                 raise ConnectionClosedWithHint('Is borg working on the server?') from None
             if version == RPC_PROTOCOL_VERSION:
                 self.dictFormat = False
-            elif isinstance(version, dict) and b'server_version' in version:
+            elif isinstance(version, dict) and 'server_version' in version:
                 self.dictFormat = True
-                self.server_version = version[b'server_version']
+                self.server_version = version['server_version']
             else:
                 raise Exception('Server insisted on using unsupported protocol version %s' % version)
 
@@ -734,9 +727,9 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
             return msgid
 
         def handle_error(unpacked):
-            error = unpacked[b'exception_class'].decode()
-            old_server = b'exception_args' not in unpacked
-            args = unpacked.get(b'exception_args')
+            error = unpacked['exception_class']
+            old_server = 'exception_args' not in unpacked
+            args = unpacked.get('exception_args')
 
             if error == 'DoesNotExist':
                 raise Repository.DoesNotExist(self.location.processed)
@@ -748,29 +741,29 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
                 if old_server:
                     raise IntegrityError('(not available)')
                 else:
-                    raise IntegrityError(args[0].decode())
+                    raise IntegrityError(args[0])
             elif error == 'AtticRepository':
                 if old_server:
                     raise Repository.AtticRepository('(not available)')
                 else:
-                    raise Repository.AtticRepository(args[0].decode())
+                    raise Repository.AtticRepository(args[0])
             elif error == 'PathNotAllowed':
                 if old_server:
                     raise PathNotAllowed('(unknown)')
                 else:
-                    raise PathNotAllowed(args[0].decode())
+                    raise PathNotAllowed(args[0])
             elif error == 'ParentPathDoesNotExist':
-                raise Repository.ParentPathDoesNotExist(args[0].decode())
+                raise Repository.ParentPathDoesNotExist(args[0])
             elif error == 'ObjectNotFound':
                 if old_server:
                     raise Repository.ObjectNotFound('(not available)', self.location.processed)
                 else:
-                    raise Repository.ObjectNotFound(args[0].decode(), self.location.processed)
+                    raise Repository.ObjectNotFound(args[0], self.location.processed)
             elif error == 'InvalidRPCMethod':
                 if old_server:
                     raise InvalidRPCMethod('(not available)')
                 else:
-                    raise InvalidRPCMethod(args[0].decode())
+                    raise InvalidRPCMethod(args[0])
             else:
                 raise self.RPCError(unpacked)
 
@@ -789,10 +782,10 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
                 try:
                     unpacked = self.responses.pop(waiting_for[0])
                     waiting_for.pop(0)
-                    if b'exception_class' in unpacked:
+                    if 'exception_class' in unpacked:
                         handle_error(unpacked)
                     else:
-                        yield unpacked[RESULTB]
+                        yield unpacked[RESULT]
                         if not waiting_for and not calls:
                             return
                 except KeyError:
@@ -809,10 +802,10 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
                         else:
                             return
                     else:
-                        if b'exception_class' in unpacked:
+                        if 'exception_class' in unpacked:
                             handle_error(unpacked)
                         else:
-                            yield unpacked[RESULTB]
+                            yield unpacked[RESULT]
             if self.to_send or ((calls or self.preload_ids) and len(waiting_for) < MAX_INFLIGHT):
                 w_fds = [self.stdin_fd]
             else:
@@ -829,26 +822,26 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
                     self.unpacker.feed(data)
                     for unpacked in self.unpacker:
                         if isinstance(unpacked, dict):
-                            msgid = unpacked[MSGIDB]
+                            msgid = unpacked[MSGID]
                         elif isinstance(unpacked, tuple) and len(unpacked) == 4:
                             # The first field 'type' was always 1 and has always been ignored
                             _, msgid, error, res = unpacked
                             if error:
                                 # ignore res, because it is only a fixed string anyway.
-                                unpacked = {MSGIDB: msgid, b'exception_class': error}
+                                unpacked = {MSGID: msgid, 'exception_class': error}
                             else:
-                                unpacked = {MSGIDB: msgid, RESULTB: res}
+                                unpacked = {MSGID: msgid, RESULT: res}
                         else:
                             raise UnexpectedRPCDataFormatFromServer(data)
                         if msgid in self.ignore_responses:
                             self.ignore_responses.remove(msgid)
                             # async methods never return values, but may raise exceptions.
-                            if b'exception_class' in unpacked:
+                            if 'exception_class' in unpacked:
                                 self.async_responses[msgid] = unpacked
                             else:
                                 # we currently do not have async result values except "None",
                                 # so we do not add them into async_responses.
-                                if unpacked[RESULTB] is not None:
+                                if unpacked[RESULT] is not None:
                                     self.async_responses[msgid] = unpacked
                         else:
                             self.responses[msgid] = unpacked

+ 22 - 22
src/borg/repository.py

@@ -516,16 +516,16 @@ class Repository:
                 integrity = msgpack.unpack(fd)
         except FileNotFoundError:
             return
-        if integrity.get(b'version') != 2:
-            logger.warning('Unknown integrity data version %r in %s', integrity.get(b'version'), integrity_file)
+        if integrity.get('version') != 2:
+            logger.warning('Unknown integrity data version %r in %s', integrity.get('version'), integrity_file)
             return
-        return integrity[key].decode()
+        return integrity[key]
 
     def open_index(self, transaction_id, auto_recover=True):
         if transaction_id is None:
             return NSIndex()
         index_path = os.path.join(self.path, 'index.%d' % transaction_id)
-        integrity_data = self._read_integrity(transaction_id, b'index')
+        integrity_data = self._read_integrity(transaction_id, 'index')
         try:
             with IntegrityCheckedFile(index_path, write=False, integrity_data=integrity_data) as fd:
                 return NSIndex.read(fd)
@@ -575,7 +575,7 @@ class Repository:
                 self.io.cleanup(transaction_id)
             hints_path = os.path.join(self.path, 'hints.%d' % transaction_id)
             index_path = os.path.join(self.path, 'index.%d' % transaction_id)
-            integrity_data = self._read_integrity(transaction_id, b'hints')
+            integrity_data = self._read_integrity(transaction_id, 'hints')
             try:
                 with IntegrityCheckedFile(hints_path, write=False, integrity_data=integrity_data) as fd:
                     hints = msgpack.unpack(fd)
@@ -588,23 +588,23 @@ class Repository:
                 self.check_transaction()
                 self.prepare_txn(transaction_id)
                 return
-            if hints[b'version'] == 1:
+            if hints['version'] == 1:
                 logger.debug('Upgrading from v1 hints.%d', transaction_id)
-                self.segments = hints[b'segments']
+                self.segments = hints['segments']
                 self.compact = FreeSpace()
                 self.storage_quota_use = 0
                 self.shadow_index = {}
-                for segment in sorted(hints[b'compact']):
+                for segment in sorted(hints['compact']):
                     logger.debug('Rebuilding sparse info for segment %d', segment)
                     self._rebuild_sparse(segment)
                 logger.debug('Upgrade to v2 hints complete')
-            elif hints[b'version'] != 2:
-                raise ValueError('Unknown hints file version: %d' % hints[b'version'])
+            elif hints['version'] != 2:
+                raise ValueError('Unknown hints file version: %d' % hints['version'])
             else:
-                self.segments = hints[b'segments']
-                self.compact = FreeSpace(hints[b'compact'])
-                self.storage_quota_use = hints.get(b'storage_quota_use', 0)
-                self.shadow_index = hints.get(b'shadow_index', {})
+                self.segments = hints['segments']
+                self.compact = FreeSpace(hints['compact'])
+                self.storage_quota_use = hints.get('storage_quota_use', 0)
+                self.shadow_index = hints.get('shadow_index', {})
             self.log_storage_quota()
             # Drop uncommitted segments in the shadow index
             for key, shadowed_segments in self.shadow_index.items():
@@ -621,16 +621,16 @@ class Repository:
             os.rename(file + '.tmp', file)
 
         hints = {
-            b'version': 2,
-            b'segments': self.segments,
-            b'compact': self.compact,
-            b'storage_quota_use': self.storage_quota_use,
-            b'shadow_index': self.shadow_index,
+            'version': 2,
+            'segments': self.segments,
+            'compact': self.compact,
+            'storage_quota_use': self.storage_quota_use,
+            'shadow_index': self.shadow_index,
         }
         integrity = {
             # Integrity version started at 2, the current hints version.
             # Thus, integrity version == hints version, for now.
-            b'version': 2,
+            'version': 2,
         }
         transaction_id = self.io.get_segments_transaction_id()
         assert transaction_id is not None
@@ -647,7 +647,7 @@ class Repository:
         with IntegrityCheckedFile(hints_file + '.tmp', filename=hints_name, write=True) as fd:
             msgpack.pack(hints, fd)
             flush_and_sync(fd)
-        integrity[b'hints'] = fd.integrity_data
+        integrity['hints'] = fd.integrity_data
 
         # Write repository index
         index_name = 'index.%d' % transaction_id
@@ -656,7 +656,7 @@ class Repository:
             # XXX: Consider using SyncFile for index write-outs.
             self.index.write(fd)
             flush_and_sync(fd)
-        integrity[b'index'] = fd.integrity_data
+        integrity['index'] = fd.integrity_data
 
         # Write integrity file, containing checksums of the hints and index files
         integrity_name = 'integrity.%d' % transaction_id

+ 7 - 7
src/borg/testsuite/archive.py

@@ -171,7 +171,7 @@ class RobustUnpackerTestCase(BaseTestCase):
         return b''.join(msgpack.packb({'path': item}) for item in items)
 
     def _validator(self, value):
-        return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz')
+        return isinstance(value, dict) and value.get('path') in ('foo', 'bar', 'boo', 'baz')
 
     def process(self, input):
         unpacker = RobustUnpacker(validator=self._validator, item_keys=ITEM_KEYS)
@@ -190,10 +190,10 @@ class RobustUnpackerTestCase(BaseTestCase):
                   (False, [b'garbage'] + [self.make_chunks(['boo', 'baz'])])]
         result = self.process(chunks)
         self.assert_equal(result, [
-            {b'path': b'foo'}, {b'path': b'bar'},
+            {'path': 'foo'}, {'path': 'bar'},
             103, 97, 114, 98, 97, 103, 101,
-            {b'path': b'boo'},
-            {b'path': b'baz'}])
+            {'path': 'boo'},
+            {'path': 'baz'}])
 
     def split(self, left, length):
         parts = []
@@ -206,19 +206,19 @@ class RobustUnpackerTestCase(BaseTestCase):
         chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 2)
         input = [(False, chunks)]
         result = self.process(input)
-        self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'bar'}, {b'path': b'boo'}, {b'path': b'baz'}])
+        self.assert_equal(result, [{'path': 'foo'}, {'path': 'bar'}, {'path': 'boo'}, {'path': 'baz'}])
 
     def test_missing_chunk(self):
         chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 4)
         input = [(False, chunks[:3]), (True, chunks[4:])]
         result = self.process(input)
-        self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}])
+        self.assert_equal(result, [{'path': 'foo'}, {'path': 'boo'}, {'path': 'baz'}])
 
     def test_corrupt_chunk(self):
         chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 4)
         input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])]
         result = self.process(input)
-        self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}])
+        self.assert_equal(result, [{'path': 'foo'}, {'path': 'boo'}, {'path': 'baz'}])
 
 
 @pytest.fixture

+ 5 - 5
src/borg/testsuite/archiver.py

@@ -3623,14 +3623,14 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
         self.cmd('init', '--encryption=repokey', self.repository_location)
         with Repository(self.repository_path) as repository:
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
-        assert key[b'algorithm'] == b'argon2 chacha20-poly1305'
+        assert key['algorithm'] == 'argon2 chacha20-poly1305'
 
     def test_init_with_explicit_key_algorithm(self):
         """https://github.com/borgbackup/borg/issues/747#issuecomment-1076160401"""
         self.cmd('init', '--encryption=repokey', '--key-algorithm=pbkdf2', self.repository_location)
         with Repository(self.repository_path) as repository:
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
-        assert key[b'algorithm'] == b'sha256'
+        assert key['algorithm'] == 'sha256'
 
     def verify_change_passphrase_does_not_change_algorithm(self, given_algorithm, expected_algorithm):
         self.cmd('init', '--encryption=repokey', '--key-algorithm', given_algorithm, self.repository_location)
@@ -3640,7 +3640,7 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
 
         with Repository(self.repository_path) as repository:
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
-            assert key[b'algorithm'] == expected_algorithm.encode()
+            assert key['algorithm'] == expected_algorithm
 
     def test_change_passphrase_does_not_change_algorithm_argon2(self):
         self.verify_change_passphrase_does_not_change_algorithm('argon2', 'argon2 chacha20-poly1305')
@@ -3655,7 +3655,7 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
 
         with Repository(self.repository_path) as repository:
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
-            assert key[b'algorithm'] == expected_algorithm.encode()
+            assert key['algorithm'] == expected_algorithm
 
     def test_change_location_does_not_change_algorithm_argon2(self):
         self.verify_change_location_does_not_change_algorithm('argon2', 'argon2 chacha20-poly1305')
@@ -3969,7 +3969,7 @@ class ManifestAuthenticationTest(ArchiverTestCaseBase):
             key.change_passphrase(key._passphrase)
 
             manifest = msgpack.unpackb(key.decrypt(Manifest.MANIFEST_ID, repository.get(Manifest.MANIFEST_ID)))
-            del manifest[b'tam']
+            del manifest['tam']
             repository.put(Manifest.MANIFEST_ID, key.encrypt(Manifest.MANIFEST_ID, msgpack.packb(manifest)))
             repository.commit(compact=False)
         output = self.cmd('list', '--debug', self.repository_location)

+ 8 - 8
src/borg/testsuite/key.py

@@ -360,23 +360,23 @@ class TestTAM:
         assert blob.startswith(b'\x82')
 
         unpacked = msgpack.unpackb(blob)
-        assert unpacked[b'tam'][b'type'] == b'HKDF_HMAC_SHA512'
+        assert unpacked['tam']['type'] == 'HKDF_HMAC_SHA512'
 
         unpacked, verified = key.unpack_and_verify_manifest(blob)
         assert verified
-        assert unpacked[b'foo'] == b'bar'
-        assert b'tam' not in unpacked
+        assert unpacked['foo'] == 'bar'
+        assert 'tam' not in unpacked
 
-    @pytest.mark.parametrize('which', (b'hmac', b'salt'))
+    @pytest.mark.parametrize('which', ('hmac', 'salt'))
     def test_tampered(self, key, which):
         data = {'foo': 'bar'}
         blob = key.pack_and_authenticate_metadata(data)
         assert blob.startswith(b'\x82')
 
         unpacked = msgpack.unpackb(blob, object_hook=StableDict)
-        assert len(unpacked[b'tam'][which]) == 64
-        unpacked[b'tam'][which] = unpacked[b'tam'][which][0:32] + bytes(32)
-        assert len(unpacked[b'tam'][which]) == 64
+        assert len(unpacked['tam'][which]) == 64
+        unpacked['tam'][which] = unpacked['tam'][which][0:32] + bytes(32)
+        assert len(unpacked['tam'][which]) == 64
         blob = msgpack.packb(unpacked)
 
         with pytest.raises(TAMInvalid):
@@ -421,4 +421,4 @@ def test_key_file_roundtrip(monkeypatch, cli_argument, expected_algorithm):
     load_me = RepoKey.detect(repository, manifest_data=None)
 
     assert to_dict(load_me) == to_dict(save_me)
-    assert msgpack.unpackb(a2b_base64(saved))[b'algorithm'] == expected_algorithm.encode()
+    assert msgpack.unpackb(a2b_base64(saved))['algorithm'] == expected_algorithm

+ 2 - 2
src/borg/testsuite/repository.py

@@ -655,8 +655,8 @@ class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase):
             hints = msgpack.unpack(fd)
             fd.seek(0)
             # Corrupt segment refcount
-            assert hints[b'segments'][2] == 1
-            hints[b'segments'][2] = 0
+            assert hints['segments'][2] == 1
+            hints['segments'][2] = 0
             msgpack.pack(hints, fd)
             fd.truncate()