Browse Source

cleanup msgpack related str/bytes mess, see #968

see ticket and borg.helpers.msgpack docstring.
Thomas Waldmann 3 years ago
parent
commit
f8dbe5b542

+ 7 - 9
src/borg/archive.py

@@ -34,7 +34,7 @@ from .helpers import Error, IntegrityError, set_ec
 from .platform import uid2user, user2uid, gid2group, group2gid
 from .platform import uid2user, user2uid, gid2group, group2gid
 from .helpers import parse_timestamp, to_localtime
 from .helpers import parse_timestamp, to_localtime
 from .helpers import OutputTimestamp, format_timedelta, format_file_size, file_status, FileSize
 from .helpers import OutputTimestamp, format_timedelta, format_file_size, file_status, FileSize
-from .helpers import safe_encode, safe_decode, make_path_safe, remove_surrogates
+from .helpers import safe_encode, make_path_safe, remove_surrogates
 from .helpers import StableDict
 from .helpers import StableDict
 from .helpers import bin_to_hex
 from .helpers import bin_to_hex
 from .helpers import safe_ns
 from .helpers import safe_ns
@@ -479,7 +479,6 @@ class Archive:
     def load(self, id):
     def load(self, id):
         self.id = id
         self.id = id
         self.metadata = self._load_meta(self.id)
         self.metadata = self._load_meta(self.id)
-        self.metadata.cmdline = [safe_decode(arg) for arg in self.metadata.cmdline]
         self.name = self.metadata.name
         self.name = self.metadata.name
         self.comment = self.metadata.get('comment', '')
         self.comment = self.metadata.get('comment', '')
 
 
@@ -1515,7 +1514,7 @@ class RobustUnpacker:
     """
     """
     def __init__(self, validator, item_keys):
     def __init__(self, validator, item_keys):
         super().__init__()
         super().__init__()
-        self.item_keys = [msgpack.packb(name.encode()) for name in item_keys]
+        self.item_keys = [msgpack.packb(name) for name in item_keys]
         self.validator = validator
         self.validator = validator
         self._buffered_data = []
         self._buffered_data = []
         self._resync = False
         self._resync = False
@@ -1734,7 +1733,7 @@ class ArchiveChecker:
         # lost manifest on a older borg version than the most recent one that was ever used
         # lost manifest on a older borg version than the most recent one that was ever used
         # within this repository (assuming that newer borg versions support more item keys).
         # within this repository (assuming that newer borg versions support more item keys).
         manifest = Manifest(self.key, self.repository)
         manifest = Manifest(self.key, self.repository)
-        archive_keys_serialized = [msgpack.packb(name.encode()) for name in ARCHIVE_KEYS]
+        archive_keys_serialized = [msgpack.packb(name) for name in ARCHIVE_KEYS]
         pi = ProgressIndicatorPercent(total=len(self.chunks), msg="Rebuilding manifest %6.2f%%", step=0.01,
         pi = ProgressIndicatorPercent(total=len(self.chunks), msg="Rebuilding manifest %6.2f%%", step=0.01,
                                       msgid='check.rebuild_manifest')
                                       msgid='check.rebuild_manifest')
         for chunk_id, _ in self.chunks.iteritems():
         for chunk_id, _ in self.chunks.iteritems():
@@ -1881,9 +1880,9 @@ class ArchiveChecker:
 
 
             Missing item chunks will be skipped and the msgpack stream will be restarted
             Missing item chunks will be skipped and the msgpack stream will be restarted
             """
             """
-            item_keys = frozenset(key.encode() for key in self.manifest.item_keys)
-            required_item_keys = frozenset(key.encode() for key in REQUIRED_ITEM_KEYS)
-            unpacker = RobustUnpacker(lambda item: isinstance(item, StableDict) and b'path' in item,
+            item_keys = self.manifest.item_keys
+            required_item_keys = REQUIRED_ITEM_KEYS
+            unpacker = RobustUnpacker(lambda item: isinstance(item, StableDict) and 'path' in item,
                                       self.manifest.item_keys)
                                       self.manifest.item_keys)
             _state = 0
             _state = 0
 
 
@@ -1905,7 +1904,7 @@ class ArchiveChecker:
             def valid_item(obj):
             def valid_item(obj):
                 if not isinstance(obj, StableDict):
                 if not isinstance(obj, StableDict):
                     return False, 'not a dictionary'
                     return False, 'not a dictionary'
-                keys = set(obj)
+                keys = set(k.decode('utf-8', errors='replace') for k in obj)
                 if not required_item_keys.issubset(keys):
                 if not required_item_keys.issubset(keys):
                     return False, 'missing required keys: ' + list_keys_safe(required_item_keys - keys)
                     return False, 'missing required keys: ' + list_keys_safe(required_item_keys - keys)
                 if not keys.issubset(item_keys):
                 if not keys.issubset(item_keys):
@@ -1991,7 +1990,6 @@ class ArchiveChecker:
                 archive = ArchiveItem(internal_dict=msgpack.unpackb(data))
                 archive = ArchiveItem(internal_dict=msgpack.unpackb(data))
                 if archive.version != 2:
                 if archive.version != 2:
                     raise Exception('Unknown archive metadata version')
                     raise Exception('Unknown archive metadata version')
-                archive.cmdline = [safe_decode(arg) for arg in archive.cmdline]
                 items_buffer = ChunkBuffer(self.key)
                 items_buffer = ChunkBuffer(self.key)
                 items_buffer.write_chunk = add_callback
                 items_buffer.write_chunk = add_callback
                 for item in robust_iterator(archive):
                 for item in robust_iterator(archive):

+ 6 - 6
src/borg/archiver.py

@@ -55,7 +55,7 @@ try:
     from .helpers import PrefixSpec, GlobSpec, CommentSpec, SortBySpec, FilesCacheMode
     from .helpers import PrefixSpec, GlobSpec, CommentSpec, SortBySpec, FilesCacheMode
     from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter
     from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter
     from .helpers import format_timedelta, format_file_size, parse_file_size, format_archive
     from .helpers import format_timedelta, format_file_size, parse_file_size, format_archive
-    from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict, eval_escapes
+    from .helpers import remove_surrogates, bin_to_hex, prepare_dump_dict, eval_escapes
     from .helpers import interval, prune_within, prune_split, PRUNING_PATTERNS
     from .helpers import interval, prune_within, prune_split, PRUNING_PATTERNS
     from .helpers import timestamp
     from .helpers import timestamp
     from .helpers import get_cache_dir, os_stat
     from .helpers import get_cache_dir, os_stat
@@ -1944,12 +1944,12 @@ class Archiver:
                 print('This repository is not encrypted, cannot enable TAM.')
                 print('This repository is not encrypted, cannot enable TAM.')
                 return EXIT_ERROR
                 return EXIT_ERROR
 
 
-            if not manifest.tam_verified or not manifest.config.get(b'tam_required', False):
+            if not manifest.tam_verified or not manifest.config.get('tam_required', False):
                 # The standard archive listing doesn't include the archive ID like in borg 1.1.x
                 # The standard archive listing doesn't include the archive ID like in borg 1.1.x
                 print('Manifest contents:')
                 print('Manifest contents:')
                 for archive_info in manifest.archives.list(sort_by=['ts']):
                 for archive_info in manifest.archives.list(sort_by=['ts']):
                     print(format_archive(archive_info), '[%s]' % bin_to_hex(archive_info.id))
                     print(format_archive(archive_info), '[%s]' % bin_to_hex(archive_info.id))
-                manifest.config[b'tam_required'] = True
+                manifest.config['tam_required'] = True
                 manifest.write()
                 manifest.write()
                 repository.commit(compact=False)
                 repository.commit(compact=False)
             if not key.tam_required:
             if not key.tam_required:
@@ -1972,7 +1972,7 @@ class Archiver:
                 print('Key updated')
                 print('Key updated')
                 if hasattr(key, 'find_key'):
                 if hasattr(key, 'find_key'):
                     print('Key location:', key.find_key())
                     print('Key location:', key.find_key())
-            manifest.config[b'tam_required'] = False
+            manifest.config['tam_required'] = False
             manifest.write()
             manifest.write()
             repository.commit(compact=False)
             repository.commit(compact=False)
         else:
         else:
@@ -2304,7 +2304,7 @@ class Archiver:
         """dump decoded archive metadata (not: data)"""
         """dump decoded archive metadata (not: data)"""
 
 
         try:
         try:
-            archive_meta_orig = manifest.archives.get_raw_dict()[safe_encode(args.location.archive)]
+            archive_meta_orig = manifest.archives.get_raw_dict()[args.location.archive]
         except KeyError:
         except KeyError:
             raise Archive.DoesNotExist(args.location.archive)
             raise Archive.DoesNotExist(args.location.archive)
 
 
@@ -2321,7 +2321,7 @@ class Archiver:
             fd.write(do_indent(prepare_dump_dict(archive_meta_orig)))
             fd.write(do_indent(prepare_dump_dict(archive_meta_orig)))
             fd.write(',\n')
             fd.write(',\n')
 
 
-            data = key.decrypt(archive_meta_orig[b'id'], repository.get(archive_meta_orig[b'id']))
+            data = key.decrypt(archive_meta_orig['id'], repository.get(archive_meta_orig['id']))
             archive_org_dict = msgpack.unpackb(data, object_hook=StableDict)
             archive_org_dict = msgpack.unpackb(data, object_hook=StableDict)
 
 
             fd.write('    "_meta":\n')
             fd.write('    "_meta":\n')

+ 20 - 16
src/borg/cache_sync/unpack.h

@@ -384,19 +384,11 @@ static inline int unpack_callback_map_end(unpack_user* u)
 
 
 static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* p, unsigned int length)
 static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* p, unsigned int length)
 {
 {
-    /* raw = what Borg uses for binary stuff and strings as well */
+    /* raw = what Borg uses for text stuff */
     /* Note: p points to an internal buffer which contains l bytes. */
     /* Note: p points to an internal buffer which contains l bytes. */
     (void)b;
     (void)b;
 
 
     switch(u->expect) {
     switch(u->expect) {
-    case expect_key:
-        if(length != 32) {
-            SET_LAST_ERROR("Incorrect key length");
-            return -1;
-        }
-        memcpy(u->current.key, p, 32);
-        u->expect = expect_size;
-        break;
     case expect_map_key:
     case expect_map_key:
         if(length == 6 && !memcmp("chunks", p, 6)) {
         if(length == 6 && !memcmp("chunks", p, 6)) {
             u->expect = expect_chunks_begin;
             u->expect = expect_chunks_begin;
@@ -409,19 +401,31 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char*
             u->expect = expect_map_item_end;
             u->expect = expect_map_item_end;
         }
         }
         break;
         break;
-    default:
-        if(u->inside_chunks) {
-            SET_LAST_ERROR("Unexpected bytes in chunks structure");
-            return -1;
-        }
     }
     }
     return 0;
     return 0;
 }
 }
 
 
 static inline int unpack_callback_bin(unpack_user* u, const char* b, const char* p, unsigned int length)
 static inline int unpack_callback_bin(unpack_user* u, const char* b, const char* p, unsigned int length)
 {
 {
-    (void)u; (void)b; (void)p; (void)length;
-    UNEXPECTED("bin");
+    /* bin = what Borg uses for binary stuff */
+    /* Note: p points to an internal buffer which contains l bytes. */
+    (void)b;
+
+    switch(u->expect) {
+    case expect_key:
+        if(length != 32) {
+            SET_LAST_ERROR("Incorrect key length");
+            return -1;
+        }
+        memcpy(u->current.key, p, 32);
+        u->expect = expect_size;
+        break;
+    default:
+        if(u->inside_chunks) {
+            SET_LAST_ERROR("Unexpected bytes in chunks structure");
+            return -1;
+        }
+    }
     return 0;
     return 0;
 }
 }
 
 

+ 21 - 25
src/borg/helpers/manifest.py

@@ -12,7 +12,7 @@ from ..logger import create_logger
 logger = create_logger()
 logger = create_logger()
 
 
 from .datastruct import StableDict
 from .datastruct import StableDict
-from .parseformat import bin_to_hex, safe_encode, safe_decode
+from .parseformat import bin_to_hex
 from .time import parse_timestamp
 from .time import parse_timestamp
 from .. import shellpattern
 from .. import shellpattern
 from ..constants import *  # NOQA
 from ..constants import *  # NOQA
@@ -39,39 +39,35 @@ class Archives(abc.MutableMapping):
     str timestamps or datetime timestamps.
     str timestamps or datetime timestamps.
     """
     """
     def __init__(self):
     def __init__(self):
-        # key: encoded archive name, value: dict(b'id': bytes_id, b'time': bytes_iso_ts)
+        # key: str archive name, value: dict('id': bytes_id, 'time': str_iso_ts)
         self._archives = {}
         self._archives = {}
 
 
     def __len__(self):
     def __len__(self):
         return len(self._archives)
         return len(self._archives)
 
 
     def __iter__(self):
     def __iter__(self):
-        return iter(safe_decode(name) for name in self._archives)
+        return iter(self._archives)
 
 
     def __getitem__(self, name):
     def __getitem__(self, name):
         assert isinstance(name, str)
         assert isinstance(name, str)
-        _name = safe_encode(name)
-        values = self._archives.get(_name)
+        values = self._archives.get(name)
         if values is None:
         if values is None:
             raise KeyError
             raise KeyError
-        ts = parse_timestamp(values[b'time'].decode())
-        return ArchiveInfo(name=name, id=values[b'id'], ts=ts)
+        ts = parse_timestamp(values['time'])
+        return ArchiveInfo(name=name, id=values['id'], ts=ts)
 
 
     def __setitem__(self, name, info):
     def __setitem__(self, name, info):
         assert isinstance(name, str)
         assert isinstance(name, str)
-        name = safe_encode(name)
         assert isinstance(info, tuple)
         assert isinstance(info, tuple)
         id, ts = info
         id, ts = info
         assert isinstance(id, bytes)
         assert isinstance(id, bytes)
         if isinstance(ts, datetime):
         if isinstance(ts, datetime):
             ts = ts.replace(tzinfo=None).strftime(ISO_FORMAT)
             ts = ts.replace(tzinfo=None).strftime(ISO_FORMAT)
         assert isinstance(ts, str)
         assert isinstance(ts, str)
-        ts = ts.encode()
-        self._archives[name] = {b'id': id, b'time': ts}
+        self._archives[name] = {'id': id, 'time': ts}
 
 
     def __delitem__(self, name):
     def __delitem__(self, name):
         assert isinstance(name, str)
         assert isinstance(name, str)
-        name = safe_encode(name)
         del self._archives[name]
         del self._archives[name]
 
 
     def list(self, *, glob=None, match_end=r'\Z', sort_by=(), consider_checkpoints=True, first=None, last=None, reverse=False):
     def list(self, *, glob=None, match_end=r'\Z', sort_by=(), consider_checkpoints=True, first=None, last=None, reverse=False):
@@ -116,8 +112,8 @@ class Archives(abc.MutableMapping):
     def set_raw_dict(self, d):
     def set_raw_dict(self, d):
         """set the dict we get from the msgpack unpacker"""
         """set the dict we get from the msgpack unpacker"""
         for k, v in d.items():
         for k, v in d.items():
-            assert isinstance(k, bytes)
-            assert isinstance(v, dict) and b'id' in v and b'time' in v
+            assert isinstance(k, str)
+            assert isinstance(v, dict) and 'id' in v and 'time' in v
             self._archives[k] = v
             self._archives[k] = v
 
 
     def get_raw_dict(self):
     def get_raw_dict(self):
@@ -196,10 +192,10 @@ class Manifest:
         manifest.timestamp = m.get('timestamp')
         manifest.timestamp = m.get('timestamp')
         manifest.config = m.config
         manifest.config = m.config
         # valid item keys are whatever is known in the repo or every key we know
         # valid item keys are whatever is known in the repo or every key we know
-        manifest.item_keys = ITEM_KEYS | frozenset(key.decode() for key in m.get('item_keys', []))
+        manifest.item_keys = ITEM_KEYS | frozenset(m.get('item_keys', []))
 
 
         if manifest.tam_verified:
         if manifest.tam_verified:
-            manifest_required = manifest.config.get(b'tam_required', False)
+            manifest_required = manifest.config.get('tam_required', False)
             security_required = tam_required(repository)
             security_required = tam_required(repository)
             if manifest_required and not security_required:
             if manifest_required and not security_required:
                 logger.debug('Manifest is TAM verified and says TAM is required, updating security database...')
                 logger.debug('Manifest is TAM verified and says TAM is required, updating security database...')
@@ -214,32 +210,32 @@ class Manifest:
     def check_repository_compatibility(self, operations):
     def check_repository_compatibility(self, operations):
         for operation in operations:
         for operation in operations:
             assert isinstance(operation, self.Operation)
             assert isinstance(operation, self.Operation)
-            feature_flags = self.config.get(b'feature_flags', None)
+            feature_flags = self.config.get('feature_flags', None)
             if feature_flags is None:
             if feature_flags is None:
                 return
                 return
-            if operation.value.encode() not in feature_flags:
+            if operation.value not in feature_flags:
                 continue
                 continue
-            requirements = feature_flags[operation.value.encode()]
-            if b'mandatory' in requirements:
-                unsupported = set(requirements[b'mandatory']) - self.SUPPORTED_REPO_FEATURES
+            requirements = feature_flags[operation.value]
+            if 'mandatory' in requirements:
+                unsupported = set(requirements['mandatory']) - self.SUPPORTED_REPO_FEATURES
                 if unsupported:
                 if unsupported:
-                    raise MandatoryFeatureUnsupported([f.decode() for f in unsupported])
+                    raise MandatoryFeatureUnsupported(list(unsupported))
 
 
     def get_all_mandatory_features(self):
     def get_all_mandatory_features(self):
         result = {}
         result = {}
-        feature_flags = self.config.get(b'feature_flags', None)
+        feature_flags = self.config.get('feature_flags', None)
         if feature_flags is None:
         if feature_flags is None:
             return result
             return result
 
 
         for operation, requirements in feature_flags.items():
         for operation, requirements in feature_flags.items():
-            if b'mandatory' in requirements:
-                result[operation.decode()] = {feature.decode() for feature in requirements[b'mandatory']}
+            if 'mandatory' in requirements:
+                result[operation] = set(requirements['mandatory'])
         return result
         return result
 
 
     def write(self):
     def write(self):
         from ..item import ManifestItem
         from ..item import ManifestItem
         if self.key.tam_required:
         if self.key.tam_required:
-            self.config[b'tam_required'] = True
+            self.config['tam_required'] = True
         # self.timestamp needs to be strictly monotonically increasing. Clocks often are not set correctly
         # self.timestamp needs to be strictly monotonically increasing. Clocks often are not set correctly
         if self.timestamp is None:
         if self.timestamp is None:
             self.timestamp = datetime.utcnow().strftime(ISO_FORMAT)
             self.timestamp = datetime.utcnow().strftime(ISO_FORMAT)

+ 71 - 30
src/borg/helpers/msgpack.py

@@ -1,21 +1,56 @@
+"""
+wrapping msgpack
+================
+
+Due to the planned breaking api changes in upstream msgpack, we wrap it the way we need it -
+to avoid having lots of clutter in the calling code. see tickets #968 and #3632.
+
+Packing
+-------
+- use_bin_type = True (used by borg since borg 1.3)
+  This is used to generate output according to new msgpack 2.0 spec.
+  This cleanly keeps bytes and str types apart.
+
+- use_bin_type = False (used by borg < 1.3)
+  This creates output according to the older msgpack spec.
+  BAD: str and bytes were packed into same "raw" representation.
+
+- unicode_errors = 'surrogateescape'
+  Guess backup applications are one of the rare cases when this needs to be used.
+  It is needed because borg also needs to deal with data that does not cleanly encode/decode using utf-8.
+  There's a lot of crap out there, e.g. in filenames and as a backup tool, we must keep them as good as possible.
+
+Unpacking
+---------
+- raw = True (the old way, used by borg <= 1.3)
+  This is currently still needed to not try to decode "raw" msgpack objects.
+  These could come either from str (new or old msgpack) or bytes (old msgpack).
+  Thus, we basically must know what we want and either keep the bytes we get
+  or decode them to str, if we want str.
+
+- raw = False (the new way)
+  This can be used in future, when we do not have to deal with data any more that was packed the old way.
+  It will then unpack according to the msgpack 2.0 spec format and directly output bytes or str.
+
+- unicode_errors = 'surrogateescape' -> see description above (will be used when raw is False).
+
+As of borg 1.3, we have the first part on the way to fix the msgpack str/bytes mess, #968.
+borg now still needs to **read** old repos, archives, keys, ... so we can not yet fix it completely.
+But from now on, borg only **writes** new data according to the new msgpack spec,
+thus we can complete the fix for #968 in a later borg release.
+
+current way in msgpack terms
+----------------------------
+
+- pack with use_bin_type=True (according to msgpack 2.0 spec)
+- packs str -> raw and bytes -> bin
+- unpack with raw=True (aka "the old way")
+- unpacks raw to bytes (thus we always need to decode manually if we want str)
+"""
+
 from .datastruct import StableDict
 from .datastruct import StableDict
 from ..constants import *  # NOQA
 from ..constants import *  # NOQA
 
 
-# wrapping msgpack ---------------------------------------------------------------------------------------------------
-#
-# due to the planned breaking api changes in upstream msgpack, we wrap it the way we need it -
-# to avoid having lots of clutter in the calling code. see tickets #968 and #3632.
-#
-# Packing
-# -------
-# use_bin_type = False is needed to generate the old msgpack format (not msgpack 2.0 spec) as borg always did.
-# unicode_errors = None is needed because usage of it is deprecated
-#
-# Unpacking
-# ---------
-# raw = True is needed to unpack the old msgpack format to bytes (not str, about the decoding see item.pyx).
-# unicode_errors = None is needed because usage of it is deprecated
-
 from msgpack import Packer as mp_Packer
 from msgpack import Packer as mp_Packer
 from msgpack import packb as mp_packb
 from msgpack import packb as mp_packb
 from msgpack import pack as mp_pack
 from msgpack import pack as mp_pack
@@ -30,6 +65,10 @@ from msgpack import OutOfData
 
 
 version = mp_version
 version = mp_version
 
 
+USE_BIN_TYPE = True
+RAW = True  # should become False later when we do not need to read old stuff any more
+UNICODE_ERRORS = 'surrogateescape'  # previously done by safe_encode, safe_decode
+
 
 
 class PackException(Exception):
 class PackException(Exception):
     """Exception while msgpack packing"""
     """Exception while msgpack packing"""
@@ -40,10 +79,10 @@ class UnpackException(Exception):
 
 
 
 
 class Packer(mp_Packer):
 class Packer(mp_Packer):
-    def __init__(self, *, default=None, unicode_errors=None,
-                 use_single_float=False, autoreset=True, use_bin_type=False,
+    def __init__(self, *, default=None, unicode_errors=UNICODE_ERRORS,
+                 use_single_float=False, autoreset=True, use_bin_type=USE_BIN_TYPE,
                  strict_types=False):
                  strict_types=False):
-        assert unicode_errors is None
+        assert unicode_errors == UNICODE_ERRORS
         super().__init__(default=default, unicode_errors=unicode_errors,
         super().__init__(default=default, unicode_errors=unicode_errors,
                          use_single_float=use_single_float, autoreset=autoreset, use_bin_type=use_bin_type,
                          use_single_float=use_single_float, autoreset=autoreset, use_bin_type=use_bin_type,
                          strict_types=strict_types)
                          strict_types=strict_types)
@@ -55,16 +94,16 @@ class Packer(mp_Packer):
             raise PackException(e)
             raise PackException(e)
 
 
 
 
-def packb(o, *, use_bin_type=False, unicode_errors=None, **kwargs):
-    assert unicode_errors is None
+def packb(o, *, use_bin_type=USE_BIN_TYPE, unicode_errors=UNICODE_ERRORS, **kwargs):
+    assert unicode_errors == UNICODE_ERRORS
     try:
     try:
         return mp_packb(o, use_bin_type=use_bin_type, unicode_errors=unicode_errors, **kwargs)
         return mp_packb(o, use_bin_type=use_bin_type, unicode_errors=unicode_errors, **kwargs)
     except Exception as e:
     except Exception as e:
         raise PackException(e)
         raise PackException(e)
 
 
 
 
-def pack(o, stream, *, use_bin_type=False, unicode_errors=None, **kwargs):
-    assert unicode_errors is None
+def pack(o, stream, *, use_bin_type=USE_BIN_TYPE, unicode_errors=UNICODE_ERRORS, **kwargs):
+    assert unicode_errors == UNICODE_ERRORS
     try:
     try:
         return mp_pack(o, stream, use_bin_type=use_bin_type, unicode_errors=unicode_errors, **kwargs)
         return mp_pack(o, stream, use_bin_type=use_bin_type, unicode_errors=unicode_errors, **kwargs)
     except Exception as e:
     except Exception as e:
@@ -72,13 +111,13 @@ def pack(o, stream, *, use_bin_type=False, unicode_errors=None, **kwargs):
 
 
 
 
 class Unpacker(mp_Unpacker):
 class Unpacker(mp_Unpacker):
-    def __init__(self, file_like=None, *, read_size=0, use_list=True, raw=True,
+    def __init__(self, file_like=None, *, read_size=0, use_list=True, raw=RAW,
                  object_hook=None, object_pairs_hook=None, list_hook=None,
                  object_hook=None, object_pairs_hook=None, list_hook=None,
-                 unicode_errors=None, max_buffer_size=0,
+                 unicode_errors=UNICODE_ERRORS, max_buffer_size=0,
                  ext_hook=ExtType,
                  ext_hook=ExtType,
                  strict_map_key=False):
                  strict_map_key=False):
-        assert raw is True
-        assert unicode_errors is None
+        assert raw == RAW
+        assert unicode_errors == UNICODE_ERRORS
         kw = dict(file_like=file_like, read_size=read_size, use_list=use_list, raw=raw,
         kw = dict(file_like=file_like, read_size=read_size, use_list=use_list, raw=raw,
                   object_hook=object_hook, object_pairs_hook=object_pairs_hook, list_hook=list_hook,
                   object_hook=object_hook, object_pairs_hook=object_pairs_hook, list_hook=list_hook,
                   unicode_errors=unicode_errors, max_buffer_size=max_buffer_size,
                   unicode_errors=unicode_errors, max_buffer_size=max_buffer_size,
@@ -105,10 +144,11 @@ class Unpacker(mp_Unpacker):
     next = __next__
     next = __next__
 
 
 
 
-def unpackb(packed, *, raw=True, unicode_errors=None,
+def unpackb(packed, *, raw=RAW, unicode_errors=UNICODE_ERRORS,
             strict_map_key=False,
             strict_map_key=False,
             **kwargs):
             **kwargs):
-    assert unicode_errors is None
+    assert raw == RAW
+    assert unicode_errors == UNICODE_ERRORS
     try:
     try:
         kw = dict(raw=raw, unicode_errors=unicode_errors,
         kw = dict(raw=raw, unicode_errors=unicode_errors,
                   strict_map_key=strict_map_key)
                   strict_map_key=strict_map_key)
@@ -118,10 +158,11 @@ def unpackb(packed, *, raw=True, unicode_errors=None,
         raise UnpackException(e)
         raise UnpackException(e)
 
 
 
 
-def unpack(stream, *, raw=True, unicode_errors=None,
+def unpack(stream, *, raw=RAW, unicode_errors=UNICODE_ERRORS,
            strict_map_key=False,
            strict_map_key=False,
            **kwargs):
            **kwargs):
-    assert unicode_errors is None
+    # assert raw == RAW
+    assert unicode_errors == UNICODE_ERRORS
     try:
     try:
         kw = dict(raw=raw, unicode_errors=unicode_errors,
         kw = dict(raw=raw, unicode_errors=unicode_errors,
                   strict_map_key=strict_map_key)
                   strict_map_key=strict_map_key)

+ 144 - 28
src/borg/item.pyx

@@ -2,7 +2,6 @@ import stat
 from collections import namedtuple
 from collections import namedtuple
 
 
 from .constants import ITEM_KEYS, ARCHIVE_KEYS
 from .constants import ITEM_KEYS, ARCHIVE_KEYS
-from .helpers import safe_encode, safe_decode
 from .helpers import StableDict
 from .helpers import StableDict
 from .helpers import format_file_size
 from .helpers import format_file_size
 from .helpers.msgpack import timestamp_to_int, int_to_timestamp
 from .helpers.msgpack import timestamp_to_int, int_to_timestamp
@@ -16,6 +15,51 @@ cdef extern from "_item.c":
 API_VERSION = '1.2_01'
 API_VERSION = '1.2_01'
 
 
 
 
+def fix_key(data, key):
+    """if k is a bytes-typed key, migrate key/value to a str-typed key in dict data"""
+    if isinstance(key, bytes):
+        value = data.pop(key)
+        key = key.decode()
+        data[key] = value
+    assert isinstance(key, str)
+    return key
+
+
+def fix_str_value(data, key, errors='surrogateescape'):
+    """makes sure that data[key] is a str (decode if it is bytes)"""
+    assert isinstance(key, str)  # fix_key must be called first
+    value = data[key]
+    if isinstance(value, bytes):
+        value = value.decode('utf-8', errors=errors)
+        data[key] = value
+    assert isinstance(value, str)
+    return value
+
+
+def fix_list_of_str(t):
+    """make sure we have a list of str"""
+    assert isinstance(t, (tuple, list))
+    l = [e.decode() if isinstance(e, bytes) else e for e in t]
+    assert all(isinstance(e, str) for e in l), repr(l)
+    return l
+
+
+def fix_tuple_of_str(t):
+    """make sure we have a tuple of str"""
+    assert isinstance(t, (tuple, list))
+    t = tuple(e.decode() if isinstance(e, bytes) else e for e in t)
+    assert all(isinstance(e, str) for e in t), repr(t)
+    return t
+
+
+def fix_tuple_of_str_and_int(t):
+    """make sure we have a tuple of str"""
+    assert isinstance(t, (tuple, list))
+    t = tuple(e.decode() if isinstance(e, bytes) else e for e in t)
+    assert all(isinstance(e, (str, int)) for e in t), repr(t)
+    return t
+
+
 class PropDict:
 class PropDict:
     """
     """
     Manage a dictionary via properties.
     Manage a dictionary via properties.
@@ -155,10 +199,10 @@ class Item(PropDict):
 
 
     # properties statically defined, so that IDEs can know their names:
     # properties statically defined, so that IDEs can know their names:
 
 
-    path = PropDict._make_property('path', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
-    source = PropDict._make_property('source', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
-    user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode)
-    group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode)
+    path = PropDict._make_property('path', str, 'surrogate-escaped str')
+    source = PropDict._make_property('source', str, 'surrogate-escaped str')
+    user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None')
+    group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None')
 
 
     acl_access = PropDict._make_property('acl_access', bytes)
     acl_access = PropDict._make_property('acl_access', bytes)
     acl_default = PropDict._make_property('acl_default', bytes)
     acl_default = PropDict._make_property('acl_default', bytes)
@@ -290,6 +334,14 @@ class Item(PropDict):
         except AttributeError:
         except AttributeError:
             return False
             return False
 
 
+    def update_internal(self, d):
+        # legacy support for migration (data from old msgpacks comes in as bytes always, but sometimes we want str)
+        for k, v in list(d.items()):
+            k = fix_key(d, k)
+            if k in ('path', 'source', 'user', 'group'):
+                v = fix_str_value(d, k)
+            self._dict[k] = v
+
 
 
 class EncryptedKey(PropDict):
 class EncryptedKey(PropDict):
     """
     """
@@ -309,7 +361,7 @@ class EncryptedKey(PropDict):
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
 
 
     version = PropDict._make_property('version', int)
     version = PropDict._make_property('version', int)
-    algorithm = PropDict._make_property('algorithm', str, encode=str.encode, decode=bytes.decode)
+    algorithm = PropDict._make_property('algorithm', str)
     iterations = PropDict._make_property('iterations', int)
     iterations = PropDict._make_property('iterations', int)
     salt = PropDict._make_property('salt', bytes)
     salt = PropDict._make_property('salt', bytes)
     hash = PropDict._make_property('hash', bytes)
     hash = PropDict._make_property('hash', bytes)
@@ -317,7 +369,17 @@ class EncryptedKey(PropDict):
     argon2_time_cost = PropDict._make_property('argon2_time_cost', int)
     argon2_time_cost = PropDict._make_property('argon2_time_cost', int)
     argon2_memory_cost = PropDict._make_property('argon2_memory_cost', int)
     argon2_memory_cost = PropDict._make_property('argon2_memory_cost', int)
     argon2_parallelism = PropDict._make_property('argon2_parallelism', int)
     argon2_parallelism = PropDict._make_property('argon2_parallelism', int)
-    argon2_type = PropDict._make_property('argon2_type', str, encode=str.encode, decode=bytes.decode)
+    argon2_type = PropDict._make_property('argon2_type', str)
+
+    def update_internal(self, d):
+        # legacy support for migration (data from old msgpacks comes in as bytes always, but sometimes we want str)
+        for k, v in list(d.items()):
+            k = fix_key(d, k)
+            if k == 'version':
+                assert isinstance(v, int)
+            if k in ('algorithm', 'argon2_type'):
+                v = fix_str_value(d, k)
+            self._dict[k] = v
 
 
 
 
 class Key(PropDict):
 class Key(PropDict):
@@ -344,17 +406,13 @@ class Key(PropDict):
     chunk_seed = PropDict._make_property('chunk_seed', int)
     chunk_seed = PropDict._make_property('chunk_seed', int)
     tam_required = PropDict._make_property('tam_required', bool)
     tam_required = PropDict._make_property('tam_required', bool)
 
 
-
-def tuple_encode(t):
-    """encode a tuple that might contain str items"""
-    # we have str, but want to give bytes to msgpack.pack
-    return tuple(safe_encode(e) if isinstance(e, str) else e for e in t)
-
-
-def tuple_decode(t):
-    """decode a tuple that might contain bytes items"""
-    # we get bytes objects from msgpack.unpack, but want str
-    return tuple(safe_decode(e) if isinstance(e, bytes) else e for e in t)
+    def update_internal(self, d):
+        # legacy support for migration (data from old msgpacks comes in as bytes always, but sometimes we want str)
+        for k, v in list(d.items()):
+            k = fix_key(d, k)
+            if k == 'version':
+                assert isinstance(v, int)
+            self._dict[k] = v
 
 
 
 
 class ArchiveItem(PropDict):
 class ArchiveItem(PropDict):
@@ -374,15 +432,15 @@ class ArchiveItem(PropDict):
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
 
 
     version = PropDict._make_property('version', int)
     version = PropDict._make_property('version', int)
-    name = PropDict._make_property('name', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
+    name = PropDict._make_property('name', str, 'surrogate-escaped str')
     items = PropDict._make_property('items', list)
     items = PropDict._make_property('items', list)
     cmdline = PropDict._make_property('cmdline', list)  # list of s-e-str
     cmdline = PropDict._make_property('cmdline', list)  # list of s-e-str
-    hostname = PropDict._make_property('hostname', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
-    username = PropDict._make_property('username', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
-    time = PropDict._make_property('time', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
-    time_end = PropDict._make_property('time_end', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
-    comment = PropDict._make_property('comment', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
-    chunker_params = PropDict._make_property('chunker_params', tuple, 'chunker-params tuple', encode=tuple_encode, decode=tuple_decode)
+    hostname = PropDict._make_property('hostname', str, 'surrogate-escaped str')
+    username = PropDict._make_property('username', str, 'surrogate-escaped str')
+    time = PropDict._make_property('time', str)
+    time_end = PropDict._make_property('time_end', str)
+    comment = PropDict._make_property('comment', str, 'surrogate-escaped str')
+    chunker_params = PropDict._make_property('chunker_params', tuple)
     recreate_cmdline = PropDict._make_property('recreate_cmdline', list)  # list of s-e-str
     recreate_cmdline = PropDict._make_property('recreate_cmdline', list)  # list of s-e-str
     # recreate_source_id, recreate_args, recreate_partial_chunks were used in 1.1.0b1 .. b2
     # recreate_source_id, recreate_args, recreate_partial_chunks were used in 1.1.0b1 .. b2
     recreate_source_id = PropDict._make_property('recreate_source_id', bytes)
     recreate_source_id = PropDict._make_property('recreate_source_id', bytes)
@@ -395,6 +453,22 @@ class ArchiveItem(PropDict):
     csize_parts = PropDict._make_property('csize_parts', int)
     csize_parts = PropDict._make_property('csize_parts', int)
     nfiles_parts = PropDict._make_property('nfiles_parts', int)
     nfiles_parts = PropDict._make_property('nfiles_parts', int)
 
 
+    def update_internal(self, d):
+        # legacy support for migration (data from old msgpacks comes in as bytes always, but sometimes we want str)
+        for k, v in list(d.items()):
+            k = fix_key(d, k)
+            if k == 'version':
+                assert isinstance(v, int)
+            if k in ('name', 'hostname', 'username', 'comment'):
+                v = fix_str_value(d, k)
+            if k in ('time', 'time_end'):
+                v = fix_str_value(d, k, 'replace')
+            if k == 'chunker_params':
+                v = fix_tuple_of_str_and_int(v)
+            if k in ('cmdline', 'recreate_cmdline'):
+                v = fix_list_of_str(v)
+            self._dict[k] = v
+
 
 
 class ManifestItem(PropDict):
 class ManifestItem(PropDict):
     """
     """
@@ -413,10 +487,52 @@ class ManifestItem(PropDict):
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
 
 
     version = PropDict._make_property('version', int)
     version = PropDict._make_property('version', int)
-    archives = PropDict._make_property('archives', dict)  # name -> dict
-    timestamp = PropDict._make_property('timestamp', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
+    archives = PropDict._make_property('archives', dict, 'dict of str -> dict')  # name -> dict
+    timestamp = PropDict._make_property('timestamp', str)
     config = PropDict._make_property('config', dict)
     config = PropDict._make_property('config', dict)
-    item_keys = PropDict._make_property('item_keys', tuple)
+    item_keys = PropDict._make_property('item_keys', tuple, 'tuple of str')
+
+    def update_internal(self, d):
+        # legacy support for migration (data from old msgpacks comes in as bytes always, but sometimes we want str)
+        for k, v in list(d.items()):
+            k = fix_key(d, k)
+            if k == 'version':
+                assert isinstance(v, int)
+            if k == 'archives':
+                ad = v
+                assert isinstance(ad, dict)
+                for ak, av in list(ad.items()):
+                    ak = fix_key(ad, ak)
+                    assert isinstance(av, dict)
+                    for ik, iv in list(av.items()):
+                        ik = fix_key(av, ik)
+                    assert set(av) == {'id', 'time'}
+                    assert isinstance(av['id'], bytes)
+                    fix_str_value(av, 'time')
+            if k == 'timestamp':
+                v = fix_str_value(d, k, 'replace')
+            if k == 'config':
+                cd = v
+                assert isinstance(cd, dict)
+                for ck, cv in list(cd.items()):
+                    ck = fix_key(cd, ck)
+                    if ck == 'tam_required':
+                        assert isinstance(cv, bool)
+                    if ck == 'feature_flags':
+                        assert isinstance(cv, dict)
+                        ops = {'read', 'check', 'write', 'delete'}
+                        for op, specs in list(cv.items()):
+                            op = fix_key(cv, op)
+                            assert op in ops
+                            for speck, specv in list(specs.items()):
+                                speck = fix_key(specs, speck)
+                                if speck == 'mandatory':
+                                    specs[speck] = fix_tuple_of_str(specv)
+                        assert set(cv).issubset(ops)
+            if k == 'item_keys':
+                v = fix_tuple_of_str(v)
+            self._dict[k] = v
+
 
 
 class ItemDiff:
 class ItemDiff:
     """
     """

+ 25 - 24
src/borg/remote.py

@@ -38,7 +38,8 @@ logger = create_logger(__name__)
 
 
 RPC_PROTOCOL_VERSION = 2
 RPC_PROTOCOL_VERSION = 2
 BORG_VERSION = parse_version(__version__)
 BORG_VERSION = parse_version(__version__)
-MSGID, MSG, ARGS, RESULT = b'i', b'm', b'a', b'r'
+MSGID, MSG, ARGS, RESULT = 'i', 'm', 'a', 'r'  # pack
+MSGIDB, MSGB, ARGSB, RESULTB = b'i', b'm', b'a', b'r'  # unpack
 
 
 MAX_INFLIGHT = 100
 MAX_INFLIGHT = 100
 
 
@@ -216,9 +217,9 @@ class RepositoryServer:  # pragma: no cover
                 for unpacked in unpacker:
                 for unpacked in unpacker:
                     if isinstance(unpacked, dict):
                     if isinstance(unpacked, dict):
                         dictFormat = True
                         dictFormat = True
-                        msgid = unpacked[MSGID]
-                        method = unpacked[MSG].decode()
-                        args = decode_keys(unpacked[ARGS])
+                        msgid = unpacked[MSGIDB]
+                        method = unpacked[MSGB].decode()
+                        args = decode_keys(unpacked[ARGSB])
                     elif isinstance(unpacked, tuple) and len(unpacked) == 4:
                     elif isinstance(unpacked, tuple) and len(unpacked) == 4:
                         dictFormat = False
                         dictFormat = False
                         # The first field 'type' was always 1 and has always been ignored
                         # The first field 'type' was always 1 and has always been ignored
@@ -256,21 +257,21 @@ class RepositoryServer:  # pragma: no cover
 
 
                             try:
                             try:
                                 msg = msgpack.packb({MSGID: msgid,
                                 msg = msgpack.packb({MSGID: msgid,
-                                                    b'exception_class': e.__class__.__name__,
-                                                    b'exception_args': e.args,
-                                                    b'exception_full': ex_full,
-                                                    b'exception_short': ex_short,
-                                                    b'exception_trace': ex_trace,
-                                                    b'sysinfo': sysinfo()})
+                                                    'exception_class': e.__class__.__name__,
+                                                    'exception_args': e.args,
+                                                    'exception_full': ex_full,
+                                                    'exception_short': ex_short,
+                                                    'exception_trace': ex_trace,
+                                                    'sysinfo': sysinfo()})
                             except TypeError:
                             except TypeError:
                                 msg = msgpack.packb({MSGID: msgid,
                                 msg = msgpack.packb({MSGID: msgid,
-                                                    b'exception_class': e.__class__.__name__,
-                                                    b'exception_args': [x if isinstance(x, (str, bytes, int)) else None
-                                                                        for x in e.args],
-                                                    b'exception_full': ex_full,
-                                                    b'exception_short': ex_short,
-                                                    b'exception_trace': ex_trace,
-                                                    b'sysinfo': sysinfo()})
+                                                    'exception_class': e.__class__.__name__,
+                                                    'exception_args': [x if isinstance(x, (str, bytes, int)) else None
+                                                                       for x in e.args],
+                                                    'exception_full': ex_full,
+                                                    'exception_short': ex_short,
+                                                    'exception_trace': ex_trace,
+                                                    'sysinfo': sysinfo()})
 
 
                             os_write(stdout_fd, msg)
                             os_write(stdout_fd, msg)
                         else:
                         else:
@@ -570,7 +571,7 @@ class RemoteRepository:
         try:
         try:
             try:
             try:
                 version = self.call('negotiate', {'client_data': {
                 version = self.call('negotiate', {'client_data': {
-                    b'client_version': BORG_VERSION,
+                    'client_version': BORG_VERSION,
                 }})
                 }})
             except ConnectionClosed:
             except ConnectionClosed:
                 raise ConnectionClosedWithHint('Is borg working on the server?') from None
                 raise ConnectionClosedWithHint('Is borg working on the server?') from None
@@ -791,7 +792,7 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
                     if b'exception_class' in unpacked:
                     if b'exception_class' in unpacked:
                         handle_error(unpacked)
                         handle_error(unpacked)
                     else:
                     else:
-                        yield unpacked[RESULT]
+                        yield unpacked[RESULTB]
                         if not waiting_for and not calls:
                         if not waiting_for and not calls:
                             return
                             return
                 except KeyError:
                 except KeyError:
@@ -811,7 +812,7 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
                         if b'exception_class' in unpacked:
                         if b'exception_class' in unpacked:
                             handle_error(unpacked)
                             handle_error(unpacked)
                         else:
                         else:
-                            yield unpacked[RESULT]
+                            yield unpacked[RESULTB]
             if self.to_send or ((calls or self.preload_ids) and len(waiting_for) < MAX_INFLIGHT):
             if self.to_send or ((calls or self.preload_ids) and len(waiting_for) < MAX_INFLIGHT):
                 w_fds = [self.stdin_fd]
                 w_fds = [self.stdin_fd]
             else:
             else:
@@ -828,15 +829,15 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
                     self.unpacker.feed(data)
                     self.unpacker.feed(data)
                     for unpacked in self.unpacker:
                     for unpacked in self.unpacker:
                         if isinstance(unpacked, dict):
                         if isinstance(unpacked, dict):
-                            msgid = unpacked[MSGID]
+                            msgid = unpacked[MSGIDB]
                         elif isinstance(unpacked, tuple) and len(unpacked) == 4:
                         elif isinstance(unpacked, tuple) and len(unpacked) == 4:
                             # The first field 'type' was always 1 and has always been ignored
                             # The first field 'type' was always 1 and has always been ignored
                             _, msgid, error, res = unpacked
                             _, msgid, error, res = unpacked
                             if error:
                             if error:
                                 # ignore res, because it is only a fixed string anyway.
                                 # ignore res, because it is only a fixed string anyway.
-                                unpacked = {MSGID: msgid, b'exception_class': error}
+                                unpacked = {MSGIDB: msgid, b'exception_class': error}
                             else:
                             else:
-                                unpacked = {MSGID: msgid, RESULT: res}
+                                unpacked = {MSGIDB: msgid, RESULTB: res}
                         else:
                         else:
                             raise UnexpectedRPCDataFormatFromServer(data)
                             raise UnexpectedRPCDataFormatFromServer(data)
                         if msgid in self.ignore_responses:
                         if msgid in self.ignore_responses:
@@ -847,7 +848,7 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
                             else:
                             else:
                                 # we currently do not have async result values except "None",
                                 # we currently do not have async result values except "None",
                                 # so we do not add them into async_responses.
                                 # so we do not add them into async_responses.
-                                if unpacked[RESULT] is not None:
+                                if unpacked[RESULTB] is not None:
                                     self.async_responses[msgid] = unpacked
                                     self.async_responses[msgid] = unpacked
                         else:
                         else:
                             self.responses[msgid] = unpacked
                             self.responses[msgid] = unpacked

+ 7 - 7
src/borg/testsuite/archive.py

@@ -186,8 +186,8 @@ class RobustUnpackerTestCase(BaseTestCase):
         return result
         return result
 
 
     def test_extra_garbage_no_sync(self):
     def test_extra_garbage_no_sync(self):
-        chunks = [(False, [self.make_chunks([b'foo', b'bar'])]),
-                  (False, [b'garbage'] + [self.make_chunks([b'boo', b'baz'])])]
+        chunks = [(False, [self.make_chunks(['foo', 'bar'])]),
+                  (False, [b'garbage'] + [self.make_chunks(['boo', 'baz'])])]
         result = self.process(chunks)
         result = self.process(chunks)
         self.assert_equal(result, [
         self.assert_equal(result, [
             {b'path': b'foo'}, {b'path': b'bar'},
             {b'path': b'foo'}, {b'path': b'bar'},
@@ -203,19 +203,19 @@ class RobustUnpackerTestCase(BaseTestCase):
         return parts
         return parts
 
 
     def test_correct_stream(self):
     def test_correct_stream(self):
-        chunks = self.split(self.make_chunks([b'foo', b'bar', b'boo', b'baz']), 2)
+        chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 2)
         input = [(False, chunks)]
         input = [(False, chunks)]
         result = self.process(input)
         result = self.process(input)
         self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'bar'}, {b'path': b'boo'}, {b'path': b'baz'}])
         self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'bar'}, {b'path': b'boo'}, {b'path': b'baz'}])
 
 
     def test_missing_chunk(self):
     def test_missing_chunk(self):
-        chunks = self.split(self.make_chunks([b'foo', b'bar', b'boo', b'baz']), 4)
+        chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 4)
         input = [(False, chunks[:3]), (True, chunks[4:])]
         input = [(False, chunks[:3]), (True, chunks[4:])]
         result = self.process(input)
         result = self.process(input)
         self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}])
         self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}])
 
 
     def test_corrupt_chunk(self):
     def test_corrupt_chunk(self):
-        chunks = self.split(self.make_chunks([b'foo', b'bar', b'boo', b'baz']), 4)
+        chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 4)
         input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])]
         input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])]
         result = self.process(input)
         result = self.process(input)
         self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}])
         self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}])
@@ -242,7 +242,7 @@ IK = sorted(list(ITEM_KEYS))
 
 
 @pytest.mark.parametrize('packed',
 @pytest.mark.parametrize('packed',
     [msgpack.packb(o) for o in [
     [msgpack.packb(o) for o in [
-        {b'path': b'/a/b/c'},  # small (different msgpack mapping type!)
+        {'path': b'/a/b/c'},  # small (different msgpack mapping type!)
         OrderedDict((k, b'') for k in IK),  # as big (key count) as it gets
         OrderedDict((k, b'') for k in IK),  # as big (key count) as it gets
         OrderedDict((k, b'x' * 1000) for k in IK),  # as big (key count and volume) as it gets
         OrderedDict((k, b'x' * 1000) for k in IK),  # as big (key count and volume) as it gets
     ]])
     ]])
@@ -251,7 +251,7 @@ def test_valid_msgpacked_items(packed, item_keys_serialized):
 
 
 
 
 def test_key_length_msgpacked_items():
 def test_key_length_msgpacked_items():
-    key = b'x' * 32  # 31 bytes is the limit for fixstr msgpack type
+    key = 'x' * 32  # 31 bytes is the limit for fixstr msgpack type
     data = {key: b''}
     data = {key: b''}
     item_keys_serialized = [msgpack.packb(key), ]
     item_keys_serialized = [msgpack.packb(key), ]
     assert valid_msgpacked_dict(msgpack.packb(data), item_keys_serialized)
     assert valid_msgpacked_dict(msgpack.packb(data), item_keys_serialized)

+ 7 - 7
src/borg/testsuite/archiver.py

@@ -1810,7 +1810,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
     def add_unknown_feature(self, operation):
     def add_unknown_feature(self, operation):
         with Repository(self.repository_path, exclusive=True) as repository:
         with Repository(self.repository_path, exclusive=True) as repository:
             manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
             manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
-            manifest.config[b'feature_flags'] = {operation.value.encode(): {b'mandatory': [b'unknown-feature']}}
+            manifest.config['feature_flags'] = {operation.value: {'mandatory': ['unknown-feature']}}
             manifest.write()
             manifest.write()
             repository.commit(compact=False)
             repository.commit(compact=False)
 
 
@@ -3640,13 +3640,13 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
 
 
         with Repository(self.repository_path) as repository:
         with Repository(self.repository_path) as repository:
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
-            assert key[b'algorithm'] == expected_algorithm
+            assert key[b'algorithm'] == expected_algorithm.encode()
 
 
     def test_change_passphrase_does_not_change_algorithm_argon2(self):
     def test_change_passphrase_does_not_change_algorithm_argon2(self):
-        self.verify_change_passphrase_does_not_change_algorithm('argon2', b'argon2 chacha20-poly1305')
+        self.verify_change_passphrase_does_not_change_algorithm('argon2', 'argon2 chacha20-poly1305')
 
 
     def test_change_passphrase_does_not_change_algorithm_pbkdf2(self):
     def test_change_passphrase_does_not_change_algorithm_pbkdf2(self):
-        self.verify_change_passphrase_does_not_change_algorithm('pbkdf2', b'sha256')
+        self.verify_change_passphrase_does_not_change_algorithm('pbkdf2', 'sha256')
 
 
     def verify_change_location_does_not_change_algorithm(self, given_algorithm, expected_algorithm):
     def verify_change_location_does_not_change_algorithm(self, given_algorithm, expected_algorithm):
         self.cmd('init', '--encryption=keyfile', '--key-algorithm', given_algorithm, self.repository_location)
         self.cmd('init', '--encryption=keyfile', '--key-algorithm', given_algorithm, self.repository_location)
@@ -3655,13 +3655,13 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
 
 
         with Repository(self.repository_path) as repository:
         with Repository(self.repository_path) as repository:
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
-            assert key[b'algorithm'] == expected_algorithm
+            assert key[b'algorithm'] == expected_algorithm.encode()
 
 
     def test_change_location_does_not_change_algorithm_argon2(self):
     def test_change_location_does_not_change_algorithm_argon2(self):
-        self.verify_change_location_does_not_change_algorithm('argon2', b'argon2 chacha20-poly1305')
+        self.verify_change_location_does_not_change_algorithm('argon2', 'argon2 chacha20-poly1305')
 
 
     def test_change_location_does_not_change_algorithm_pbkdf2(self):
     def test_change_location_does_not_change_algorithm_pbkdf2(self):
-        self.verify_change_location_does_not_change_algorithm('pbkdf2', b'sha256')
+        self.verify_change_location_does_not_change_algorithm('pbkdf2', 'sha256')
 
 
     def test_key_change_algorithm(self):
     def test_key_change_algorithm(self):
         self.cmd('init', '--encryption=repokey', '--key-algorithm=pbkdf2', self.repository_location)
         self.cmd('init', '--encryption=repokey', '--key-algorithm=pbkdf2', self.repository_location)

+ 3 - 3
src/borg/testsuite/item.py

@@ -102,7 +102,7 @@ def test_item_se_str_property():
     item = Item()
     item = Item()
     item.path = '/a/b/c'
     item.path = '/a/b/c'
     assert item.path == '/a/b/c'
     assert item.path == '/a/b/c'
-    assert item.as_dict() == {'path': b'/a/b/c'}
+    assert item.as_dict() == {'path': '/a/b/c'}
     del item.path
     del item.path
     assert item.as_dict() == {}
     assert item.as_dict() == {}
     with pytest.raises(TypeError):
     with pytest.raises(TypeError):
@@ -111,11 +111,11 @@ def test_item_se_str_property():
     # non-utf-8 path, needing surrogate-escaping for latin-1 u-umlaut
     # non-utf-8 path, needing surrogate-escaping for latin-1 u-umlaut
     item = Item(internal_dict={'path': b'/a/\xfc/c'})
     item = Item(internal_dict={'path': b'/a/\xfc/c'})
     assert item.path == '/a/\udcfc/c'  # getting a surrogate-escaped representation
     assert item.path == '/a/\udcfc/c'  # getting a surrogate-escaped representation
-    assert item.as_dict() == {'path': b'/a/\xfc/c'}
+    assert item.as_dict() == {'path': '/a/\udcfc/c'}
     del item.path
     del item.path
     assert 'path' not in item
     assert 'path' not in item
     item.path = '/a/\udcfc/c'  # setting using a surrogate-escaped representation
     item.path = '/a/\udcfc/c'  # setting using a surrogate-escaped representation
-    assert item.as_dict() == {'path': b'/a/\xfc/c'}
+    assert item.as_dict() == {'path': '/a/\udcfc/c'}
 
 
 
 
 def test_item_list_property():
 def test_item_list_property():