Bläddra i källkod

Merge pull request #6704 from ThomasWaldmann/msgpack-str-bytes-cleanup

borg2: cleanup msgpack related str/bytes mess
TW 3 år sedan
förälder
incheckning
2c9be35886

+ 19 - 19
docs/internals/data-structures.rst

@@ -329,17 +329,17 @@ or modified. It looks like this:
 .. code-block:: python
 .. code-block:: python
 
 
     {
     {
-        b'version': 1,
-        b'timestamp': b'2017-05-05T12:42:23.042864',
-        b'item_keys': [b'acl_access', b'acl_default', ...],
-        b'config': {},
-        b'archives': {
-            b'2017-05-05-system-backup': {
-                b'id': b'<32 byte binary object ID>',
-                b'time': b'2017-05-05T12:42:22.942864',
+        'version': 1,
+        'timestamp': '2017-05-05T12:42:23.042864',
+        'item_keys': ['acl_access', 'acl_default', ...],
+        'config': {},
+        'archives': {
+            '2017-05-05-system-backup': {
+                'id': b'<32 byte binary object ID>',
+                'time': '2017-05-05T12:42:22.942864',
             },
             },
         },
         },
-        b'tam': ...,
+        'tam': ...,
     }
     }
 
 
 The *version* field can be either 1 or 2. The versions differ in the
 The *version* field can be either 1 or 2. The versions differ in the
@@ -393,15 +393,15 @@ The *config* key stores the feature flags enabled on a repository:
 .. code-block:: python
 .. code-block:: python
 
 
     config = {
     config = {
-        b'feature_flags': {
-            b'read': {
-                b'mandatory': [b'some_feature'],
+        'feature_flags': {
+            'read': {
+                'mandatory': ['some_feature'],
             },
             },
-            b'check': {
-                b'mandatory': [b'other_feature'],
+            'check': {
+                'mandatory': ['other_feature'],
             }
             }
-            b'write': ...,
-            b'delete': ...
+            'write': ...,
+            'delete': ...
         },
         },
     }
     }
 
 
@@ -1220,9 +1220,9 @@ transaction ID in the file names. Integrity data is stored in a third file
 .. code-block:: python
 .. code-block:: python
 
 
     {
     {
-        b'version': 2,
-        b'hints': b'{"algorithm": "XXH64", "digests": {"final": "411208db2aa13f1a"}}',
-        b'index': b'{"algorithm": "XXH64", "digests": {"HashHeader": "846b7315f91b8e48", "final": "cb3e26cadc173e40"}}'
+        'version': 2,
+        'hints': '{"algorithm": "XXH64", "digests": {"final": "411208db2aa13f1a"}}',
+        'index': '{"algorithm": "XXH64", "digests": {"HashHeader": "846b7315f91b8e48", "final": "cb3e26cadc173e40"}}'
     }
     }
 
 
 The *version* key started at 2, the same version used for the hints. Since Borg has
 The *version* key started at 2, the same version used for the hints. Since Borg has

+ 21 - 21
src/borg/archive.py

@@ -34,7 +34,7 @@ from .helpers import Error, IntegrityError, set_ec
 from .platform import uid2user, user2uid, gid2group, group2gid
 from .platform import uid2user, user2uid, gid2group, group2gid
 from .helpers import parse_timestamp, to_localtime
 from .helpers import parse_timestamp, to_localtime
 from .helpers import OutputTimestamp, format_timedelta, format_file_size, file_status, FileSize
 from .helpers import OutputTimestamp, format_timedelta, format_file_size, file_status, FileSize
-from .helpers import safe_encode, safe_decode, make_path_safe, remove_surrogates
+from .helpers import safe_encode, make_path_safe, remove_surrogates
 from .helpers import StableDict
 from .helpers import StableDict
 from .helpers import bin_to_hex
 from .helpers import bin_to_hex
 from .helpers import safe_ns
 from .helpers import safe_ns
@@ -392,14 +392,14 @@ def get_item_uid_gid(item, *, numeric, uid_forced=None, gid_forced=None, uid_def
     if uid_forced is not None:
     if uid_forced is not None:
         uid = uid_forced
         uid = uid_forced
     else:
     else:
-        uid = None if numeric else user2uid(item.user)
+        uid = None if numeric else user2uid(item.get('user'))
         uid = item.uid if uid is None else uid
         uid = item.uid if uid is None else uid
         if uid < 0:
         if uid < 0:
             uid = uid_default
             uid = uid_default
     if gid_forced is not None:
     if gid_forced is not None:
         gid = gid_forced
         gid = gid_forced
     else:
     else:
-        gid = None if numeric else group2gid(item.group)
+        gid = None if numeric else group2gid(item.get('group'))
         gid = item.gid if gid is None else gid
         gid = item.gid if gid is None else gid
         if gid < 0:
         if gid < 0:
             gid = gid_default
             gid = gid_default
@@ -479,7 +479,6 @@ class Archive:
     def load(self, id):
     def load(self, id):
         self.id = id
         self.id = id
         self.metadata = self._load_meta(self.id)
         self.metadata = self._load_meta(self.id)
-        self.metadata.cmdline = [safe_decode(arg) for arg in self.metadata.cmdline]
         self.name = self.metadata.name
         self.name = self.metadata.name
         self.comment = self.metadata.get('comment', '')
         self.comment = self.metadata.get('comment', '')
 
 
@@ -1090,11 +1089,13 @@ class MetadataCollector:
         if not self.nobirthtime and hasattr(st, 'st_birthtime'):
         if not self.nobirthtime and hasattr(st, 'st_birthtime'):
             # sadly, there's no stat_result.st_birthtime_ns
             # sadly, there's no stat_result.st_birthtime_ns
             attrs['birthtime'] = safe_ns(int(st.st_birthtime * 10**9))
             attrs['birthtime'] = safe_ns(int(st.st_birthtime * 10**9))
-        if self.numeric_ids:
-            attrs['user'] = attrs['group'] = None
-        else:
-            attrs['user'] = uid2user(st.st_uid)
-            attrs['group'] = gid2group(st.st_gid)
+        if not self.numeric_ids:
+            user = uid2user(st.st_uid)
+            if user is not None:
+                attrs['user'] = user
+            group = gid2group(st.st_gid)
+            if group is not None:
+                attrs['group'] = group
         return attrs
         return attrs
 
 
     def stat_ext_attrs(self, st, path, fd=None):
     def stat_ext_attrs(self, st, path, fd=None):
@@ -1427,8 +1428,11 @@ class TarfileObjectProcessors:
                 return safe_ns(int(float(s) * 1e9))
                 return safe_ns(int(float(s) * 1e9))
 
 
             item = Item(path=make_path_safe(tarinfo.name), mode=tarinfo.mode | type,
             item = Item(path=make_path_safe(tarinfo.name), mode=tarinfo.mode | type,
-                        uid=tarinfo.uid, gid=tarinfo.gid, user=tarinfo.uname or None, group=tarinfo.gname or None,
-                        mtime=s_to_ns(tarinfo.mtime))
+                        uid=tarinfo.uid, gid=tarinfo.gid, mtime=s_to_ns(tarinfo.mtime))
+            if tarinfo.uname:
+                item.user = tarinfo.uname
+            if tarinfo.gname:
+                item.group = tarinfo.gname
             if ph:
             if ph:
                 # note: for mtime this is a bit redundant as it is already done by tarfile module,
                 # note: for mtime this is a bit redundant as it is already done by tarfile module,
                 #       but we just do it in our way to be consistent for sure.
                 #       but we just do it in our way to be consistent for sure.
@@ -1515,7 +1519,7 @@ class RobustUnpacker:
     """
     """
     def __init__(self, validator, item_keys):
     def __init__(self, validator, item_keys):
         super().__init__()
         super().__init__()
-        self.item_keys = [msgpack.packb(name.encode()) for name in item_keys]
+        self.item_keys = [msgpack.packb(name) for name in item_keys]
         self.validator = validator
         self.validator = validator
         self._buffered_data = []
         self._buffered_data = []
         self._resync = False
         self._resync = False
@@ -1719,13 +1723,10 @@ class ArchiveChecker:
 
 
         Iterates through all objects in the repository looking for archive metadata blocks.
         Iterates through all objects in the repository looking for archive metadata blocks.
         """
         """
-        required_archive_keys = frozenset(key.encode() for key in REQUIRED_ARCHIVE_KEYS)
-
         def valid_archive(obj):
         def valid_archive(obj):
             if not isinstance(obj, dict):
             if not isinstance(obj, dict):
                 return False
                 return False
-            keys = set(obj)
-            return required_archive_keys.issubset(keys)
+            return REQUIRED_ARCHIVE_KEYS.issubset(obj)
 
 
         logger.info('Rebuilding missing manifest, this might take some time...')
         logger.info('Rebuilding missing manifest, this might take some time...')
         # as we have lost the manifest, we do not know any more what valid item keys we had.
         # as we have lost the manifest, we do not know any more what valid item keys we had.
@@ -1734,7 +1735,7 @@ class ArchiveChecker:
         # lost manifest on a older borg version than the most recent one that was ever used
         # lost manifest on a older borg version than the most recent one that was ever used
         # within this repository (assuming that newer borg versions support more item keys).
         # within this repository (assuming that newer borg versions support more item keys).
         manifest = Manifest(self.key, self.repository)
         manifest = Manifest(self.key, self.repository)
-        archive_keys_serialized = [msgpack.packb(name.encode()) for name in ARCHIVE_KEYS]
+        archive_keys_serialized = [msgpack.packb(name) for name in ARCHIVE_KEYS]
         pi = ProgressIndicatorPercent(total=len(self.chunks), msg="Rebuilding manifest %6.2f%%", step=0.01,
         pi = ProgressIndicatorPercent(total=len(self.chunks), msg="Rebuilding manifest %6.2f%%", step=0.01,
                                       msgid='check.rebuild_manifest')
                                       msgid='check.rebuild_manifest')
         for chunk_id, _ in self.chunks.iteritems():
         for chunk_id, _ in self.chunks.iteritems():
@@ -1881,9 +1882,9 @@ class ArchiveChecker:
 
 
             Missing item chunks will be skipped and the msgpack stream will be restarted
             Missing item chunks will be skipped and the msgpack stream will be restarted
             """
             """
-            item_keys = frozenset(key.encode() for key in self.manifest.item_keys)
-            required_item_keys = frozenset(key.encode() for key in REQUIRED_ITEM_KEYS)
-            unpacker = RobustUnpacker(lambda item: isinstance(item, StableDict) and b'path' in item,
+            item_keys = self.manifest.item_keys
+            required_item_keys = REQUIRED_ITEM_KEYS
+            unpacker = RobustUnpacker(lambda item: isinstance(item, StableDict) and 'path' in item,
                                       self.manifest.item_keys)
                                       self.manifest.item_keys)
             _state = 0
             _state = 0
 
 
@@ -1991,7 +1992,6 @@ class ArchiveChecker:
                 archive = ArchiveItem(internal_dict=msgpack.unpackb(data))
                 archive = ArchiveItem(internal_dict=msgpack.unpackb(data))
                 if archive.version != 2:
                 if archive.version != 2:
                     raise Exception('Unknown archive metadata version')
                     raise Exception('Unknown archive metadata version')
-                archive.cmdline = [safe_decode(arg) for arg in archive.cmdline]
                 items_buffer = ChunkBuffer(self.key)
                 items_buffer = ChunkBuffer(self.key)
                 items_buffer.write_chunk = add_callback
                 items_buffer.write_chunk = add_callback
                 for item in robust_iterator(archive):
                 for item in robust_iterator(archive):

+ 9 - 13
src/borg/archiver.py

@@ -55,7 +55,7 @@ try:
     from .helpers import PrefixSpec, GlobSpec, CommentSpec, SortBySpec, FilesCacheMode
     from .helpers import PrefixSpec, GlobSpec, CommentSpec, SortBySpec, FilesCacheMode
     from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter
     from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter
     from .helpers import format_timedelta, format_file_size, parse_file_size, format_archive
     from .helpers import format_timedelta, format_file_size, parse_file_size, format_archive
-    from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict, eval_escapes
+    from .helpers import remove_surrogates, bin_to_hex, prepare_dump_dict, eval_escapes
     from .helpers import interval, prune_within, prune_split, PRUNING_PATTERNS
     from .helpers import interval, prune_within, prune_split, PRUNING_PATTERNS
     from .helpers import timestamp
     from .helpers import timestamp
     from .helpers import get_cache_dir, os_stat
     from .helpers import get_cache_dir, os_stat
@@ -366,10 +366,6 @@ class Archiver:
                 if chunks_healthy is not None:
                 if chunks_healthy is not None:
                     item._dict['chunks_healthy'] = chunks
                     item._dict['chunks_healthy'] = chunks
                 item._dict.pop('source')  # not used for hardlinks any more, replaced by hlid
                 item._dict.pop('source')  # not used for hardlinks any more, replaced by hlid
-            for attr in 'atime', 'ctime', 'mtime', 'birthtime':
-                if attr in item:
-                    ns = getattr(item, attr)  # decode (bigint or Timestamp) --> int ns
-                    setattr(item, attr, ns)  # encode int ns --> msgpack.Timestamp only, no bigint any more
             # make sure we only have desired stuff in the new item. specifically, make sure to get rid of:
             # make sure we only have desired stuff in the new item. specifically, make sure to get rid of:
             # - 'acl' remnants of bug in attic <= 0.13
             # - 'acl' remnants of bug in attic <= 0.13
             # - 'hardlink_master' (superseded by hlid)
             # - 'hardlink_master' (superseded by hlid)
@@ -1359,8 +1355,8 @@ class Archiver:
             tarinfo.mode = stat.S_IMODE(item.mode)
             tarinfo.mode = stat.S_IMODE(item.mode)
             tarinfo.uid = item.uid
             tarinfo.uid = item.uid
             tarinfo.gid = item.gid
             tarinfo.gid = item.gid
-            tarinfo.uname = item.user or ''
-            tarinfo.gname = item.group or ''
+            tarinfo.uname = item.get('user', '')
+            tarinfo.gname = item.get('group',  '')
             # The linkname in tar has 2 uses:
             # The linkname in tar has 2 uses:
             # for symlinks it means the destination, while for hardlinks it refers to the file.
             # for symlinks it means the destination, while for hardlinks it refers to the file.
             # Since hardlinks in tar have a different type code (LNKTYPE) the format might
             # Since hardlinks in tar have a different type code (LNKTYPE) the format might
@@ -1944,12 +1940,12 @@ class Archiver:
                 print('This repository is not encrypted, cannot enable TAM.')
                 print('This repository is not encrypted, cannot enable TAM.')
                 return EXIT_ERROR
                 return EXIT_ERROR
 
 
-            if not manifest.tam_verified or not manifest.config.get(b'tam_required', False):
+            if not manifest.tam_verified or not manifest.config.get('tam_required', False):
                 # The standard archive listing doesn't include the archive ID like in borg 1.1.x
                 # The standard archive listing doesn't include the archive ID like in borg 1.1.x
                 print('Manifest contents:')
                 print('Manifest contents:')
                 for archive_info in manifest.archives.list(sort_by=['ts']):
                 for archive_info in manifest.archives.list(sort_by=['ts']):
                     print(format_archive(archive_info), '[%s]' % bin_to_hex(archive_info.id))
                     print(format_archive(archive_info), '[%s]' % bin_to_hex(archive_info.id))
-                manifest.config[b'tam_required'] = True
+                manifest.config['tam_required'] = True
                 manifest.write()
                 manifest.write()
                 repository.commit(compact=False)
                 repository.commit(compact=False)
             if not key.tam_required:
             if not key.tam_required:
@@ -1972,7 +1968,7 @@ class Archiver:
                 print('Key updated')
                 print('Key updated')
                 if hasattr(key, 'find_key'):
                 if hasattr(key, 'find_key'):
                     print('Key location:', key.find_key())
                     print('Key location:', key.find_key())
-            manifest.config[b'tam_required'] = False
+            manifest.config['tam_required'] = False
             manifest.write()
             manifest.write()
             repository.commit(compact=False)
             repository.commit(compact=False)
         else:
         else:
@@ -2304,7 +2300,7 @@ class Archiver:
         """dump decoded archive metadata (not: data)"""
         """dump decoded archive metadata (not: data)"""
 
 
         try:
         try:
-            archive_meta_orig = manifest.archives.get_raw_dict()[safe_encode(args.location.archive)]
+            archive_meta_orig = manifest.archives.get_raw_dict()[args.location.archive]
         except KeyError:
         except KeyError:
             raise Archive.DoesNotExist(args.location.archive)
             raise Archive.DoesNotExist(args.location.archive)
 
 
@@ -2321,7 +2317,7 @@ class Archiver:
             fd.write(do_indent(prepare_dump_dict(archive_meta_orig)))
             fd.write(do_indent(prepare_dump_dict(archive_meta_orig)))
             fd.write(',\n')
             fd.write(',\n')
 
 
-            data = key.decrypt(archive_meta_orig[b'id'], repository.get(archive_meta_orig[b'id']))
+            data = key.decrypt(archive_meta_orig['id'], repository.get(archive_meta_orig['id']))
             archive_org_dict = msgpack.unpackb(data, object_hook=StableDict)
             archive_org_dict = msgpack.unpackb(data, object_hook=StableDict)
 
 
             fd.write('    "_meta":\n')
             fd.write('    "_meta":\n')
@@ -2331,7 +2327,7 @@ class Archiver:
 
 
             unpacker = msgpack.Unpacker(use_list=False, object_hook=StableDict)
             unpacker = msgpack.Unpacker(use_list=False, object_hook=StableDict)
             first = True
             first = True
-            for item_id in archive_org_dict[b'items']:
+            for item_id in archive_org_dict['items']:
                 data = key.decrypt(item_id, repository.get(item_id))
                 data = key.decrypt(item_id, repository.get(item_id))
                 unpacker.feed(data)
                 unpacker.feed(data)
                 for item in unpacker:
                 for item in unpacker:

+ 20 - 16
src/borg/cache_sync/unpack.h

@@ -384,19 +384,11 @@ static inline int unpack_callback_map_end(unpack_user* u)
 
 
 static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* p, unsigned int length)
 static inline int unpack_callback_raw(unpack_user* u, const char* b, const char* p, unsigned int length)
 {
 {
-    /* raw = what Borg uses for binary stuff and strings as well */
+    /* raw = what Borg uses for text stuff */
     /* Note: p points to an internal buffer which contains l bytes. */
     /* Note: p points to an internal buffer which contains l bytes. */
     (void)b;
     (void)b;
 
 
     switch(u->expect) {
     switch(u->expect) {
-    case expect_key:
-        if(length != 32) {
-            SET_LAST_ERROR("Incorrect key length");
-            return -1;
-        }
-        memcpy(u->current.key, p, 32);
-        u->expect = expect_size;
-        break;
     case expect_map_key:
     case expect_map_key:
         if(length == 6 && !memcmp("chunks", p, 6)) {
         if(length == 6 && !memcmp("chunks", p, 6)) {
             u->expect = expect_chunks_begin;
             u->expect = expect_chunks_begin;
@@ -409,19 +401,31 @@ static inline int unpack_callback_raw(unpack_user* u, const char* b, const char*
             u->expect = expect_map_item_end;
             u->expect = expect_map_item_end;
         }
         }
         break;
         break;
-    default:
-        if(u->inside_chunks) {
-            SET_LAST_ERROR("Unexpected bytes in chunks structure");
-            return -1;
-        }
     }
     }
     return 0;
     return 0;
 }
 }
 
 
 static inline int unpack_callback_bin(unpack_user* u, const char* b, const char* p, unsigned int length)
 static inline int unpack_callback_bin(unpack_user* u, const char* b, const char* p, unsigned int length)
 {
 {
-    (void)u; (void)b; (void)p; (void)length;
-    UNEXPECTED("bin");
+    /* bin = what Borg uses for binary stuff */
+    /* Note: p points to an internal buffer which contains l bytes. */
+    (void)b;
+
+    switch(u->expect) {
+    case expect_key:
+        if(length != 32) {
+            SET_LAST_ERROR("Incorrect key length");
+            return -1;
+        }
+        memcpy(u->current.key, p, 32);
+        u->expect = expect_size;
+        break;
+    default:
+        if(u->inside_chunks) {
+            SET_LAST_ERROR("Unexpected bytes in chunks structure");
+            return -1;
+        }
+    }
     return 0;
     return 0;
 }
 }
 
 

+ 1 - 1
src/borg/constants.py

@@ -128,7 +128,7 @@ class KeyType:
     # upper 4 bits are ciphersuite, 0 == legacy AES-CTR
     # upper 4 bits are ciphersuite, 0 == legacy AES-CTR
     KEYFILE = 0x00
     KEYFILE = 0x00
     # repos with PASSPHRASE mode could not be created any more since borg 1.0, see #97.
     # repos with PASSPHRASE mode could not be created any more since borg 1.0, see #97.
-    # in borg 1.3 all of its code and also the "borg key migrate-to-repokey" command was removed.
+    # in borg 2. all of its code and also the "borg key migrate-to-repokey" command was removed.
     # if you still need to, you can use "borg key migrate-to-repokey" with borg 1.0, 1.1 and 1.2.
     # if you still need to, you can use "borg key migrate-to-repokey" with borg 1.0, 1.1 and 1.2.
     # Nowadays, we just dispatch this to RepoKey and assume the passphrase was migrated to a repokey.
     # Nowadays, we just dispatch this to RepoKey and assume the passphrase was migrated to a repokey.
     PASSPHRASE = 0x01  # legacy, attic and borg < 1.0
     PASSPHRASE = 0x01  # legacy, attic and borg < 1.0

+ 9 - 7
src/borg/crypto/key.py

@@ -22,7 +22,7 @@ from ..helpers import bin_to_hex
 from ..helpers.passphrase import Passphrase, PasswordRetriesExceeded, PassphraseWrong
 from ..helpers.passphrase import Passphrase, PasswordRetriesExceeded, PassphraseWrong
 from ..helpers import msgpack
 from ..helpers import msgpack
 from ..helpers.manifest import Manifest
 from ..helpers.manifest import Manifest
-from ..item import Key, EncryptedKey
+from ..item import Key, EncryptedKey, want_bytes
 from ..platform import SaveFile
 from ..platform import SaveFile
 
 
 from .nonces import NonceManager
 from .nonces import NonceManager
@@ -232,26 +232,28 @@ class KeyBase:
         unpacker = get_limited_unpacker('manifest')
         unpacker = get_limited_unpacker('manifest')
         unpacker.feed(data)
         unpacker.feed(data)
         unpacked = unpacker.unpack()
         unpacked = unpacker.unpack()
-        if b'tam' not in unpacked:
+        if 'tam' not in unpacked:
             if tam_required:
             if tam_required:
                 raise TAMRequiredError(self.repository._location.canonical_path())
                 raise TAMRequiredError(self.repository._location.canonical_path())
             else:
             else:
                 logger.debug('TAM not found and not required')
                 logger.debug('TAM not found and not required')
                 return unpacked, False
                 return unpacked, False
-        tam = unpacked.pop(b'tam', None)
+        tam = unpacked.pop('tam', None)
         if not isinstance(tam, dict):
         if not isinstance(tam, dict):
             raise TAMInvalid()
             raise TAMInvalid()
-        tam_type = tam.get(b'type', b'<none>').decode('ascii', 'replace')
+        tam_type = tam.get('type', '<none>')
         if tam_type != 'HKDF_HMAC_SHA512':
         if tam_type != 'HKDF_HMAC_SHA512':
             if tam_required:
             if tam_required:
                 raise TAMUnsupportedSuiteError(repr(tam_type))
                 raise TAMUnsupportedSuiteError(repr(tam_type))
             else:
             else:
                 logger.debug('Ignoring TAM made with unsupported suite, since TAM is not required: %r', tam_type)
                 logger.debug('Ignoring TAM made with unsupported suite, since TAM is not required: %r', tam_type)
                 return unpacked, False
                 return unpacked, False
-        tam_hmac = tam.get(b'hmac')
-        tam_salt = tam.get(b'salt')
-        if not isinstance(tam_salt, bytes) or not isinstance(tam_hmac, bytes):
+        tam_hmac = tam.get('hmac')
+        tam_salt = tam.get('salt')
+        if not isinstance(tam_salt, (bytes, str)) or not isinstance(tam_hmac, (bytes, str)):
             raise TAMInvalid()
             raise TAMInvalid()
+        tam_hmac = want_bytes(tam_hmac)  # legacy
+        tam_salt = want_bytes(tam_salt)  # legacy
         offset = data.index(tam_hmac)
         offset = data.index(tam_hmac)
         data[offset:offset + 64] = bytes(64)
         data[offset:offset + 64] = bytes(64)
         tam_key = self._tam_key(tam_salt, context=b'manifest')
         tam_key = self._tam_key(tam_salt, context=b'manifest')

+ 1 - 1
src/borg/helpers/__init__.py

@@ -18,7 +18,7 @@ from .progress import *  # NOQA
 from .time import *  # NOQA
 from .time import *  # NOQA
 from .yes import *  # NOQA
 from .yes import *  # NOQA
 
 
-from .msgpack import is_slow_msgpack, is_supported_msgpack, int_to_bigint, bigint_to_int, get_limited_unpacker
+from .msgpack import is_slow_msgpack, is_supported_msgpack, get_limited_unpacker
 from . import msgpack
 from . import msgpack
 
 
 # generic mechanism to enable users to invoke workarounds by setting the
 # generic mechanism to enable users to invoke workarounds by setting the

+ 2 - 2
src/borg/helpers/fs.py

@@ -205,8 +205,8 @@ class HardLinkManager:
 
 
     def hardlink_id_from_path(self, path):
     def hardlink_id_from_path(self, path):
         """compute a hardlink id from a path"""
         """compute a hardlink id from a path"""
-        assert isinstance(path, bytes)
-        return hashlib.sha256(path).digest()
+        assert isinstance(path, str)
+        return hashlib.sha256(path.encode('utf-8', errors='surrogateescape')).digest()
 
 
     def hardlink_id_from_inode(self, *, ino, dev):
     def hardlink_id_from_inode(self, *, ino, dev):
         """compute a hardlink id from an inode"""
         """compute a hardlink id from an inode"""

+ 21 - 25
src/borg/helpers/manifest.py

@@ -12,7 +12,7 @@ from ..logger import create_logger
 logger = create_logger()
 logger = create_logger()
 
 
 from .datastruct import StableDict
 from .datastruct import StableDict
-from .parseformat import bin_to_hex, safe_encode, safe_decode
+from .parseformat import bin_to_hex
 from .time import parse_timestamp
 from .time import parse_timestamp
 from .. import shellpattern
 from .. import shellpattern
 from ..constants import *  # NOQA
 from ..constants import *  # NOQA
@@ -39,39 +39,35 @@ class Archives(abc.MutableMapping):
     str timestamps or datetime timestamps.
     str timestamps or datetime timestamps.
     """
     """
     def __init__(self):
     def __init__(self):
-        # key: encoded archive name, value: dict(b'id': bytes_id, b'time': bytes_iso_ts)
+        # key: str archive name, value: dict('id': bytes_id, 'time': str_iso_ts)
         self._archives = {}
         self._archives = {}
 
 
     def __len__(self):
     def __len__(self):
         return len(self._archives)
         return len(self._archives)
 
 
     def __iter__(self):
     def __iter__(self):
-        return iter(safe_decode(name) for name in self._archives)
+        return iter(self._archives)
 
 
     def __getitem__(self, name):
     def __getitem__(self, name):
         assert isinstance(name, str)
         assert isinstance(name, str)
-        _name = safe_encode(name)
-        values = self._archives.get(_name)
+        values = self._archives.get(name)
         if values is None:
         if values is None:
             raise KeyError
             raise KeyError
-        ts = parse_timestamp(values[b'time'].decode())
-        return ArchiveInfo(name=name, id=values[b'id'], ts=ts)
+        ts = parse_timestamp(values['time'])
+        return ArchiveInfo(name=name, id=values['id'], ts=ts)
 
 
     def __setitem__(self, name, info):
     def __setitem__(self, name, info):
         assert isinstance(name, str)
         assert isinstance(name, str)
-        name = safe_encode(name)
         assert isinstance(info, tuple)
         assert isinstance(info, tuple)
         id, ts = info
         id, ts = info
         assert isinstance(id, bytes)
         assert isinstance(id, bytes)
         if isinstance(ts, datetime):
         if isinstance(ts, datetime):
             ts = ts.replace(tzinfo=None).strftime(ISO_FORMAT)
             ts = ts.replace(tzinfo=None).strftime(ISO_FORMAT)
         assert isinstance(ts, str)
         assert isinstance(ts, str)
-        ts = ts.encode()
-        self._archives[name] = {b'id': id, b'time': ts}
+        self._archives[name] = {'id': id, 'time': ts}
 
 
     def __delitem__(self, name):
     def __delitem__(self, name):
         assert isinstance(name, str)
         assert isinstance(name, str)
-        name = safe_encode(name)
         del self._archives[name]
         del self._archives[name]
 
 
     def list(self, *, glob=None, match_end=r'\Z', sort_by=(), consider_checkpoints=True, first=None, last=None, reverse=False):
     def list(self, *, glob=None, match_end=r'\Z', sort_by=(), consider_checkpoints=True, first=None, last=None, reverse=False):
@@ -116,8 +112,8 @@ class Archives(abc.MutableMapping):
     def set_raw_dict(self, d):
     def set_raw_dict(self, d):
         """set the dict we get from the msgpack unpacker"""
         """set the dict we get from the msgpack unpacker"""
         for k, v in d.items():
         for k, v in d.items():
-            assert isinstance(k, bytes)
-            assert isinstance(v, dict) and b'id' in v and b'time' in v
+            assert isinstance(k, str)
+            assert isinstance(v, dict) and 'id' in v and 'time' in v
             self._archives[k] = v
             self._archives[k] = v
 
 
     def get_raw_dict(self):
     def get_raw_dict(self):
@@ -196,10 +192,10 @@ class Manifest:
         manifest.timestamp = m.get('timestamp')
         manifest.timestamp = m.get('timestamp')
         manifest.config = m.config
         manifest.config = m.config
         # valid item keys are whatever is known in the repo or every key we know
         # valid item keys are whatever is known in the repo or every key we know
-        manifest.item_keys = ITEM_KEYS | frozenset(key.decode() for key in m.get('item_keys', []))
+        manifest.item_keys = ITEM_KEYS | frozenset(m.get('item_keys', []))
 
 
         if manifest.tam_verified:
         if manifest.tam_verified:
-            manifest_required = manifest.config.get(b'tam_required', False)
+            manifest_required = manifest.config.get('tam_required', False)
             security_required = tam_required(repository)
             security_required = tam_required(repository)
             if manifest_required and not security_required:
             if manifest_required and not security_required:
                 logger.debug('Manifest is TAM verified and says TAM is required, updating security database...')
                 logger.debug('Manifest is TAM verified and says TAM is required, updating security database...')
@@ -214,32 +210,32 @@ class Manifest:
     def check_repository_compatibility(self, operations):
     def check_repository_compatibility(self, operations):
         for operation in operations:
         for operation in operations:
             assert isinstance(operation, self.Operation)
             assert isinstance(operation, self.Operation)
-            feature_flags = self.config.get(b'feature_flags', None)
+            feature_flags = self.config.get('feature_flags', None)
             if feature_flags is None:
             if feature_flags is None:
                 return
                 return
-            if operation.value.encode() not in feature_flags:
+            if operation.value not in feature_flags:
                 continue
                 continue
-            requirements = feature_flags[operation.value.encode()]
-            if b'mandatory' in requirements:
-                unsupported = set(requirements[b'mandatory']) - self.SUPPORTED_REPO_FEATURES
+            requirements = feature_flags[operation.value]
+            if 'mandatory' in requirements:
+                unsupported = set(requirements['mandatory']) - self.SUPPORTED_REPO_FEATURES
                 if unsupported:
                 if unsupported:
-                    raise MandatoryFeatureUnsupported([f.decode() for f in unsupported])
+                    raise MandatoryFeatureUnsupported(list(unsupported))
 
 
     def get_all_mandatory_features(self):
     def get_all_mandatory_features(self):
         result = {}
         result = {}
-        feature_flags = self.config.get(b'feature_flags', None)
+        feature_flags = self.config.get('feature_flags', None)
         if feature_flags is None:
         if feature_flags is None:
             return result
             return result
 
 
         for operation, requirements in feature_flags.items():
         for operation, requirements in feature_flags.items():
-            if b'mandatory' in requirements:
-                result[operation.decode()] = {feature.decode() for feature in requirements[b'mandatory']}
+            if 'mandatory' in requirements:
+                result[operation] = set(requirements['mandatory'])
         return result
         return result
 
 
     def write(self):
     def write(self):
         from ..item import ManifestItem
         from ..item import ManifestItem
         if self.key.tam_required:
         if self.key.tam_required:
-            self.config[b'tam_required'] = True
+            self.config['tam_required'] = True
         # self.timestamp needs to be strictly monotonically increasing. Clocks often are not set correctly
         # self.timestamp needs to be strictly monotonically increasing. Clocks often are not set correctly
         if self.timestamp is None:
         if self.timestamp is None:
             self.timestamp = datetime.utcnow().strftime(ISO_FORMAT)
             self.timestamp = datetime.utcnow().strftime(ISO_FORMAT)

+ 71 - 52
src/borg/helpers/msgpack.py

@@ -1,21 +1,53 @@
+"""
+wrapping msgpack
+================
+
+We wrap msgpack here the way we need it - to avoid having lots of clutter in the calling code.
+
+Packing
+-------
+- use_bin_type = True (used by borg since borg 2.0)
+  This is used to generate output according to new msgpack 2.0 spec.
+  This cleanly keeps bytes and str types apart.
+
+- use_bin_type = False (used by borg < 1.3)
+  This creates output according to the older msgpack spec.
+  BAD: str and bytes were packed into same "raw" representation.
+
+- unicode_errors = 'surrogateescape'
+  Guess backup applications are one of the rare cases when this needs to be used.
+  It is needed because borg also needs to deal with data that does not cleanly encode/decode using utf-8.
+  There's a lot of crap out there, e.g. in filenames and as a backup tool, we must keep them as good as possible.
+
+Unpacking
+---------
+- raw = False (used by borg since borg 2.0)
+  We already can use this with borg 2.0 due to the type conversion to the desired type in item.py update_internal
+  methods. This type conversion code can be removed in future, when we do not have to deal with data any more
+  that was packed the old way.
+  It will then unpack according to the msgpack 2.0 spec format and directly output bytes or str.
+
+- raw = True (the old way, used by borg < 1.3)
+
+- unicode_errors = 'surrogateescape' -> see description above (will be used when raw is False).
+
+As of borg 2.0, we have fixed most of the msgpack str/bytes mess, #968.
+Borg now still needs to **read** old repos, archives, keys, ... so we can not yet fix it completely.
+But from now on, borg only **writes** new data according to the new msgpack 2.0 spec,
+thus we can remove some legacy support in a later borg release (some places are marked with "legacy").
+
+current way in msgpack terms
+----------------------------
+
+- pack with use_bin_type=True (according to msgpack 2.0 spec)
+- packs str -> raw and bytes -> bin
+- unpack with raw=False (according to msgpack 2.0 spec, using unicode_errors='surrogateescape')
+- unpacks bin to bytes and raw to str (thus we need to convert to desired type if we want bytes from "raw")
+"""
+
 from .datastruct import StableDict
 from .datastruct import StableDict
 from ..constants import *  # NOQA
 from ..constants import *  # NOQA
 
 
-# wrapping msgpack ---------------------------------------------------------------------------------------------------
-#
-# due to the planned breaking api changes in upstream msgpack, we wrap it the way we need it -
-# to avoid having lots of clutter in the calling code. see tickets #968 and #3632.
-#
-# Packing
-# -------
-# use_bin_type = False is needed to generate the old msgpack format (not msgpack 2.0 spec) as borg always did.
-# unicode_errors = None is needed because usage of it is deprecated
-#
-# Unpacking
-# ---------
-# raw = True is needed to unpack the old msgpack format to bytes (not str, about the decoding see item.pyx).
-# unicode_errors = None is needed because usage of it is deprecated
-
 from msgpack import Packer as mp_Packer
 from msgpack import Packer as mp_Packer
 from msgpack import packb as mp_packb
 from msgpack import packb as mp_packb
 from msgpack import pack as mp_pack
 from msgpack import pack as mp_pack
@@ -30,6 +62,10 @@ from msgpack import OutOfData
 
 
 version = mp_version
 version = mp_version
 
 
+USE_BIN_TYPE = True
+RAW = False
+UNICODE_ERRORS = 'surrogateescape'
+
 
 
 class PackException(Exception):
 class PackException(Exception):
     """Exception while msgpack packing"""
     """Exception while msgpack packing"""
@@ -40,10 +76,10 @@ class UnpackException(Exception):
 
 
 
 
 class Packer(mp_Packer):
 class Packer(mp_Packer):
-    def __init__(self, *, default=None, unicode_errors=None,
-                 use_single_float=False, autoreset=True, use_bin_type=False,
+    def __init__(self, *, default=None, unicode_errors=UNICODE_ERRORS,
+                 use_single_float=False, autoreset=True, use_bin_type=USE_BIN_TYPE,
                  strict_types=False):
                  strict_types=False):
-        assert unicode_errors is None
+        assert unicode_errors == UNICODE_ERRORS
         super().__init__(default=default, unicode_errors=unicode_errors,
         super().__init__(default=default, unicode_errors=unicode_errors,
                          use_single_float=use_single_float, autoreset=autoreset, use_bin_type=use_bin_type,
                          use_single_float=use_single_float, autoreset=autoreset, use_bin_type=use_bin_type,
                          strict_types=strict_types)
                          strict_types=strict_types)
@@ -55,16 +91,16 @@ class Packer(mp_Packer):
             raise PackException(e)
             raise PackException(e)
 
 
 
 
-def packb(o, *, use_bin_type=False, unicode_errors=None, **kwargs):
-    assert unicode_errors is None
+def packb(o, *, use_bin_type=USE_BIN_TYPE, unicode_errors=UNICODE_ERRORS, **kwargs):
+    assert unicode_errors == UNICODE_ERRORS
     try:
     try:
         return mp_packb(o, use_bin_type=use_bin_type, unicode_errors=unicode_errors, **kwargs)
         return mp_packb(o, use_bin_type=use_bin_type, unicode_errors=unicode_errors, **kwargs)
     except Exception as e:
     except Exception as e:
         raise PackException(e)
         raise PackException(e)
 
 
 
 
-def pack(o, stream, *, use_bin_type=False, unicode_errors=None, **kwargs):
-    assert unicode_errors is None
+def pack(o, stream, *, use_bin_type=USE_BIN_TYPE, unicode_errors=UNICODE_ERRORS, **kwargs):
+    assert unicode_errors == UNICODE_ERRORS
     try:
     try:
         return mp_pack(o, stream, use_bin_type=use_bin_type, unicode_errors=unicode_errors, **kwargs)
         return mp_pack(o, stream, use_bin_type=use_bin_type, unicode_errors=unicode_errors, **kwargs)
     except Exception as e:
     except Exception as e:
@@ -72,13 +108,13 @@ def pack(o, stream, *, use_bin_type=False, unicode_errors=None, **kwargs):
 
 
 
 
 class Unpacker(mp_Unpacker):
 class Unpacker(mp_Unpacker):
-    def __init__(self, file_like=None, *, read_size=0, use_list=True, raw=True,
+    def __init__(self, file_like=None, *, read_size=0, use_list=True, raw=RAW,
                  object_hook=None, object_pairs_hook=None, list_hook=None,
                  object_hook=None, object_pairs_hook=None, list_hook=None,
-                 unicode_errors=None, max_buffer_size=0,
+                 unicode_errors=UNICODE_ERRORS, max_buffer_size=0,
                  ext_hook=ExtType,
                  ext_hook=ExtType,
                  strict_map_key=False):
                  strict_map_key=False):
-        assert raw is True
-        assert unicode_errors is None
+        assert raw == RAW
+        assert unicode_errors == UNICODE_ERRORS
         kw = dict(file_like=file_like, read_size=read_size, use_list=use_list, raw=raw,
         kw = dict(file_like=file_like, read_size=read_size, use_list=use_list, raw=raw,
                   object_hook=object_hook, object_pairs_hook=object_pairs_hook, list_hook=list_hook,
                   object_hook=object_hook, object_pairs_hook=object_pairs_hook, list_hook=list_hook,
                   unicode_errors=unicode_errors, max_buffer_size=max_buffer_size,
                   unicode_errors=unicode_errors, max_buffer_size=max_buffer_size,
@@ -105,10 +141,11 @@ class Unpacker(mp_Unpacker):
     next = __next__
     next = __next__
 
 
 
 
-def unpackb(packed, *, raw=True, unicode_errors=None,
+def unpackb(packed, *, raw=RAW, unicode_errors=UNICODE_ERRORS,
             strict_map_key=False,
             strict_map_key=False,
             **kwargs):
             **kwargs):
-    assert unicode_errors is None
+    assert raw == RAW
+    assert unicode_errors == UNICODE_ERRORS
     try:
     try:
         kw = dict(raw=raw, unicode_errors=unicode_errors,
         kw = dict(raw=raw, unicode_errors=unicode_errors,
                   strict_map_key=strict_map_key)
                   strict_map_key=strict_map_key)
@@ -118,10 +155,11 @@ def unpackb(packed, *, raw=True, unicode_errors=None,
         raise UnpackException(e)
         raise UnpackException(e)
 
 
 
 
-def unpack(stream, *, raw=True, unicode_errors=None,
+def unpack(stream, *, raw=RAW, unicode_errors=UNICODE_ERRORS,
            strict_map_key=False,
            strict_map_key=False,
            **kwargs):
            **kwargs):
-    assert unicode_errors is None
+    assert raw == RAW
+    assert unicode_errors == UNICODE_ERRORS
     try:
     try:
         kw = dict(raw=raw, unicode_errors=unicode_errors,
         kw = dict(raw=raw, unicode_errors=unicode_errors,
                   strict_map_key=strict_map_key)
                   strict_map_key=strict_map_key)
@@ -164,30 +202,11 @@ def get_limited_unpacker(kind):
     return Unpacker(**args)
     return Unpacker(**args)
 
 
 
 
-def bigint_to_int(mtime):  # legacy
-    """Convert bytearray to int
-    """
-    if isinstance(mtime, bytes):
-        return int.from_bytes(mtime, 'little', signed=True)
-    return mtime
-
-
-def int_to_bigint(value):  # legacy
-    """Convert integers larger than 64 bits to bytearray
-
-    Smaller integers are left alone
-    """
-    if value.bit_length() > 63:
-        return value.to_bytes((value.bit_length() + 9) // 8, 'little', signed=True)
-    return value
-
-
 def int_to_timestamp(ns):
 def int_to_timestamp(ns):
+    assert isinstance(ns, int)
     return Timestamp.from_unix_nano(ns)
     return Timestamp.from_unix_nano(ns)
 
 
 
 
 def timestamp_to_int(ts):
 def timestamp_to_int(ts):
-    if isinstance(ts, Timestamp):
-        return ts.to_unix_nano()
-    # legacy support note: we need to keep the bigint conversion for compatibility with borg < 1.3 archives.
-    return bigint_to_int(ts)
+    assert isinstance(ts, Timestamp)
+    return ts.to_unix_nano()

+ 2 - 2
src/borg/helpers/parseformat.py

@@ -808,8 +808,8 @@ class ItemFormatter(BaseFormatter):
         hlid = bin_to_hex(hlid) if hlid else ''
         hlid = bin_to_hex(hlid) if hlid else ''
         item_data['type'] = item_type
         item_data['type'] = item_type
         item_data['mode'] = mode
         item_data['mode'] = mode
-        item_data['user'] = item.user or item.uid
-        item_data['group'] = item.group or item.gid
+        item_data['user'] = item.get('user', str(item.uid))
+        item_data['group'] = item.get('group', str(item.gid))
         item_data['uid'] = item.uid
         item_data['uid'] = item.uid
         item_data['gid'] = item.gid
         item_data['gid'] = item.gid
         item_data['path'] = remove_surrogates(item.path)
         item_data['path'] = remove_surrogates(item.path)

+ 235 - 66
src/borg/item.pyx

@@ -2,10 +2,9 @@ import stat
 from collections import namedtuple
 from collections import namedtuple
 
 
 from .constants import ITEM_KEYS, ARCHIVE_KEYS
 from .constants import ITEM_KEYS, ARCHIVE_KEYS
-from .helpers import safe_encode, safe_decode
 from .helpers import StableDict
 from .helpers import StableDict
 from .helpers import format_file_size
 from .helpers import format_file_size
-from .helpers.msgpack import timestamp_to_int, int_to_timestamp
+from .helpers.msgpack import timestamp_to_int, int_to_timestamp, Timestamp
 
 
 
 
 cdef extern from "_item.c":
 cdef extern from "_item.c":
@@ -16,6 +15,102 @@ cdef extern from "_item.c":
 API_VERSION = '1.2_01'
 API_VERSION = '1.2_01'
 
 
 
 
+def fix_key(data, key, *, errors='strict'):
+    """if k is a bytes-typed key, migrate key/value to a str-typed key in dict data"""
+    if isinstance(key, bytes):
+        value = data.pop(key)
+        key = key.decode('utf-8', errors=errors)
+        data[key] = value
+    assert isinstance(key, str)
+    return key
+
+
+def fix_str_value(data, key, errors='surrogateescape'):
+    """makes sure that data[key] is a str (decode if it is bytes)"""
+    assert isinstance(key, str)  # fix_key must be called first
+    value = data[key]
+    value = want_str(value, errors=errors)
+    data[key] = value
+    return value
+
+
+def fix_bytes_value(data, key):
+    """makes sure that data[key] is bytes (encode if it is str)"""
+    assert isinstance(key, str)  # fix_key must be called first
+    value = data[key]
+    value = want_bytes(value)
+    data[key] = value
+    return value
+
+
+def fix_list_of_str(v):
+    """make sure we have a list of str"""
+    assert isinstance(v, (tuple, list))
+    return [want_str(e) for e in v]
+
+
+def fix_list_of_bytes(v):
+    """make sure we have a list of bytes"""
+    assert isinstance(v, (tuple, list))
+    return [want_bytes(e) for e in v]
+
+
+def fix_list_of_chunkentries(v):
+    """make sure we have a list of correct chunkentries"""
+    assert isinstance(v, (tuple, list))
+    chunks = []
+    for ce in v:
+        assert isinstance(ce, (tuple, list))
+        assert len(ce) == 3  # id, size, csize
+        assert isinstance(ce[1], int)
+        assert isinstance(ce[2], int)
+        ce_fixed = [want_bytes(ce[0]), ce[1], ce[2]]  # list!
+        chunks.append(ce_fixed)  # create a list of lists
+    return chunks
+
+
+def fix_tuple_of_str(v):
+    """make sure we have a tuple of str"""
+    assert isinstance(v, (tuple, list))
+    return tuple(want_str(e) for e in v)
+
+
+def fix_tuple_of_str_and_int(v):
+    """make sure we have a tuple of str"""
+    assert isinstance(v, (tuple, list))
+    t = tuple(e.decode() if isinstance(e, bytes) else e for e in v)
+    assert all(isinstance(e, (str, int)) for e in t), repr(t)
+    return t
+
+
+def fix_timestamp(v):
+    """make sure v is a Timestamp"""
+    if isinstance(v, Timestamp):
+        return v
+    # legacy support
+    if isinstance(v, bytes):  # was: bigint_to_int()
+        v = int.from_bytes(v, 'little', signed=True)
+    assert isinstance(v, int)
+    return int_to_timestamp(v)
+
+
+def want_bytes(v, *, errors='surrogateescape'):
+    """we know that we want bytes and the value should be bytes"""
+    # legacy support: it being str can be caused by msgpack unpack decoding old data that was packed with use_bin_type=False
+    if isinstance(v, str):
+        v = v.encode('utf-8', errors=errors)
+    assert isinstance(v, bytes), f'not a bytes object, but {v!r}'
+    return v
+
+
+def want_str(v, *, errors='surrogateescape'):
+    """we know that we want str and the value should be str"""
+    if isinstance(v, bytes):
+        v = v.decode('utf-8', errors=errors)
+    assert isinstance(v, str), f'not a str object, but {v!r}'
+    return v
+
+
 class PropDict:
 class PropDict:
     """
     """
     Manage a dictionary via properties.
     Manage a dictionary via properties.
@@ -112,6 +207,8 @@ class PropDict:
                 raise AttributeError(attr_error_msg) from None
                 raise AttributeError(attr_error_msg) from None
             if decode is not None:
             if decode is not None:
                 value = decode(value)
                 value = decode(value)
+            if not isinstance(value, value_type):
+                raise TypeError(type_error_msg)
             return value
             return value
 
 
         def _set(self, value):
         def _set(self, value):
@@ -139,14 +236,9 @@ class Item(PropDict):
     Items are created either from msgpack unpacker output, from another dict, from kwargs or
     Items are created either from msgpack unpacker output, from another dict, from kwargs or
     built step-by-step by setting attributes.
     built step-by-step by setting attributes.
 
 
-    msgpack gives us a dict with bytes-typed keys, just give it to Item(internal_dict=d) and use item.key_name later.
-    msgpack gives us byte-typed values for stuff that should be str, we automatically decode when getting
-    such a property and encode when setting it.
+    msgpack unpacker gives us a dict, just give it to Item(internal_dict=d) and use item.key_name later.
 
 
     If an Item shall be serialized, give as_dict() method output to msgpack packer.
     If an Item shall be serialized, give as_dict() method output to msgpack packer.
-
-    A bug in Attic up to and including release 0.13 added a (meaningless) 'acl' key to every item.
-    We must never re-use this key. See test_attic013_acl_bug for details.
     """
     """
 
 
     VALID_KEYS = ITEM_KEYS | {'deleted', 'nlink', }  # str-typed keys
     VALID_KEYS = ITEM_KEYS | {'deleted', 'nlink', }  # str-typed keys
@@ -155,10 +247,10 @@ class Item(PropDict):
 
 
     # properties statically defined, so that IDEs can know their names:
     # properties statically defined, so that IDEs can know their names:
 
 
-    path = PropDict._make_property('path', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
-    source = PropDict._make_property('source', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
-    user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode)
-    group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode)
+    path = PropDict._make_property('path', str, 'surrogate-escaped str')
+    source = PropDict._make_property('source', str, 'surrogate-escaped str')
+    user = PropDict._make_property('user', str, 'surrogate-escaped str')
+    group = PropDict._make_property('group', str, 'surrogate-escaped str')
 
 
     acl_access = PropDict._make_property('acl_access', bytes)
     acl_access = PropDict._make_property('acl_access', bytes)
     acl_default = PropDict._make_property('acl_default', bytes)
     acl_default = PropDict._make_property('acl_default', bytes)
@@ -177,14 +269,13 @@ class Item(PropDict):
     birthtime = PropDict._make_property('birthtime', int, 'int (ns)', encode=int_to_timestamp, decode=timestamp_to_int)
     birthtime = PropDict._make_property('birthtime', int, 'int (ns)', encode=int_to_timestamp, decode=timestamp_to_int)
 
 
     # size is only present for items with a chunk list and then it is sum(chunk_sizes)
     # size is only present for items with a chunk list and then it is sum(chunk_sizes)
-    # compatibility note: this is a new feature, in old archives size will be missing.
     size = PropDict._make_property('size', int)
     size = PropDict._make_property('size', int)
 
 
     hlid = PropDict._make_property('hlid', bytes)  # hard link id: same value means same hard link.
     hlid = PropDict._make_property('hlid', bytes)  # hard link id: same value means same hard link.
     hardlink_master = PropDict._make_property('hardlink_master', bool)  # legacy
     hardlink_master = PropDict._make_property('hardlink_master', bool)  # legacy
 
 
-    chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None')
-    chunks_healthy = PropDict._make_property('chunks_healthy', (list, type(None)), 'list or None')
+    chunks = PropDict._make_property('chunks', list, 'list')
+    chunks_healthy = PropDict._make_property('chunks_healthy', list, 'list')
 
 
     xattrs = PropDict._make_property('xattrs', StableDict)
     xattrs = PropDict._make_property('xattrs', StableDict)
 
 
@@ -193,12 +284,10 @@ class Item(PropDict):
 
 
     part = PropDict._make_property('part', int)
     part = PropDict._make_property('part', int)
 
 
-    def get_size(self, hardlink_masters=None, memorize=False, compressed=False, from_chunks=False, consider_ids=None):
+    def get_size(self, memorize=False, compressed=False, from_chunks=False, consider_ids=None):
         """
         """
         Determine the (uncompressed or compressed) size of this item.
         Determine the (uncompressed or compressed) size of this item.
 
 
-        :param hardlink_masters: If given, the size of hardlink slaves is computed via the hardlink master's chunk list,
-        otherwise size will be returned as 0.
         :param memorize: Whether the computed size value will be stored into the item.
         :param memorize: Whether the computed size value will be stored into the item.
         :param compressed: Whether the compressed or uncompressed size will be returned.
         :param compressed: Whether the compressed or uncompressed size will be returned.
         :param from_chunks: If true, size is computed from chunks even if a precomputed value is available.
         :param from_chunks: If true, size is computed from chunks even if a precomputed value is available.
@@ -218,31 +307,14 @@ class Item(PropDict):
             # no precomputed (c)size value available, compute it:
             # no precomputed (c)size value available, compute it:
             try:
             try:
                 chunks = getattr(self, 'chunks')
                 chunks = getattr(self, 'chunks')
-                having_chunks = True
             except AttributeError:
             except AttributeError:
-                having_chunks = False
-                # this item has no (own) chunks list, but if this is a hardlink slave
-                # and we know the master, we can still compute the size.
-                if hardlink_masters is None:
-                    chunks = None
-                else:
-                    try:
-                        master = getattr(self, 'source')
-                    except AttributeError:
-                        # not a hardlink slave, likely a directory or special file w/o chunks
-                        chunks = None
-                    else:
-                        # hardlink slave, try to fetch hardlink master's chunks list
-                        # todo: put precomputed size into hardlink_masters' values and use it, if present
-                        chunks, _ = hardlink_masters.get(master, (None, None))
-                if chunks is None:
-                    return 0
+                return 0
             if consider_ids is not None:
             if consider_ids is not None:
                 size = sum(getattr(ChunkListEntry(*chunk), attr) for chunk in chunks if chunk.id in consider_ids)
                 size = sum(getattr(ChunkListEntry(*chunk), attr) for chunk in chunks if chunk.id in consider_ids)
             else:
             else:
                 size = sum(getattr(ChunkListEntry(*chunk), attr) for chunk in chunks)
                 size = sum(getattr(ChunkListEntry(*chunk), attr) for chunk in chunks)
             # if requested, memorize the precomputed (c)size for items that have an own chunks list:
             # if requested, memorize the precomputed (c)size for items that have an own chunks list:
-            if memorize and having_chunks:
+            if memorize:
                 setattr(self, attr, size)
                 setattr(self, attr, size)
         return size
         return size
 
 
@@ -290,6 +362,31 @@ class Item(PropDict):
         except AttributeError:
         except AttributeError:
             return False
             return False
 
 
+    def update_internal(self, d):
+        # legacy support for migration (data from old msgpacks comes in as bytes always, but sometimes we want str),
+        # also need to fix old timestamp data types.
+        for k, v in list(d.items()):
+            k = fix_key(d, k)
+            if k in ('path', 'source', 'user', 'group'):
+                v = fix_str_value(d, k)
+            if k in ('chunks', 'chunks_healthy'):
+                v = fix_list_of_chunkentries(v)
+            if k in ('atime', 'ctime', 'mtime', 'birthtime'):
+                v = fix_timestamp(v)
+            if k in ('acl_access', 'acl_default', 'acl_extended', 'acl_nfs4'):
+                v = fix_bytes_value(d, k)
+            if k == 'xattrs':
+                if not isinstance(v, StableDict):
+                    v = StableDict(v)
+                v_new = StableDict()
+                for xk, xv in list(v.items()):
+                    xk = want_bytes(xk)
+                    # old borg used to store None instead of a b'' value
+                    xv = b'' if xv is None else want_bytes(xv)
+                    v_new[xk] = xv
+                v = v_new  # xattrs is a StableDict(bytes keys -> bytes values)
+            self._dict[k] = v
+
 
 
 class EncryptedKey(PropDict):
 class EncryptedKey(PropDict):
     """
     """
@@ -298,18 +395,18 @@ class EncryptedKey(PropDict):
     A EncryptedKey is created either from msgpack unpacker output, from another dict, from kwargs or
     A EncryptedKey is created either from msgpack unpacker output, from another dict, from kwargs or
     built step-by-step by setting attributes.
     built step-by-step by setting attributes.
 
 
-    msgpack gives us a dict with bytes-typed keys, just give it to EncryptedKey(d) and use enc_key.xxx later.
+    msgpack unpacker gives us a dict, just give it to EncryptedKey(d) and use enc_key.xxx later.
 
 
     If a EncryptedKey shall be serialized, give as_dict() method output to msgpack packer.
     If a EncryptedKey shall be serialized, give as_dict() method output to msgpack packer.
     """
     """
 
 
-    VALID_KEYS = { 'version', 'algorithm', 'iterations', 'salt', 'hash', 'data',
-                   'argon2_time_cost', 'argon2_memory_cost', 'argon2_parallelism', 'argon2_type' }
+    VALID_KEYS = {'version', 'algorithm', 'iterations', 'salt', 'hash', 'data',
+                  'argon2_time_cost', 'argon2_memory_cost', 'argon2_parallelism', 'argon2_type'}
 
 
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
 
 
     version = PropDict._make_property('version', int)
     version = PropDict._make_property('version', int)
-    algorithm = PropDict._make_property('algorithm', str, encode=str.encode, decode=bytes.decode)
+    algorithm = PropDict._make_property('algorithm', str)
     iterations = PropDict._make_property('iterations', int)
     iterations = PropDict._make_property('iterations', int)
     salt = PropDict._make_property('salt', bytes)
     salt = PropDict._make_property('salt', bytes)
     hash = PropDict._make_property('hash', bytes)
     hash = PropDict._make_property('hash', bytes)
@@ -317,7 +414,19 @@ class EncryptedKey(PropDict):
     argon2_time_cost = PropDict._make_property('argon2_time_cost', int)
     argon2_time_cost = PropDict._make_property('argon2_time_cost', int)
     argon2_memory_cost = PropDict._make_property('argon2_memory_cost', int)
     argon2_memory_cost = PropDict._make_property('argon2_memory_cost', int)
     argon2_parallelism = PropDict._make_property('argon2_parallelism', int)
     argon2_parallelism = PropDict._make_property('argon2_parallelism', int)
-    argon2_type = PropDict._make_property('argon2_type', str, encode=str.encode, decode=bytes.decode)
+    argon2_type = PropDict._make_property('argon2_type', str)
+
+    def update_internal(self, d):
+        # legacy support for migration (data from old msgpacks comes in as bytes always, but sometimes we want str)
+        for k, v in list(d.items()):
+            k = fix_key(d, k)
+            if k == 'version':
+                assert isinstance(v, int)
+            if k in ('algorithm', 'argon2_type'):
+                v = fix_str_value(d, k)
+            if k in ('salt', 'hash', 'data'):
+                v = fix_bytes_value(d, k)
+            self._dict[k] = v
 
 
 
 
 class Key(PropDict):
 class Key(PropDict):
@@ -327,7 +436,7 @@ class Key(PropDict):
     A Key is created either from msgpack unpacker output, from another dict, from kwargs or
     A Key is created either from msgpack unpacker output, from another dict, from kwargs or
     built step-by-step by setting attributes.
     built step-by-step by setting attributes.
 
 
-    msgpack gives us a dict with bytes-typed keys, just give it to Key(d) and use key.xxx later.
+    msgpack unpacker gives us a dict, just give it to Key(d) and use key.xxx later.
 
 
     If a Key shall be serialized, give as_dict() method output to msgpack packer.
     If a Key shall be serialized, give as_dict() method output to msgpack packer.
     """
     """
@@ -344,17 +453,15 @@ class Key(PropDict):
     chunk_seed = PropDict._make_property('chunk_seed', int)
     chunk_seed = PropDict._make_property('chunk_seed', int)
     tam_required = PropDict._make_property('tam_required', bool)
     tam_required = PropDict._make_property('tam_required', bool)
 
 
-
-def tuple_encode(t):
-    """encode a tuple that might contain str items"""
-    # we have str, but want to give bytes to msgpack.pack
-    return tuple(safe_encode(e) if isinstance(e, str) else e for e in t)
-
-
-def tuple_decode(t):
-    """decode a tuple that might contain bytes items"""
-    # we get bytes objects from msgpack.unpack, but want str
-    return tuple(safe_decode(e) if isinstance(e, bytes) else e for e in t)
+    def update_internal(self, d):
+        # legacy support for migration (data from old msgpacks comes in as bytes always, but sometimes we want str)
+        for k, v in list(d.items()):
+            k = fix_key(d, k)
+            if k == 'version':
+                assert isinstance(v, int)
+            if k in ('repository_id', 'enc_key', 'enc_hmac_key', 'id_key'):
+                v = fix_bytes_value(d, k)
+            self._dict[k] = v
 
 
 
 
 class ArchiveItem(PropDict):
 class ArchiveItem(PropDict):
@@ -364,7 +471,7 @@ class ArchiveItem(PropDict):
     An ArchiveItem is created either from msgpack unpacker output, from another dict, from kwargs or
     An ArchiveItem is created either from msgpack unpacker output, from another dict, from kwargs or
     built step-by-step by setting attributes.
     built step-by-step by setting attributes.
 
 
-    msgpack gives us a dict with bytes-typed keys, just give it to ArchiveItem(d) and use arch.xxx later.
+    msgpack unpacker gives us a dict, just give it to ArchiveItem(d) and use arch.xxx later.
 
 
     If a ArchiveItem shall be serialized, give as_dict() method output to msgpack packer.
     If a ArchiveItem shall be serialized, give as_dict() method output to msgpack packer.
     """
     """
@@ -374,15 +481,15 @@ class ArchiveItem(PropDict):
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
 
 
     version = PropDict._make_property('version', int)
     version = PropDict._make_property('version', int)
-    name = PropDict._make_property('name', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
+    name = PropDict._make_property('name', str, 'surrogate-escaped str')
     items = PropDict._make_property('items', list)
     items = PropDict._make_property('items', list)
     cmdline = PropDict._make_property('cmdline', list)  # list of s-e-str
     cmdline = PropDict._make_property('cmdline', list)  # list of s-e-str
-    hostname = PropDict._make_property('hostname', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
-    username = PropDict._make_property('username', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
-    time = PropDict._make_property('time', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
-    time_end = PropDict._make_property('time_end', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
-    comment = PropDict._make_property('comment', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
-    chunker_params = PropDict._make_property('chunker_params', tuple, 'chunker-params tuple', encode=tuple_encode, decode=tuple_decode)
+    hostname = PropDict._make_property('hostname', str, 'surrogate-escaped str')
+    username = PropDict._make_property('username', str, 'surrogate-escaped str')
+    time = PropDict._make_property('time', str)
+    time_end = PropDict._make_property('time_end', str)
+    comment = PropDict._make_property('comment', str, 'surrogate-escaped str')
+    chunker_params = PropDict._make_property('chunker_params', tuple)
     recreate_cmdline = PropDict._make_property('recreate_cmdline', list)  # list of s-e-str
     recreate_cmdline = PropDict._make_property('recreate_cmdline', list)  # list of s-e-str
     # recreate_source_id, recreate_args, recreate_partial_chunks were used in 1.1.0b1 .. b2
     # recreate_source_id, recreate_args, recreate_partial_chunks were used in 1.1.0b1 .. b2
     recreate_source_id = PropDict._make_property('recreate_source_id', bytes)
     recreate_source_id = PropDict._make_property('recreate_source_id', bytes)
@@ -395,6 +502,24 @@ class ArchiveItem(PropDict):
     csize_parts = PropDict._make_property('csize_parts', int)
     csize_parts = PropDict._make_property('csize_parts', int)
     nfiles_parts = PropDict._make_property('nfiles_parts', int)
     nfiles_parts = PropDict._make_property('nfiles_parts', int)
 
 
+    def update_internal(self, d):
+        # legacy support for migration (data from old msgpacks comes in as bytes always, but sometimes we want str)
+        for k, v in list(d.items()):
+            k = fix_key(d, k)
+            if k == 'version':
+                assert isinstance(v, int)
+            if k in ('name', 'hostname', 'username', 'comment'):
+                v = fix_str_value(d, k)
+            if k in ('time', 'time_end'):
+                v = fix_str_value(d, k, 'replace')
+            if k == 'chunker_params':
+                v = fix_tuple_of_str_and_int(v)
+            if k in ('cmdline', 'recreate_cmdline'):
+                v = fix_list_of_str(v)
+            if k == 'items':
+                v = fix_list_of_bytes(v)
+            self._dict[k] = v
+
 
 
 class ManifestItem(PropDict):
 class ManifestItem(PropDict):
     """
     """
@@ -403,7 +528,7 @@ class ManifestItem(PropDict):
     A ManifestItem is created either from msgpack unpacker output, from another dict, from kwargs or
     A ManifestItem is created either from msgpack unpacker output, from another dict, from kwargs or
     built step-by-step by setting attributes.
     built step-by-step by setting attributes.
 
 
-    msgpack gives us a dict with bytes-typed keys, just give it to ManifestItem(d) and use manifest.xxx later.
+    msgpack unpacker gives us a dict, just give it to ManifestItem(d) and use manifest.xxx later.
 
 
     If a ManifestItem shall be serialized, give as_dict() method output to msgpack packer.
     If a ManifestItem shall be serialized, give as_dict() method output to msgpack packer.
     """
     """
@@ -413,10 +538,54 @@ class ManifestItem(PropDict):
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
 
 
     version = PropDict._make_property('version', int)
     version = PropDict._make_property('version', int)
-    archives = PropDict._make_property('archives', dict)  # name -> dict
-    timestamp = PropDict._make_property('timestamp', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
+    archives = PropDict._make_property('archives', dict, 'dict of str -> dict')  # name -> dict
+    timestamp = PropDict._make_property('timestamp', str)
     config = PropDict._make_property('config', dict)
     config = PropDict._make_property('config', dict)
-    item_keys = PropDict._make_property('item_keys', tuple)
+    item_keys = PropDict._make_property('item_keys', tuple, 'tuple of str')
+
+    def update_internal(self, d):
+        # legacy support for migration (data from old msgpacks comes in as bytes always, but sometimes we want str)
+        for k, v in list(d.items()):
+            k = fix_key(d, k)
+            if k == 'version':
+                assert isinstance(v, int)
+            if k == 'archives':
+                ad = v
+                assert isinstance(ad, dict)
+                for ak, av in list(ad.items()):
+                    ak = fix_key(ad, ak, errors='surrogateescape')
+                    assert isinstance(av, dict)
+                    for ik, iv in list(av.items()):
+                        ik = fix_key(av, ik)
+                        if ik == 'id':
+                            fix_bytes_value(av, 'id')
+                        if ik == 'time':
+                            fix_str_value(av, 'time')
+                    assert set(av) == {'id', 'time'}
+            if k == 'timestamp':
+                v = fix_str_value(d, k, 'replace')
+            if k == 'config':
+                cd = v
+                assert isinstance(cd, dict)
+                for ck, cv in list(cd.items()):
+                    ck = fix_key(cd, ck)
+                    if ck == 'tam_required':
+                        assert isinstance(cv, bool)
+                    if ck == 'feature_flags':
+                        assert isinstance(cv, dict)
+                        ops = {'read', 'check', 'write', 'delete'}
+                        for op, specs in list(cv.items()):
+                            op = fix_key(cv, op)
+                            assert op in ops
+                            for speck, specv in list(specs.items()):
+                                speck = fix_key(specs, speck)
+                                if speck == 'mandatory':
+                                    specs[speck] = fix_tuple_of_str(specv)
+                        assert set(cv).issubset(ops)
+            if k == 'item_keys':
+                v = fix_tuple_of_str(v)
+            self._dict[k] = v
+
 
 
 class ItemDiff:
 class ItemDiff:
     """
     """

+ 43 - 49
src/borg/remote.py

@@ -38,7 +38,7 @@ logger = create_logger(__name__)
 
 
 RPC_PROTOCOL_VERSION = 2
 RPC_PROTOCOL_VERSION = 2
 BORG_VERSION = parse_version(__version__)
 BORG_VERSION = parse_version(__version__)
-MSGID, MSG, ARGS, RESULT = b'i', b'm', b'a', b'r'
+MSGID, MSG, ARGS, RESULT = 'i', 'm', 'a', 'r'
 
 
 MAX_INFLIGHT = 100
 MAX_INFLIGHT = 100
 
 
@@ -138,10 +138,6 @@ compatMap = {
 }
 }
 
 
 
 
-def decode_keys(d):
-    return {k.decode(): d[k] for k in d}
-
-
 class RepositoryServer:  # pragma: no cover
 class RepositoryServer:  # pragma: no cover
     rpc_methods = (
     rpc_methods = (
         '__len__',
         '__len__',
@@ -217,13 +213,12 @@ class RepositoryServer:  # pragma: no cover
                     if isinstance(unpacked, dict):
                     if isinstance(unpacked, dict):
                         dictFormat = True
                         dictFormat = True
                         msgid = unpacked[MSGID]
                         msgid = unpacked[MSGID]
-                        method = unpacked[MSG].decode()
-                        args = decode_keys(unpacked[ARGS])
+                        method = unpacked[MSG]
+                        args = unpacked[ARGS]
                     elif isinstance(unpacked, tuple) and len(unpacked) == 4:
                     elif isinstance(unpacked, tuple) and len(unpacked) == 4:
                         dictFormat = False
                         dictFormat = False
                         # The first field 'type' was always 1 and has always been ignored
                         # The first field 'type' was always 1 and has always been ignored
                         _, msgid, method, args = unpacked
                         _, msgid, method, args = unpacked
-                        method = method.decode()
                         args = self.positional_to_named(method, args)
                         args = self.positional_to_named(method, args)
                     else:
                     else:
                         if self.repository is not None:
                         if self.repository is not None:
@@ -256,21 +251,21 @@ class RepositoryServer:  # pragma: no cover
 
 
                             try:
                             try:
                                 msg = msgpack.packb({MSGID: msgid,
                                 msg = msgpack.packb({MSGID: msgid,
-                                                    b'exception_class': e.__class__.__name__,
-                                                    b'exception_args': e.args,
-                                                    b'exception_full': ex_full,
-                                                    b'exception_short': ex_short,
-                                                    b'exception_trace': ex_trace,
-                                                    b'sysinfo': sysinfo()})
+                                                    'exception_class': e.__class__.__name__,
+                                                    'exception_args': e.args,
+                                                    'exception_full': ex_full,
+                                                    'exception_short': ex_short,
+                                                    'exception_trace': ex_trace,
+                                                    'sysinfo': sysinfo()})
                             except TypeError:
                             except TypeError:
                                 msg = msgpack.packb({MSGID: msgid,
                                 msg = msgpack.packb({MSGID: msgid,
-                                                    b'exception_class': e.__class__.__name__,
-                                                    b'exception_args': [x if isinstance(x, (str, bytes, int)) else None
-                                                                        for x in e.args],
-                                                    b'exception_full': ex_full,
-                                                    b'exception_short': ex_short,
-                                                    b'exception_trace': ex_trace,
-                                                    b'sysinfo': sysinfo()})
+                                                    'exception_class': e.__class__.__name__,
+                                                    'exception_args': [x if isinstance(x, (str, bytes, int)) else None
+                                                                       for x in e.args],
+                                                    'exception_full': ex_full,
+                                                    'exception_short': ex_short,
+                                                    'exception_trace': ex_trace,
+                                                    'sysinfo': sysinfo()})
 
 
                             os_write(stdout_fd, msg)
                             os_write(stdout_fd, msg)
                         else:
                         else:
@@ -307,7 +302,7 @@ class RepositoryServer:  # pragma: no cover
         # clients since 1.1.0b3 use a dict as client_data
         # clients since 1.1.0b3 use a dict as client_data
         # clients since 1.1.0b6 support json log format from server
         # clients since 1.1.0b6 support json log format from server
         if isinstance(client_data, dict):
         if isinstance(client_data, dict):
-            self.client_version = client_data[b'client_version']
+            self.client_version = client_data['client_version']
             level = logging.getLevelName(logging.getLogger('').level)
             level = logging.getLevelName(logging.getLogger('').level)
             setup_logging(is_serve=True, json=True, level=level)
             setup_logging(is_serve=True, json=True, level=level)
             logger.debug('Initialized logging system for JSON-based protocol')
             logger.debug('Initialized logging system for JSON-based protocol')
@@ -369,7 +364,6 @@ class RepositoryServer:  # pragma: no cover
         return self.repository.id
         return self.repository.id
 
 
     def inject_exception(self, kind):
     def inject_exception(self, kind):
-        kind = kind.decode()
         s1 = 'test string'
         s1 = 'test string'
         s2 = 'test string2'
         s2 = 'test string2'
         if kind == 'DoesNotExist':
         if kind == 'DoesNotExist':
@@ -483,35 +477,35 @@ class RemoteRepository:
 
 
     class RPCError(Exception):
     class RPCError(Exception):
         def __init__(self, unpacked):
         def __init__(self, unpacked):
-            # for borg < 1.1: unpacked only has b'exception_class' as key
-            # for borg 1.1+: unpacked has keys: b'exception_args', b'exception_full', b'exception_short', b'sysinfo'
+            # for borg < 1.1: unpacked only has 'exception_class' as key
+            # for borg 1.1+: unpacked has keys: 'exception_args', 'exception_full', 'exception_short', 'sysinfo'
             self.unpacked = unpacked
             self.unpacked = unpacked
 
 
         def get_message(self):
         def get_message(self):
-            if b'exception_short' in self.unpacked:
-                return b'\n'.join(self.unpacked[b'exception_short']).decode()
+            if 'exception_short' in self.unpacked:
+                return '\n'.join(self.unpacked['exception_short'])
             else:
             else:
                 return self.exception_class
                 return self.exception_class
 
 
         @property
         @property
         def traceback(self):
         def traceback(self):
-            return self.unpacked.get(b'exception_trace', True)
+            return self.unpacked.get('exception_trace', True)
 
 
         @property
         @property
         def exception_class(self):
         def exception_class(self):
-            return self.unpacked[b'exception_class'].decode()
+            return self.unpacked['exception_class']
 
 
         @property
         @property
         def exception_full(self):
         def exception_full(self):
-            if b'exception_full' in self.unpacked:
-                return b'\n'.join(self.unpacked[b'exception_full']).decode()
+            if 'exception_full' in self.unpacked:
+                return '\n'.join(self.unpacked['exception_full'])
             else:
             else:
                 return self.get_message() + '\nRemote Exception (see remote log for the traceback)'
                 return self.get_message() + '\nRemote Exception (see remote log for the traceback)'
 
 
         @property
         @property
         def sysinfo(self):
         def sysinfo(self):
-            if b'sysinfo' in self.unpacked:
-                return self.unpacked[b'sysinfo'].decode()
+            if 'sysinfo' in self.unpacked:
+                return self.unpacked['sysinfo']
             else:
             else:
                 return ''
                 return ''
 
 
@@ -570,15 +564,15 @@ class RemoteRepository:
         try:
         try:
             try:
             try:
                 version = self.call('negotiate', {'client_data': {
                 version = self.call('negotiate', {'client_data': {
-                    b'client_version': BORG_VERSION,
+                    'client_version': BORG_VERSION,
                 }})
                 }})
             except ConnectionClosed:
             except ConnectionClosed:
                 raise ConnectionClosedWithHint('Is borg working on the server?') from None
                 raise ConnectionClosedWithHint('Is borg working on the server?') from None
             if version == RPC_PROTOCOL_VERSION:
             if version == RPC_PROTOCOL_VERSION:
                 self.dictFormat = False
                 self.dictFormat = False
-            elif isinstance(version, dict) and b'server_version' in version:
+            elif isinstance(version, dict) and 'server_version' in version:
                 self.dictFormat = True
                 self.dictFormat = True
-                self.server_version = version[b'server_version']
+                self.server_version = version['server_version']
             else:
             else:
                 raise Exception('Server insisted on using unsupported protocol version %s' % version)
                 raise Exception('Server insisted on using unsupported protocol version %s' % version)
 
 
@@ -733,9 +727,9 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
             return msgid
             return msgid
 
 
         def handle_error(unpacked):
         def handle_error(unpacked):
-            error = unpacked[b'exception_class'].decode()
-            old_server = b'exception_args' not in unpacked
-            args = unpacked.get(b'exception_args')
+            error = unpacked['exception_class']
+            old_server = 'exception_args' not in unpacked
+            args = unpacked.get('exception_args')
 
 
             if error == 'DoesNotExist':
             if error == 'DoesNotExist':
                 raise Repository.DoesNotExist(self.location.processed)
                 raise Repository.DoesNotExist(self.location.processed)
@@ -747,29 +741,29 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
                 if old_server:
                 if old_server:
                     raise IntegrityError('(not available)')
                     raise IntegrityError('(not available)')
                 else:
                 else:
-                    raise IntegrityError(args[0].decode())
+                    raise IntegrityError(args[0])
             elif error == 'AtticRepository':
             elif error == 'AtticRepository':
                 if old_server:
                 if old_server:
                     raise Repository.AtticRepository('(not available)')
                     raise Repository.AtticRepository('(not available)')
                 else:
                 else:
-                    raise Repository.AtticRepository(args[0].decode())
+                    raise Repository.AtticRepository(args[0])
             elif error == 'PathNotAllowed':
             elif error == 'PathNotAllowed':
                 if old_server:
                 if old_server:
                     raise PathNotAllowed('(unknown)')
                     raise PathNotAllowed('(unknown)')
                 else:
                 else:
-                    raise PathNotAllowed(args[0].decode())
+                    raise PathNotAllowed(args[0])
             elif error == 'ParentPathDoesNotExist':
             elif error == 'ParentPathDoesNotExist':
-                raise Repository.ParentPathDoesNotExist(args[0].decode())
+                raise Repository.ParentPathDoesNotExist(args[0])
             elif error == 'ObjectNotFound':
             elif error == 'ObjectNotFound':
                 if old_server:
                 if old_server:
                     raise Repository.ObjectNotFound('(not available)', self.location.processed)
                     raise Repository.ObjectNotFound('(not available)', self.location.processed)
                 else:
                 else:
-                    raise Repository.ObjectNotFound(args[0].decode(), self.location.processed)
+                    raise Repository.ObjectNotFound(args[0], self.location.processed)
             elif error == 'InvalidRPCMethod':
             elif error == 'InvalidRPCMethod':
                 if old_server:
                 if old_server:
                     raise InvalidRPCMethod('(not available)')
                     raise InvalidRPCMethod('(not available)')
                 else:
                 else:
-                    raise InvalidRPCMethod(args[0].decode())
+                    raise InvalidRPCMethod(args[0])
             else:
             else:
                 raise self.RPCError(unpacked)
                 raise self.RPCError(unpacked)
 
 
@@ -788,7 +782,7 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
                 try:
                 try:
                     unpacked = self.responses.pop(waiting_for[0])
                     unpacked = self.responses.pop(waiting_for[0])
                     waiting_for.pop(0)
                     waiting_for.pop(0)
-                    if b'exception_class' in unpacked:
+                    if 'exception_class' in unpacked:
                         handle_error(unpacked)
                         handle_error(unpacked)
                     else:
                     else:
                         yield unpacked[RESULT]
                         yield unpacked[RESULT]
@@ -808,7 +802,7 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
                         else:
                         else:
                             return
                             return
                     else:
                     else:
-                        if b'exception_class' in unpacked:
+                        if 'exception_class' in unpacked:
                             handle_error(unpacked)
                             handle_error(unpacked)
                         else:
                         else:
                             yield unpacked[RESULT]
                             yield unpacked[RESULT]
@@ -834,7 +828,7 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
                             _, msgid, error, res = unpacked
                             _, msgid, error, res = unpacked
                             if error:
                             if error:
                                 # ignore res, because it is only a fixed string anyway.
                                 # ignore res, because it is only a fixed string anyway.
-                                unpacked = {MSGID: msgid, b'exception_class': error}
+                                unpacked = {MSGID: msgid, 'exception_class': error}
                             else:
                             else:
                                 unpacked = {MSGID: msgid, RESULT: res}
                                 unpacked = {MSGID: msgid, RESULT: res}
                         else:
                         else:
@@ -842,7 +836,7 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
                         if msgid in self.ignore_responses:
                         if msgid in self.ignore_responses:
                             self.ignore_responses.remove(msgid)
                             self.ignore_responses.remove(msgid)
                             # async methods never return values, but may raise exceptions.
                             # async methods never return values, but may raise exceptions.
-                            if b'exception_class' in unpacked:
+                            if 'exception_class' in unpacked:
                                 self.async_responses[msgid] = unpacked
                                 self.async_responses[msgid] = unpacked
                             else:
                             else:
                                 # we currently do not have async result values except "None",
                                 # we currently do not have async result values except "None",

+ 22 - 22
src/borg/repository.py

@@ -516,16 +516,16 @@ class Repository:
                 integrity = msgpack.unpack(fd)
                 integrity = msgpack.unpack(fd)
         except FileNotFoundError:
         except FileNotFoundError:
             return
             return
-        if integrity.get(b'version') != 2:
-            logger.warning('Unknown integrity data version %r in %s', integrity.get(b'version'), integrity_file)
+        if integrity.get('version') != 2:
+            logger.warning('Unknown integrity data version %r in %s', integrity.get('version'), integrity_file)
             return
             return
-        return integrity[key].decode()
+        return integrity[key]
 
 
     def open_index(self, transaction_id, auto_recover=True):
     def open_index(self, transaction_id, auto_recover=True):
         if transaction_id is None:
         if transaction_id is None:
             return NSIndex()
             return NSIndex()
         index_path = os.path.join(self.path, 'index.%d' % transaction_id)
         index_path = os.path.join(self.path, 'index.%d' % transaction_id)
-        integrity_data = self._read_integrity(transaction_id, b'index')
+        integrity_data = self._read_integrity(transaction_id, 'index')
         try:
         try:
             with IntegrityCheckedFile(index_path, write=False, integrity_data=integrity_data) as fd:
             with IntegrityCheckedFile(index_path, write=False, integrity_data=integrity_data) as fd:
                 return NSIndex.read(fd)
                 return NSIndex.read(fd)
@@ -575,7 +575,7 @@ class Repository:
                 self.io.cleanup(transaction_id)
                 self.io.cleanup(transaction_id)
             hints_path = os.path.join(self.path, 'hints.%d' % transaction_id)
             hints_path = os.path.join(self.path, 'hints.%d' % transaction_id)
             index_path = os.path.join(self.path, 'index.%d' % transaction_id)
             index_path = os.path.join(self.path, 'index.%d' % transaction_id)
-            integrity_data = self._read_integrity(transaction_id, b'hints')
+            integrity_data = self._read_integrity(transaction_id, 'hints')
             try:
             try:
                 with IntegrityCheckedFile(hints_path, write=False, integrity_data=integrity_data) as fd:
                 with IntegrityCheckedFile(hints_path, write=False, integrity_data=integrity_data) as fd:
                     hints = msgpack.unpack(fd)
                     hints = msgpack.unpack(fd)
@@ -588,23 +588,23 @@ class Repository:
                 self.check_transaction()
                 self.check_transaction()
                 self.prepare_txn(transaction_id)
                 self.prepare_txn(transaction_id)
                 return
                 return
-            if hints[b'version'] == 1:
+            if hints['version'] == 1:
                 logger.debug('Upgrading from v1 hints.%d', transaction_id)
                 logger.debug('Upgrading from v1 hints.%d', transaction_id)
-                self.segments = hints[b'segments']
+                self.segments = hints['segments']
                 self.compact = FreeSpace()
                 self.compact = FreeSpace()
                 self.storage_quota_use = 0
                 self.storage_quota_use = 0
                 self.shadow_index = {}
                 self.shadow_index = {}
-                for segment in sorted(hints[b'compact']):
+                for segment in sorted(hints['compact']):
                     logger.debug('Rebuilding sparse info for segment %d', segment)
                     logger.debug('Rebuilding sparse info for segment %d', segment)
                     self._rebuild_sparse(segment)
                     self._rebuild_sparse(segment)
                 logger.debug('Upgrade to v2 hints complete')
                 logger.debug('Upgrade to v2 hints complete')
-            elif hints[b'version'] != 2:
-                raise ValueError('Unknown hints file version: %d' % hints[b'version'])
+            elif hints['version'] != 2:
+                raise ValueError('Unknown hints file version: %d' % hints['version'])
             else:
             else:
-                self.segments = hints[b'segments']
-                self.compact = FreeSpace(hints[b'compact'])
-                self.storage_quota_use = hints.get(b'storage_quota_use', 0)
-                self.shadow_index = hints.get(b'shadow_index', {})
+                self.segments = hints['segments']
+                self.compact = FreeSpace(hints['compact'])
+                self.storage_quota_use = hints.get('storage_quota_use', 0)
+                self.shadow_index = hints.get('shadow_index', {})
             self.log_storage_quota()
             self.log_storage_quota()
             # Drop uncommitted segments in the shadow index
             # Drop uncommitted segments in the shadow index
             for key, shadowed_segments in self.shadow_index.items():
             for key, shadowed_segments in self.shadow_index.items():
@@ -621,16 +621,16 @@ class Repository:
             os.rename(file + '.tmp', file)
             os.rename(file + '.tmp', file)
 
 
         hints = {
         hints = {
-            b'version': 2,
-            b'segments': self.segments,
-            b'compact': self.compact,
-            b'storage_quota_use': self.storage_quota_use,
-            b'shadow_index': self.shadow_index,
+            'version': 2,
+            'segments': self.segments,
+            'compact': self.compact,
+            'storage_quota_use': self.storage_quota_use,
+            'shadow_index': self.shadow_index,
         }
         }
         integrity = {
         integrity = {
             # Integrity version started at 2, the current hints version.
             # Integrity version started at 2, the current hints version.
             # Thus, integrity version == hints version, for now.
             # Thus, integrity version == hints version, for now.
-            b'version': 2,
+            'version': 2,
         }
         }
         transaction_id = self.io.get_segments_transaction_id()
         transaction_id = self.io.get_segments_transaction_id()
         assert transaction_id is not None
         assert transaction_id is not None
@@ -647,7 +647,7 @@ class Repository:
         with IntegrityCheckedFile(hints_file + '.tmp', filename=hints_name, write=True) as fd:
         with IntegrityCheckedFile(hints_file + '.tmp', filename=hints_name, write=True) as fd:
             msgpack.pack(hints, fd)
             msgpack.pack(hints, fd)
             flush_and_sync(fd)
             flush_and_sync(fd)
-        integrity[b'hints'] = fd.integrity_data
+        integrity['hints'] = fd.integrity_data
 
 
         # Write repository index
         # Write repository index
         index_name = 'index.%d' % transaction_id
         index_name = 'index.%d' % transaction_id
@@ -656,7 +656,7 @@ class Repository:
             # XXX: Consider using SyncFile for index write-outs.
             # XXX: Consider using SyncFile for index write-outs.
             self.index.write(fd)
             self.index.write(fd)
             flush_and_sync(fd)
             flush_and_sync(fd)
-        integrity[b'index'] = fd.integrity_data
+        integrity['index'] = fd.integrity_data
 
 
         # Write integrity file, containing checksums of the hints and index files
         # Write integrity file, containing checksums of the hints and index files
         integrity_name = 'integrity.%d' % transaction_id
         integrity_name = 'integrity.%d' % transaction_id

+ 14 - 14
src/borg/testsuite/archive.py

@@ -171,7 +171,7 @@ class RobustUnpackerTestCase(BaseTestCase):
         return b''.join(msgpack.packb({'path': item}) for item in items)
         return b''.join(msgpack.packb({'path': item}) for item in items)
 
 
     def _validator(self, value):
     def _validator(self, value):
-        return isinstance(value, dict) and value.get(b'path') in (b'foo', b'bar', b'boo', b'baz')
+        return isinstance(value, dict) and value.get('path') in ('foo', 'bar', 'boo', 'baz')
 
 
     def process(self, input):
     def process(self, input):
         unpacker = RobustUnpacker(validator=self._validator, item_keys=ITEM_KEYS)
         unpacker = RobustUnpacker(validator=self._validator, item_keys=ITEM_KEYS)
@@ -186,14 +186,14 @@ class RobustUnpackerTestCase(BaseTestCase):
         return result
         return result
 
 
     def test_extra_garbage_no_sync(self):
     def test_extra_garbage_no_sync(self):
-        chunks = [(False, [self.make_chunks([b'foo', b'bar'])]),
-                  (False, [b'garbage'] + [self.make_chunks([b'boo', b'baz'])])]
+        chunks = [(False, [self.make_chunks(['foo', 'bar'])]),
+                  (False, [b'garbage'] + [self.make_chunks(['boo', 'baz'])])]
         result = self.process(chunks)
         result = self.process(chunks)
         self.assert_equal(result, [
         self.assert_equal(result, [
-            {b'path': b'foo'}, {b'path': b'bar'},
+            {'path': 'foo'}, {'path': 'bar'},
             103, 97, 114, 98, 97, 103, 101,
             103, 97, 114, 98, 97, 103, 101,
-            {b'path': b'boo'},
-            {b'path': b'baz'}])
+            {'path': 'boo'},
+            {'path': 'baz'}])
 
 
     def split(self, left, length):
     def split(self, left, length):
         parts = []
         parts = []
@@ -203,22 +203,22 @@ class RobustUnpackerTestCase(BaseTestCase):
         return parts
         return parts
 
 
     def test_correct_stream(self):
     def test_correct_stream(self):
-        chunks = self.split(self.make_chunks([b'foo', b'bar', b'boo', b'baz']), 2)
+        chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 2)
         input = [(False, chunks)]
         input = [(False, chunks)]
         result = self.process(input)
         result = self.process(input)
-        self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'bar'}, {b'path': b'boo'}, {b'path': b'baz'}])
+        self.assert_equal(result, [{'path': 'foo'}, {'path': 'bar'}, {'path': 'boo'}, {'path': 'baz'}])
 
 
     def test_missing_chunk(self):
     def test_missing_chunk(self):
-        chunks = self.split(self.make_chunks([b'foo', b'bar', b'boo', b'baz']), 4)
+        chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 4)
         input = [(False, chunks[:3]), (True, chunks[4:])]
         input = [(False, chunks[:3]), (True, chunks[4:])]
         result = self.process(input)
         result = self.process(input)
-        self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}])
+        self.assert_equal(result, [{'path': 'foo'}, {'path': 'boo'}, {'path': 'baz'}])
 
 
     def test_corrupt_chunk(self):
     def test_corrupt_chunk(self):
-        chunks = self.split(self.make_chunks([b'foo', b'bar', b'boo', b'baz']), 4)
+        chunks = self.split(self.make_chunks(['foo', 'bar', 'boo', 'baz']), 4)
         input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])]
         input = [(False, chunks[:3]), (True, [b'gar', b'bage'] + chunks[3:])]
         result = self.process(input)
         result = self.process(input)
-        self.assert_equal(result, [{b'path': b'foo'}, {b'path': b'boo'}, {b'path': b'baz'}])
+        self.assert_equal(result, [{'path': 'foo'}, {'path': 'boo'}, {'path': 'baz'}])
 
 
 
 
 @pytest.fixture
 @pytest.fixture
@@ -242,7 +242,7 @@ IK = sorted(list(ITEM_KEYS))
 
 
 @pytest.mark.parametrize('packed',
 @pytest.mark.parametrize('packed',
     [msgpack.packb(o) for o in [
     [msgpack.packb(o) for o in [
-        {b'path': b'/a/b/c'},  # small (different msgpack mapping type!)
+        {'path': b'/a/b/c'},  # small (different msgpack mapping type!)
         OrderedDict((k, b'') for k in IK),  # as big (key count) as it gets
         OrderedDict((k, b'') for k in IK),  # as big (key count) as it gets
         OrderedDict((k, b'x' * 1000) for k in IK),  # as big (key count and volume) as it gets
         OrderedDict((k, b'x' * 1000) for k in IK),  # as big (key count and volume) as it gets
     ]])
     ]])
@@ -251,7 +251,7 @@ def test_valid_msgpacked_items(packed, item_keys_serialized):
 
 
 
 
 def test_key_length_msgpacked_items():
 def test_key_length_msgpacked_items():
-    key = b'x' * 32  # 31 bytes is the limit for fixstr msgpack type
+    key = 'x' * 32  # 31 bytes is the limit for fixstr msgpack type
     data = {key: b''}
     data = {key: b''}
     item_keys_serialized = [msgpack.packb(key), ]
     item_keys_serialized = [msgpack.packb(key), ]
     assert valid_msgpacked_dict(msgpack.packb(data), item_keys_serialized)
     assert valid_msgpacked_dict(msgpack.packb(data), item_keys_serialized)

+ 10 - 10
src/borg/testsuite/archiver.py

@@ -1810,7 +1810,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
     def add_unknown_feature(self, operation):
     def add_unknown_feature(self, operation):
         with Repository(self.repository_path, exclusive=True) as repository:
         with Repository(self.repository_path, exclusive=True) as repository:
             manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
             manifest, key = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
-            manifest.config[b'feature_flags'] = {operation.value.encode(): {b'mandatory': [b'unknown-feature']}}
+            manifest.config['feature_flags'] = {operation.value: {'mandatory': ['unknown-feature']}}
             manifest.write()
             manifest.write()
             repository.commit(compact=False)
             repository.commit(compact=False)
 
 
@@ -3623,14 +3623,14 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('init', '--encryption=repokey', self.repository_location)
         with Repository(self.repository_path) as repository:
         with Repository(self.repository_path) as repository:
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
-        assert key[b'algorithm'] == b'argon2 chacha20-poly1305'
+        assert key['algorithm'] == 'argon2 chacha20-poly1305'
 
 
     def test_init_with_explicit_key_algorithm(self):
     def test_init_with_explicit_key_algorithm(self):
         """https://github.com/borgbackup/borg/issues/747#issuecomment-1076160401"""
         """https://github.com/borgbackup/borg/issues/747#issuecomment-1076160401"""
         self.cmd('init', '--encryption=repokey', '--key-algorithm=pbkdf2', self.repository_location)
         self.cmd('init', '--encryption=repokey', '--key-algorithm=pbkdf2', self.repository_location)
         with Repository(self.repository_path) as repository:
         with Repository(self.repository_path) as repository:
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
-        assert key[b'algorithm'] == b'sha256'
+        assert key['algorithm'] == 'sha256'
 
 
     def verify_change_passphrase_does_not_change_algorithm(self, given_algorithm, expected_algorithm):
     def verify_change_passphrase_does_not_change_algorithm(self, given_algorithm, expected_algorithm):
         self.cmd('init', '--encryption=repokey', '--key-algorithm', given_algorithm, self.repository_location)
         self.cmd('init', '--encryption=repokey', '--key-algorithm', given_algorithm, self.repository_location)
@@ -3640,13 +3640,13 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
 
 
         with Repository(self.repository_path) as repository:
         with Repository(self.repository_path) as repository:
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
-            assert key[b'algorithm'] == expected_algorithm
+            assert key['algorithm'] == expected_algorithm
 
 
     def test_change_passphrase_does_not_change_algorithm_argon2(self):
     def test_change_passphrase_does_not_change_algorithm_argon2(self):
-        self.verify_change_passphrase_does_not_change_algorithm('argon2', b'argon2 chacha20-poly1305')
+        self.verify_change_passphrase_does_not_change_algorithm('argon2', 'argon2 chacha20-poly1305')
 
 
     def test_change_passphrase_does_not_change_algorithm_pbkdf2(self):
     def test_change_passphrase_does_not_change_algorithm_pbkdf2(self):
-        self.verify_change_passphrase_does_not_change_algorithm('pbkdf2', b'sha256')
+        self.verify_change_passphrase_does_not_change_algorithm('pbkdf2', 'sha256')
 
 
     def verify_change_location_does_not_change_algorithm(self, given_algorithm, expected_algorithm):
     def verify_change_location_does_not_change_algorithm(self, given_algorithm, expected_algorithm):
         self.cmd('init', '--encryption=keyfile', '--key-algorithm', given_algorithm, self.repository_location)
         self.cmd('init', '--encryption=keyfile', '--key-algorithm', given_algorithm, self.repository_location)
@@ -3655,13 +3655,13 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
 
 
         with Repository(self.repository_path) as repository:
         with Repository(self.repository_path) as repository:
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
             key = msgpack.unpackb(a2b_base64(repository.load_key()))
-            assert key[b'algorithm'] == expected_algorithm
+            assert key['algorithm'] == expected_algorithm
 
 
     def test_change_location_does_not_change_algorithm_argon2(self):
     def test_change_location_does_not_change_algorithm_argon2(self):
-        self.verify_change_location_does_not_change_algorithm('argon2', b'argon2 chacha20-poly1305')
+        self.verify_change_location_does_not_change_algorithm('argon2', 'argon2 chacha20-poly1305')
 
 
     def test_change_location_does_not_change_algorithm_pbkdf2(self):
     def test_change_location_does_not_change_algorithm_pbkdf2(self):
-        self.verify_change_location_does_not_change_algorithm('pbkdf2', b'sha256')
+        self.verify_change_location_does_not_change_algorithm('pbkdf2', 'sha256')
 
 
     def test_key_change_algorithm(self):
     def test_key_change_algorithm(self):
         self.cmd('init', '--encryption=repokey', '--key-algorithm=pbkdf2', self.repository_location)
         self.cmd('init', '--encryption=repokey', '--key-algorithm=pbkdf2', self.repository_location)
@@ -3969,7 +3969,7 @@ class ManifestAuthenticationTest(ArchiverTestCaseBase):
             key.change_passphrase(key._passphrase)
             key.change_passphrase(key._passphrase)
 
 
             manifest = msgpack.unpackb(key.decrypt(Manifest.MANIFEST_ID, repository.get(Manifest.MANIFEST_ID)))
             manifest = msgpack.unpackb(key.decrypt(Manifest.MANIFEST_ID, repository.get(Manifest.MANIFEST_ID)))
-            del manifest[b'tam']
+            del manifest['tam']
             repository.put(Manifest.MANIFEST_ID, key.encrypt(Manifest.MANIFEST_ID, msgpack.packb(manifest)))
             repository.put(Manifest.MANIFEST_ID, key.encrypt(Manifest.MANIFEST_ID, msgpack.packb(manifest)))
             repository.commit(compact=False)
             repository.commit(compact=False)
         output = self.cmd('list', '--debug', self.repository_location)
         output = self.cmd('list', '--debug', self.repository_location)

+ 1 - 13
src/borg/testsuite/helpers.py

@@ -22,7 +22,7 @@ from ..helpers import get_base_dir, get_cache_dir, get_keys_dir, get_security_di
 from ..helpers import is_slow_msgpack
 from ..helpers import is_slow_msgpack
 from ..helpers import msgpack
 from ..helpers import msgpack
 from ..helpers import yes, TRUISH, FALSISH, DEFAULTISH
 from ..helpers import yes, TRUISH, FALSISH, DEFAULTISH
-from ..helpers import StableDict, int_to_bigint, bigint_to_int, bin_to_hex
+from ..helpers import StableDict, bin_to_hex
 from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams
 from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams
 from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
 from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
 from ..helpers import swidth_slice
 from ..helpers import swidth_slice
@@ -38,18 +38,6 @@ from ..helpers.passphrase import Passphrase, PasswordRetriesExceeded
 from . import BaseTestCase, FakeInputs
 from . import BaseTestCase, FakeInputs
 
 
 
 
-class BigIntTestCase(BaseTestCase):
-
-    def test_bigint(self):
-        self.assert_equal(int_to_bigint(0), 0)
-        self.assert_equal(int_to_bigint(2**63-1), 2**63-1)
-        self.assert_equal(int_to_bigint(-2**63+1), -2**63+1)
-        self.assert_equal(int_to_bigint(2**63), b'\x00\x00\x00\x00\x00\x00\x00\x80\x00')
-        self.assert_equal(int_to_bigint(-2**63), b'\x00\x00\x00\x00\x00\x00\x00\x80\xff')
-        self.assert_equal(bigint_to_int(int_to_bigint(-2**70)), -2**70)
-        self.assert_equal(bigint_to_int(int_to_bigint(2**70)), 2**70)
-
-
 def test_bin_to_hex():
 def test_bin_to_hex():
     assert bin_to_hex(b'') == ''
     assert bin_to_hex(b'') == ''
     assert bin_to_hex(b'\x00\x01\xff') == '0001ff'
     assert bin_to_hex(b'\x00\x01\xff') == '0001ff'

+ 3 - 11
src/borg/testsuite/item.py

@@ -89,20 +89,12 @@ def test_item_mptimestamp_property():
     assert item.as_dict() == {'atime': Timestamp.from_unix_nano(big)}
     assert item.as_dict() == {'atime': Timestamp.from_unix_nano(big)}
 
 
 
 
-def test_item_user_group_none():
-    item = Item()
-    item.user = None
-    assert item.user is None
-    item.group = None
-    assert item.group is None
-
-
 def test_item_se_str_property():
 def test_item_se_str_property():
     # start simple
     # start simple
     item = Item()
     item = Item()
     item.path = '/a/b/c'
     item.path = '/a/b/c'
     assert item.path == '/a/b/c'
     assert item.path == '/a/b/c'
-    assert item.as_dict() == {'path': b'/a/b/c'}
+    assert item.as_dict() == {'path': '/a/b/c'}
     del item.path
     del item.path
     assert item.as_dict() == {}
     assert item.as_dict() == {}
     with pytest.raises(TypeError):
     with pytest.raises(TypeError):
@@ -111,11 +103,11 @@ def test_item_se_str_property():
     # non-utf-8 path, needing surrogate-escaping for latin-1 u-umlaut
     # non-utf-8 path, needing surrogate-escaping for latin-1 u-umlaut
     item = Item(internal_dict={'path': b'/a/\xfc/c'})
     item = Item(internal_dict={'path': b'/a/\xfc/c'})
     assert item.path == '/a/\udcfc/c'  # getting a surrogate-escaped representation
     assert item.path == '/a/\udcfc/c'  # getting a surrogate-escaped representation
-    assert item.as_dict() == {'path': b'/a/\xfc/c'}
+    assert item.as_dict() == {'path': '/a/\udcfc/c'}
     del item.path
     del item.path
     assert 'path' not in item
     assert 'path' not in item
     item.path = '/a/\udcfc/c'  # setting using a surrogate-escaped representation
     item.path = '/a/\udcfc/c'  # setting using a surrogate-escaped representation
-    assert item.as_dict() == {'path': b'/a/\xfc/c'}
+    assert item.as_dict() == {'path': '/a/\udcfc/c'}
 
 
 
 
 def test_item_list_property():
 def test_item_list_property():

+ 8 - 8
src/borg/testsuite/key.py

@@ -360,23 +360,23 @@ class TestTAM:
         assert blob.startswith(b'\x82')
         assert blob.startswith(b'\x82')
 
 
         unpacked = msgpack.unpackb(blob)
         unpacked = msgpack.unpackb(blob)
-        assert unpacked[b'tam'][b'type'] == b'HKDF_HMAC_SHA512'
+        assert unpacked['tam']['type'] == 'HKDF_HMAC_SHA512'
 
 
         unpacked, verified = key.unpack_and_verify_manifest(blob)
         unpacked, verified = key.unpack_and_verify_manifest(blob)
         assert verified
         assert verified
-        assert unpacked[b'foo'] == b'bar'
-        assert b'tam' not in unpacked
+        assert unpacked['foo'] == 'bar'
+        assert 'tam' not in unpacked
 
 
-    @pytest.mark.parametrize('which', (b'hmac', b'salt'))
+    @pytest.mark.parametrize('which', ('hmac', 'salt'))
     def test_tampered(self, key, which):
     def test_tampered(self, key, which):
         data = {'foo': 'bar'}
         data = {'foo': 'bar'}
         blob = key.pack_and_authenticate_metadata(data)
         blob = key.pack_and_authenticate_metadata(data)
         assert blob.startswith(b'\x82')
         assert blob.startswith(b'\x82')
 
 
         unpacked = msgpack.unpackb(blob, object_hook=StableDict)
         unpacked = msgpack.unpackb(blob, object_hook=StableDict)
-        assert len(unpacked[b'tam'][which]) == 64
-        unpacked[b'tam'][which] = unpacked[b'tam'][which][0:32] + bytes(32)
-        assert len(unpacked[b'tam'][which]) == 64
+        assert len(unpacked['tam'][which]) == 64
+        unpacked['tam'][which] = unpacked['tam'][which][0:32] + bytes(32)
+        assert len(unpacked['tam'][which]) == 64
         blob = msgpack.packb(unpacked)
         blob = msgpack.packb(unpacked)
 
 
         with pytest.raises(TAMInvalid):
         with pytest.raises(TAMInvalid):
@@ -421,4 +421,4 @@ def test_key_file_roundtrip(monkeypatch, cli_argument, expected_algorithm):
     load_me = RepoKey.detect(repository, manifest_data=None)
     load_me = RepoKey.detect(repository, manifest_data=None)
 
 
     assert to_dict(load_me) == to_dict(save_me)
     assert to_dict(load_me) == to_dict(save_me)
-    assert msgpack.unpackb(a2b_base64(saved))[b'algorithm'] == expected_algorithm.encode()
+    assert msgpack.unpackb(a2b_base64(saved))['algorithm'] == expected_algorithm

+ 2 - 2
src/borg/testsuite/repository.py

@@ -655,8 +655,8 @@ class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase):
             hints = msgpack.unpack(fd)
             hints = msgpack.unpack(fd)
             fd.seek(0)
             fd.seek(0)
             # Corrupt segment refcount
             # Corrupt segment refcount
-            assert hints[b'segments'][2] == 1
-            hints[b'segments'][2] = 0
+            assert hints['segments'][2] == 1
+            hints['segments'][2] = 0
             msgpack.pack(hints, fd)
             msgpack.pack(hints, fd)
             fd.truncate()
             fd.truncate()
 
 

+ 2 - 6
src/borg/xattr.py

@@ -76,9 +76,7 @@ def get_all(path, follow_symlinks=False):
         for name in names:
         for name in names:
             try:
             try:
                 # xattr name is a bytes object, we directly use it.
                 # xattr name is a bytes object, we directly use it.
-                # if we get an empty xattr value (b''), we store None into the result dict -
-                # borg always did it like that...
-                result[name] = getxattr(path, name, follow_symlinks=follow_symlinks) or None
+                result[name] = getxattr(path, name, follow_symlinks=follow_symlinks)
             except OSError as e:
             except OSError as e:
                 name_str = name.decode()
                 name_str = name.decode()
                 if isinstance(path, int):
                 if isinstance(path, int):
@@ -122,9 +120,7 @@ def set_all(path, xattrs, follow_symlinks=False):
     warning = False
     warning = False
     for k, v in xattrs.items():
     for k, v in xattrs.items():
         try:
         try:
-            # the key k is a bytes object due to msgpack unpacking it as such.
-            # if we have a None value, it means "empty", so give b'' to setxattr in that case:
-            setxattr(path, k, v or b'', follow_symlinks=follow_symlinks)
+            setxattr(path, k, v, follow_symlinks=follow_symlinks)
         except OSError as e:
         except OSError as e:
             warning = True
             warning = True
             k_str = k.decode()
             k_str = k.decode()