Просмотр исходного кода

Merge pull request #1117 from ThomasWaldmann/items-refactor

refactor to use Item class
TW 9 лет назад
Родитель
Сommit
504af0206d

+ 115 - 118
src/borg/archive.py

@@ -33,6 +33,7 @@ from .helpers import ProgressIndicatorPercent, log_multi
 from .helpers import PathPrefixPattern, FnmatchPattern
 from .helpers import PathPrefixPattern, FnmatchPattern
 from .helpers import consume
 from .helpers import consume
 from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec
 from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec
+from .item import Item
 from .key import key_factory
 from .key import key_factory
 from .platform import acl_get, acl_set, set_flags, get_flags, swidth
 from .platform import acl_get, acl_set, set_flags, get_flags, swidth
 from .remote import cache_if_remote
 from .remote import cache_if_remote
@@ -86,7 +87,7 @@ class Statistics:
             columns, lines = get_terminal_size()
             columns, lines = get_terminal_size()
             if not final:
             if not final:
                 msg = '{0.osize_fmt} O {0.csize_fmt} C {0.usize_fmt} D {0.nfiles} N '.format(self)
                 msg = '{0.osize_fmt} O {0.csize_fmt} C {0.usize_fmt} D {0.nfiles} N '.format(self)
-                path = remove_surrogates(item[b'path']) if item else ''
+                path = remove_surrogates(item.path) if item else ''
                 space = columns - swidth(msg)
                 space = columns - swidth(msg)
                 if space < swidth('...') + swidth(path):
                 if space < swidth('...') + swidth(path):
                     path = '%s...%s' % (path[:(space // 2) - swidth('...')], path[-space // 2:])
                     path = '%s...%s' % (path[:(space // 2) - swidth('...')], path[-space // 2:])
@@ -106,16 +107,16 @@ class DownloadPipeline:
         unpacker = msgpack.Unpacker(use_list=False)
         unpacker = msgpack.Unpacker(use_list=False)
         for _, data in self.fetch_many(ids):
         for _, data in self.fetch_many(ids):
             unpacker.feed(data)
             unpacker.feed(data)
-            items = [decode_dict(item, ITEM_TEXT_KEYS) for item in unpacker]
+            items = [Item(internal_dict=item) for item in unpacker]
             if filter:
             if filter:
                 items = [item for item in items if filter(item)]
                 items = [item for item in items if filter(item)]
             for item in items:
             for item in items:
-                if b'chunks' in item:
-                    item[b'chunks'] = [ChunkListEntry(*e) for e in item[b'chunks']]
+                if 'chunks' in item:
+                    item.chunks = [ChunkListEntry(*e) for e in item.chunks]
             if preload:
             if preload:
                 for item in items:
                 for item in items:
-                    if b'chunks' in item:
-                        self.repository.preload([c.id for c in item[b'chunks']])
+                    if 'chunks' in item:
+                        self.repository.preload([c.id for c in item.chunks])
             for item in items:
             for item in items:
                 yield item
                 yield item
 
 
@@ -135,7 +136,7 @@ class ChunkBuffer:
         self.chunker = Chunker(self.key.chunk_seed, *chunker_params)
         self.chunker = Chunker(self.key.chunk_seed, *chunker_params)
 
 
     def add(self, item):
     def add(self, item):
-        self.buffer.write(self.packer.pack(StableDict(item)))
+        self.buffer.write(self.packer.pack(item.as_dict()))
         if self.is_full():
         if self.is_full():
             self.flush()
             self.flush()
 
 
@@ -286,9 +287,6 @@ Number of files: {0.stats.nfiles}'''.format(
             yield item
             yield item
 
 
     def add_item(self, item):
     def add_item(self, item):
-        unknown_keys = set(item) - ITEM_KEYS
-        assert not unknown_keys, ('unknown item metadata keys detected, please update constants.ITEM_KEYS: %s',
-                                  ','.join(k.decode('ascii') for k in unknown_keys))
         if self.show_progress:
         if self.show_progress:
             self.stats.show_progress(item=item, dt=0.2)
             self.stats.show_progress(item=item, dt=0.2)
         self.items_buffer.add(item)
         self.items_buffer.add(item)
@@ -356,9 +354,10 @@ Number of files: {0.stats.nfiles}'''.format(
             _, data = self.key.decrypt(id, chunk)
             _, data = self.key.decrypt(id, chunk)
             unpacker.feed(data)
             unpacker.feed(data)
             for item in unpacker:
             for item in unpacker:
-                if b'chunks' in item:
+                item = Item(internal_dict=item)
+                if 'chunks' in item:
                     stats.nfiles += 1
                     stats.nfiles += 1
-                    add_file_chunks(item[b'chunks'])
+                    add_file_chunks(item.chunks)
         cache.rollback()
         cache.rollback()
         return stats
         return stats
 
 
@@ -373,22 +372,22 @@ Number of files: {0.stats.nfiles}'''.format(
         :param stdout: write extracted data to stdout
         :param stdout: write extracted data to stdout
         :param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
         :param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
         :param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
         :param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
-        :param original_path: b'path' key as stored in archive
+        :param original_path: 'path' key as stored in archive
         """
         """
         if dry_run or stdout:
         if dry_run or stdout:
-            if b'chunks' in item:
-                for _, data in self.pipeline.fetch_many([c.id for c in item[b'chunks']], is_preloaded=True):
+            if 'chunks' in item:
+                for _, data in self.pipeline.fetch_many([c.id for c in item.chunks], is_preloaded=True):
                     if stdout:
                     if stdout:
                         sys.stdout.buffer.write(data)
                         sys.stdout.buffer.write(data)
                 if stdout:
                 if stdout:
                     sys.stdout.buffer.flush()
                     sys.stdout.buffer.flush()
             return
             return
 
 
-        original_path = original_path or item[b'path']
+        original_path = original_path or item.path
         dest = self.cwd
         dest = self.cwd
-        if item[b'path'].startswith('/') or item[b'path'].startswith('..'):
+        if item.path.startswith(('/', '..')):
             raise Exception('Path should be relative and local')
             raise Exception('Path should be relative and local')
-        path = os.path.join(dest, item[b'path'])
+        path = os.path.join(dest, item.path)
         # Attempt to remove existing files, ignore errors on failure
         # Attempt to remove existing files, ignore errors on failure
         try:
         try:
             st = os.lstat(path)
             st = os.lstat(path)
@@ -400,27 +399,27 @@ Number of files: {0.stats.nfiles}'''.format(
             raise self.IncompatibleFilesystemEncodingError(path, sys.getfilesystemencoding()) from None
             raise self.IncompatibleFilesystemEncodingError(path, sys.getfilesystemencoding()) from None
         except OSError:
         except OSError:
             pass
             pass
-        mode = item[b'mode']
+        mode = item.mode
         if stat.S_ISREG(mode):
         if stat.S_ISREG(mode):
             if not os.path.exists(os.path.dirname(path)):
             if not os.path.exists(os.path.dirname(path)):
                 os.makedirs(os.path.dirname(path))
                 os.makedirs(os.path.dirname(path))
 
 
             # Hard link?
             # Hard link?
-            if b'source' in item:
-                source = os.path.join(dest, item[b'source'])
+            if 'source' in item:
+                source = os.path.join(dest, item.source)
                 if os.path.exists(path):
                 if os.path.exists(path):
                     os.unlink(path)
                     os.unlink(path)
                 if not hardlink_masters:
                 if not hardlink_masters:
                     os.link(source, path)
                     os.link(source, path)
                     return
                     return
-                item[b'chunks'], link_target = hardlink_masters[item[b'source']]
+                item.chunks, link_target = hardlink_masters[item.source]
                 if link_target:
                 if link_target:
                     # Hard link was extracted previously, just link
                     # Hard link was extracted previously, just link
                     os.link(link_target, path)
                     os.link(link_target, path)
                     return
                     return
                 # Extract chunks, since the item which had the chunks was not extracted
                 # Extract chunks, since the item which had the chunks was not extracted
             with open(path, 'wb') as fd:
             with open(path, 'wb') as fd:
-                ids = [c.id for c in item[b'chunks']]
+                ids = [c.id for c in item.chunks]
                 for _, data in self.pipeline.fetch_many(ids, is_preloaded=True):
                 for _, data in self.pipeline.fetch_many(ids, is_preloaded=True):
                     if sparse and self.zeros.startswith(data):
                     if sparse and self.zeros.startswith(data):
                         # all-zero chunk: create a hole in a sparse file
                         # all-zero chunk: create a hole in a sparse file
@@ -433,7 +432,7 @@ Number of files: {0.stats.nfiles}'''.format(
                 self.restore_attrs(path, item, fd=fd.fileno())
                 self.restore_attrs(path, item, fd=fd.fileno())
             if hardlink_masters:
             if hardlink_masters:
                 # Update master entry with extracted file path, so that following hardlinks don't extract twice.
                 # Update master entry with extracted file path, so that following hardlinks don't extract twice.
-                hardlink_masters[item.get(b'source') or original_path] = (None, path)
+                hardlink_masters[item.get('source') or original_path] = (None, path)
         elif stat.S_ISDIR(mode):
         elif stat.S_ISDIR(mode):
             if not os.path.exists(path):
             if not os.path.exists(path):
                 os.makedirs(path)
                 os.makedirs(path)
@@ -442,7 +441,7 @@ Number of files: {0.stats.nfiles}'''.format(
         elif stat.S_ISLNK(mode):
         elif stat.S_ISLNK(mode):
             if not os.path.exists(os.path.dirname(path)):
             if not os.path.exists(os.path.dirname(path)):
                 os.makedirs(os.path.dirname(path))
                 os.makedirs(os.path.dirname(path))
-            source = item[b'source']
+            source = item.source
             if os.path.exists(path):
             if os.path.exists(path):
                 os.unlink(path)
                 os.unlink(path)
             try:
             try:
@@ -456,18 +455,18 @@ Number of files: {0.stats.nfiles}'''.format(
             os.mkfifo(path)
             os.mkfifo(path)
             self.restore_attrs(path, item)
             self.restore_attrs(path, item)
         elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode):
         elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode):
-            os.mknod(path, item[b'mode'], item[b'rdev'])
+            os.mknod(path, item.mode, item.rdev)
             self.restore_attrs(path, item)
             self.restore_attrs(path, item)
         else:
         else:
-            raise Exception('Unknown archive item type %r' % item[b'mode'])
+            raise Exception('Unknown archive item type %r' % item.mode)
 
 
     def restore_attrs(self, path, item, symlink=False, fd=None):
     def restore_attrs(self, path, item, symlink=False, fd=None):
         uid = gid = None
         uid = gid = None
         if not self.numeric_owner:
         if not self.numeric_owner:
-            uid = user2uid(item[b'user'])
-            gid = group2gid(item[b'group'])
-        uid = item[b'uid'] if uid is None else uid
-        gid = item[b'gid'] if gid is None else gid
+            uid = user2uid(item.user)
+            gid = group2gid(item.group)
+        uid = item.uid if uid is None else uid
+        gid = item.gid if gid is None else gid
         # This code is a bit of a mess due to os specific differences
         # This code is a bit of a mess due to os specific differences
         try:
         try:
             if fd:
             if fd:
@@ -477,14 +476,14 @@ Number of files: {0.stats.nfiles}'''.format(
         except OSError:
         except OSError:
             pass
             pass
         if fd:
         if fd:
-            os.fchmod(fd, item[b'mode'])
+            os.fchmod(fd, item.mode)
         elif not symlink:
         elif not symlink:
-            os.chmod(path, item[b'mode'])
+            os.chmod(path, item.mode)
         elif has_lchmod:  # Not available on Linux
         elif has_lchmod:  # Not available on Linux
-            os.lchmod(path, item[b'mode'])
-        mtime = bigint_to_int(item[b'mtime'])
-        if b'atime' in item:
-            atime = bigint_to_int(item[b'atime'])
+            os.lchmod(path, item.mode)
+        mtime = item.mtime
+        if 'atime' in item:
+            atime = item.atime
         else:
         else:
             # old archives only had mtime in item metadata
             # old archives only had mtime in item metadata
             atime = mtime
             atime = mtime
@@ -493,14 +492,14 @@ Number of files: {0.stats.nfiles}'''.format(
         else:
         else:
             os.utime(path, None, ns=(atime, mtime), follow_symlinks=False)
             os.utime(path, None, ns=(atime, mtime), follow_symlinks=False)
         acl_set(path, item, self.numeric_owner)
         acl_set(path, item, self.numeric_owner)
-        if b'bsdflags' in item:
+        if 'bsdflags' in item:
             try:
             try:
-                set_flags(path, item[b'bsdflags'], fd=fd)
+                set_flags(path, item.bsdflags, fd=fd)
             except OSError:
             except OSError:
                 pass
                 pass
         # chown removes Linux capabilities, so set the extended attributes at the end, after chown, since they include
         # chown removes Linux capabilities, so set the extended attributes at the end, after chown, since they include
         # the Linux capabilities in the "security.capability" attribute.
         # the Linux capabilities in the "security.capability" attribute.
-        xattrs = item.get(b'xattrs', {})
+        xattrs = item.get('xattrs', {})
         for k, v in xattrs.items():
         for k, v in xattrs.items():
             try:
             try:
                 xattr.setxattr(fd or path, k, v, follow_symlinks=False)
                 xattr.setxattr(fd or path, k, v, follow_symlinks=False)
@@ -541,8 +540,9 @@ Number of files: {0.stats.nfiles}'''.format(
             unpacker.feed(data)
             unpacker.feed(data)
             self.cache.chunk_decref(items_id, stats)
             self.cache.chunk_decref(items_id, stats)
             for item in unpacker:
             for item in unpacker:
-                if b'chunks' in item:
-                    for chunk_id, size, csize in item[b'chunks']:
+                item = Item(internal_dict=item)
+                if 'chunks' in item:
+                    for chunk_id, size, csize in item.chunks:
                         self.cache.chunk_decref(chunk_id, stats)
                         self.cache.chunk_decref(chunk_id, stats)
         if progress:
         if progress:
             pi.finish()
             pi.finish()
@@ -550,39 +550,39 @@ Number of files: {0.stats.nfiles}'''.format(
         del self.manifest.archives[self.name]
         del self.manifest.archives[self.name]
 
 
     def stat_attrs(self, st, path):
     def stat_attrs(self, st, path):
-        item = {
-            b'mode': st.st_mode,
-            b'uid': st.st_uid, b'user': uid2user(st.st_uid),
-            b'gid': st.st_gid, b'group': gid2group(st.st_gid),
-            b'atime': int_to_bigint(st.st_atime_ns),
-            b'ctime': int_to_bigint(st.st_ctime_ns),
-            b'mtime': int_to_bigint(st.st_mtime_ns),
-        }
+        attrs = dict(
+            mode=st.st_mode,
+            uid=st.st_uid, user=uid2user(st.st_uid),
+            gid=st.st_gid, group=gid2group(st.st_gid),
+            atime=st.st_atime_ns,
+            ctime=st.st_ctime_ns,
+            mtime=st.st_mtime_ns,
+        )
         if self.numeric_owner:
         if self.numeric_owner:
-            item[b'user'] = item[b'group'] = None
+            attrs['user'] = attrs['group'] = None
         xattrs = xattr.get_all(path, follow_symlinks=False)
         xattrs = xattr.get_all(path, follow_symlinks=False)
         if xattrs:
         if xattrs:
-            item[b'xattrs'] = StableDict(xattrs)
+            attrs['xattrs'] = StableDict(xattrs)
         bsdflags = get_flags(path, st)
         bsdflags = get_flags(path, st)
         if bsdflags:
         if bsdflags:
-            item[b'bsdflags'] = bsdflags
-        acl_get(path, item, st, self.numeric_owner)
-        return item
+            attrs['bsdflags'] = bsdflags
+        acl_get(path, attrs, st, self.numeric_owner)
+        return attrs
 
 
     def process_dir(self, path, st):
     def process_dir(self, path, st):
-        item = {b'path': make_path_safe(path)}
+        item = Item(path=make_path_safe(path))
         item.update(self.stat_attrs(st, path))
         item.update(self.stat_attrs(st, path))
         self.add_item(item)
         self.add_item(item)
         return 'd'  # directory
         return 'd'  # directory
 
 
     def process_fifo(self, path, st):
     def process_fifo(self, path, st):
-        item = {b'path': make_path_safe(path)}
+        item = Item(path=make_path_safe(path))
         item.update(self.stat_attrs(st, path))
         item.update(self.stat_attrs(st, path))
         self.add_item(item)
         self.add_item(item)
         return 'f'  # fifo
         return 'f'  # fifo
 
 
     def process_dev(self, path, st):
     def process_dev(self, path, st):
-        item = {b'path': make_path_safe(path), b'rdev': st.st_rdev}
+        item = Item(path=make_path_safe(path), rdev=st.st_rdev)
         item.update(self.stat_attrs(st, path))
         item.update(self.stat_attrs(st, path))
         self.add_item(item)
         self.add_item(item)
         if stat.S_ISCHR(st.st_mode):
         if stat.S_ISCHR(st.st_mode):
@@ -592,7 +592,7 @@ Number of files: {0.stats.nfiles}'''.format(
 
 
     def process_symlink(self, path, st):
     def process_symlink(self, path, st):
         source = os.readlink(path)
         source = os.readlink(path)
-        item = {b'path': make_path_safe(path), b'source': source}
+        item = Item(path=make_path_safe(path), source=source)
         item.update(self.stat_attrs(st, path))
         item.update(self.stat_attrs(st, path))
         self.add_item(item)
         self.add_item(item)
         return 's'  # symlink
         return 's'  # symlink
@@ -604,15 +604,15 @@ Number of files: {0.stats.nfiles}'''.format(
         for data in self.chunker.chunkify(fd):
         for data in self.chunker.chunkify(fd):
             chunks.append(cache.add_chunk(self.key.id_hash(data), Chunk(data), self.stats))
             chunks.append(cache.add_chunk(self.key.id_hash(data), Chunk(data), self.stats))
         self.stats.nfiles += 1
         self.stats.nfiles += 1
-        t = int_to_bigint(int(time.time()) * 1000000000)
-        item = {
-            b'path': path,
-            b'chunks': chunks,
-            b'mode': 0o100660,  # regular file, ug=rw
-            b'uid': uid, b'user': uid2user(uid),
-            b'gid': gid, b'group': gid2group(gid),
-            b'mtime': t, b'atime': t, b'ctime': t,
-        }
+        t = int(time.time()) * 1000000000
+        item = Item(
+            path=path,
+            chunks=chunks,
+            mode=0o100660,  # regular file, ug=rw
+            uid=uid, user=uid2user(uid),
+            gid=gid, group=gid2group(gid),
+            mtime=t, atime=t, ctime=t,
+        )
         self.add_item(item)
         self.add_item(item)
         return 'i'  # stdin
         return 'i'  # stdin
 
 
@@ -623,11 +623,8 @@ Number of files: {0.stats.nfiles}'''.format(
         if st.st_nlink > 1:
         if st.st_nlink > 1:
             source = self.hard_links.get((st.st_ino, st.st_dev))
             source = self.hard_links.get((st.st_ino, st.st_dev))
             if (st.st_ino, st.st_dev) in self.hard_links:
             if (st.st_ino, st.st_dev) in self.hard_links:
-                item = self.stat_attrs(st, path)
-                item.update({
-                    b'path': safe_path,
-                    b'source': source,
-                })
+                item = Item(path=safe_path, source=source)
+                item.update(self.stat_attrs(st, path))
                 self.add_item(item)
                 self.add_item(item)
                 status = 'h'  # regular file, hardlink (to already seen inodes)
                 status = 'h'  # regular file, hardlink (to already seen inodes)
                 return status
                 return status
@@ -649,10 +646,10 @@ Number of files: {0.stats.nfiles}'''.format(
                 status = 'U'  # regular file, unchanged
                 status = 'U'  # regular file, unchanged
         else:
         else:
             status = 'A'  # regular file, added
             status = 'A'  # regular file, added
-        item = {
-            b'path': safe_path,
-            b'hardlink_master': st.st_nlink > 1,  # item is a hard link and has the chunks
-        }
+        item = Item(
+            path=safe_path,
+            hardlink_master=st.st_nlink > 1,  # item is a hard link and has the chunks
+        )
         # Only chunkify the file if needed
         # Only chunkify the file if needed
         if chunks is None:
         if chunks is None:
             compress = self.compression_decider1.decide(path)
             compress = self.compression_decider1.decide(path)
@@ -668,7 +665,7 @@ Number of files: {0.stats.nfiles}'''.format(
                         self.stats.show_progress(item=item, dt=0.2)
                         self.stats.show_progress(item=item, dt=0.2)
             cache.memorize_file(path_hash, st, [c.id for c in chunks])
             cache.memorize_file(path_hash, st, [c.id for c in chunks])
             status = status or 'M'  # regular file, modified (if not 'A' already)
             status = status or 'M'  # regular file, modified (if not 'A' already)
-        item[b'chunks'] = chunks
+        item.chunks = chunks
         item.update(self.stat_attrs(st, path))
         item.update(self.stat_attrs(st, path))
         self.stats.nfiles += 1
         self.stats.nfiles += 1
         self.add_item(item)
         self.add_item(item)
@@ -698,7 +695,7 @@ class RobustUnpacker:
     """
     """
     def __init__(self, validator):
     def __init__(self, validator):
         super().__init__()
         super().__init__()
-        self.item_keys = [msgpack.packb(name) for name in ITEM_KEYS]
+        self.item_keys = [msgpack.packb(name.encode()) for name in ITEM_KEYS]
         self.validator = validator
         self.validator = validator
         self._buffered_data = []
         self._buffered_data = []
         self._resync = False
         self._resync = False
@@ -894,10 +891,10 @@ class ArchiveChecker:
             """
             """
             offset = 0
             offset = 0
             chunk_list = []
             chunk_list = []
-            for chunk_id, size, csize in item[b'chunks']:
+            for chunk_id, size, csize in item.chunks:
                 if chunk_id not in self.chunks:
                 if chunk_id not in self.chunks:
                     # If a file chunk is missing, create an all empty replacement chunk
                     # If a file chunk is missing, create an all empty replacement chunk
-                    logger.error('{}: Missing file chunk detected (Byte {}-{})'.format(safe_decode(item[b'path']), offset, offset + size))
+                    logger.error('{}: Missing file chunk detected (Byte {}-{})'.format(item.path, offset, offset + size))
                     self.error_found = True
                     self.error_found = True
                     data = bytes(size)
                     data = bytes(size)
                     chunk_id = self.key.id_hash(data)
                     chunk_id = self.key.id_hash(data)
@@ -908,14 +905,14 @@ class ArchiveChecker:
                     add_reference(chunk_id, size, csize)
                     add_reference(chunk_id, size, csize)
                 chunk_list.append((chunk_id, size, csize))
                 chunk_list.append((chunk_id, size, csize))
                 offset += size
                 offset += size
-            item[b'chunks'] = chunk_list
+            item.chunks = chunk_list
 
 
         def robust_iterator(archive):
         def robust_iterator(archive):
             """Iterates through all archive items
             """Iterates through all archive items
 
 
             Missing item chunks will be skipped and the msgpack stream will be restarted
             Missing item chunks will be skipped and the msgpack stream will be restarted
             """
             """
-            unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item)
+            unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and 'path' in item)
             _state = 0
             _state = 0
 
 
             def missing_chunk_detector(chunk_id):
             def missing_chunk_detector(chunk_id):
@@ -946,7 +943,7 @@ class ArchiveChecker:
                     try:
                     try:
                         for item in unpacker:
                         for item in unpacker:
                             if isinstance(item, dict):
                             if isinstance(item, dict):
-                                yield item
+                                yield Item(internal_dict=item)
                             else:
                             else:
                                 report('Did not get expected metadata dict when unpacking item metadata', chunk_id, i)
                                 report('Did not get expected metadata dict when unpacking item metadata', chunk_id, i)
                     except Exception:
                     except Exception:
@@ -990,7 +987,7 @@ class ArchiveChecker:
                 items_buffer = ChunkBuffer(self.key)
                 items_buffer = ChunkBuffer(self.key)
                 items_buffer.write_chunk = add_callback
                 items_buffer.write_chunk = add_callback
                 for item in robust_iterator(archive):
                 for item in robust_iterator(archive):
-                    if b'chunks' in item:
+                    if 'chunks' in item:
                         verify_file_chunks(item)
                         verify_file_chunks(item)
                     items_buffer.add(item)
                     items_buffer.add(item)
                 items_buffer.flush(flush=True)
                 items_buffer.flush(flush=True)
@@ -1093,38 +1090,38 @@ class ArchiveRecreater:
 
 
         def item_is_hardlink_master(item):
         def item_is_hardlink_master(item):
             return (target_is_subset and
             return (target_is_subset and
-                    stat.S_ISREG(item[b'mode']) and
-                    item.get(b'hardlink_master', True) and
-                    b'source' not in item and
-                    not matcher.match(item[b'path']))
+                    stat.S_ISREG(item.mode) and
+                    item.get('hardlink_master', True) and
+                    'source' not in item and
+                    not matcher.match(item.path))
 
 
         for item in archive.iter_items():
         for item in archive.iter_items():
             if item_is_hardlink_master(item):
             if item_is_hardlink_master(item):
                 # Re-visit all of these items in the archive even when fast-forwarding to rebuild hardlink_masters
                 # Re-visit all of these items in the archive even when fast-forwarding to rebuild hardlink_masters
-                hardlink_masters[item[b'path']] = (item.get(b'chunks'), None)
+                hardlink_masters[item.path] = (item.get('chunks'), None)
                 continue
                 continue
             if resume_from:
             if resume_from:
                 # Fast forward to after the last processed file
                 # Fast forward to after the last processed file
-                if item[b'path'] == resume_from:
-                    logger.info('Fast-forwarded to %s', remove_surrogates(item[b'path']))
+                if item.path == resume_from:
+                    logger.info('Fast-forwarded to %s', remove_surrogates(item.path))
                     resume_from = None
                     resume_from = None
                 continue
                 continue
-            if not matcher.match(item[b'path']):
-                self.print_file_status('x', item[b'path'])
+            if not matcher.match(item.path):
+                self.print_file_status('x', item.path)
                 continue
                 continue
-            if target_is_subset and stat.S_ISREG(item[b'mode']) and item.get(b'source') in hardlink_masters:
+            if target_is_subset and stat.S_ISREG(item.mode) and item.get('source') in hardlink_masters:
                 # master of this hard link is outside the target subset
                 # master of this hard link is outside the target subset
-                chunks, new_source = hardlink_masters[item[b'source']]
+                chunks, new_source = hardlink_masters[item.source]
                 if new_source is None:
                 if new_source is None:
                     # First item to use this master, move the chunks
                     # First item to use this master, move the chunks
-                    item[b'chunks'] = chunks
-                    hardlink_masters[item[b'source']] = (None, item[b'path'])
-                    del item[b'source']
+                    item.chunks = chunks
+                    hardlink_masters[item.source] = (None, item.path)
+                    del item.source
                 else:
                 else:
                     # Master was already moved, only update this item's source
                     # Master was already moved, only update this item's source
-                    item[b'source'] = new_source
+                    item.source = new_source
             if self.dry_run:
             if self.dry_run:
-                self.print_file_status('-', item[b'path'])
+                self.print_file_status('-', item.path)
             else:
             else:
                 try:
                 try:
                     self.process_item(archive, target, item)
                     self.process_item(archive, target, item)
@@ -1136,11 +1133,11 @@ class ArchiveRecreater:
             target.stats.show_progress(final=True)
             target.stats.show_progress(final=True)
 
 
     def process_item(self, archive, target, item):
     def process_item(self, archive, target, item):
-        if b'chunks' in item:
-            item[b'chunks'] = self.process_chunks(archive, target, item)
+        if 'chunks' in item:
+            item.chunks = self.process_chunks(archive, target, item)
             target.stats.nfiles += 1
             target.stats.nfiles += 1
         target.add_item(item)
         target.add_item(item)
-        self.print_file_status(file_status(item[b'mode']), item[b'path'])
+        self.print_file_status(file_status(item.mode), item.path)
         if self.interrupt:
         if self.interrupt:
             raise self.Interrupted
             raise self.Interrupted
 
 
@@ -1148,9 +1145,9 @@ class ArchiveRecreater:
         """Return new chunk ID list for 'item'."""
         """Return new chunk ID list for 'item'."""
         # TODO: support --compression-from
         # TODO: support --compression-from
         if not self.recompress and not target.recreate_rechunkify:
         if not self.recompress and not target.recreate_rechunkify:
-            for chunk_id, size, csize in item[b'chunks']:
+            for chunk_id, size, csize in item.chunks:
                 self.cache.chunk_incref(chunk_id, target.stats)
                 self.cache.chunk_incref(chunk_id, target.stats)
-            return item[b'chunks']
+            return item.chunks
         new_chunks = self.process_partial_chunks(target)
         new_chunks = self.process_partial_chunks(target)
         chunk_iterator = self.create_chunk_iterator(archive, target, item)
         chunk_iterator = self.create_chunk_iterator(archive, target, item)
         consume(chunk_iterator, len(new_chunks))
         consume(chunk_iterator, len(new_chunks))
@@ -1181,7 +1178,7 @@ class ArchiveRecreater:
 
 
     def create_chunk_iterator(self, archive, target, item):
     def create_chunk_iterator(self, archive, target, item):
         """Return iterator of chunks to store for 'item' from 'archive' in 'target'."""
         """Return iterator of chunks to store for 'item' from 'archive' in 'target'."""
-        chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in item[b'chunks']])
+        chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in item.chunks])
         if target.recreate_rechunkify:
         if target.recreate_rechunkify:
             # The target.chunker will read the file contents through ChunkIteratorFileWrapper chunk-by-chunk
             # The target.chunker will read the file contents through ChunkIteratorFileWrapper chunk-by-chunk
             # (does not load the entire file into memory)
             # (does not load the entire file into memory)
@@ -1243,7 +1240,7 @@ class ArchiveRecreater:
         """Add excludes to the matcher created by exclude_cache and exclude_if_present."""
         """Add excludes to the matcher created by exclude_cache and exclude_if_present."""
         def exclude(dir, tag_item):
         def exclude(dir, tag_item):
             if self.keep_tag_files:
             if self.keep_tag_files:
-                tag_files.append(PathPrefixPattern(tag_item[b'path']))
+                tag_files.append(PathPrefixPattern(tag_item.path))
                 tagged_dirs.append(FnmatchPattern(dir + '/'))
                 tagged_dirs.append(FnmatchPattern(dir + '/'))
             else:
             else:
                 tagged_dirs.append(PathPrefixPattern(dir))
                 tagged_dirs.append(PathPrefixPattern(dir))
@@ -1255,18 +1252,18 @@ class ArchiveRecreater:
         cachedir_masters = {}
         cachedir_masters = {}
 
 
         for item in archive.iter_items(
         for item in archive.iter_items(
-                filter=lambda item: item[b'path'].endswith(CACHE_TAG_NAME) or matcher.match(item[b'path'])):
-            if item[b'path'].endswith(CACHE_TAG_NAME):
-                cachedir_masters[item[b'path']] = item
-            if stat.S_ISREG(item[b'mode']):
-                dir, tag_file = os.path.split(item[b'path'])
+                filter=lambda item: item.path.endswith(CACHE_TAG_NAME) or matcher.match(item.path)):
+            if item.path.endswith(CACHE_TAG_NAME):
+                cachedir_masters[item.path] = item
+            if stat.S_ISREG(item.mode):
+                dir, tag_file = os.path.split(item.path)
                 if tag_file in self.exclude_if_present:
                 if tag_file in self.exclude_if_present:
                     exclude(dir, item)
                     exclude(dir, item)
                 if self.exclude_caches and tag_file == CACHE_TAG_NAME:
                 if self.exclude_caches and tag_file == CACHE_TAG_NAME:
-                    if b'chunks' in item:
+                    if 'chunks' in item:
                         file = open_item(archive, item)
                         file = open_item(archive, item)
                     else:
                     else:
-                        file = open_item(archive, cachedir_masters[item[b'source']])
+                        file = open_item(archive, cachedir_masters[item.source])
                     if file.read(len(CACHE_TAG_CONTENTS)).startswith(CACHE_TAG_CONTENTS):
                     if file.read(len(CACHE_TAG_CONTENTS)).startswith(CACHE_TAG_CONTENTS):
                         exclude(dir, item)
                         exclude(dir, item)
         matcher.add(tag_files, True)
         matcher.add(tag_files, True)
@@ -1307,13 +1304,13 @@ class ArchiveRecreater:
         logger.info('Replaying items from interrupted operation...')
         logger.info('Replaying items from interrupted operation...')
         item = None
         item = None
         for item in old_target.iter_items():
         for item in old_target.iter_items():
-            if b'chunks' in item:
-                for chunk in item[b'chunks']:
+            if 'chunks' in item:
+                for chunk in item.chunks:
                     self.cache.chunk_incref(chunk.id, target.stats)
                     self.cache.chunk_incref(chunk.id, target.stats)
                 target.stats.nfiles += 1
                 target.stats.nfiles += 1
             target.add_item(item)
             target.add_item(item)
         if item:
         if item:
-            resume_from = item[b'path']
+            resume_from = item.path
         else:
         else:
             resume_from = None
             resume_from = None
         if self.progress:
         if self.progress:

+ 57 - 56
src/borg/archiver.py

@@ -38,6 +38,7 @@ from .helpers import update_excludes, check_extension_modules
 from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo
 from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo
 from .helpers import log_multi
 from .helpers import log_multi
 from .helpers import parse_pattern, PatternMatcher, PathPrefixPattern
 from .helpers import parse_pattern, PatternMatcher, PathPrefixPattern
+from .item import Item
 from .key import key_creator, RepoKey, PassphraseKey
 from .key import key_creator, RepoKey, PassphraseKey
 from .platform import get_flags
 from .platform import get_flags
 from .remote import RepositoryServer, RemoteRepository, cache_if_remote
 from .remote import RepositoryServer, RemoteRepository, cache_if_remote
@@ -405,22 +406,22 @@ class Archiver:
         hardlink_masters = {} if partial_extract else None
         hardlink_masters = {} if partial_extract else None
 
 
         def item_is_hardlink_master(item):
         def item_is_hardlink_master(item):
-            return (partial_extract and stat.S_ISREG(item[b'mode']) and
-                    item.get(b'hardlink_master', True) and b'source' not in item)
+            return (partial_extract and stat.S_ISREG(item.mode) and
+                    item.get('hardlink_master', True) and 'source' not in item)
 
 
         for item in archive.iter_items(preload=True,
         for item in archive.iter_items(preload=True,
-                filter=lambda item: item_is_hardlink_master(item) or matcher.match(item[b'path'])):
-            orig_path = item[b'path']
+                filter=lambda item: item_is_hardlink_master(item) or matcher.match(item.path)):
+            orig_path = item.path
             if item_is_hardlink_master(item):
             if item_is_hardlink_master(item):
-                hardlink_masters[orig_path] = (item.get(b'chunks'), None)
-            if not matcher.match(item[b'path']):
+                hardlink_masters[orig_path] = (item.get('chunks'), None)
+            if not matcher.match(item.path):
                 continue
                 continue
             if strip_components:
             if strip_components:
-                item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:])
-                if not item[b'path']:
+                item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
+                if not item.path:
                     continue
                     continue
             if not args.dry_run:
             if not args.dry_run:
-                while dirs and not item[b'path'].startswith(dirs[-1][b'path']):
+                while dirs and not item.path.startswith(dirs[-1].path):
                     archive.extract_item(dirs.pop(-1), stdout=stdout)
                     archive.extract_item(dirs.pop(-1), stdout=stdout)
             if output_list:
             if output_list:
                 logging.getLogger('borg.output.list').info(remove_surrogates(orig_path))
                 logging.getLogger('borg.output.list').info(remove_surrogates(orig_path))
@@ -428,7 +429,7 @@ class Archiver:
                 if dry_run:
                 if dry_run:
                     archive.extract_item(item, dry_run=True)
                     archive.extract_item(item, dry_run=True)
                 else:
                 else:
-                    if stat.S_ISDIR(item[b'mode']):
+                    if stat.S_ISDIR(item.mode):
                         dirs.append(item)
                         dirs.append(item)
                         archive.extract_item(item, restore_attrs=False)
                         archive.extract_item(item, restore_attrs=False)
                     else:
                     else:
@@ -455,58 +456,58 @@ class Archiver:
             return self.compare_chunk_contents(chunks1, chunks2)
             return self.compare_chunk_contents(chunks1, chunks2)
 
 
         def sum_chunk_size(item, consider_ids=None):
         def sum_chunk_size(item, consider_ids=None):
-            if item.get(b'deleted'):
+            if item.get('deleted'):
                 return None
                 return None
             else:
             else:
-                return sum(c.size for c in item[b'chunks']
+                return sum(c.size for c in item.chunks
                            if consider_ids is None or c.id in consider_ids)
                            if consider_ids is None or c.id in consider_ids)
 
 
         def get_owner(item):
         def get_owner(item):
             if args.numeric_owner:
             if args.numeric_owner:
-                return item[b'uid'], item[b'gid']
+                return item.uid, item.gid
             else:
             else:
-                return item[b'user'], item[b'group']
+                return item.user, item.group
 
 
         def get_mode(item):
         def get_mode(item):
-            if b'mode' in item:
-                return stat.filemode(item[b'mode'])
+            if 'mode' in item:
+                return stat.filemode(item.mode)
             else:
             else:
                 return [None]
                 return [None]
 
 
         def has_hardlink_master(item, hardlink_masters):
         def has_hardlink_master(item, hardlink_masters):
-            return stat.S_ISREG(item[b'mode']) and item.get(b'source') in hardlink_masters
+            return stat.S_ISREG(item.mode) and item.get('source') in hardlink_masters
 
 
         def compare_link(item1, item2):
         def compare_link(item1, item2):
             # These are the simple link cases. For special cases, e.g. if a
             # These are the simple link cases. For special cases, e.g. if a
             # regular file is replaced with a link or vice versa, it is
             # regular file is replaced with a link or vice versa, it is
             # indicated in compare_mode instead.
             # indicated in compare_mode instead.
-            if item1.get(b'deleted'):
+            if item1.get('deleted'):
                 return 'added link'
                 return 'added link'
-            elif item2.get(b'deleted'):
+            elif item2.get('deleted'):
                 return 'removed link'
                 return 'removed link'
-            elif b'source' in item1 and b'source' in item2 and item1[b'source'] != item2[b'source']:
+            elif 'source' in item1 and 'source' in item2 and item1.source != item2.source:
                 return 'changed link'
                 return 'changed link'
 
 
         def contents_changed(item1, item2):
         def contents_changed(item1, item2):
             if can_compare_chunk_ids:
             if can_compare_chunk_ids:
-                return item1[b'chunks'] != item2[b'chunks']
+                return item1.chunks != item2.chunks
             else:
             else:
                 if sum_chunk_size(item1) != sum_chunk_size(item2):
                 if sum_chunk_size(item1) != sum_chunk_size(item2):
                     return True
                     return True
                 else:
                 else:
-                    chunk_ids1 = [c.id for c in item1[b'chunks']]
-                    chunk_ids2 = [c.id for c in item2[b'chunks']]
+                    chunk_ids1 = [c.id for c in item1.chunks]
+                    chunk_ids2 = [c.id for c in item2.chunks]
                     return not fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2)
                     return not fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2)
 
 
         def compare_content(path, item1, item2):
         def compare_content(path, item1, item2):
             if contents_changed(item1, item2):
             if contents_changed(item1, item2):
-                if item1.get(b'deleted'):
+                if item1.get('deleted'):
                     return ('added {:>13}'.format(format_file_size(sum_chunk_size(item2))))
                     return ('added {:>13}'.format(format_file_size(sum_chunk_size(item2))))
-                elif item2.get(b'deleted'):
+                elif item2.get('deleted'):
                     return ('removed {:>11}'.format(format_file_size(sum_chunk_size(item1))))
                     return ('removed {:>11}'.format(format_file_size(sum_chunk_size(item1))))
                 else:
                 else:
-                    chunk_ids1 = {c.id for c in item1[b'chunks']}
-                    chunk_ids2 = {c.id for c in item2[b'chunks']}
+                    chunk_ids1 = {c.id for c in item1.chunks}
+                    chunk_ids2 = {c.id for c in item2.chunks}
                     added_ids = chunk_ids2 - chunk_ids1
                     added_ids = chunk_ids2 - chunk_ids1
                     removed_ids = chunk_ids1 - chunk_ids2
                     removed_ids = chunk_ids1 - chunk_ids2
                     added = sum_chunk_size(item2, added_ids)
                     added = sum_chunk_size(item2, added_ids)
@@ -515,9 +516,9 @@ class Archiver:
                                                  format_file_size(-removed, precision=1, sign=True)))
                                                  format_file_size(-removed, precision=1, sign=True)))
 
 
         def compare_directory(item1, item2):
         def compare_directory(item1, item2):
-            if item2.get(b'deleted') and not item1.get(b'deleted'):
+            if item2.get('deleted') and not item1.get('deleted'):
                 return 'removed directory'
                 return 'removed directory'
-            elif item1.get(b'deleted') and not item2.get(b'deleted'):
+            elif item1.get('deleted') and not item2.get('deleted'):
                 return 'added directory'
                 return 'added directory'
 
 
         def compare_owner(item1, item2):
         def compare_owner(item1, item2):
@@ -527,7 +528,7 @@ class Archiver:
                 return '[{}:{} -> {}:{}]'.format(user1, group1, user2, group2)
                 return '[{}:{} -> {}:{}]'.format(user1, group1, user2, group2)
 
 
         def compare_mode(item1, item2):
         def compare_mode(item1, item2):
-            if item1[b'mode'] != item2[b'mode']:
+            if item1.mode != item2.mode:
                 return '[{} -> {}]'.format(get_mode(item1), get_mode(item2))
                 return '[{} -> {}]'.format(get_mode(item1), get_mode(item2))
 
 
         def compare_items(output, path, item1, item2, hardlink_masters, deleted=False):
         def compare_items(output, path, item1, item2, hardlink_masters, deleted=False):
@@ -538,15 +539,15 @@ class Archiver:
             changes = []
             changes = []
 
 
             if has_hardlink_master(item1, hardlink_masters):
             if has_hardlink_master(item1, hardlink_masters):
-                item1 = hardlink_masters[item1[b'source']][0]
+                item1 = hardlink_masters[item1.source][0]
 
 
             if has_hardlink_master(item2, hardlink_masters):
             if has_hardlink_master(item2, hardlink_masters):
-                item2 = hardlink_masters[item2[b'source']][1]
+                item2 = hardlink_masters[item2.source][1]
 
 
             if get_mode(item1)[0] == 'l' or get_mode(item2)[0] == 'l':
             if get_mode(item1)[0] == 'l' or get_mode(item2)[0] == 'l':
                 changes.append(compare_link(item1, item2))
                 changes.append(compare_link(item1, item2))
 
 
-            if b'chunks' in item1 and b'chunks' in item2:
+            if 'chunks' in item1 and 'chunks' in item2:
                 changes.append(compare_content(path, item1, item2))
                 changes.append(compare_content(path, item1, item2))
 
 
             if get_mode(item1)[0] == 'd' or get_mode(item2)[0] == 'd':
             if get_mode(item1)[0] == 'd' or get_mode(item2)[0] == 'd':
@@ -570,21 +571,21 @@ class Archiver:
 
 
         def compare_archives(archive1, archive2, matcher):
         def compare_archives(archive1, archive2, matcher):
             def hardlink_master_seen(item):
             def hardlink_master_seen(item):
-                return b'source' not in item or not stat.S_ISREG(item[b'mode']) or item[b'source'] in hardlink_masters
+                return 'source' not in item or not stat.S_ISREG(item.mode) or item.source in hardlink_masters
 
 
             def is_hardlink_master(item):
             def is_hardlink_master(item):
-                return item.get(b'hardlink_master', True) and b'source' not in item
+                return item.get('hardlink_master', True) and 'source' not in item
 
 
             def update_hardlink_masters(item1, item2):
             def update_hardlink_masters(item1, item2):
                 if is_hardlink_master(item1) or is_hardlink_master(item2):
                 if is_hardlink_master(item1) or is_hardlink_master(item2):
-                    hardlink_masters[item1[b'path']] = (item1, item2)
+                    hardlink_masters[item1.path] = (item1, item2)
 
 
             def compare_or_defer(item1, item2):
             def compare_or_defer(item1, item2):
                 update_hardlink_masters(item1, item2)
                 update_hardlink_masters(item1, item2)
                 if not hardlink_master_seen(item1) or not hardlink_master_seen(item2):
                 if not hardlink_master_seen(item1) or not hardlink_master_seen(item2):
                     deferred.append((item1, item2))
                     deferred.append((item1, item2))
                 else:
                 else:
-                    compare_items(output, item1[b'path'], item1, item2, hardlink_masters)
+                    compare_items(output, item1.path, item1, item2, hardlink_masters)
 
 
             orphans_archive1 = collections.OrderedDict()
             orphans_archive1 = collections.OrderedDict()
             orphans_archive2 = collections.OrderedDict()
             orphans_archive2 = collections.OrderedDict()
@@ -593,44 +594,44 @@ class Archiver:
             output = []
             output = []
 
 
             for item1, item2 in zip_longest(
             for item1, item2 in zip_longest(
-                    archive1.iter_items(lambda item: matcher.match(item[b'path'])),
-                    archive2.iter_items(lambda item: matcher.match(item[b'path'])),
+                    archive1.iter_items(lambda item: matcher.match(item.path)),
+                    archive2.iter_items(lambda item: matcher.match(item.path)),
             ):
             ):
-                if item1 and item2 and item1[b'path'] == item2[b'path']:
+                if item1 and item2 and item1.path == item2.path:
                     compare_or_defer(item1, item2)
                     compare_or_defer(item1, item2)
                     continue
                     continue
                 if item1:
                 if item1:
-                    matching_orphan = orphans_archive2.pop(item1[b'path'], None)
+                    matching_orphan = orphans_archive2.pop(item1.path, None)
                     if matching_orphan:
                     if matching_orphan:
                         compare_or_defer(item1, matching_orphan)
                         compare_or_defer(item1, matching_orphan)
                     else:
                     else:
-                        orphans_archive1[item1[b'path']] = item1
+                        orphans_archive1[item1.path] = item1
                 if item2:
                 if item2:
-                    matching_orphan = orphans_archive1.pop(item2[b'path'], None)
+                    matching_orphan = orphans_archive1.pop(item2.path, None)
                     if matching_orphan:
                     if matching_orphan:
                         compare_or_defer(matching_orphan, item2)
                         compare_or_defer(matching_orphan, item2)
                     else:
                     else:
-                        orphans_archive2[item2[b'path']] = item2
+                        orphans_archive2[item2.path] = item2
             # At this point orphans_* contain items that had no matching partner in the other archive
             # At this point orphans_* contain items that had no matching partner in the other archive
-            deleted_item = {
-                b'deleted': True,
-                b'chunks': [],
-                b'mode': 0,
-            }
+            deleted_item = Item(
+                deleted=True,
+                chunks=[],
+                mode=0,
+            )
             for added in orphans_archive2.values():
             for added in orphans_archive2.values():
-                path = added[b'path']
-                deleted_item[b'path'] = path
+                path = added.path
+                deleted_item.path = path
                 update_hardlink_masters(deleted_item, added)
                 update_hardlink_masters(deleted_item, added)
                 compare_items(output, path, deleted_item, added, hardlink_masters, deleted=True)
                 compare_items(output, path, deleted_item, added, hardlink_masters, deleted=True)
             for deleted in orphans_archive1.values():
             for deleted in orphans_archive1.values():
-                path = deleted[b'path']
-                deleted_item[b'path'] = path
+                path = deleted.path
+                deleted_item.path = path
                 update_hardlink_masters(deleted, deleted_item)
                 update_hardlink_masters(deleted, deleted_item)
                 compare_items(output, path, deleted, deleted_item, hardlink_masters, deleted=True)
                 compare_items(output, path, deleted, deleted_item, hardlink_masters, deleted=True)
             for item1, item2 in deferred:
             for item1, item2 in deferred:
                 assert hardlink_master_seen(item1)
                 assert hardlink_master_seen(item1)
                 assert hardlink_master_seen(item2)
                 assert hardlink_master_seen(item2)
-                compare_items(output, item1[b'path'], item1, item2, hardlink_masters)
+                compare_items(output, item1.path, item1, item2, hardlink_masters)
 
 
             for line in sorted(output):
             for line in sorted(output):
                 print_output(line)
                 print_output(line)
@@ -749,7 +750,7 @@ class Archiver:
                         sys.stdout.write(bytestring.decode('utf-8', errors='replace'))
                         sys.stdout.write(bytestring.decode('utf-8', errors='replace'))
                 else:
                 else:
                     write = sys.stdout.buffer.write
                     write = sys.stdout.buffer.write
-                for item in archive.iter_items(lambda item: matcher.match(item[b'path'])):
+                for item in archive.iter_items(lambda item: matcher.match(item.path)):
                     write(safe_encode(formatter.format_item(item)))
                     write(safe_encode(formatter.format_item(item)))
         else:
         else:
             for archive_info in manifest.list_archive_infos(sort_by='ts'):
             for archive_info in manifest.list_archive_infos(sort_by='ts'):
@@ -2116,7 +2117,7 @@ def sig_info_handler(signum, stack):  # pragma: no cover
             logger.info("{0} {1}/{2}".format(path, format_file_size(pos), format_file_size(total)))
             logger.info("{0} {1}/{2}".format(path, format_file_size(pos), format_file_size(total)))
             break
             break
         if func in ('extract_item', ):  # extract op
         if func in ('extract_item', ):  # extract op
-            path = loc['item'][b'path']
+            path = loc['item'].path
             try:
             try:
                 pos = loc['fd'].tell()
                 pos = loc['fd'].tell()
             except Exception:
             except Exception:

+ 4 - 2
src/borg/cache.py

@@ -16,6 +16,7 @@ from .helpers import get_cache_dir
 from .helpers import decode_dict, int_to_bigint, bigint_to_int, bin_to_hex
 from .helpers import decode_dict, int_to_bigint, bigint_to_int, bin_to_hex
 from .helpers import format_file_size
 from .helpers import format_file_size
 from .helpers import yes
 from .helpers import yes
+from .item import Item
 from .key import PlaintextKey
 from .key import PlaintextKey
 from .locking import UpgradableLock
 from .locking import UpgradableLock
 from .remote import cache_if_remote
 from .remote import cache_if_remote
@@ -298,8 +299,9 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                     if not isinstance(item, dict):
                     if not isinstance(item, dict):
                         logger.error('Error: Did not get expected metadata dict - archive corrupted!')
                         logger.error('Error: Did not get expected metadata dict - archive corrupted!')
                         continue
                         continue
-                    if b'chunks' in item:
-                        for chunk_id, size, csize in item[b'chunks']:
+                    item = Item(internal_dict=item)
+                    if 'chunks' in item:
+                        for chunk_id, size, csize in item.chunks:
                             chunk_idx.add(chunk_id, 1, size, csize)
                             chunk_idx.add(chunk_id, 1, size, csize)
             if self.do_cache:
             if self.do_cache:
                 fn = mkpath(archive_id)
                 fn = mkpath(archive_id)

+ 3 - 4
src/borg/constants.py

@@ -1,10 +1,9 @@
 # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
 # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
-ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', b'hardlink_master',
-                 b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
-                 b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
+ITEM_KEYS = set(['path', 'source', 'rdev', 'chunks', 'hardlink_master',
+                 'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime',
+                 'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended', ])
 
 
 ARCHIVE_TEXT_KEYS = (b'name', b'comment', b'hostname', b'username', b'time', b'time_end')
 ARCHIVE_TEXT_KEYS = (b'name', b'comment', b'hostname', b'username', b'time', b'time_end')
-ITEM_TEXT_KEYS = (b'path', b'source', b'user', b'group')
 
 
 # default umask, overriden by --umask, defaults to read/write only for owner
 # default umask, overriden by --umask, defaults to read/write only for owner
 UMASK_DEFAULT = 0o077
 UMASK_DEFAULT = 0o077

+ 36 - 36
src/borg/fuse.py

@@ -15,7 +15,7 @@ logger = create_logger()
 
 
 from .archive import Archive
 from .archive import Archive
 from .helpers import daemonize
 from .helpers import daemonize
-from .helpers import bigint_to_int
+from .item import Item
 from .lrucache import LRUCache
 from .lrucache import LRUCache
 
 
 # Does this version of llfuse support ns precision?
 # Does this version of llfuse support ns precision?
@@ -38,12 +38,13 @@ class ItemCache:
 
 
     def add(self, item):
     def add(self, item):
         pos = self.fd.seek(0, io.SEEK_END)
         pos = self.fd.seek(0, io.SEEK_END)
-        self.fd.write(msgpack.packb(item))
+        self.fd.write(msgpack.packb(item.as_dict()))
         return pos + self.offset
         return pos + self.offset
 
 
     def get(self, inode):
     def get(self, inode):
         self.fd.seek(inode - self.offset, io.SEEK_SET)
         self.fd.seek(inode - self.offset, io.SEEK_SET)
-        return next(msgpack.Unpacker(self.fd, read_size=1024))
+        item = next(msgpack.Unpacker(self.fd, read_size=1024))
+        return Item(internal_dict=item)
 
 
 
 
 class FuseOperations(llfuse.Operations):
 class FuseOperations(llfuse.Operations):
@@ -57,7 +58,7 @@ class FuseOperations(llfuse.Operations):
         self.items = {}
         self.items = {}
         self.parent = {}
         self.parent = {}
         self.contents = defaultdict(dict)
         self.contents = defaultdict(dict)
-        self.default_dir = {b'mode': 0o40755, b'mtime': int(time.time() * 1e9), b'uid': os.getuid(), b'gid': os.getgid()}
+        self.default_dir = Item(mode=0o40755, mtime=int(time.time() * 1e9), uid=os.getuid(), gid=os.getgid())
         self.pending_archives = {}
         self.pending_archives = {}
         self.accounted_chunks = {}
         self.accounted_chunks = {}
         self.cache = ItemCache()
         self.cache = ItemCache()
@@ -86,8 +87,9 @@ class FuseOperations(llfuse.Operations):
             _, data = self.key.decrypt(key, chunk)
             _, data = self.key.decrypt(key, chunk)
             unpacker.feed(data)
             unpacker.feed(data)
             for item in unpacker:
             for item in unpacker:
-                segments = prefix + os.fsencode(os.path.normpath(item[b'path'])).split(b'/')
-                del item[b'path']
+                item = Item(internal_dict=item)
+                segments = prefix + os.fsencode(os.path.normpath(item.path)).split(b'/')
+                del item.path
                 num_segments = len(segments)
                 num_segments = len(segments)
                 parent = 1
                 parent = 1
                 for i, segment in enumerate(segments, 1):
                 for i, segment in enumerate(segments, 1):
@@ -98,10 +100,10 @@ class FuseOperations(llfuse.Operations):
                         self.parent[archive_inode] = parent
                         self.parent[archive_inode] = parent
                     # Leaf segment?
                     # Leaf segment?
                     if i == num_segments:
                     if i == num_segments:
-                        if b'source' in item and stat.S_ISREG(item[b'mode']):
-                            inode = self._find_inode(item[b'source'], prefix)
+                        if 'source' in item and stat.S_ISREG(item.mode):
+                            inode = self._find_inode(item.source, prefix)
                             item = self.cache.get(inode)
                             item = self.cache.get(inode)
-                            item[b'nlink'] = item.get(b'nlink', 1) + 1
+                            item.nlink = item.get('nlink', 1) + 1
                             self.items[inode] = item
                             self.items[inode] = item
                         else:
                         else:
                             inode = self.cache.add(item)
                             inode = self.cache.add(item)
@@ -151,58 +153,56 @@ class FuseOperations(llfuse.Operations):
         item = self.get_item(inode)
         item = self.get_item(inode)
         size = 0
         size = 0
         dsize = 0
         dsize = 0
-        try:
-            for key, chunksize, _ in item[b'chunks']:
+        if 'chunks' in item:
+            for key, chunksize, _ in item.chunks:
                 size += chunksize
                 size += chunksize
                 if self.accounted_chunks.get(key, inode) == inode:
                 if self.accounted_chunks.get(key, inode) == inode:
                     self.accounted_chunks[key] = inode
                     self.accounted_chunks[key] = inode
                     dsize += chunksize
                     dsize += chunksize
-        except KeyError:
-            pass
         entry = llfuse.EntryAttributes()
         entry = llfuse.EntryAttributes()
         entry.st_ino = inode
         entry.st_ino = inode
         entry.generation = 0
         entry.generation = 0
         entry.entry_timeout = 300
         entry.entry_timeout = 300
         entry.attr_timeout = 300
         entry.attr_timeout = 300
-        entry.st_mode = item[b'mode']
-        entry.st_nlink = item.get(b'nlink', 1)
-        entry.st_uid = item[b'uid']
-        entry.st_gid = item[b'gid']
-        entry.st_rdev = item.get(b'rdev', 0)
+        entry.st_mode = item.mode
+        entry.st_nlink = item.get('nlink', 1)
+        entry.st_uid = item.uid
+        entry.st_gid = item.gid
+        entry.st_rdev = item.get('rdev', 0)
         entry.st_size = size
         entry.st_size = size
         entry.st_blksize = 512
         entry.st_blksize = 512
         entry.st_blocks = dsize / 512
         entry.st_blocks = dsize / 512
         # note: older archives only have mtime (not atime nor ctime)
         # note: older archives only have mtime (not atime nor ctime)
         if have_fuse_xtime_ns:
         if have_fuse_xtime_ns:
-            entry.st_mtime_ns = bigint_to_int(item[b'mtime'])
-            if b'atime' in item:
-                entry.st_atime_ns = bigint_to_int(item[b'atime'])
+            entry.st_mtime_ns = item.mtime
+            if 'atime' in item:
+                entry.st_atime_ns = item.atime
             else:
             else:
-                entry.st_atime_ns = bigint_to_int(item[b'mtime'])
-            if b'ctime' in item:
-                entry.st_ctime_ns = bigint_to_int(item[b'ctime'])
+                entry.st_atime_ns = item.mtime
+            if 'ctime' in item:
+                entry.st_ctime_ns = item.ctime
             else:
             else:
-                entry.st_ctime_ns = bigint_to_int(item[b'mtime'])
+                entry.st_ctime_ns = item.mtime
         else:
         else:
-            entry.st_mtime = bigint_to_int(item[b'mtime']) / 1e9
-            if b'atime' in item:
-                entry.st_atime = bigint_to_int(item[b'atime']) / 1e9
+            entry.st_mtime = item.mtime / 1e9
+            if 'atime' in item:
+                entry.st_atime = item.atime / 1e9
             else:
             else:
-                entry.st_atime = bigint_to_int(item[b'mtime']) / 1e9
-            if b'ctime' in item:
-                entry.st_ctime = bigint_to_int(item[b'ctime']) / 1e9
+                entry.st_atime = item.mtime / 1e9
+            if 'ctime' in item:
+                entry.st_ctime = item.ctime / 1e9
             else:
             else:
-                entry.st_ctime = bigint_to_int(item[b'mtime']) / 1e9
+                entry.st_ctime = item.mtime / 1e9
         return entry
         return entry
 
 
     def listxattr(self, inode, ctx=None):
     def listxattr(self, inode, ctx=None):
         item = self.get_item(inode)
         item = self.get_item(inode)
-        return item.get(b'xattrs', {}).keys()
+        return item.get('xattrs', {}).keys()
 
 
     def getxattr(self, inode, name, ctx=None):
     def getxattr(self, inode, name, ctx=None):
         item = self.get_item(inode)
         item = self.get_item(inode)
         try:
         try:
-            return item.get(b'xattrs', {})[name]
+            return item.get('xattrs', {})[name]
         except KeyError:
         except KeyError:
             raise llfuse.FUSEError(errno.ENODATA) from None
             raise llfuse.FUSEError(errno.ENODATA) from None
 
 
@@ -234,7 +234,7 @@ class FuseOperations(llfuse.Operations):
     def read(self, fh, offset, size):
     def read(self, fh, offset, size):
         parts = []
         parts = []
         item = self.get_item(fh)
         item = self.get_item(fh)
-        for id, s, csize in item[b'chunks']:
+        for id, s, csize in item.chunks:
             if s < offset:
             if s < offset:
                 offset -= s
                 offset -= s
                 continue
                 continue
@@ -264,7 +264,7 @@ class FuseOperations(llfuse.Operations):
 
 
     def readlink(self, inode, ctx=None):
     def readlink(self, inode, ctx=None):
         item = self.get_item(inode)
         item = self.get_item(inode)
-        return os.fsencode(item[b'source'])
+        return os.fsencode(item.source)
 
 
     def mount(self, mountpoint, extra_options, foreground=False):
     def mount(self, mountpoint, extra_options, foreground=False):
         options = ['fsname=borgfs', 'ro']
         options = ['fsname=borgfs', 'ro']

+ 26 - 28
src/borg/helpers.py

@@ -1157,10 +1157,8 @@ class ItemFormatter:
         class FakeArchive:
         class FakeArchive:
             fpr = name = ""
             fpr = name = ""
 
 
-        fake_item = {
-            b'mode': 0, b'path': '', b'user': '', b'group': '', b'mtime': 0,
-            b'uid': 0, b'gid': 0,
-        }
+        from .item import Item
+        fake_item = Item(mode=0, path='', user='', group='', mtime=0, uid=0, gid=0)
         formatter = cls(FakeArchive, "")
         formatter = cls(FakeArchive, "")
         keys = []
         keys = []
         keys.extend(formatter.call_keys.keys())
         keys.extend(formatter.call_keys.keys())
@@ -1196,12 +1194,12 @@ class ItemFormatter:
             'csize': self.calculate_csize,
             'csize': self.calculate_csize,
             'num_chunks': self.calculate_num_chunks,
             'num_chunks': self.calculate_num_chunks,
             'unique_chunks': self.calculate_unique_chunks,
             'unique_chunks': self.calculate_unique_chunks,
-            'isomtime': partial(self.format_time, b'mtime'),
-            'isoctime': partial(self.format_time, b'ctime'),
-            'isoatime': partial(self.format_time, b'atime'),
-            'mtime': partial(self.time, b'mtime'),
-            'ctime': partial(self.time, b'ctime'),
-            'atime': partial(self.time, b'atime'),
+            'isomtime': partial(self.format_time, 'mtime'),
+            'isoctime': partial(self.format_time, 'ctime'),
+            'isoatime': partial(self.format_time, 'atime'),
+            'mtime': partial(self.time, 'mtime'),
+            'ctime': partial(self.time, 'ctime'),
+            'atime': partial(self.time, 'atime'),
         }
         }
         for hash_function in hashlib.algorithms_guaranteed:
         for hash_function in hashlib.algorithms_guaranteed:
             self.add_key(hash_function, partial(self.hash_item, hash_function))
             self.add_key(hash_function, partial(self.hash_item, hash_function))
@@ -1213,11 +1211,11 @@ class ItemFormatter:
         self.used_call_keys = set(self.call_keys) & self.format_keys
         self.used_call_keys = set(self.call_keys) & self.format_keys
 
 
     def get_item_data(self, item):
     def get_item_data(self, item):
-        mode = stat.filemode(item[b'mode'])
+        mode = stat.filemode(item.mode)
         item_type = mode[0]
         item_type = mode[0]
         item_data = self.item_data
         item_data = self.item_data
 
 
-        source = item.get(b'source', '')
+        source = item.get('source', '')
         extra = ''
         extra = ''
         if source:
         if source:
             source = remove_surrogates(source)
             source = remove_surrogates(source)
@@ -1228,16 +1226,16 @@ class ItemFormatter:
                 extra = ' link to %s' % source
                 extra = ' link to %s' % source
         item_data['type'] = item_type
         item_data['type'] = item_type
         item_data['mode'] = mode
         item_data['mode'] = mode
-        item_data['user'] = item[b'user'] or item[b'uid']
-        item_data['group'] = item[b'group'] or item[b'gid']
-        item_data['uid'] = item[b'uid']
-        item_data['gid'] = item[b'gid']
-        item_data['path'] = remove_surrogates(item[b'path'])
-        item_data['bpath'] = item[b'path']
+        item_data['user'] = item.user or item.uid
+        item_data['group'] = item.group or item.gid
+        item_data['uid'] = item.uid
+        item_data['gid'] = item.gid
+        item_data['path'] = remove_surrogates(item.path)
+        item_data['bpath'] = item.path
         item_data['source'] = source
         item_data['source'] = source
         item_data['linktarget'] = source
         item_data['linktarget'] = source
         item_data['extra'] = extra
         item_data['extra'] = extra
-        item_data['flags'] = item.get(b'bsdflags')
+        item_data['flags'] = item.get('bsdflags')
         for key in self.used_call_keys:
         for key in self.used_call_keys:
             item_data[key] = self.call_keys[key](item)
             item_data[key] = self.call_keys[key](item)
         return item_data
         return item_data
@@ -1246,31 +1244,31 @@ class ItemFormatter:
         return self.format.format_map(self.get_item_data(item))
         return self.format.format_map(self.get_item_data(item))
 
 
     def calculate_num_chunks(self, item):
     def calculate_num_chunks(self, item):
-        return len(item.get(b'chunks', []))
+        return len(item.get('chunks', []))
 
 
     def calculate_unique_chunks(self, item):
     def calculate_unique_chunks(self, item):
         chunk_index = self.archive.cache.chunks
         chunk_index = self.archive.cache.chunks
-        return sum(1 for c in item.get(b'chunks', []) if chunk_index[c.id].refcount == 1)
+        return sum(1 for c in item.get('chunks', []) if chunk_index[c.id].refcount == 1)
 
 
     def calculate_size(self, item):
     def calculate_size(self, item):
-        return sum(c.size for c in item.get(b'chunks', []))
+        return sum(c.size for c in item.get('chunks', []))
 
 
     def calculate_csize(self, item):
     def calculate_csize(self, item):
-        return sum(c.csize for c in item.get(b'chunks', []))
+        return sum(c.csize for c in item.get('chunks', []))
 
 
     def hash_item(self, hash_function, item):
     def hash_item(self, hash_function, item):
-        if b'chunks' not in item:
+        if 'chunks' not in item:
             return ""
             return ""
         hash = hashlib.new(hash_function)
         hash = hashlib.new(hash_function)
-        for _, data in self.archive.pipeline.fetch_many([c.id for c in item[b'chunks']]):
+        for _, data in self.archive.pipeline.fetch_many([c.id for c in item.chunks]):
             hash.update(data)
             hash.update(data)
         return hash.hexdigest()
         return hash.hexdigest()
 
 
     def format_time(self, key, item):
     def format_time(self, key, item):
-        return format_time(safe_timestamp(item.get(key) or item[b'mtime']))
+        return format_time(safe_timestamp(item.get(key) or item.mtime))
 
 
     def time(self, key, item):
     def time(self, key, item):
-        return safe_timestamp(item.get(key) or item[b'mtime'])
+        return safe_timestamp(item.get(key) or item.mtime)
 
 
 
 
 class ChunkIteratorFileWrapper:
 class ChunkIteratorFileWrapper:
@@ -1314,7 +1312,7 @@ class ChunkIteratorFileWrapper:
 
 
 def open_item(archive, item):
 def open_item(archive, item):
     """Return file-like object for archived item (with chunks)."""
     """Return file-like object for archived item (with chunks)."""
-    chunk_iterator = archive.pipeline.fetch_many([c.id for c in item[b'chunks']])
+    chunk_iterator = archive.pipeline.fetch_many([c.id for c in item.chunks])
     return ChunkIteratorFileWrapper(chunk_iterator)
     return ChunkIteratorFileWrapper(chunk_iterator)
 
 
 
 

+ 28 - 16
src/borg/item.py

@@ -21,25 +21,34 @@ class PropDict:
 
 
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
 
 
-    def __init__(self, data_dict=None, **kw):
+    def __init__(self, data_dict=None, internal_dict=None, **kw):
         if data_dict is None:
         if data_dict is None:
             data = kw
             data = kw
         elif not isinstance(data_dict, dict):
         elif not isinstance(data_dict, dict):
             raise TypeError("data_dict must be dict")
             raise TypeError("data_dict must be dict")
         else:
         else:
             data = data_dict
             data = data_dict
-        # internally, we want an dict with only str-typed keys
-        _dict = {}
-        for k, v in data.items():
+        self._dict = {}
+        self.update_internal(internal_dict or {})
+        self.update(data)
+
+    def update(self, d):
+        for k, v in d.items():
+            if isinstance(k, bytes):
+                k = k.decode()
+            setattr(self, self._check_key(k), v)
+
+    def update_internal(self, d):
+        for k, v in d.items():
             if isinstance(k, bytes):
             if isinstance(k, bytes):
                 k = k.decode()
                 k = k.decode()
-            elif not isinstance(k, str):
-                raise TypeError("dict keys must be str or bytes, not %r" % k)
-            _dict[k] = v
-        unknown_keys = set(_dict) - self.VALID_KEYS
-        if unknown_keys:
-            raise ValueError("dict contains unknown keys %s" % ','.join(unknown_keys))
-        self._dict = _dict
+            self._dict[k] = v
+
+    def __eq__(self, other):
+        return self.as_dict() == other.as_dict()
+
+    def __repr__(self):
+        return '%s(internal_dict=%r)' % (self.__class__.__name__, self._dict)
 
 
     def as_dict(self):
     def as_dict(self):
         """return the internal dictionary"""
         """return the internal dictionary"""
@@ -110,7 +119,7 @@ class Item(PropDict):
     If an Item shall be serialized, give as_dict() method output to msgpack packer.
     If an Item shall be serialized, give as_dict() method output to msgpack packer.
     """
     """
 
 
-    VALID_KEYS = set(key.decode() for key in ITEM_KEYS)  # we want str-typed keys
+    VALID_KEYS = ITEM_KEYS | {'deleted', 'nlink', }  # str-typed keys
 
 
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
 
 
@@ -118,14 +127,14 @@ class Item(PropDict):
 
 
     path = PropDict._make_property('path', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
     path = PropDict._make_property('path', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
     source = PropDict._make_property('source', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
     source = PropDict._make_property('source', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
+    user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode)
+    group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode)
+
     acl_access = PropDict._make_property('acl_access', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
     acl_access = PropDict._make_property('acl_access', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
     acl_default = PropDict._make_property('acl_default', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
     acl_default = PropDict._make_property('acl_default', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
     acl_extended = PropDict._make_property('acl_extended', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
     acl_extended = PropDict._make_property('acl_extended', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
     acl_nfs4 = PropDict._make_property('acl_nfs4', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
     acl_nfs4 = PropDict._make_property('acl_nfs4', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
 
 
-    user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode)
-    group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode)
-
     mode = PropDict._make_property('mode', int)
     mode = PropDict._make_property('mode', int)
     uid = PropDict._make_property('uid', int)
     uid = PropDict._make_property('uid', int)
     gid = PropDict._make_property('gid', int)
     gid = PropDict._make_property('gid', int)
@@ -138,6 +147,9 @@ class Item(PropDict):
 
 
     hardlink_master = PropDict._make_property('hardlink_master', bool)
     hardlink_master = PropDict._make_property('hardlink_master', bool)
 
 
-    chunks = PropDict._make_property('chunks', list)
+    chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None')
 
 
     xattrs = PropDict._make_property('xattrs', StableDict)
     xattrs = PropDict._make_property('xattrs', StableDict)
+
+    deleted = PropDict._make_property('deleted', bool)
+    nlink = PropDict._make_property('nlink', int)

+ 12 - 14
src/borg/platform/darwin.pyx

@@ -62,9 +62,9 @@ def acl_get(path, item, st, numeric_owner=False):
         if text == NULL:
         if text == NULL:
             return
             return
         if numeric_owner:
         if numeric_owner:
-            item[b'acl_extended'] = _remove_non_numeric_identifier(text)
+            item['acl_extended'] = _remove_non_numeric_identifier(text)
         else:
         else:
-            item[b'acl_extended'] = text
+            item['acl_extended'] = text
     finally:
     finally:
         acl_free(text)
         acl_free(text)
         acl_free(acl)
         acl_free(acl)
@@ -72,18 +72,16 @@ def acl_get(path, item, st, numeric_owner=False):
 
 
 def acl_set(path, item, numeric_owner=False):
 def acl_set(path, item, numeric_owner=False):
     cdef acl_t acl = NULL
     cdef acl_t acl = NULL
-    try:
+    acl_text = item.get('acl_extended')
+    if acl_text is not None:
         try:
         try:
             if numeric_owner:
             if numeric_owner:
-                acl = acl_from_text(item[b'acl_extended'])
+                acl = acl_from_text(acl_text)
             else:
             else:
-                acl = acl_from_text(<bytes>_remove_numeric_id_if_possible(item[b'acl_extended']))
-        except KeyError:
-            return
-        if acl == NULL:
-            return
-        if acl_set_link_np(<bytes>os.fsencode(path), ACL_TYPE_EXTENDED, acl):
-            return
-    finally:
-        acl_free(acl)
-
+                acl = acl_from_text(<bytes>_remove_numeric_id_if_possible(acl_text))
+            if acl == NULL:
+                return
+            if acl_set_link_np(<bytes>os.fsencode(path), ACL_TYPE_EXTENDED, acl):
+                return
+        finally:
+            acl_free(acl)

+ 6 - 6
src/borg/platform/freebsd.pyx

@@ -57,10 +57,10 @@ def acl_get(path, item, st, numeric_owner=False):
         return
         return
     flags |= ACL_TEXT_NUMERIC_IDS if numeric_owner else 0
     flags |= ACL_TEXT_NUMERIC_IDS if numeric_owner else 0
     if ret > 0:
     if ret > 0:
-        _get_acl(p, ACL_TYPE_NFS4, item, b'acl_nfs4', flags)
+        _get_acl(p, ACL_TYPE_NFS4, item, 'acl_nfs4', flags)
     else:
     else:
-        _get_acl(p, ACL_TYPE_ACCESS, item, b'acl_access', flags)
-        _get_acl(p, ACL_TYPE_DEFAULT, item, b'acl_default', flags)
+        _get_acl(p, ACL_TYPE_ACCESS, item, 'acl_access', flags)
+        _get_acl(p, ACL_TYPE_DEFAULT, item, 'acl_default', flags)
 
 
 
 
 cdef _set_acl(p, type, item, attribute, numeric_owner=False):
 cdef _set_acl(p, type, item, attribute, numeric_owner=False):
@@ -98,6 +98,6 @@ def acl_set(path, item, numeric_owner=False):
     of the user/group names
     of the user/group names
     """
     """
     p = os.fsencode(path)
     p = os.fsencode(path)
-    _set_acl(p, ACL_TYPE_NFS4, item, b'acl_nfs4', numeric_owner)
-    _set_acl(p, ACL_TYPE_ACCESS, item, b'acl_access', numeric_owner)
-    _set_acl(p, ACL_TYPE_DEFAULT, item, b'acl_default', numeric_owner)
+    _set_acl(p, ACL_TYPE_NFS4, item, 'acl_nfs4', numeric_owner)
+    _set_acl(p, ACL_TYPE_ACCESS, item, 'acl_access', numeric_owner)
+    _set_acl(p, ACL_TYPE_DEFAULT, item, 'acl_default', numeric_owner)

+ 4 - 4
src/borg/platform/linux.pyx

@@ -171,12 +171,12 @@ def acl_get(path, item, st, numeric_owner=False):
         if access_acl:
         if access_acl:
             access_text = acl_to_text(access_acl, NULL)
             access_text = acl_to_text(access_acl, NULL)
             if access_text:
             if access_text:
-                item[b'acl_access'] = converter(access_text)
+                item['acl_access'] = converter(access_text)
         default_acl = acl_get_file(p, ACL_TYPE_DEFAULT)
         default_acl = acl_get_file(p, ACL_TYPE_DEFAULT)
         if default_acl:
         if default_acl:
             default_text = acl_to_text(default_acl, NULL)
             default_text = acl_to_text(default_acl, NULL)
             if default_text:
             if default_text:
-                item[b'acl_default'] = converter(default_text)
+                item['acl_default'] = converter(default_text)
     finally:
     finally:
         acl_free(default_text)
         acl_free(default_text)
         acl_free(default_acl)
         acl_free(default_acl)
@@ -193,8 +193,8 @@ def acl_set(path, item, numeric_owner=False):
         converter = posix_acl_use_stored_uid_gid
         converter = posix_acl_use_stored_uid_gid
     else:
     else:
         converter = acl_use_local_uid_gid
         converter = acl_use_local_uid_gid
-    access_text = item.get(b'acl_access')
-    default_text = item.get(b'acl_default')
+    access_text = item.get('acl_access')
+    default_text = item.get('acl_default')
     if access_text:
     if access_text:
         try:
         try:
             access_acl = acl_from_text(<bytes>converter(access_text))
             access_acl = acl_from_text(<bytes>converter(access_text))

+ 8 - 7
src/borg/testsuite/archive.py

@@ -7,6 +7,7 @@ import pytest
 import msgpack
 import msgpack
 
 
 from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, Statistics
 from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, Statistics
+from ..item import Item
 from ..key import PlaintextKey
 from ..key import PlaintextKey
 from ..helpers import Manifest
 from ..helpers import Manifest
 from . import BaseTestCase
 from . import BaseTestCase
@@ -38,12 +39,12 @@ def tests_stats_progress(stats, columns=80):
 
 
     out = StringIO()
     out = StringIO()
     stats.update(10**3, 0, unique=False)
     stats.update(10**3, 0, unique=False)
-    stats.show_progress(item={b'path': 'foo'}, final=False, stream=out)
+    stats.show_progress(item=Item(path='foo'), final=False, stream=out)
     s = '1.02 kB O 10 B C 10 B D 0 N foo'
     s = '1.02 kB O 10 B C 10 B D 0 N foo'
     buf = ' ' * (columns - len(s))
     buf = ' ' * (columns - len(s))
     assert out.getvalue() == s + buf + "\r"
     assert out.getvalue() == s + buf + "\r"
     out = StringIO()
     out = StringIO()
-    stats.show_progress(item={b'path': 'foo'*40}, final=False, stream=out)
+    stats.show_progress(item=Item(path='foo'*40), final=False, stream=out)
     s = '1.02 kB O 10 B C 10 B D 0 N foofoofoofoofoofoofoofo...oofoofoofoofoofoofoofoofoo'
     s = '1.02 kB O 10 B C 10 B D 0 N foofoofoofoofoofoofoofo...oofoofoofoofoofoofoofoofoo'
     buf = ' ' * (columns - len(s))
     buf = ' ' * (columns - len(s))
     assert out.getvalue() == s + buf + "\r"
     assert out.getvalue() == s + buf + "\r"
@@ -93,7 +94,7 @@ class ArchiveTimestampTestCase(BaseTestCase):
 class ChunkBufferTestCase(BaseTestCase):
 class ChunkBufferTestCase(BaseTestCase):
 
 
     def test(self):
     def test(self):
-        data = [{b'foo': 1}, {b'bar': 2}]
+        data = [Item(path='p1'), Item(path='p2')]
         cache = MockCache()
         cache = MockCache()
         key = PlaintextKey(None)
         key = PlaintextKey(None)
         chunks = CacheChunkBuffer(cache, key, None)
         chunks = CacheChunkBuffer(cache, key, None)
@@ -105,11 +106,11 @@ class ChunkBufferTestCase(BaseTestCase):
         unpacker = msgpack.Unpacker()
         unpacker = msgpack.Unpacker()
         for id in chunks.chunks:
         for id in chunks.chunks:
             unpacker.feed(cache.objects[id])
             unpacker.feed(cache.objects[id])
-        self.assert_equal(data, list(unpacker))
+        self.assert_equal(data, [Item(internal_dict=d) for d in unpacker])
 
 
     def test_partial(self):
     def test_partial(self):
-        big = b"0123456789" * 10000
-        data = [{b'full': 1, b'data': big}, {b'partial': 2, b'data': big}]
+        big = "0123456789" * 10000
+        data = [Item(path='full', source=big), Item(path='partial', source=big)]
         cache = MockCache()
         cache = MockCache()
         key = PlaintextKey(None)
         key = PlaintextKey(None)
         chunks = CacheChunkBuffer(cache, key, None)
         chunks = CacheChunkBuffer(cache, key, None)
@@ -126,7 +127,7 @@ class ChunkBufferTestCase(BaseTestCase):
         unpacker = msgpack.Unpacker()
         unpacker = msgpack.Unpacker()
         for id in chunks.chunks:
         for id in chunks.chunks:
             unpacker.feed(cache.objects[id])
             unpacker.feed(cache.objects[id])
-        self.assert_equal(data, list(unpacker))
+        self.assert_equal(data, [Item(internal_dict=d) for d in unpacker])
 
 
 
 
 class RobustUnpackerTestCase(BaseTestCase):
 class RobustUnpackerTestCase(BaseTestCase):

+ 4 - 4
src/borg/testsuite/archiver.py

@@ -1641,8 +1641,8 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
         archive, repository = self.open_archive('archive1')
         archive, repository = self.open_archive('archive1')
         with repository:
         with repository:
             for item in archive.iter_items():
             for item in archive.iter_items():
-                if item[b'path'].endswith('testsuite/archiver.py'):
-                    repository.delete(item[b'chunks'][-1].id)
+                if item.path.endswith('testsuite/archiver.py'):
+                    repository.delete(item.chunks[-1].id)
                     break
                     break
             repository.commit()
             repository.commit()
         self.cmd('check', self.repository_location, exit_code=1)
         self.cmd('check', self.repository_location, exit_code=1)
@@ -1696,8 +1696,8 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
         archive, repository = self.open_archive('archive1')
         archive, repository = self.open_archive('archive1')
         with repository:
         with repository:
             for item in archive.iter_items():
             for item in archive.iter_items():
-                if item[b'path'].endswith('testsuite/archiver.py'):
-                    chunk = item[b'chunks'][-1]
+                if item.path.endswith('testsuite/archiver.py'):
+                    chunk = item.chunks[-1]
                     data = repository.get(chunk.id) + b'1234'
                     data = repository.get(chunk.id) + b'1234'
                     repository.put(chunk.id, data)
                     repository.put(chunk.id, data)
                     break
                     break

+ 4 - 4
src/borg/testsuite/item.py

@@ -35,13 +35,13 @@ def test_item_empty():
 
 
 def test_item_from_dict():
 def test_item_from_dict():
     # does not matter whether we get str or bytes keys
     # does not matter whether we get str or bytes keys
-    item = Item({b'path': b'/a/b/c', b'mode': 0o666})
+    item = Item({b'path': '/a/b/c', b'mode': 0o666})
     assert item.path == '/a/b/c'
     assert item.path == '/a/b/c'
     assert item.mode == 0o666
     assert item.mode == 0o666
     assert 'path' in item
     assert 'path' in item
 
 
     # does not matter whether we get str or bytes keys
     # does not matter whether we get str or bytes keys
-    item = Item({'path': b'/a/b/c', 'mode': 0o666})
+    item = Item({'path': '/a/b/c', 'mode': 0o666})
     assert item.path == '/a/b/c'
     assert item.path == '/a/b/c'
     assert item.mode == 0o666
     assert item.mode == 0o666
     assert 'mode' in item
     assert 'mode' in item
@@ -60,7 +60,7 @@ def test_item_from_dict():
 
 
 
 
 def test_item_from_kw():
 def test_item_from_kw():
-    item = Item(path=b'/a/b/c', mode=0o666)
+    item = Item(path='/a/b/c', mode=0o666)
     assert item.path == '/a/b/c'
     assert item.path == '/a/b/c'
     assert item.mode == 0o666
     assert item.mode == 0o666
 
 
@@ -107,7 +107,7 @@ def test_item_se_str_property():
         item.path = 42
         item.path = 42
 
 
     # non-utf-8 path, needing surrogate-escaping for latin-1 u-umlaut
     # non-utf-8 path, needing surrogate-escaping for latin-1 u-umlaut
-    item = Item({'path': b'/a/\xfc/c'})
+    item = Item(internal_dict={'path': b'/a/\xfc/c'})
     assert item.path == '/a/\udcfc/c'  # getting a surrogate-escaped representation
     assert item.path == '/a/\udcfc/c'  # getting a surrogate-escaped representation
     assert item.as_dict() == {'path': b'/a/\xfc/c'}
     assert item.as_dict() == {'path': b'/a/\xfc/c'}
     del item.path
     del item.path

+ 17 - 17
src/borg/testsuite/platform.py

@@ -51,26 +51,26 @@ class PlatformLinuxTestCase(BaseTestCase):
         return item
         return item
 
 
     def set_acl(self, path, access=None, default=None, numeric_owner=False):
     def set_acl(self, path, access=None, default=None, numeric_owner=False):
-        item = {b'acl_access': access, b'acl_default': default}
+        item = {'acl_access': access, 'acl_default': default}
         acl_set(path, item, numeric_owner=numeric_owner)
         acl_set(path, item, numeric_owner=numeric_owner)
 
 
     def test_access_acl(self):
     def test_access_acl(self):
         file = tempfile.NamedTemporaryFile()
         file = tempfile.NamedTemporaryFile()
         self.assert_equal(self.get_acl(file.name), {})
         self.assert_equal(self.get_acl(file.name), {})
         self.set_acl(file.name, access=b'user::rw-\ngroup::r--\nmask::rw-\nother::---\nuser:root:rw-:9999\ngroup:root:rw-:9999\n', numeric_owner=False)
         self.set_acl(file.name, access=b'user::rw-\ngroup::r--\nmask::rw-\nother::---\nuser:root:rw-:9999\ngroup:root:rw-:9999\n', numeric_owner=False)
-        self.assert_in(b'user:root:rw-:0', self.get_acl(file.name)[b'acl_access'])
-        self.assert_in(b'group:root:rw-:0', self.get_acl(file.name)[b'acl_access'])
-        self.assert_in(b'user:0:rw-:0', self.get_acl(file.name, numeric_owner=True)[b'acl_access'])
+        self.assert_in(b'user:root:rw-:0', self.get_acl(file.name)['acl_access'])
+        self.assert_in(b'group:root:rw-:0', self.get_acl(file.name)['acl_access'])
+        self.assert_in(b'user:0:rw-:0', self.get_acl(file.name, numeric_owner=True)['acl_access'])
         file2 = tempfile.NamedTemporaryFile()
         file2 = tempfile.NamedTemporaryFile()
         self.set_acl(file2.name, access=b'user::rw-\ngroup::r--\nmask::rw-\nother::---\nuser:root:rw-:9999\ngroup:root:rw-:9999\n', numeric_owner=True)
         self.set_acl(file2.name, access=b'user::rw-\ngroup::r--\nmask::rw-\nother::---\nuser:root:rw-:9999\ngroup:root:rw-:9999\n', numeric_owner=True)
-        self.assert_in(b'user:9999:rw-:9999', self.get_acl(file2.name)[b'acl_access'])
-        self.assert_in(b'group:9999:rw-:9999', self.get_acl(file2.name)[b'acl_access'])
+        self.assert_in(b'user:9999:rw-:9999', self.get_acl(file2.name)['acl_access'])
+        self.assert_in(b'group:9999:rw-:9999', self.get_acl(file2.name)['acl_access'])
 
 
     def test_default_acl(self):
     def test_default_acl(self):
         self.assert_equal(self.get_acl(self.tmpdir), {})
         self.assert_equal(self.get_acl(self.tmpdir), {})
         self.set_acl(self.tmpdir, access=ACCESS_ACL, default=DEFAULT_ACL)
         self.set_acl(self.tmpdir, access=ACCESS_ACL, default=DEFAULT_ACL)
-        self.assert_equal(self.get_acl(self.tmpdir)[b'acl_access'], ACCESS_ACL)
-        self.assert_equal(self.get_acl(self.tmpdir)[b'acl_default'], DEFAULT_ACL)
+        self.assert_equal(self.get_acl(self.tmpdir)['acl_access'], ACCESS_ACL)
+        self.assert_equal(self.get_acl(self.tmpdir)['acl_default'], DEFAULT_ACL)
 
 
     def test_non_ascii_acl(self):
     def test_non_ascii_acl(self):
         # Testing non-ascii ACL processing to see whether our code is robust.
         # Testing non-ascii ACL processing to see whether our code is robust.
@@ -86,18 +86,18 @@ class PlatformLinuxTestCase(BaseTestCase):
         group_entry_numeric = 'group:666:rw-:666'.encode('ascii')
         group_entry_numeric = 'group:666:rw-:666'.encode('ascii')
         acl = b'\n'.join([nothing_special, user_entry, group_entry])
         acl = b'\n'.join([nothing_special, user_entry, group_entry])
         self.set_acl(file.name, access=acl, numeric_owner=False)
         self.set_acl(file.name, access=acl, numeric_owner=False)
-        acl_access = self.get_acl(file.name, numeric_owner=False)[b'acl_access']
+        acl_access = self.get_acl(file.name, numeric_owner=False)['acl_access']
         self.assert_in(user_entry, acl_access)
         self.assert_in(user_entry, acl_access)
         self.assert_in(group_entry, acl_access)
         self.assert_in(group_entry, acl_access)
-        acl_access_numeric = self.get_acl(file.name, numeric_owner=True)[b'acl_access']
+        acl_access_numeric = self.get_acl(file.name, numeric_owner=True)['acl_access']
         self.assert_in(user_entry_numeric, acl_access_numeric)
         self.assert_in(user_entry_numeric, acl_access_numeric)
         self.assert_in(group_entry_numeric, acl_access_numeric)
         self.assert_in(group_entry_numeric, acl_access_numeric)
         file2 = tempfile.NamedTemporaryFile()
         file2 = tempfile.NamedTemporaryFile()
         self.set_acl(file2.name, access=acl, numeric_owner=True)
         self.set_acl(file2.name, access=acl, numeric_owner=True)
-        acl_access = self.get_acl(file2.name, numeric_owner=False)[b'acl_access']
+        acl_access = self.get_acl(file2.name, numeric_owner=False)['acl_access']
         self.assert_in(user_entry, acl_access)
         self.assert_in(user_entry, acl_access)
         self.assert_in(group_entry, acl_access)
         self.assert_in(group_entry, acl_access)
-        acl_access_numeric = self.get_acl(file.name, numeric_owner=True)[b'acl_access']
+        acl_access_numeric = self.get_acl(file.name, numeric_owner=True)['acl_access']
         self.assert_in(user_entry_numeric, acl_access_numeric)
         self.assert_in(user_entry_numeric, acl_access_numeric)
         self.assert_in(group_entry_numeric, acl_access_numeric)
         self.assert_in(group_entry_numeric, acl_access_numeric)
 
 
@@ -125,7 +125,7 @@ class PlatformDarwinTestCase(BaseTestCase):
         return item
         return item
 
 
     def set_acl(self, path, acl, numeric_owner=False):
     def set_acl(self, path, acl, numeric_owner=False):
-        item = {b'acl_extended': acl}
+        item = {'acl_extended': acl}
         acl_set(path, item, numeric_owner=numeric_owner)
         acl_set(path, item, numeric_owner=numeric_owner)
 
 
     def test_access_acl(self):
     def test_access_acl(self):
@@ -133,11 +133,11 @@ class PlatformDarwinTestCase(BaseTestCase):
         file2 = tempfile.NamedTemporaryFile()
         file2 = tempfile.NamedTemporaryFile()
         self.assert_equal(self.get_acl(file.name), {})
         self.assert_equal(self.get_acl(file.name), {})
         self.set_acl(file.name, b'!#acl 1\ngroup:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:staff:0:allow:read\nuser:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read\n', numeric_owner=False)
         self.set_acl(file.name, b'!#acl 1\ngroup:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:staff:0:allow:read\nuser:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read\n', numeric_owner=False)
-        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000014:staff:20:allow:read', self.get_acl(file.name)[b'acl_extended'])
-        self.assert_in(b'user:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read', self.get_acl(file.name)[b'acl_extended'])
+        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000014:staff:20:allow:read', self.get_acl(file.name)['acl_extended'])
+        self.assert_in(b'user:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read', self.get_acl(file.name)['acl_extended'])
         self.set_acl(file2.name, b'!#acl 1\ngroup:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:staff:0:allow:read\nuser:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read\n', numeric_owner=True)
         self.set_acl(file2.name, b'!#acl 1\ngroup:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:staff:0:allow:read\nuser:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read\n', numeric_owner=True)
-        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:wheel:0:allow:read', self.get_acl(file2.name)[b'acl_extended'])
-        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000::0:allow:read', self.get_acl(file2.name, numeric_owner=True)[b'acl_extended'])
+        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:wheel:0:allow:read', self.get_acl(file2.name)['acl_extended'])
+        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000::0:allow:read', self.get_acl(file2.name, numeric_owner=True)['acl_extended'])
 
 
 
 
 @unittest.skipUnless(sys.platform.startswith(('linux', 'freebsd', 'darwin')), 'POSIX only tests')
 @unittest.skipUnless(sys.platform.startswith(('linux', 'freebsd', 'darwin')), 'POSIX only tests')