Pārlūkot izejas kodu

Merge pull request #1117 from ThomasWaldmann/items-refactor

refactor to use Item class
TW 9 gadi atpakaļ
vecāks
revīzija
504af0206d

+ 115 - 118
src/borg/archive.py

@@ -33,6 +33,7 @@ from .helpers import ProgressIndicatorPercent, log_multi
 from .helpers import PathPrefixPattern, FnmatchPattern
 from .helpers import consume
 from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec
+from .item import Item
 from .key import key_factory
 from .platform import acl_get, acl_set, set_flags, get_flags, swidth
 from .remote import cache_if_remote
@@ -86,7 +87,7 @@ class Statistics:
             columns, lines = get_terminal_size()
             if not final:
                 msg = '{0.osize_fmt} O {0.csize_fmt} C {0.usize_fmt} D {0.nfiles} N '.format(self)
-                path = remove_surrogates(item[b'path']) if item else ''
+                path = remove_surrogates(item.path) if item else ''
                 space = columns - swidth(msg)
                 if space < swidth('...') + swidth(path):
                     path = '%s...%s' % (path[:(space // 2) - swidth('...')], path[-space // 2:])
@@ -106,16 +107,16 @@ class DownloadPipeline:
         unpacker = msgpack.Unpacker(use_list=False)
         for _, data in self.fetch_many(ids):
             unpacker.feed(data)
-            items = [decode_dict(item, ITEM_TEXT_KEYS) for item in unpacker]
+            items = [Item(internal_dict=item) for item in unpacker]
             if filter:
                 items = [item for item in items if filter(item)]
             for item in items:
-                if b'chunks' in item:
-                    item[b'chunks'] = [ChunkListEntry(*e) for e in item[b'chunks']]
+                if 'chunks' in item:
+                    item.chunks = [ChunkListEntry(*e) for e in item.chunks]
             if preload:
                 for item in items:
-                    if b'chunks' in item:
-                        self.repository.preload([c.id for c in item[b'chunks']])
+                    if 'chunks' in item:
+                        self.repository.preload([c.id for c in item.chunks])
             for item in items:
                 yield item
 
@@ -135,7 +136,7 @@ class ChunkBuffer:
         self.chunker = Chunker(self.key.chunk_seed, *chunker_params)
 
     def add(self, item):
-        self.buffer.write(self.packer.pack(StableDict(item)))
+        self.buffer.write(self.packer.pack(item.as_dict()))
         if self.is_full():
             self.flush()
 
@@ -286,9 +287,6 @@ Number of files: {0.stats.nfiles}'''.format(
             yield item
 
     def add_item(self, item):
-        unknown_keys = set(item) - ITEM_KEYS
-        assert not unknown_keys, ('unknown item metadata keys detected, please update constants.ITEM_KEYS: %s',
-                                  ','.join(k.decode('ascii') for k in unknown_keys))
         if self.show_progress:
             self.stats.show_progress(item=item, dt=0.2)
         self.items_buffer.add(item)
@@ -356,9 +354,10 @@ Number of files: {0.stats.nfiles}'''.format(
             _, data = self.key.decrypt(id, chunk)
             unpacker.feed(data)
             for item in unpacker:
-                if b'chunks' in item:
+                item = Item(internal_dict=item)
+                if 'chunks' in item:
                     stats.nfiles += 1
-                    add_file_chunks(item[b'chunks'])
+                    add_file_chunks(item.chunks)
         cache.rollback()
         return stats
 
@@ -373,22 +372,22 @@ Number of files: {0.stats.nfiles}'''.format(
         :param stdout: write extracted data to stdout
         :param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
         :param hardlink_masters: maps paths to (chunks, link_target) for extracting subtrees with hardlinks correctly
-        :param original_path: b'path' key as stored in archive
+        :param original_path: 'path' key as stored in archive
         """
         if dry_run or stdout:
-            if b'chunks' in item:
-                for _, data in self.pipeline.fetch_many([c.id for c in item[b'chunks']], is_preloaded=True):
+            if 'chunks' in item:
+                for _, data in self.pipeline.fetch_many([c.id for c in item.chunks], is_preloaded=True):
                     if stdout:
                         sys.stdout.buffer.write(data)
                 if stdout:
                     sys.stdout.buffer.flush()
             return
 
-        original_path = original_path or item[b'path']
+        original_path = original_path or item.path
         dest = self.cwd
-        if item[b'path'].startswith('/') or item[b'path'].startswith('..'):
+        if item.path.startswith(('/', '..')):
             raise Exception('Path should be relative and local')
-        path = os.path.join(dest, item[b'path'])
+        path = os.path.join(dest, item.path)
         # Attempt to remove existing files, ignore errors on failure
         try:
             st = os.lstat(path)
@@ -400,27 +399,27 @@ Number of files: {0.stats.nfiles}'''.format(
             raise self.IncompatibleFilesystemEncodingError(path, sys.getfilesystemencoding()) from None
         except OSError:
             pass
-        mode = item[b'mode']
+        mode = item.mode
         if stat.S_ISREG(mode):
             if not os.path.exists(os.path.dirname(path)):
                 os.makedirs(os.path.dirname(path))
 
             # Hard link?
-            if b'source' in item:
-                source = os.path.join(dest, item[b'source'])
+            if 'source' in item:
+                source = os.path.join(dest, item.source)
                 if os.path.exists(path):
                     os.unlink(path)
                 if not hardlink_masters:
                     os.link(source, path)
                     return
-                item[b'chunks'], link_target = hardlink_masters[item[b'source']]
+                item.chunks, link_target = hardlink_masters[item.source]
                 if link_target:
                     # Hard link was extracted previously, just link
                     os.link(link_target, path)
                     return
                 # Extract chunks, since the item which had the chunks was not extracted
             with open(path, 'wb') as fd:
-                ids = [c.id for c in item[b'chunks']]
+                ids = [c.id for c in item.chunks]
                 for _, data in self.pipeline.fetch_many(ids, is_preloaded=True):
                     if sparse and self.zeros.startswith(data):
                         # all-zero chunk: create a hole in a sparse file
@@ -433,7 +432,7 @@ Number of files: {0.stats.nfiles}'''.format(
                 self.restore_attrs(path, item, fd=fd.fileno())
             if hardlink_masters:
                 # Update master entry with extracted file path, so that following hardlinks don't extract twice.
-                hardlink_masters[item.get(b'source') or original_path] = (None, path)
+                hardlink_masters[item.get('source') or original_path] = (None, path)
         elif stat.S_ISDIR(mode):
             if not os.path.exists(path):
                 os.makedirs(path)
@@ -442,7 +441,7 @@ Number of files: {0.stats.nfiles}'''.format(
         elif stat.S_ISLNK(mode):
             if not os.path.exists(os.path.dirname(path)):
                 os.makedirs(os.path.dirname(path))
-            source = item[b'source']
+            source = item.source
             if os.path.exists(path):
                 os.unlink(path)
             try:
@@ -456,18 +455,18 @@ Number of files: {0.stats.nfiles}'''.format(
             os.mkfifo(path)
             self.restore_attrs(path, item)
         elif stat.S_ISCHR(mode) or stat.S_ISBLK(mode):
-            os.mknod(path, item[b'mode'], item[b'rdev'])
+            os.mknod(path, item.mode, item.rdev)
             self.restore_attrs(path, item)
         else:
-            raise Exception('Unknown archive item type %r' % item[b'mode'])
+            raise Exception('Unknown archive item type %r' % item.mode)
 
     def restore_attrs(self, path, item, symlink=False, fd=None):
         uid = gid = None
         if not self.numeric_owner:
-            uid = user2uid(item[b'user'])
-            gid = group2gid(item[b'group'])
-        uid = item[b'uid'] if uid is None else uid
-        gid = item[b'gid'] if gid is None else gid
+            uid = user2uid(item.user)
+            gid = group2gid(item.group)
+        uid = item.uid if uid is None else uid
+        gid = item.gid if gid is None else gid
         # This code is a bit of a mess due to os specific differences
         try:
             if fd:
@@ -477,14 +476,14 @@ Number of files: {0.stats.nfiles}'''.format(
         except OSError:
             pass
         if fd:
-            os.fchmod(fd, item[b'mode'])
+            os.fchmod(fd, item.mode)
         elif not symlink:
-            os.chmod(path, item[b'mode'])
+            os.chmod(path, item.mode)
         elif has_lchmod:  # Not available on Linux
-            os.lchmod(path, item[b'mode'])
-        mtime = bigint_to_int(item[b'mtime'])
-        if b'atime' in item:
-            atime = bigint_to_int(item[b'atime'])
+            os.lchmod(path, item.mode)
+        mtime = item.mtime
+        if 'atime' in item:
+            atime = item.atime
         else:
             # old archives only had mtime in item metadata
             atime = mtime
@@ -493,14 +492,14 @@ Number of files: {0.stats.nfiles}'''.format(
         else:
             os.utime(path, None, ns=(atime, mtime), follow_symlinks=False)
         acl_set(path, item, self.numeric_owner)
-        if b'bsdflags' in item:
+        if 'bsdflags' in item:
             try:
-                set_flags(path, item[b'bsdflags'], fd=fd)
+                set_flags(path, item.bsdflags, fd=fd)
             except OSError:
                 pass
         # chown removes Linux capabilities, so set the extended attributes at the end, after chown, since they include
         # the Linux capabilities in the "security.capability" attribute.
-        xattrs = item.get(b'xattrs', {})
+        xattrs = item.get('xattrs', {})
         for k, v in xattrs.items():
             try:
                 xattr.setxattr(fd or path, k, v, follow_symlinks=False)
@@ -541,8 +540,9 @@ Number of files: {0.stats.nfiles}'''.format(
             unpacker.feed(data)
             self.cache.chunk_decref(items_id, stats)
             for item in unpacker:
-                if b'chunks' in item:
-                    for chunk_id, size, csize in item[b'chunks']:
+                item = Item(internal_dict=item)
+                if 'chunks' in item:
+                    for chunk_id, size, csize in item.chunks:
                         self.cache.chunk_decref(chunk_id, stats)
         if progress:
             pi.finish()
@@ -550,39 +550,39 @@ Number of files: {0.stats.nfiles}'''.format(
         del self.manifest.archives[self.name]
 
     def stat_attrs(self, st, path):
-        item = {
-            b'mode': st.st_mode,
-            b'uid': st.st_uid, b'user': uid2user(st.st_uid),
-            b'gid': st.st_gid, b'group': gid2group(st.st_gid),
-            b'atime': int_to_bigint(st.st_atime_ns),
-            b'ctime': int_to_bigint(st.st_ctime_ns),
-            b'mtime': int_to_bigint(st.st_mtime_ns),
-        }
+        attrs = dict(
+            mode=st.st_mode,
+            uid=st.st_uid, user=uid2user(st.st_uid),
+            gid=st.st_gid, group=gid2group(st.st_gid),
+            atime=st.st_atime_ns,
+            ctime=st.st_ctime_ns,
+            mtime=st.st_mtime_ns,
+        )
         if self.numeric_owner:
-            item[b'user'] = item[b'group'] = None
+            attrs['user'] = attrs['group'] = None
         xattrs = xattr.get_all(path, follow_symlinks=False)
         if xattrs:
-            item[b'xattrs'] = StableDict(xattrs)
+            attrs['xattrs'] = StableDict(xattrs)
         bsdflags = get_flags(path, st)
         if bsdflags:
-            item[b'bsdflags'] = bsdflags
-        acl_get(path, item, st, self.numeric_owner)
-        return item
+            attrs['bsdflags'] = bsdflags
+        acl_get(path, attrs, st, self.numeric_owner)
+        return attrs
 
     def process_dir(self, path, st):
-        item = {b'path': make_path_safe(path)}
+        item = Item(path=make_path_safe(path))
         item.update(self.stat_attrs(st, path))
         self.add_item(item)
         return 'd'  # directory
 
     def process_fifo(self, path, st):
-        item = {b'path': make_path_safe(path)}
+        item = Item(path=make_path_safe(path))
         item.update(self.stat_attrs(st, path))
         self.add_item(item)
         return 'f'  # fifo
 
     def process_dev(self, path, st):
-        item = {b'path': make_path_safe(path), b'rdev': st.st_rdev}
+        item = Item(path=make_path_safe(path), rdev=st.st_rdev)
         item.update(self.stat_attrs(st, path))
         self.add_item(item)
         if stat.S_ISCHR(st.st_mode):
@@ -592,7 +592,7 @@ Number of files: {0.stats.nfiles}'''.format(
 
     def process_symlink(self, path, st):
         source = os.readlink(path)
-        item = {b'path': make_path_safe(path), b'source': source}
+        item = Item(path=make_path_safe(path), source=source)
         item.update(self.stat_attrs(st, path))
         self.add_item(item)
         return 's'  # symlink
@@ -604,15 +604,15 @@ Number of files: {0.stats.nfiles}'''.format(
         for data in self.chunker.chunkify(fd):
             chunks.append(cache.add_chunk(self.key.id_hash(data), Chunk(data), self.stats))
         self.stats.nfiles += 1
-        t = int_to_bigint(int(time.time()) * 1000000000)
-        item = {
-            b'path': path,
-            b'chunks': chunks,
-            b'mode': 0o100660,  # regular file, ug=rw
-            b'uid': uid, b'user': uid2user(uid),
-            b'gid': gid, b'group': gid2group(gid),
-            b'mtime': t, b'atime': t, b'ctime': t,
-        }
+        t = int(time.time()) * 1000000000
+        item = Item(
+            path=path,
+            chunks=chunks,
+            mode=0o100660,  # regular file, ug=rw
+            uid=uid, user=uid2user(uid),
+            gid=gid, group=gid2group(gid),
+            mtime=t, atime=t, ctime=t,
+        )
         self.add_item(item)
         return 'i'  # stdin
 
@@ -623,11 +623,8 @@ Number of files: {0.stats.nfiles}'''.format(
         if st.st_nlink > 1:
             source = self.hard_links.get((st.st_ino, st.st_dev))
             if (st.st_ino, st.st_dev) in self.hard_links:
-                item = self.stat_attrs(st, path)
-                item.update({
-                    b'path': safe_path,
-                    b'source': source,
-                })
+                item = Item(path=safe_path, source=source)
+                item.update(self.stat_attrs(st, path))
                 self.add_item(item)
                 status = 'h'  # regular file, hardlink (to already seen inodes)
                 return status
@@ -649,10 +646,10 @@ Number of files: {0.stats.nfiles}'''.format(
                 status = 'U'  # regular file, unchanged
         else:
             status = 'A'  # regular file, added
-        item = {
-            b'path': safe_path,
-            b'hardlink_master': st.st_nlink > 1,  # item is a hard link and has the chunks
-        }
+        item = Item(
+            path=safe_path,
+            hardlink_master=st.st_nlink > 1,  # item is a hard link and has the chunks
+        )
         # Only chunkify the file if needed
         if chunks is None:
             compress = self.compression_decider1.decide(path)
@@ -668,7 +665,7 @@ Number of files: {0.stats.nfiles}'''.format(
                         self.stats.show_progress(item=item, dt=0.2)
             cache.memorize_file(path_hash, st, [c.id for c in chunks])
             status = status or 'M'  # regular file, modified (if not 'A' already)
-        item[b'chunks'] = chunks
+        item.chunks = chunks
         item.update(self.stat_attrs(st, path))
         self.stats.nfiles += 1
         self.add_item(item)
@@ -698,7 +695,7 @@ class RobustUnpacker:
     """
     def __init__(self, validator):
         super().__init__()
-        self.item_keys = [msgpack.packb(name) for name in ITEM_KEYS]
+        self.item_keys = [msgpack.packb(name.encode()) for name in ITEM_KEYS]
         self.validator = validator
         self._buffered_data = []
         self._resync = False
@@ -894,10 +891,10 @@ class ArchiveChecker:
             """
             offset = 0
             chunk_list = []
-            for chunk_id, size, csize in item[b'chunks']:
+            for chunk_id, size, csize in item.chunks:
                 if chunk_id not in self.chunks:
                     # If a file chunk is missing, create an all empty replacement chunk
-                    logger.error('{}: Missing file chunk detected (Byte {}-{})'.format(safe_decode(item[b'path']), offset, offset + size))
+                    logger.error('{}: Missing file chunk detected (Byte {}-{})'.format(item.path, offset, offset + size))
                     self.error_found = True
                     data = bytes(size)
                     chunk_id = self.key.id_hash(data)
@@ -908,14 +905,14 @@ class ArchiveChecker:
                     add_reference(chunk_id, size, csize)
                 chunk_list.append((chunk_id, size, csize))
                 offset += size
-            item[b'chunks'] = chunk_list
+            item.chunks = chunk_list
 
         def robust_iterator(archive):
             """Iterates through all archive items
 
             Missing item chunks will be skipped and the msgpack stream will be restarted
             """
-            unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and b'path' in item)
+            unpacker = RobustUnpacker(lambda item: isinstance(item, dict) and 'path' in item)
             _state = 0
 
             def missing_chunk_detector(chunk_id):
@@ -946,7 +943,7 @@ class ArchiveChecker:
                     try:
                         for item in unpacker:
                             if isinstance(item, dict):
-                                yield item
+                                yield Item(internal_dict=item)
                             else:
                                 report('Did not get expected metadata dict when unpacking item metadata', chunk_id, i)
                     except Exception:
@@ -990,7 +987,7 @@ class ArchiveChecker:
                 items_buffer = ChunkBuffer(self.key)
                 items_buffer.write_chunk = add_callback
                 for item in robust_iterator(archive):
-                    if b'chunks' in item:
+                    if 'chunks' in item:
                         verify_file_chunks(item)
                     items_buffer.add(item)
                 items_buffer.flush(flush=True)
@@ -1093,38 +1090,38 @@ class ArchiveRecreater:
 
         def item_is_hardlink_master(item):
             return (target_is_subset and
-                    stat.S_ISREG(item[b'mode']) and
-                    item.get(b'hardlink_master', True) and
-                    b'source' not in item and
-                    not matcher.match(item[b'path']))
+                    stat.S_ISREG(item.mode) and
+                    item.get('hardlink_master', True) and
+                    'source' not in item and
+                    not matcher.match(item.path))
 
         for item in archive.iter_items():
             if item_is_hardlink_master(item):
                 # Re-visit all of these items in the archive even when fast-forwarding to rebuild hardlink_masters
-                hardlink_masters[item[b'path']] = (item.get(b'chunks'), None)
+                hardlink_masters[item.path] = (item.get('chunks'), None)
                 continue
             if resume_from:
                 # Fast forward to after the last processed file
-                if item[b'path'] == resume_from:
-                    logger.info('Fast-forwarded to %s', remove_surrogates(item[b'path']))
+                if item.path == resume_from:
+                    logger.info('Fast-forwarded to %s', remove_surrogates(item.path))
                     resume_from = None
                 continue
-            if not matcher.match(item[b'path']):
-                self.print_file_status('x', item[b'path'])
+            if not matcher.match(item.path):
+                self.print_file_status('x', item.path)
                 continue
-            if target_is_subset and stat.S_ISREG(item[b'mode']) and item.get(b'source') in hardlink_masters:
+            if target_is_subset and stat.S_ISREG(item.mode) and item.get('source') in hardlink_masters:
                 # master of this hard link is outside the target subset
-                chunks, new_source = hardlink_masters[item[b'source']]
+                chunks, new_source = hardlink_masters[item.source]
                 if new_source is None:
                     # First item to use this master, move the chunks
-                    item[b'chunks'] = chunks
-                    hardlink_masters[item[b'source']] = (None, item[b'path'])
-                    del item[b'source']
+                    item.chunks = chunks
+                    hardlink_masters[item.source] = (None, item.path)
+                    del item.source
                 else:
                     # Master was already moved, only update this item's source
-                    item[b'source'] = new_source
+                    item.source = new_source
             if self.dry_run:
-                self.print_file_status('-', item[b'path'])
+                self.print_file_status('-', item.path)
             else:
                 try:
                     self.process_item(archive, target, item)
@@ -1136,11 +1133,11 @@ class ArchiveRecreater:
             target.stats.show_progress(final=True)
 
     def process_item(self, archive, target, item):
-        if b'chunks' in item:
-            item[b'chunks'] = self.process_chunks(archive, target, item)
+        if 'chunks' in item:
+            item.chunks = self.process_chunks(archive, target, item)
             target.stats.nfiles += 1
         target.add_item(item)
-        self.print_file_status(file_status(item[b'mode']), item[b'path'])
+        self.print_file_status(file_status(item.mode), item.path)
         if self.interrupt:
             raise self.Interrupted
 
@@ -1148,9 +1145,9 @@ class ArchiveRecreater:
         """Return new chunk ID list for 'item'."""
         # TODO: support --compression-from
         if not self.recompress and not target.recreate_rechunkify:
-            for chunk_id, size, csize in item[b'chunks']:
+            for chunk_id, size, csize in item.chunks:
                 self.cache.chunk_incref(chunk_id, target.stats)
-            return item[b'chunks']
+            return item.chunks
         new_chunks = self.process_partial_chunks(target)
         chunk_iterator = self.create_chunk_iterator(archive, target, item)
         consume(chunk_iterator, len(new_chunks))
@@ -1181,7 +1178,7 @@ class ArchiveRecreater:
 
     def create_chunk_iterator(self, archive, target, item):
         """Return iterator of chunks to store for 'item' from 'archive' in 'target'."""
-        chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in item[b'chunks']])
+        chunk_iterator = archive.pipeline.fetch_many([chunk_id for chunk_id, _, _ in item.chunks])
         if target.recreate_rechunkify:
             # The target.chunker will read the file contents through ChunkIteratorFileWrapper chunk-by-chunk
             # (does not load the entire file into memory)
@@ -1243,7 +1240,7 @@ class ArchiveRecreater:
         """Add excludes to the matcher created by exclude_cache and exclude_if_present."""
         def exclude(dir, tag_item):
             if self.keep_tag_files:
-                tag_files.append(PathPrefixPattern(tag_item[b'path']))
+                tag_files.append(PathPrefixPattern(tag_item.path))
                 tagged_dirs.append(FnmatchPattern(dir + '/'))
             else:
                 tagged_dirs.append(PathPrefixPattern(dir))
@@ -1255,18 +1252,18 @@ class ArchiveRecreater:
         cachedir_masters = {}
 
         for item in archive.iter_items(
-                filter=lambda item: item[b'path'].endswith(CACHE_TAG_NAME) or matcher.match(item[b'path'])):
-            if item[b'path'].endswith(CACHE_TAG_NAME):
-                cachedir_masters[item[b'path']] = item
-            if stat.S_ISREG(item[b'mode']):
-                dir, tag_file = os.path.split(item[b'path'])
+                filter=lambda item: item.path.endswith(CACHE_TAG_NAME) or matcher.match(item.path)):
+            if item.path.endswith(CACHE_TAG_NAME):
+                cachedir_masters[item.path] = item
+            if stat.S_ISREG(item.mode):
+                dir, tag_file = os.path.split(item.path)
                 if tag_file in self.exclude_if_present:
                     exclude(dir, item)
                 if self.exclude_caches and tag_file == CACHE_TAG_NAME:
-                    if b'chunks' in item:
+                    if 'chunks' in item:
                         file = open_item(archive, item)
                     else:
-                        file = open_item(archive, cachedir_masters[item[b'source']])
+                        file = open_item(archive, cachedir_masters[item.source])
                     if file.read(len(CACHE_TAG_CONTENTS)).startswith(CACHE_TAG_CONTENTS):
                         exclude(dir, item)
         matcher.add(tag_files, True)
@@ -1307,13 +1304,13 @@ class ArchiveRecreater:
         logger.info('Replaying items from interrupted operation...')
         item = None
         for item in old_target.iter_items():
-            if b'chunks' in item:
-                for chunk in item[b'chunks']:
+            if 'chunks' in item:
+                for chunk in item.chunks:
                     self.cache.chunk_incref(chunk.id, target.stats)
                 target.stats.nfiles += 1
             target.add_item(item)
         if item:
-            resume_from = item[b'path']
+            resume_from = item.path
         else:
             resume_from = None
         if self.progress:

+ 57 - 56
src/borg/archiver.py

@@ -38,6 +38,7 @@ from .helpers import update_excludes, check_extension_modules
 from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo
 from .helpers import log_multi
 from .helpers import parse_pattern, PatternMatcher, PathPrefixPattern
+from .item import Item
 from .key import key_creator, RepoKey, PassphraseKey
 from .platform import get_flags
 from .remote import RepositoryServer, RemoteRepository, cache_if_remote
@@ -405,22 +406,22 @@ class Archiver:
         hardlink_masters = {} if partial_extract else None
 
         def item_is_hardlink_master(item):
-            return (partial_extract and stat.S_ISREG(item[b'mode']) and
-                    item.get(b'hardlink_master', True) and b'source' not in item)
+            return (partial_extract and stat.S_ISREG(item.mode) and
+                    item.get('hardlink_master', True) and 'source' not in item)
 
         for item in archive.iter_items(preload=True,
-                filter=lambda item: item_is_hardlink_master(item) or matcher.match(item[b'path'])):
-            orig_path = item[b'path']
+                filter=lambda item: item_is_hardlink_master(item) or matcher.match(item.path)):
+            orig_path = item.path
             if item_is_hardlink_master(item):
-                hardlink_masters[orig_path] = (item.get(b'chunks'), None)
-            if not matcher.match(item[b'path']):
+                hardlink_masters[orig_path] = (item.get('chunks'), None)
+            if not matcher.match(item.path):
                 continue
             if strip_components:
-                item[b'path'] = os.sep.join(orig_path.split(os.sep)[strip_components:])
-                if not item[b'path']:
+                item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
+                if not item.path:
                     continue
             if not args.dry_run:
-                while dirs and not item[b'path'].startswith(dirs[-1][b'path']):
+                while dirs and not item.path.startswith(dirs[-1].path):
                     archive.extract_item(dirs.pop(-1), stdout=stdout)
             if output_list:
                 logging.getLogger('borg.output.list').info(remove_surrogates(orig_path))
@@ -428,7 +429,7 @@ class Archiver:
                 if dry_run:
                     archive.extract_item(item, dry_run=True)
                 else:
-                    if stat.S_ISDIR(item[b'mode']):
+                    if stat.S_ISDIR(item.mode):
                         dirs.append(item)
                         archive.extract_item(item, restore_attrs=False)
                     else:
@@ -455,58 +456,58 @@ class Archiver:
             return self.compare_chunk_contents(chunks1, chunks2)
 
         def sum_chunk_size(item, consider_ids=None):
-            if item.get(b'deleted'):
+            if item.get('deleted'):
                 return None
             else:
-                return sum(c.size for c in item[b'chunks']
+                return sum(c.size for c in item.chunks
                            if consider_ids is None or c.id in consider_ids)
 
         def get_owner(item):
             if args.numeric_owner:
-                return item[b'uid'], item[b'gid']
+                return item.uid, item.gid
             else:
-                return item[b'user'], item[b'group']
+                return item.user, item.group
 
         def get_mode(item):
-            if b'mode' in item:
-                return stat.filemode(item[b'mode'])
+            if 'mode' in item:
+                return stat.filemode(item.mode)
             else:
                 return [None]
 
         def has_hardlink_master(item, hardlink_masters):
-            return stat.S_ISREG(item[b'mode']) and item.get(b'source') in hardlink_masters
+            return stat.S_ISREG(item.mode) and item.get('source') in hardlink_masters
 
         def compare_link(item1, item2):
             # These are the simple link cases. For special cases, e.g. if a
             # regular file is replaced with a link or vice versa, it is
             # indicated in compare_mode instead.
-            if item1.get(b'deleted'):
+            if item1.get('deleted'):
                 return 'added link'
-            elif item2.get(b'deleted'):
+            elif item2.get('deleted'):
                 return 'removed link'
-            elif b'source' in item1 and b'source' in item2 and item1[b'source'] != item2[b'source']:
+            elif 'source' in item1 and 'source' in item2 and item1.source != item2.source:
                 return 'changed link'
 
         def contents_changed(item1, item2):
             if can_compare_chunk_ids:
-                return item1[b'chunks'] != item2[b'chunks']
+                return item1.chunks != item2.chunks
             else:
                 if sum_chunk_size(item1) != sum_chunk_size(item2):
                     return True
                 else:
-                    chunk_ids1 = [c.id for c in item1[b'chunks']]
-                    chunk_ids2 = [c.id for c in item2[b'chunks']]
+                    chunk_ids1 = [c.id for c in item1.chunks]
+                    chunk_ids2 = [c.id for c in item2.chunks]
                     return not fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2)
 
         def compare_content(path, item1, item2):
             if contents_changed(item1, item2):
-                if item1.get(b'deleted'):
+                if item1.get('deleted'):
                     return ('added {:>13}'.format(format_file_size(sum_chunk_size(item2))))
-                elif item2.get(b'deleted'):
+                elif item2.get('deleted'):
                     return ('removed {:>11}'.format(format_file_size(sum_chunk_size(item1))))
                 else:
-                    chunk_ids1 = {c.id for c in item1[b'chunks']}
-                    chunk_ids2 = {c.id for c in item2[b'chunks']}
+                    chunk_ids1 = {c.id for c in item1.chunks}
+                    chunk_ids2 = {c.id for c in item2.chunks}
                     added_ids = chunk_ids2 - chunk_ids1
                     removed_ids = chunk_ids1 - chunk_ids2
                     added = sum_chunk_size(item2, added_ids)
@@ -515,9 +516,9 @@ class Archiver:
                                                  format_file_size(-removed, precision=1, sign=True)))
 
         def compare_directory(item1, item2):
-            if item2.get(b'deleted') and not item1.get(b'deleted'):
+            if item2.get('deleted') and not item1.get('deleted'):
                 return 'removed directory'
-            elif item1.get(b'deleted') and not item2.get(b'deleted'):
+            elif item1.get('deleted') and not item2.get('deleted'):
                 return 'added directory'
 
         def compare_owner(item1, item2):
@@ -527,7 +528,7 @@ class Archiver:
                 return '[{}:{} -> {}:{}]'.format(user1, group1, user2, group2)
 
         def compare_mode(item1, item2):
-            if item1[b'mode'] != item2[b'mode']:
+            if item1.mode != item2.mode:
                 return '[{} -> {}]'.format(get_mode(item1), get_mode(item2))
 
         def compare_items(output, path, item1, item2, hardlink_masters, deleted=False):
@@ -538,15 +539,15 @@ class Archiver:
             changes = []
 
             if has_hardlink_master(item1, hardlink_masters):
-                item1 = hardlink_masters[item1[b'source']][0]
+                item1 = hardlink_masters[item1.source][0]
 
             if has_hardlink_master(item2, hardlink_masters):
-                item2 = hardlink_masters[item2[b'source']][1]
+                item2 = hardlink_masters[item2.source][1]
 
             if get_mode(item1)[0] == 'l' or get_mode(item2)[0] == 'l':
                 changes.append(compare_link(item1, item2))
 
-            if b'chunks' in item1 and b'chunks' in item2:
+            if 'chunks' in item1 and 'chunks' in item2:
                 changes.append(compare_content(path, item1, item2))
 
             if get_mode(item1)[0] == 'd' or get_mode(item2)[0] == 'd':
@@ -570,21 +571,21 @@ class Archiver:
 
         def compare_archives(archive1, archive2, matcher):
             def hardlink_master_seen(item):
-                return b'source' not in item or not stat.S_ISREG(item[b'mode']) or item[b'source'] in hardlink_masters
+                return 'source' not in item or not stat.S_ISREG(item.mode) or item.source in hardlink_masters
 
             def is_hardlink_master(item):
-                return item.get(b'hardlink_master', True) and b'source' not in item
+                return item.get('hardlink_master', True) and 'source' not in item
 
             def update_hardlink_masters(item1, item2):
                 if is_hardlink_master(item1) or is_hardlink_master(item2):
-                    hardlink_masters[item1[b'path']] = (item1, item2)
+                    hardlink_masters[item1.path] = (item1, item2)
 
             def compare_or_defer(item1, item2):
                 update_hardlink_masters(item1, item2)
                 if not hardlink_master_seen(item1) or not hardlink_master_seen(item2):
                     deferred.append((item1, item2))
                 else:
-                    compare_items(output, item1[b'path'], item1, item2, hardlink_masters)
+                    compare_items(output, item1.path, item1, item2, hardlink_masters)
 
             orphans_archive1 = collections.OrderedDict()
             orphans_archive2 = collections.OrderedDict()
@@ -593,44 +594,44 @@ class Archiver:
             output = []
 
             for item1, item2 in zip_longest(
-                    archive1.iter_items(lambda item: matcher.match(item[b'path'])),
-                    archive2.iter_items(lambda item: matcher.match(item[b'path'])),
+                    archive1.iter_items(lambda item: matcher.match(item.path)),
+                    archive2.iter_items(lambda item: matcher.match(item.path)),
             ):
-                if item1 and item2 and item1[b'path'] == item2[b'path']:
+                if item1 and item2 and item1.path == item2.path:
                     compare_or_defer(item1, item2)
                     continue
                 if item1:
-                    matching_orphan = orphans_archive2.pop(item1[b'path'], None)
+                    matching_orphan = orphans_archive2.pop(item1.path, None)
                     if matching_orphan:
                         compare_or_defer(item1, matching_orphan)
                     else:
-                        orphans_archive1[item1[b'path']] = item1
+                        orphans_archive1[item1.path] = item1
                 if item2:
-                    matching_orphan = orphans_archive1.pop(item2[b'path'], None)
+                    matching_orphan = orphans_archive1.pop(item2.path, None)
                     if matching_orphan:
                         compare_or_defer(matching_orphan, item2)
                     else:
-                        orphans_archive2[item2[b'path']] = item2
+                        orphans_archive2[item2.path] = item2
             # At this point orphans_* contain items that had no matching partner in the other archive
-            deleted_item = {
-                b'deleted': True,
-                b'chunks': [],
-                b'mode': 0,
-            }
+            deleted_item = Item(
+                deleted=True,
+                chunks=[],
+                mode=0,
+            )
             for added in orphans_archive2.values():
-                path = added[b'path']
-                deleted_item[b'path'] = path
+                path = added.path
+                deleted_item.path = path
                 update_hardlink_masters(deleted_item, added)
                 compare_items(output, path, deleted_item, added, hardlink_masters, deleted=True)
             for deleted in orphans_archive1.values():
-                path = deleted[b'path']
-                deleted_item[b'path'] = path
+                path = deleted.path
+                deleted_item.path = path
                 update_hardlink_masters(deleted, deleted_item)
                 compare_items(output, path, deleted, deleted_item, hardlink_masters, deleted=True)
             for item1, item2 in deferred:
                 assert hardlink_master_seen(item1)
                 assert hardlink_master_seen(item2)
-                compare_items(output, item1[b'path'], item1, item2, hardlink_masters)
+                compare_items(output, item1.path, item1, item2, hardlink_masters)
 
             for line in sorted(output):
                 print_output(line)
@@ -749,7 +750,7 @@ class Archiver:
                         sys.stdout.write(bytestring.decode('utf-8', errors='replace'))
                 else:
                     write = sys.stdout.buffer.write
-                for item in archive.iter_items(lambda item: matcher.match(item[b'path'])):
+                for item in archive.iter_items(lambda item: matcher.match(item.path)):
                     write(safe_encode(formatter.format_item(item)))
         else:
             for archive_info in manifest.list_archive_infos(sort_by='ts'):
@@ -2116,7 +2117,7 @@ def sig_info_handler(signum, stack):  # pragma: no cover
             logger.info("{0} {1}/{2}".format(path, format_file_size(pos), format_file_size(total)))
             break
         if func in ('extract_item', ):  # extract op
-            path = loc['item'][b'path']
+            path = loc['item'].path
             try:
                 pos = loc['fd'].tell()
             except Exception:

+ 4 - 2
src/borg/cache.py

@@ -16,6 +16,7 @@ from .helpers import get_cache_dir
 from .helpers import decode_dict, int_to_bigint, bigint_to_int, bin_to_hex
 from .helpers import format_file_size
 from .helpers import yes
+from .item import Item
 from .key import PlaintextKey
 from .locking import UpgradableLock
 from .remote import cache_if_remote
@@ -298,8 +299,9 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                     if not isinstance(item, dict):
                         logger.error('Error: Did not get expected metadata dict - archive corrupted!')
                         continue
-                    if b'chunks' in item:
-                        for chunk_id, size, csize in item[b'chunks']:
+                    item = Item(internal_dict=item)
+                    if 'chunks' in item:
+                        for chunk_id, size, csize in item.chunks:
                             chunk_idx.add(chunk_id, 1, size, csize)
             if self.do_cache:
                 fn = mkpath(archive_id)

+ 3 - 4
src/borg/constants.py

@@ -1,10 +1,9 @@
 # this set must be kept complete, otherwise the RobustUnpacker might malfunction:
-ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', b'hardlink_master',
-                 b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
-                 b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
+ITEM_KEYS = set(['path', 'source', 'rdev', 'chunks', 'hardlink_master',
+                 'mode', 'user', 'group', 'uid', 'gid', 'mtime', 'atime', 'ctime',
+                 'xattrs', 'bsdflags', 'acl_nfs4', 'acl_access', 'acl_default', 'acl_extended', ])
 
 ARCHIVE_TEXT_KEYS = (b'name', b'comment', b'hostname', b'username', b'time', b'time_end')
-ITEM_TEXT_KEYS = (b'path', b'source', b'user', b'group')
 
 # default umask, overriden by --umask, defaults to read/write only for owner
 UMASK_DEFAULT = 0o077

+ 36 - 36
src/borg/fuse.py

@@ -15,7 +15,7 @@ logger = create_logger()
 
 from .archive import Archive
 from .helpers import daemonize
-from .helpers import bigint_to_int
+from .item import Item
 from .lrucache import LRUCache
 
 # Does this version of llfuse support ns precision?
@@ -38,12 +38,13 @@ class ItemCache:
 
     def add(self, item):
         pos = self.fd.seek(0, io.SEEK_END)
-        self.fd.write(msgpack.packb(item))
+        self.fd.write(msgpack.packb(item.as_dict()))
         return pos + self.offset
 
     def get(self, inode):
         self.fd.seek(inode - self.offset, io.SEEK_SET)
-        return next(msgpack.Unpacker(self.fd, read_size=1024))
+        item = next(msgpack.Unpacker(self.fd, read_size=1024))
+        return Item(internal_dict=item)
 
 
 class FuseOperations(llfuse.Operations):
@@ -57,7 +58,7 @@ class FuseOperations(llfuse.Operations):
         self.items = {}
         self.parent = {}
         self.contents = defaultdict(dict)
-        self.default_dir = {b'mode': 0o40755, b'mtime': int(time.time() * 1e9), b'uid': os.getuid(), b'gid': os.getgid()}
+        self.default_dir = Item(mode=0o40755, mtime=int(time.time() * 1e9), uid=os.getuid(), gid=os.getgid())
         self.pending_archives = {}
         self.accounted_chunks = {}
         self.cache = ItemCache()
@@ -86,8 +87,9 @@ class FuseOperations(llfuse.Operations):
             _, data = self.key.decrypt(key, chunk)
             unpacker.feed(data)
             for item in unpacker:
-                segments = prefix + os.fsencode(os.path.normpath(item[b'path'])).split(b'/')
-                del item[b'path']
+                item = Item(internal_dict=item)
+                segments = prefix + os.fsencode(os.path.normpath(item.path)).split(b'/')
+                del item.path
                 num_segments = len(segments)
                 parent = 1
                 for i, segment in enumerate(segments, 1):
@@ -98,10 +100,10 @@ class FuseOperations(llfuse.Operations):
                         self.parent[archive_inode] = parent
                     # Leaf segment?
                     if i == num_segments:
-                        if b'source' in item and stat.S_ISREG(item[b'mode']):
-                            inode = self._find_inode(item[b'source'], prefix)
+                        if 'source' in item and stat.S_ISREG(item.mode):
+                            inode = self._find_inode(item.source, prefix)
                             item = self.cache.get(inode)
-                            item[b'nlink'] = item.get(b'nlink', 1) + 1
+                            item.nlink = item.get('nlink', 1) + 1
                             self.items[inode] = item
                         else:
                             inode = self.cache.add(item)
@@ -151,58 +153,56 @@ class FuseOperations(llfuse.Operations):
         item = self.get_item(inode)
         size = 0
         dsize = 0
-        try:
-            for key, chunksize, _ in item[b'chunks']:
+        if 'chunks' in item:
+            for key, chunksize, _ in item.chunks:
                 size += chunksize
                 if self.accounted_chunks.get(key, inode) == inode:
                     self.accounted_chunks[key] = inode
                     dsize += chunksize
-        except KeyError:
-            pass
         entry = llfuse.EntryAttributes()
         entry.st_ino = inode
         entry.generation = 0
         entry.entry_timeout = 300
         entry.attr_timeout = 300
-        entry.st_mode = item[b'mode']
-        entry.st_nlink = item.get(b'nlink', 1)
-        entry.st_uid = item[b'uid']
-        entry.st_gid = item[b'gid']
-        entry.st_rdev = item.get(b'rdev', 0)
+        entry.st_mode = item.mode
+        entry.st_nlink = item.get('nlink', 1)
+        entry.st_uid = item.uid
+        entry.st_gid = item.gid
+        entry.st_rdev = item.get('rdev', 0)
         entry.st_size = size
         entry.st_blksize = 512
         entry.st_blocks = dsize / 512
         # note: older archives only have mtime (not atime nor ctime)
         if have_fuse_xtime_ns:
-            entry.st_mtime_ns = bigint_to_int(item[b'mtime'])
-            if b'atime' in item:
-                entry.st_atime_ns = bigint_to_int(item[b'atime'])
+            entry.st_mtime_ns = item.mtime
+            if 'atime' in item:
+                entry.st_atime_ns = item.atime
             else:
-                entry.st_atime_ns = bigint_to_int(item[b'mtime'])
-            if b'ctime' in item:
-                entry.st_ctime_ns = bigint_to_int(item[b'ctime'])
+                entry.st_atime_ns = item.mtime
+            if 'ctime' in item:
+                entry.st_ctime_ns = item.ctime
             else:
-                entry.st_ctime_ns = bigint_to_int(item[b'mtime'])
+                entry.st_ctime_ns = item.mtime
         else:
-            entry.st_mtime = bigint_to_int(item[b'mtime']) / 1e9
-            if b'atime' in item:
-                entry.st_atime = bigint_to_int(item[b'atime']) / 1e9
+            entry.st_mtime = item.mtime / 1e9
+            if 'atime' in item:
+                entry.st_atime = item.atime / 1e9
             else:
-                entry.st_atime = bigint_to_int(item[b'mtime']) / 1e9
-            if b'ctime' in item:
-                entry.st_ctime = bigint_to_int(item[b'ctime']) / 1e9
+                entry.st_atime = item.mtime / 1e9
+            if 'ctime' in item:
+                entry.st_ctime = item.ctime / 1e9
             else:
-                entry.st_ctime = bigint_to_int(item[b'mtime']) / 1e9
+                entry.st_ctime = item.mtime / 1e9
         return entry
 
     def listxattr(self, inode, ctx=None):
         item = self.get_item(inode)
-        return item.get(b'xattrs', {}).keys()
+        return item.get('xattrs', {}).keys()
 
     def getxattr(self, inode, name, ctx=None):
         item = self.get_item(inode)
         try:
-            return item.get(b'xattrs', {})[name]
+            return item.get('xattrs', {})[name]
         except KeyError:
             raise llfuse.FUSEError(errno.ENODATA) from None
 
@@ -234,7 +234,7 @@ class FuseOperations(llfuse.Operations):
     def read(self, fh, offset, size):
         parts = []
         item = self.get_item(fh)
-        for id, s, csize in item[b'chunks']:
+        for id, s, csize in item.chunks:
             if s < offset:
                 offset -= s
                 continue
@@ -264,7 +264,7 @@ class FuseOperations(llfuse.Operations):
 
     def readlink(self, inode, ctx=None):
         item = self.get_item(inode)
-        return os.fsencode(item[b'source'])
+        return os.fsencode(item.source)
 
     def mount(self, mountpoint, extra_options, foreground=False):
         options = ['fsname=borgfs', 'ro']

+ 26 - 28
src/borg/helpers.py

@@ -1157,10 +1157,8 @@ class ItemFormatter:
         class FakeArchive:
             fpr = name = ""
 
-        fake_item = {
-            b'mode': 0, b'path': '', b'user': '', b'group': '', b'mtime': 0,
-            b'uid': 0, b'gid': 0,
-        }
+        from .item import Item
+        fake_item = Item(mode=0, path='', user='', group='', mtime=0, uid=0, gid=0)
         formatter = cls(FakeArchive, "")
         keys = []
         keys.extend(formatter.call_keys.keys())
@@ -1196,12 +1194,12 @@ class ItemFormatter:
             'csize': self.calculate_csize,
             'num_chunks': self.calculate_num_chunks,
             'unique_chunks': self.calculate_unique_chunks,
-            'isomtime': partial(self.format_time, b'mtime'),
-            'isoctime': partial(self.format_time, b'ctime'),
-            'isoatime': partial(self.format_time, b'atime'),
-            'mtime': partial(self.time, b'mtime'),
-            'ctime': partial(self.time, b'ctime'),
-            'atime': partial(self.time, b'atime'),
+            'isomtime': partial(self.format_time, 'mtime'),
+            'isoctime': partial(self.format_time, 'ctime'),
+            'isoatime': partial(self.format_time, 'atime'),
+            'mtime': partial(self.time, 'mtime'),
+            'ctime': partial(self.time, 'ctime'),
+            'atime': partial(self.time, 'atime'),
         }
         for hash_function in hashlib.algorithms_guaranteed:
             self.add_key(hash_function, partial(self.hash_item, hash_function))
@@ -1213,11 +1211,11 @@ class ItemFormatter:
         self.used_call_keys = set(self.call_keys) & self.format_keys
 
     def get_item_data(self, item):
-        mode = stat.filemode(item[b'mode'])
+        mode = stat.filemode(item.mode)
         item_type = mode[0]
         item_data = self.item_data
 
-        source = item.get(b'source', '')
+        source = item.get('source', '')
         extra = ''
         if source:
             source = remove_surrogates(source)
@@ -1228,16 +1226,16 @@ class ItemFormatter:
                 extra = ' link to %s' % source
         item_data['type'] = item_type
         item_data['mode'] = mode
-        item_data['user'] = item[b'user'] or item[b'uid']
-        item_data['group'] = item[b'group'] or item[b'gid']
-        item_data['uid'] = item[b'uid']
-        item_data['gid'] = item[b'gid']
-        item_data['path'] = remove_surrogates(item[b'path'])
-        item_data['bpath'] = item[b'path']
+        item_data['user'] = item.user or item.uid
+        item_data['group'] = item.group or item.gid
+        item_data['uid'] = item.uid
+        item_data['gid'] = item.gid
+        item_data['path'] = remove_surrogates(item.path)
+        item_data['bpath'] = item.path
         item_data['source'] = source
         item_data['linktarget'] = source
         item_data['extra'] = extra
-        item_data['flags'] = item.get(b'bsdflags')
+        item_data['flags'] = item.get('bsdflags')
         for key in self.used_call_keys:
             item_data[key] = self.call_keys[key](item)
         return item_data
@@ -1246,31 +1244,31 @@ class ItemFormatter:
         return self.format.format_map(self.get_item_data(item))
 
     def calculate_num_chunks(self, item):
-        return len(item.get(b'chunks', []))
+        return len(item.get('chunks', []))
 
     def calculate_unique_chunks(self, item):
         chunk_index = self.archive.cache.chunks
-        return sum(1 for c in item.get(b'chunks', []) if chunk_index[c.id].refcount == 1)
+        return sum(1 for c in item.get('chunks', []) if chunk_index[c.id].refcount == 1)
 
     def calculate_size(self, item):
-        return sum(c.size for c in item.get(b'chunks', []))
+        return sum(c.size for c in item.get('chunks', []))
 
     def calculate_csize(self, item):
-        return sum(c.csize for c in item.get(b'chunks', []))
+        return sum(c.csize for c in item.get('chunks', []))
 
     def hash_item(self, hash_function, item):
-        if b'chunks' not in item:
+        if 'chunks' not in item:
             return ""
         hash = hashlib.new(hash_function)
-        for _, data in self.archive.pipeline.fetch_many([c.id for c in item[b'chunks']]):
+        for _, data in self.archive.pipeline.fetch_many([c.id for c in item.chunks]):
             hash.update(data)
         return hash.hexdigest()
 
     def format_time(self, key, item):
-        return format_time(safe_timestamp(item.get(key) or item[b'mtime']))
+        return format_time(safe_timestamp(item.get(key) or item.mtime))
 
     def time(self, key, item):
-        return safe_timestamp(item.get(key) or item[b'mtime'])
+        return safe_timestamp(item.get(key) or item.mtime)
 
 
 class ChunkIteratorFileWrapper:
@@ -1314,7 +1312,7 @@ class ChunkIteratorFileWrapper:
 
 def open_item(archive, item):
     """Return file-like object for archived item (with chunks)."""
-    chunk_iterator = archive.pipeline.fetch_many([c.id for c in item[b'chunks']])
+    chunk_iterator = archive.pipeline.fetch_many([c.id for c in item.chunks])
     return ChunkIteratorFileWrapper(chunk_iterator)
 
 

+ 28 - 16
src/borg/item.py

@@ -21,25 +21,34 @@ class PropDict:
 
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
 
-    def __init__(self, data_dict=None, **kw):
+    def __init__(self, data_dict=None, internal_dict=None, **kw):
         if data_dict is None:
             data = kw
         elif not isinstance(data_dict, dict):
             raise TypeError("data_dict must be dict")
         else:
             data = data_dict
-        # internally, we want an dict with only str-typed keys
-        _dict = {}
-        for k, v in data.items():
+        self._dict = {}
+        self.update_internal(internal_dict or {})
+        self.update(data)
+
+    def update(self, d):
+        for k, v in d.items():
+            if isinstance(k, bytes):
+                k = k.decode()
+            setattr(self, self._check_key(k), v)
+
+    def update_internal(self, d):
+        for k, v in d.items():
             if isinstance(k, bytes):
                 k = k.decode()
-            elif not isinstance(k, str):
-                raise TypeError("dict keys must be str or bytes, not %r" % k)
-            _dict[k] = v
-        unknown_keys = set(_dict) - self.VALID_KEYS
-        if unknown_keys:
-            raise ValueError("dict contains unknown keys %s" % ','.join(unknown_keys))
-        self._dict = _dict
+            self._dict[k] = v
+
+    def __eq__(self, other):
+        return self.as_dict() == other.as_dict()
+
+    def __repr__(self):
+        return '%s(internal_dict=%r)' % (self.__class__.__name__, self._dict)
 
     def as_dict(self):
         """return the internal dictionary"""
@@ -110,7 +119,7 @@ class Item(PropDict):
     If an Item shall be serialized, give as_dict() method output to msgpack packer.
     """
 
-    VALID_KEYS = set(key.decode() for key in ITEM_KEYS)  # we want str-typed keys
+    VALID_KEYS = ITEM_KEYS | {'deleted', 'nlink', }  # str-typed keys
 
     __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
 
@@ -118,14 +127,14 @@ class Item(PropDict):
 
     path = PropDict._make_property('path', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
     source = PropDict._make_property('source', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
+    user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode)
+    group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode)
+
     acl_access = PropDict._make_property('acl_access', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
     acl_default = PropDict._make_property('acl_default', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
     acl_extended = PropDict._make_property('acl_extended', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
     acl_nfs4 = PropDict._make_property('acl_nfs4', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
 
-    user = PropDict._make_property('user', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode)
-    group = PropDict._make_property('group', (str, type(None)), 'surrogate-escaped str or None', encode=safe_encode, decode=safe_decode)
-
     mode = PropDict._make_property('mode', int)
     uid = PropDict._make_property('uid', int)
     gid = PropDict._make_property('gid', int)
@@ -138,6 +147,9 @@ class Item(PropDict):
 
     hardlink_master = PropDict._make_property('hardlink_master', bool)
 
-    chunks = PropDict._make_property('chunks', list)
+    chunks = PropDict._make_property('chunks', (list, type(None)), 'list or None')
 
     xattrs = PropDict._make_property('xattrs', StableDict)
+
+    deleted = PropDict._make_property('deleted', bool)
+    nlink = PropDict._make_property('nlink', int)

+ 12 - 14
src/borg/platform/darwin.pyx

@@ -62,9 +62,9 @@ def acl_get(path, item, st, numeric_owner=False):
         if text == NULL:
             return
         if numeric_owner:
-            item[b'acl_extended'] = _remove_non_numeric_identifier(text)
+            item['acl_extended'] = _remove_non_numeric_identifier(text)
         else:
-            item[b'acl_extended'] = text
+            item['acl_extended'] = text
     finally:
         acl_free(text)
         acl_free(acl)
@@ -72,18 +72,16 @@ def acl_get(path, item, st, numeric_owner=False):
 
 def acl_set(path, item, numeric_owner=False):
     cdef acl_t acl = NULL
-    try:
+    acl_text = item.get('acl_extended')
+    if acl_text is not None:
         try:
             if numeric_owner:
-                acl = acl_from_text(item[b'acl_extended'])
+                acl = acl_from_text(acl_text)
             else:
-                acl = acl_from_text(<bytes>_remove_numeric_id_if_possible(item[b'acl_extended']))
-        except KeyError:
-            return
-        if acl == NULL:
-            return
-        if acl_set_link_np(<bytes>os.fsencode(path), ACL_TYPE_EXTENDED, acl):
-            return
-    finally:
-        acl_free(acl)
-
+                acl = acl_from_text(<bytes>_remove_numeric_id_if_possible(acl_text))
+            if acl == NULL:
+                return
+            if acl_set_link_np(<bytes>os.fsencode(path), ACL_TYPE_EXTENDED, acl):
+                return
+        finally:
+            acl_free(acl)

+ 6 - 6
src/borg/platform/freebsd.pyx

@@ -57,10 +57,10 @@ def acl_get(path, item, st, numeric_owner=False):
         return
     flags |= ACL_TEXT_NUMERIC_IDS if numeric_owner else 0
     if ret > 0:
-        _get_acl(p, ACL_TYPE_NFS4, item, b'acl_nfs4', flags)
+        _get_acl(p, ACL_TYPE_NFS4, item, 'acl_nfs4', flags)
     else:
-        _get_acl(p, ACL_TYPE_ACCESS, item, b'acl_access', flags)
-        _get_acl(p, ACL_TYPE_DEFAULT, item, b'acl_default', flags)
+        _get_acl(p, ACL_TYPE_ACCESS, item, 'acl_access', flags)
+        _get_acl(p, ACL_TYPE_DEFAULT, item, 'acl_default', flags)
 
 
 cdef _set_acl(p, type, item, attribute, numeric_owner=False):
@@ -98,6 +98,6 @@ def acl_set(path, item, numeric_owner=False):
     of the user/group names
     """
     p = os.fsencode(path)
-    _set_acl(p, ACL_TYPE_NFS4, item, b'acl_nfs4', numeric_owner)
-    _set_acl(p, ACL_TYPE_ACCESS, item, b'acl_access', numeric_owner)
-    _set_acl(p, ACL_TYPE_DEFAULT, item, b'acl_default', numeric_owner)
+    _set_acl(p, ACL_TYPE_NFS4, item, 'acl_nfs4', numeric_owner)
+    _set_acl(p, ACL_TYPE_ACCESS, item, 'acl_access', numeric_owner)
+    _set_acl(p, ACL_TYPE_DEFAULT, item, 'acl_default', numeric_owner)

+ 4 - 4
src/borg/platform/linux.pyx

@@ -171,12 +171,12 @@ def acl_get(path, item, st, numeric_owner=False):
         if access_acl:
             access_text = acl_to_text(access_acl, NULL)
             if access_text:
-                item[b'acl_access'] = converter(access_text)
+                item['acl_access'] = converter(access_text)
         default_acl = acl_get_file(p, ACL_TYPE_DEFAULT)
         if default_acl:
             default_text = acl_to_text(default_acl, NULL)
             if default_text:
-                item[b'acl_default'] = converter(default_text)
+                item['acl_default'] = converter(default_text)
     finally:
         acl_free(default_text)
         acl_free(default_acl)
@@ -193,8 +193,8 @@ def acl_set(path, item, numeric_owner=False):
         converter = posix_acl_use_stored_uid_gid
     else:
         converter = acl_use_local_uid_gid
-    access_text = item.get(b'acl_access')
-    default_text = item.get(b'acl_default')
+    access_text = item.get('acl_access')
+    default_text = item.get('acl_default')
     if access_text:
         try:
             access_acl = acl_from_text(<bytes>converter(access_text))

+ 8 - 7
src/borg/testsuite/archive.py

@@ -7,6 +7,7 @@ import pytest
 import msgpack
 
 from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, Statistics
+from ..item import Item
 from ..key import PlaintextKey
 from ..helpers import Manifest
 from . import BaseTestCase
@@ -38,12 +39,12 @@ def tests_stats_progress(stats, columns=80):
 
     out = StringIO()
     stats.update(10**3, 0, unique=False)
-    stats.show_progress(item={b'path': 'foo'}, final=False, stream=out)
+    stats.show_progress(item=Item(path='foo'), final=False, stream=out)
     s = '1.02 kB O 10 B C 10 B D 0 N foo'
     buf = ' ' * (columns - len(s))
     assert out.getvalue() == s + buf + "\r"
     out = StringIO()
-    stats.show_progress(item={b'path': 'foo'*40}, final=False, stream=out)
+    stats.show_progress(item=Item(path='foo'*40), final=False, stream=out)
     s = '1.02 kB O 10 B C 10 B D 0 N foofoofoofoofoofoofoofo...oofoofoofoofoofoofoofoofoo'
     buf = ' ' * (columns - len(s))
     assert out.getvalue() == s + buf + "\r"
@@ -93,7 +94,7 @@ class ArchiveTimestampTestCase(BaseTestCase):
 class ChunkBufferTestCase(BaseTestCase):
 
     def test(self):
-        data = [{b'foo': 1}, {b'bar': 2}]
+        data = [Item(path='p1'), Item(path='p2')]
         cache = MockCache()
         key = PlaintextKey(None)
         chunks = CacheChunkBuffer(cache, key, None)
@@ -105,11 +106,11 @@ class ChunkBufferTestCase(BaseTestCase):
         unpacker = msgpack.Unpacker()
         for id in chunks.chunks:
             unpacker.feed(cache.objects[id])
-        self.assert_equal(data, list(unpacker))
+        self.assert_equal(data, [Item(internal_dict=d) for d in unpacker])
 
     def test_partial(self):
-        big = b"0123456789" * 10000
-        data = [{b'full': 1, b'data': big}, {b'partial': 2, b'data': big}]
+        big = "0123456789" * 10000
+        data = [Item(path='full', source=big), Item(path='partial', source=big)]
         cache = MockCache()
         key = PlaintextKey(None)
         chunks = CacheChunkBuffer(cache, key, None)
@@ -126,7 +127,7 @@ class ChunkBufferTestCase(BaseTestCase):
         unpacker = msgpack.Unpacker()
         for id in chunks.chunks:
             unpacker.feed(cache.objects[id])
-        self.assert_equal(data, list(unpacker))
+        self.assert_equal(data, [Item(internal_dict=d) for d in unpacker])
 
 
 class RobustUnpackerTestCase(BaseTestCase):

+ 4 - 4
src/borg/testsuite/archiver.py

@@ -1641,8 +1641,8 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
         archive, repository = self.open_archive('archive1')
         with repository:
             for item in archive.iter_items():
-                if item[b'path'].endswith('testsuite/archiver.py'):
-                    repository.delete(item[b'chunks'][-1].id)
+                if item.path.endswith('testsuite/archiver.py'):
+                    repository.delete(item.chunks[-1].id)
                     break
             repository.commit()
         self.cmd('check', self.repository_location, exit_code=1)
@@ -1696,8 +1696,8 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
         archive, repository = self.open_archive('archive1')
         with repository:
             for item in archive.iter_items():
-                if item[b'path'].endswith('testsuite/archiver.py'):
-                    chunk = item[b'chunks'][-1]
+                if item.path.endswith('testsuite/archiver.py'):
+                    chunk = item.chunks[-1]
                     data = repository.get(chunk.id) + b'1234'
                     repository.put(chunk.id, data)
                     break

+ 4 - 4
src/borg/testsuite/item.py

@@ -35,13 +35,13 @@ def test_item_empty():
 
 def test_item_from_dict():
     # does not matter whether we get str or bytes keys
-    item = Item({b'path': b'/a/b/c', b'mode': 0o666})
+    item = Item({b'path': '/a/b/c', b'mode': 0o666})
     assert item.path == '/a/b/c'
     assert item.mode == 0o666
     assert 'path' in item
 
     # does not matter whether we get str or bytes keys
-    item = Item({'path': b'/a/b/c', 'mode': 0o666})
+    item = Item({'path': '/a/b/c', 'mode': 0o666})
     assert item.path == '/a/b/c'
     assert item.mode == 0o666
     assert 'mode' in item
@@ -60,7 +60,7 @@ def test_item_from_dict():
 
 
 def test_item_from_kw():
-    item = Item(path=b'/a/b/c', mode=0o666)
+    item = Item(path='/a/b/c', mode=0o666)
     assert item.path == '/a/b/c'
     assert item.mode == 0o666
 
@@ -107,7 +107,7 @@ def test_item_se_str_property():
         item.path = 42
 
     # non-utf-8 path, needing surrogate-escaping for latin-1 u-umlaut
-    item = Item({'path': b'/a/\xfc/c'})
+    item = Item(internal_dict={'path': b'/a/\xfc/c'})
     assert item.path == '/a/\udcfc/c'  # getting a surrogate-escaped representation
     assert item.as_dict() == {'path': b'/a/\xfc/c'}
     del item.path

+ 17 - 17
src/borg/testsuite/platform.py

@@ -51,26 +51,26 @@ class PlatformLinuxTestCase(BaseTestCase):
         return item
 
     def set_acl(self, path, access=None, default=None, numeric_owner=False):
-        item = {b'acl_access': access, b'acl_default': default}
+        item = {'acl_access': access, 'acl_default': default}
         acl_set(path, item, numeric_owner=numeric_owner)
 
     def test_access_acl(self):
         file = tempfile.NamedTemporaryFile()
         self.assert_equal(self.get_acl(file.name), {})
         self.set_acl(file.name, access=b'user::rw-\ngroup::r--\nmask::rw-\nother::---\nuser:root:rw-:9999\ngroup:root:rw-:9999\n', numeric_owner=False)
-        self.assert_in(b'user:root:rw-:0', self.get_acl(file.name)[b'acl_access'])
-        self.assert_in(b'group:root:rw-:0', self.get_acl(file.name)[b'acl_access'])
-        self.assert_in(b'user:0:rw-:0', self.get_acl(file.name, numeric_owner=True)[b'acl_access'])
+        self.assert_in(b'user:root:rw-:0', self.get_acl(file.name)['acl_access'])
+        self.assert_in(b'group:root:rw-:0', self.get_acl(file.name)['acl_access'])
+        self.assert_in(b'user:0:rw-:0', self.get_acl(file.name, numeric_owner=True)['acl_access'])
         file2 = tempfile.NamedTemporaryFile()
         self.set_acl(file2.name, access=b'user::rw-\ngroup::r--\nmask::rw-\nother::---\nuser:root:rw-:9999\ngroup:root:rw-:9999\n', numeric_owner=True)
-        self.assert_in(b'user:9999:rw-:9999', self.get_acl(file2.name)[b'acl_access'])
-        self.assert_in(b'group:9999:rw-:9999', self.get_acl(file2.name)[b'acl_access'])
+        self.assert_in(b'user:9999:rw-:9999', self.get_acl(file2.name)['acl_access'])
+        self.assert_in(b'group:9999:rw-:9999', self.get_acl(file2.name)['acl_access'])
 
     def test_default_acl(self):
         self.assert_equal(self.get_acl(self.tmpdir), {})
         self.set_acl(self.tmpdir, access=ACCESS_ACL, default=DEFAULT_ACL)
-        self.assert_equal(self.get_acl(self.tmpdir)[b'acl_access'], ACCESS_ACL)
-        self.assert_equal(self.get_acl(self.tmpdir)[b'acl_default'], DEFAULT_ACL)
+        self.assert_equal(self.get_acl(self.tmpdir)['acl_access'], ACCESS_ACL)
+        self.assert_equal(self.get_acl(self.tmpdir)['acl_default'], DEFAULT_ACL)
 
     def test_non_ascii_acl(self):
         # Testing non-ascii ACL processing to see whether our code is robust.
@@ -86,18 +86,18 @@ class PlatformLinuxTestCase(BaseTestCase):
         group_entry_numeric = 'group:666:rw-:666'.encode('ascii')
         acl = b'\n'.join([nothing_special, user_entry, group_entry])
         self.set_acl(file.name, access=acl, numeric_owner=False)
-        acl_access = self.get_acl(file.name, numeric_owner=False)[b'acl_access']
+        acl_access = self.get_acl(file.name, numeric_owner=False)['acl_access']
         self.assert_in(user_entry, acl_access)
         self.assert_in(group_entry, acl_access)
-        acl_access_numeric = self.get_acl(file.name, numeric_owner=True)[b'acl_access']
+        acl_access_numeric = self.get_acl(file.name, numeric_owner=True)['acl_access']
         self.assert_in(user_entry_numeric, acl_access_numeric)
         self.assert_in(group_entry_numeric, acl_access_numeric)
         file2 = tempfile.NamedTemporaryFile()
         self.set_acl(file2.name, access=acl, numeric_owner=True)
-        acl_access = self.get_acl(file2.name, numeric_owner=False)[b'acl_access']
+        acl_access = self.get_acl(file2.name, numeric_owner=False)['acl_access']
         self.assert_in(user_entry, acl_access)
         self.assert_in(group_entry, acl_access)
-        acl_access_numeric = self.get_acl(file.name, numeric_owner=True)[b'acl_access']
+        acl_access_numeric = self.get_acl(file.name, numeric_owner=True)['acl_access']
         self.assert_in(user_entry_numeric, acl_access_numeric)
         self.assert_in(group_entry_numeric, acl_access_numeric)
 
@@ -125,7 +125,7 @@ class PlatformDarwinTestCase(BaseTestCase):
         return item
 
     def set_acl(self, path, acl, numeric_owner=False):
-        item = {b'acl_extended': acl}
+        item = {'acl_extended': acl}
         acl_set(path, item, numeric_owner=numeric_owner)
 
     def test_access_acl(self):
@@ -133,11 +133,11 @@ class PlatformDarwinTestCase(BaseTestCase):
         file2 = tempfile.NamedTemporaryFile()
         self.assert_equal(self.get_acl(file.name), {})
         self.set_acl(file.name, b'!#acl 1\ngroup:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:staff:0:allow:read\nuser:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read\n', numeric_owner=False)
-        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000014:staff:20:allow:read', self.get_acl(file.name)[b'acl_extended'])
-        self.assert_in(b'user:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read', self.get_acl(file.name)[b'acl_extended'])
+        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000014:staff:20:allow:read', self.get_acl(file.name)['acl_extended'])
+        self.assert_in(b'user:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read', self.get_acl(file.name)['acl_extended'])
         self.set_acl(file2.name, b'!#acl 1\ngroup:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:staff:0:allow:read\nuser:FFFFEEEE-DDDD-CCCC-BBBB-AAAA00000000:root:0:allow:read\n', numeric_owner=True)
-        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:wheel:0:allow:read', self.get_acl(file2.name)[b'acl_extended'])
-        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000::0:allow:read', self.get_acl(file2.name, numeric_owner=True)[b'acl_extended'])
+        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000:wheel:0:allow:read', self.get_acl(file2.name)['acl_extended'])
+        self.assert_in(b'group:ABCDEFAB-CDEF-ABCD-EFAB-CDEF00000000::0:allow:read', self.get_acl(file2.name, numeric_owner=True)['acl_extended'])
 
 
 @unittest.skipUnless(sys.platform.startswith(('linux', 'freebsd', 'darwin')), 'POSIX only tests')