Browse Source

Merge pull request #1161 from ThomasWaldmann/pretty-archive

make source more pretty
enkore 9 years ago
parent
commit
d5edaa502c

+ 52 - 50
src/borg/archive.py

@@ -35,7 +35,7 @@ from .helpers import ProgressIndicatorPercent, log_multi
 from .helpers import PathPrefixPattern, FnmatchPattern
 from .helpers import PathPrefixPattern, FnmatchPattern
 from .helpers import consume
 from .helpers import consume
 from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec
 from .helpers import CompressionDecider1, CompressionDecider2, CompressionSpec
-from .item import Item
+from .item import Item, ArchiveItem
 from .key import key_factory
 from .key import key_factory
 from .platform import acl_get, acl_set, set_flags, get_flags, swidth
 from .platform import acl_get, acl_set, set_flags, get_flags, swidth
 from .remote import cache_if_remote
 from .remote import cache_if_remote
@@ -269,37 +269,36 @@ class Archive:
                     break
                     break
                 i += 1
                 i += 1
         else:
         else:
-            if name not in self.manifest.archives:
+            info = self.manifest.archives.get(name)
+            if info is None:
                 raise self.DoesNotExist(name)
                 raise self.DoesNotExist(name)
-            info = self.manifest.archives[name]
-            self.load(info[b'id'])
+            self.load(info.id)
             self.zeros = b'\0' * (1 << chunker_params[1])
             self.zeros = b'\0' * (1 << chunker_params[1])
 
 
     def _load_meta(self, id):
     def _load_meta(self, id):
         _, data = self.key.decrypt(id, self.repository.get(id))
         _, data = self.key.decrypt(id, self.repository.get(id))
-        metadata = msgpack.unpackb(data)
-        if metadata[b'version'] != 1:
+        metadata = ArchiveItem(internal_dict=msgpack.unpackb(data))
+        if metadata.version != 1:
             raise Exception('Unknown archive metadata version')
             raise Exception('Unknown archive metadata version')
         return metadata
         return metadata
 
 
     def load(self, id):
     def load(self, id):
         self.id = id
         self.id = id
         self.metadata = self._load_meta(self.id)
         self.metadata = self._load_meta(self.id)
-        decode_dict(self.metadata, ARCHIVE_TEXT_KEYS)
-        self.metadata[b'cmdline'] = [safe_decode(arg) for arg in self.metadata[b'cmdline']]
-        self.name = self.metadata[b'name']
+        self.metadata.cmdline = [safe_decode(arg) for arg in self.metadata.cmdline]
+        self.name = self.metadata.name
 
 
     @property
     @property
     def ts(self):
     def ts(self):
         """Timestamp of archive creation (start) in UTC"""
         """Timestamp of archive creation (start) in UTC"""
-        ts = self.metadata[b'time']
+        ts = self.metadata.time
         return parse_timestamp(ts)
         return parse_timestamp(ts)
 
 
     @property
     @property
     def ts_end(self):
     def ts_end(self):
         """Timestamp of archive creation (end) in UTC"""
         """Timestamp of archive creation (end) in UTC"""
         # fall back to time if there is no time_end present in metadata
         # fall back to time if there is no time_end present in metadata
-        ts = self.metadata.get(b'time_end') or self.metadata[b'time']
+        ts = self.metadata.get('time_end') or self.metadata.time
         return parse_timestamp(ts)
         return parse_timestamp(ts)
 
 
     @property
     @property
@@ -336,7 +335,7 @@ Number of files: {0.stats.nfiles}'''.format(
         return filter(item) if filter else True
         return filter(item) if filter else True
 
 
     def iter_items(self, filter=None, preload=False):
     def iter_items(self, filter=None, preload=False):
-        for item in self.pipeline.unpack_many(self.metadata[b'items'], preload=preload,
+        for item in self.pipeline.unpack_many(self.metadata.items, preload=preload,
                                               filter=lambda item: self.item_filter(item, filter)):
                                               filter=lambda item: self.item_filter(item, filter)):
             yield item
             yield item
 
 
@@ -366,7 +365,7 @@ Number of files: {0.stats.nfiles}'''.format(
         metadata = {
         metadata = {
             'version': 1,
             'version': 1,
             'name': name,
             'name': name,
-            'comment': comment,
+            'comment': comment or '',
             'items': self.items_buffer.chunks,
             'items': self.items_buffer.chunks,
             'cmdline': sys.argv,
             'cmdline': sys.argv,
             'hostname': socket.gethostname(),
             'hostname': socket.gethostname(),
@@ -376,10 +375,11 @@ Number of files: {0.stats.nfiles}'''.format(
             'chunker_params': self.chunker_params,
             'chunker_params': self.chunker_params,
         }
         }
         metadata.update(additional_metadata or {})
         metadata.update(additional_metadata or {})
-        data = msgpack.packb(StableDict(metadata), unicode_errors='surrogateescape')
+        metadata = ArchiveItem(metadata)
+        data = msgpack.packb(metadata.as_dict(), unicode_errors='surrogateescape')
         self.id = self.key.id_hash(data)
         self.id = self.key.id_hash(data)
         self.cache.add_chunk(self.id, Chunk(data), self.stats)
         self.cache.add_chunk(self.id, Chunk(data), self.stats)
-        self.manifest.archives[name] = {'id': self.id, 'time': metadata['time']}
+        self.manifest.archives[name] = (self.id, metadata.time)
         self.manifest.write()
         self.manifest.write()
         self.repository.commit()
         self.repository.commit()
         self.cache.commit()
         self.cache.commit()
@@ -400,7 +400,7 @@ Number of files: {0.stats.nfiles}'''.format(
         cache.begin_txn()
         cache.begin_txn()
         stats = Statistics()
         stats = Statistics()
         add(self.id)
         add(self.id)
-        for id, chunk in zip(self.metadata[b'items'], self.repository.get_many(self.metadata[b'items'])):
+        for id, chunk in zip(self.metadata.items, self.repository.get_many(self.metadata.items)):
             add(id)
             add(id)
             _, data = self.key.decrypt(id, chunk)
             _, data = self.key.decrypt(id, chunk)
             unpacker.feed(data)
             unpacker.feed(data)
@@ -588,12 +588,12 @@ Number of files: {0.stats.nfiles}'''.format(
                     raise
                     raise
 
 
     def set_meta(self, key, value):
     def set_meta(self, key, value):
-        metadata = StableDict(self._load_meta(self.id))
-        metadata[key] = value
-        data = msgpack.packb(metadata, unicode_errors='surrogateescape')
+        metadata = self._load_meta(self.id)
+        setattr(metadata, key, value)
+        data = msgpack.packb(metadata.as_dict(), unicode_errors='surrogateescape')
         new_id = self.key.id_hash(data)
         new_id = self.key.id_hash(data)
         self.cache.add_chunk(new_id, Chunk(data), self.stats)
         self.cache.add_chunk(new_id, Chunk(data), self.stats)
-        self.manifest.archives[self.name] = {'id': new_id, 'time': metadata[b'time']}
+        self.manifest.archives[self.name] = (new_id, metadata.time)
         self.cache.chunk_decref(self.id, self.stats)
         self.cache.chunk_decref(self.id, self.stats)
         self.id = new_id
         self.id = new_id
 
 
@@ -602,7 +602,7 @@ Number of files: {0.stats.nfiles}'''.format(
             raise self.AlreadyExists(name)
             raise self.AlreadyExists(name)
         oldname = self.name
         oldname = self.name
         self.name = name
         self.name = name
-        self.set_meta(b'name', name)
+        self.set_meta('name', name)
         del self.manifest.archives[oldname]
         del self.manifest.archives[oldname]
 
 
     def delete(self, stats, progress=False, forced=False):
     def delete(self, stats, progress=False, forced=False):
@@ -625,7 +625,7 @@ Number of files: {0.stats.nfiles}'''.format(
         error = False
         error = False
         try:
         try:
             unpacker = msgpack.Unpacker(use_list=False)
             unpacker = msgpack.Unpacker(use_list=False)
-            items_ids = self.metadata[b'items']
+            items_ids = self.metadata.items
             pi = ProgressIndicatorPercent(total=len(items_ids), msg="Decrementing references %3.0f%%", same_line=True)
             pi = ProgressIndicatorPercent(total=len(items_ids), msg="Decrementing references %3.0f%%", same_line=True)
             for (i, (items_id, data)) in enumerate(zip(items_ids, self.repository.get_many(items_ids))):
             for (i, (items_id, data)) in enumerate(zip(items_ids, self.repository.get_many(items_ids))):
                 if progress:
                 if progress:
@@ -844,7 +844,7 @@ Number of files: {0.stats.nfiles}'''.format(
     @staticmethod
     @staticmethod
     def list_archives(repository, key, manifest, cache=None):
     def list_archives(repository, key, manifest, cache=None):
         # expensive! see also Manifest.list_archive_infos.
         # expensive! see also Manifest.list_archive_infos.
-        for name, info in manifest.archives.items():
+        for name in manifest.archives:
             yield Archive(repository, key, manifest, name, cache=cache)
             yield Archive(repository, key, manifest, name, cache=cache)
 
 
     @staticmethod
     @staticmethod
@@ -1075,8 +1075,9 @@ class ArchiveChecker:
             except (TypeError, ValueError, StopIteration):
             except (TypeError, ValueError, StopIteration):
                 continue
                 continue
             if valid_archive(archive):
             if valid_archive(archive):
-                logger.info('Found archive %s', archive[b'name'].decode('utf-8'))
-                manifest.archives[archive[b'name'].decode('utf-8')] = {b'id': chunk_id, b'time': archive[b'time']}
+                archive = ArchiveItem(internal_dict=archive)
+                logger.info('Found archive %s', archive.name)
+                manifest.archives[archive.name] = (chunk_id, archive.time)
         logger.info('Manifest rebuild complete.')
         logger.info('Manifest rebuild complete.')
         return manifest
         return manifest
 
 
@@ -1187,7 +1188,7 @@ class ArchiveChecker:
                 return required_item_keys.issubset(keys) and keys.issubset(item_keys)
                 return required_item_keys.issubset(keys) and keys.issubset(item_keys)
 
 
             i = 0
             i = 0
-            for state, items in groupby(archive[b'items'], missing_chunk_detector):
+            for state, items in groupby(archive.items, missing_chunk_detector):
                 items = list(items)
                 items = list(items)
                 if state % 2:
                 if state % 2:
                     for chunk_id in items:
                     for chunk_id in items:
@@ -1215,37 +1216,38 @@ class ArchiveChecker:
 
 
         if archive is None:
         if archive is None:
             # we need last N or all archives
             # we need last N or all archives
-            archive_items = sorted(self.manifest.archives.items(), reverse=True,
-                                   key=lambda name_info: name_info[1][b'time'])
+            archive_infos = self.manifest.archives.list(sort_by='ts', reverse=True)
             if prefix is not None:
             if prefix is not None:
-                archive_items = [item for item in archive_items if item[0].startswith(prefix)]
-            num_archives = len(archive_items)
+                archive_infos = [info for info in archive_infos if info.name.startswith(prefix)]
+            num_archives = len(archive_infos)
             end = None if last is None else min(num_archives, last)
             end = None if last is None else min(num_archives, last)
         else:
         else:
             # we only want one specific archive
             # we only want one specific archive
-            archive_items = [item for item in self.manifest.archives.items() if item[0] == archive]
-            if not archive_items:
+            info = self.manifest.archives.get(archive)
+            if info is None:
                 logger.error("Archive '%s' not found.", archive)
                 logger.error("Archive '%s' not found.", archive)
+                archive_infos = []
+            else:
+                archive_infos = [info]
             num_archives = 1
             num_archives = 1
             end = 1
             end = 1
 
 
         with cache_if_remote(self.repository) as repository:
         with cache_if_remote(self.repository) as repository:
-            for i, (name, info) in enumerate(archive_items[:end]):
-                logger.info('Analyzing archive {} ({}/{})'.format(name, num_archives - i, num_archives))
-                archive_id = info[b'id']
+            for i, info in enumerate(archive_infos[:end]):
+                logger.info('Analyzing archive {} ({}/{})'.format(info.name, num_archives - i, num_archives))
+                archive_id = info.id
                 if archive_id not in self.chunks:
                 if archive_id not in self.chunks:
                     logger.error('Archive metadata block is missing!')
                     logger.error('Archive metadata block is missing!')
                     self.error_found = True
                     self.error_found = True
-                    del self.manifest.archives[name]
+                    del self.manifest.archives[info.name]
                     continue
                     continue
                 mark_as_possibly_superseded(archive_id)
                 mark_as_possibly_superseded(archive_id)
                 cdata = self.repository.get(archive_id)
                 cdata = self.repository.get(archive_id)
                 _, data = self.key.decrypt(archive_id, cdata)
                 _, data = self.key.decrypt(archive_id, cdata)
-                archive = StableDict(msgpack.unpackb(data))
-                if archive[b'version'] != 1:
+                archive = ArchiveItem(internal_dict=msgpack.unpackb(data))
+                if archive.version != 1:
                     raise Exception('Unknown archive metadata version')
                     raise Exception('Unknown archive metadata version')
-                decode_dict(archive, ARCHIVE_TEXT_KEYS)
-                archive[b'cmdline'] = [safe_decode(arg) for arg in archive[b'cmdline']]
+                archive.cmdline = [safe_decode(arg) for arg in archive.cmdline]
                 items_buffer = ChunkBuffer(self.key)
                 items_buffer = ChunkBuffer(self.key)
                 items_buffer.write_chunk = add_callback
                 items_buffer.write_chunk = add_callback
                 for item in robust_iterator(archive):
                 for item in robust_iterator(archive):
@@ -1253,14 +1255,14 @@ class ArchiveChecker:
                         verify_file_chunks(item)
                         verify_file_chunks(item)
                     items_buffer.add(item)
                     items_buffer.add(item)
                 items_buffer.flush(flush=True)
                 items_buffer.flush(flush=True)
-                for previous_item_id in archive[b'items']:
+                for previous_item_id in archive.items:
                     mark_as_possibly_superseded(previous_item_id)
                     mark_as_possibly_superseded(previous_item_id)
-                archive[b'items'] = items_buffer.chunks
-                data = msgpack.packb(archive, unicode_errors='surrogateescape')
+                archive.items = items_buffer.chunks
+                data = msgpack.packb(archive.as_dict(), unicode_errors='surrogateescape')
                 new_archive_id = self.key.id_hash(data)
                 new_archive_id = self.key.id_hash(data)
                 cdata = self.key.encrypt(Chunk(data))
                 cdata = self.key.encrypt(Chunk(data))
                 add_reference(new_archive_id, len(data), len(cdata), cdata)
                 add_reference(new_archive_id, len(data), len(cdata), cdata)
-                info[b'id'] = new_archive_id
+                self.manifest.archives[info.name] = (new_archive_id, info.ts)
 
 
     def orphan_chunks_check(self):
     def orphan_chunks_check(self):
         if self.check_all:
         if self.check_all:
@@ -1483,9 +1485,9 @@ class ArchiveRecreater:
         if completed:
         if completed:
             timestamp = archive.ts.replace(tzinfo=None)
             timestamp = archive.ts.replace(tzinfo=None)
             if comment is None:
             if comment is None:
-                comment = archive.metadata.get(b'comment', '')
+                comment = archive.metadata.get('comment', '')
             target.save(timestamp=timestamp, comment=comment, additional_metadata={
             target.save(timestamp=timestamp, comment=comment, additional_metadata={
-                'cmdline': archive.metadata[b'cmdline'],
+                'cmdline': archive.metadata.cmdline,
                 'recreate_cmdline': sys.argv,
                 'recreate_cmdline': sys.argv,
             })
             })
             if replace_original:
             if replace_original:
@@ -1554,7 +1556,7 @@ class ArchiveRecreater:
         if not target:
         if not target:
             target = self.create_target_archive(target_name)
             target = self.create_target_archive(target_name)
         # If the archives use the same chunker params, then don't rechunkify
         # If the archives use the same chunker params, then don't rechunkify
-        target.recreate_rechunkify = tuple(archive.metadata.get(b'chunker_params')) != self.chunker_params
+        target.recreate_rechunkify = tuple(archive.metadata.get('chunker_params')) != self.chunker_params
         return target, resume_from
         return target, resume_from
 
 
     def try_resume(self, archive, target_name):
     def try_resume(self, archive, target_name):
@@ -1573,7 +1575,7 @@ class ArchiveRecreater:
         return target, resume_from
         return target, resume_from
 
 
     def incref_partial_chunks(self, source_archive, target_archive):
     def incref_partial_chunks(self, source_archive, target_archive):
-        target_archive.recreate_partial_chunks = source_archive.metadata.get(b'recreate_partial_chunks', [])
+        target_archive.recreate_partial_chunks = source_archive.metadata.get('recreate_partial_chunks', [])
         for chunk_id, size, csize in target_archive.recreate_partial_chunks:
         for chunk_id, size, csize in target_archive.recreate_partial_chunks:
             if not self.cache.seen_chunk(chunk_id):
             if not self.cache.seen_chunk(chunk_id):
                 try:
                 try:
@@ -1606,8 +1608,8 @@ class ArchiveRecreater:
         return item
         return item
 
 
     def can_resume(self, archive, old_target, target_name):
     def can_resume(self, archive, old_target, target_name):
-        resume_id = old_target.metadata[b'recreate_source_id']
-        resume_args = [safe_decode(arg) for arg in old_target.metadata[b'recreate_args']]
+        resume_id = old_target.metadata.recreate_source_id
+        resume_args = [safe_decode(arg) for arg in old_target.metadata.recreate_args]
         if resume_id != archive.id:
         if resume_id != archive.id:
             logger.warning('Source archive changed, will discard %s and start over', target_name)
             logger.warning('Source archive changed, will discard %s and start over', target_name)
             logger.warning('Saved fingerprint:   %s', bin_to_hex(resume_id))
             logger.warning('Saved fingerprint:   %s', bin_to_hex(resume_id))

+ 11 - 11
src/borg/archiver.py

@@ -679,8 +679,8 @@ class Archiver:
         archive2 = Archive(repository, key, manifest, args.archive2,
         archive2 = Archive(repository, key, manifest, args.archive2,
                            consider_part_files=args.consider_part_files)
                            consider_part_files=args.consider_part_files)
 
 
-        can_compare_chunk_ids = archive1.metadata.get(b'chunker_params', False) == archive2.metadata.get(
-            b'chunker_params', True) or args.same_chunker_params
+        can_compare_chunk_ids = archive1.metadata.get('chunker_params', False) == archive2.metadata.get(
+            'chunker_params', True) or args.same_chunker_params
         if not can_compare_chunk_ids:
         if not can_compare_chunk_ids:
             self.print_warning('--chunker-params might be different between archives, diff will be slow.\n'
             self.print_warning('--chunker-params might be different between archives, diff will be slow.\n'
                                'If you know for certain that they are the same, pass --same-chunker-params '
                                'If you know for certain that they are the same, pass --same-chunker-params '
@@ -734,7 +734,7 @@ class Archiver:
                     msg.append("This repository seems to have no manifest, so we can't tell anything about its contents.")
                     msg.append("This repository seems to have no manifest, so we can't tell anything about its contents.")
                 else:
                 else:
                     msg.append("You requested to completely DELETE the repository *including* all archives it contains:")
                     msg.append("You requested to completely DELETE the repository *including* all archives it contains:")
-                    for archive_info in manifest.list_archive_infos(sort_by='ts'):
+                    for archive_info in manifest.archives.list(sort_by='ts'):
                         msg.append(format_archive(archive_info))
                         msg.append(format_archive(archive_info))
                 msg.append("Type 'YES' if you understand this and want to continue: ")
                 msg.append("Type 'YES' if you understand this and want to continue: ")
                 msg = '\n'.join(msg)
                 msg = '\n'.join(msg)
@@ -812,7 +812,7 @@ class Archiver:
                 format = "{archive:<36} {time} [{id}]{NL}"
                 format = "{archive:<36} {time} [{id}]{NL}"
             formatter = ArchiveFormatter(format)
             formatter = ArchiveFormatter(format)
 
 
-            for archive_info in manifest.list_archive_infos(sort_by='ts'):
+            for archive_info in manifest.archives.list(sort_by='ts'):
                 if args.prefix and not archive_info.name.startswith(args.prefix):
                 if args.prefix and not archive_info.name.startswith(args.prefix):
                     continue
                     continue
                 write(safe_encode(formatter.format_item(archive_info)))
                 write(safe_encode(formatter.format_item(archive_info)))
@@ -831,14 +831,14 @@ class Archiver:
             stats = archive.calc_stats(cache)
             stats = archive.calc_stats(cache)
             print('Archive name: %s' % archive.name)
             print('Archive name: %s' % archive.name)
             print('Archive fingerprint: %s' % archive.fpr)
             print('Archive fingerprint: %s' % archive.fpr)
-            print('Comment: %s' % archive.metadata.get(b'comment', ''))
-            print('Hostname: %s' % archive.metadata[b'hostname'])
-            print('Username: %s' % archive.metadata[b'username'])
+            print('Comment: %s' % archive.metadata.get('comment', ''))
+            print('Hostname: %s' % archive.metadata.hostname)
+            print('Username: %s' % archive.metadata.username)
             print('Time (start): %s' % format_time(to_localtime(archive.ts)))
             print('Time (start): %s' % format_time(to_localtime(archive.ts)))
             print('Time (end):   %s' % format_time(to_localtime(archive.ts_end)))
             print('Time (end):   %s' % format_time(to_localtime(archive.ts_end)))
             print('Duration: %s' % archive.duration_from_meta)
             print('Duration: %s' % archive.duration_from_meta)
             print('Number of files: %d' % stats.nfiles)
             print('Number of files: %d' % stats.nfiles)
-            print('Command line: %s' % format_cmdline(archive.metadata[b'cmdline']))
+            print('Command line: %s' % format_cmdline(archive.metadata.cmdline))
             print(DASHES)
             print(DASHES)
             print(STATS_HEADER)
             print(STATS_HEADER)
             print(str(stats))
             print(str(stats))
@@ -857,7 +857,7 @@ class Archiver:
                              '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
                              '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
                              '"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.')
                              '"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.')
             return self.exit_code
             return self.exit_code
-        archives_checkpoints = manifest.list_archive_infos(sort_by='ts', reverse=True)  # just a ArchiveInfo list
+        archives_checkpoints = manifest.archives.list(sort_by='ts', reverse=True)  # just a ArchiveInfo list
         if args.prefix:
         if args.prefix:
             archives_checkpoints = [arch for arch in archives_checkpoints if arch.name.startswith(args.prefix)]
             archives_checkpoints = [arch for arch in archives_checkpoints if arch.name.startswith(args.prefix)]
         is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search
         is_checkpoint = re.compile(r'\.checkpoint(\.\d+)?$').search
@@ -974,7 +974,7 @@ class Archiver:
                 if args.target is not None:
                 if args.target is not None:
                     self.print_error('--target: Need to specify single archive')
                     self.print_error('--target: Need to specify single archive')
                     return self.exit_code
                     return self.exit_code
-                for archive in manifest.list_archive_infos(sort_by='ts'):
+                for archive in manifest.archives.list(sort_by='ts'):
                     name = archive.name
                     name = archive.name
                     if recreater.is_temporary_archive(name):
                     if recreater.is_temporary_archive(name):
                         continue
                         continue
@@ -1009,7 +1009,7 @@ class Archiver:
         """dump (decrypted, decompressed) archive items metadata (not: data)"""
         """dump (decrypted, decompressed) archive items metadata (not: data)"""
         archive = Archive(repository, key, manifest, args.location.archive,
         archive = Archive(repository, key, manifest, args.location.archive,
                           consider_part_files=args.consider_part_files)
                           consider_part_files=args.consider_part_files)
-        for i, item_id in enumerate(archive.metadata[b'items']):
+        for i, item_id in enumerate(archive.metadata.items):
             _, data = key.decrypt(item_id, repository.get(item_id))
             _, data = key.decrypt(item_id, repository.get(item_id))
             filename = '%06d_%s.items' % (i, bin_to_hex(item_id))
             filename = '%06d_%s.items' % (i, bin_to_hex(item_id))
             print('Dumping', filename)
             print('Dumping', filename)

+ 8 - 9
src/borg/cache.py

@@ -16,7 +16,7 @@ from .helpers import get_cache_dir
 from .helpers import decode_dict, int_to_bigint, bigint_to_int, bin_to_hex
 from .helpers import decode_dict, int_to_bigint, bigint_to_int, bin_to_hex
 from .helpers import format_file_size
 from .helpers import format_file_size
 from .helpers import yes
 from .helpers import yes
-from .item import Item
+from .item import Item, ArchiveItem
 from .key import PlaintextKey
 from .key import PlaintextKey
 from .locking import Lock
 from .locking import Lock
 from .platform import SaveFile
 from .platform import SaveFile
@@ -279,7 +279,7 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
                 return set()
                 return set()
 
 
         def repo_archives():
         def repo_archives():
-            return set(info[b'id'] for info in self.manifest.archives.values())
+            return set(info.id for info in self.manifest.archives.list())
 
 
         def cleanup_outdated(ids):
         def cleanup_outdated(ids):
             for id in ids:
             for id in ids:
@@ -290,12 +290,11 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
             cdata = repository.get(archive_id)
             cdata = repository.get(archive_id)
             _, data = key.decrypt(archive_id, cdata)
             _, data = key.decrypt(archive_id, cdata)
             chunk_idx.add(archive_id, 1, len(data), len(cdata))
             chunk_idx.add(archive_id, 1, len(data), len(cdata))
-            archive = msgpack.unpackb(data)
-            if archive[b'version'] != 1:
+            archive = ArchiveItem(internal_dict=msgpack.unpackb(data))
+            if archive.version != 1:
                 raise Exception('Unknown archive metadata version')
                 raise Exception('Unknown archive metadata version')
-            decode_dict(archive, (b'name',))
             unpacker = msgpack.Unpacker()
             unpacker = msgpack.Unpacker()
-            for item_id, chunk in zip(archive[b'items'], repository.get_many(archive[b'items'])):
+            for item_id, chunk in zip(archive.items, repository.get_many(archive.items)):
                 _, data = key.decrypt(item_id, chunk)
                 _, data = key.decrypt(item_id, chunk)
                 chunk_idx.add(item_id, 1, len(data), len(chunk))
                 chunk_idx.add(item_id, 1, len(data), len(chunk))
                 unpacker.feed(data)
                 unpacker.feed(data)
@@ -319,9 +318,9 @@ Chunk index:    {0.total_unique_chunks:20d} {0.total_chunks:20d}"""
             return chunk_idx
             return chunk_idx
 
 
         def lookup_name(archive_id):
         def lookup_name(archive_id):
-            for name, info in self.manifest.archives.items():
-                if info[b'id'] == archive_id:
-                    return name
+            for info in self.manifest.archives.list():
+                if info.id == archive_id:
+                    return info.name
 
 
         def create_master_idx(chunk_idx):
         def create_master_idx(chunk_idx):
             logger.info('Synchronizing chunks cache...')
             logger.info('Synchronizing chunks cache...')

+ 0 - 2
src/borg/constants.py

@@ -15,8 +15,6 @@ ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'hostname', 'us
 # this is the set of keys that are always present in archives:
 # this is the set of keys that are always present in archives:
 REQUIRED_ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'time', ])
 REQUIRED_ARCHIVE_KEYS = frozenset(['version', 'name', 'items', 'cmdline', 'time', ])
 
 
-ARCHIVE_TEXT_KEYS = (b'name', b'comment', b'hostname', b'username', b'time', b'time_end')
-
 # default umask, overriden by --umask, defaults to read/write only for owner
 # default umask, overriden by --umask, defaults to read/write only for owner
 UMASK_DEFAULT = 0o077
 UMASK_DEFAULT = 0o077
 
 

+ 4 - 4
src/borg/fuse.py

@@ -73,11 +73,11 @@ class FuseOperations(llfuse.Operations):
         if archive:
         if archive:
             self.process_archive(archive)
             self.process_archive(archive)
         else:
         else:
-            for archive_name in manifest.archives:
+            for name in manifest.archives:
                 # Create archive placeholder inode
                 # Create archive placeholder inode
                 archive_inode = self._create_dir(parent=1)
                 archive_inode = self._create_dir(parent=1)
-                self.contents[1][os.fsencode(archive_name)] = archive_inode
-                self.pending_archives[archive_inode] = Archive(repository, key, manifest, archive_name)
+                self.contents[1][os.fsencode(name)] = archive_inode
+                self.pending_archives[archive_inode] = Archive(repository, key, manifest, name)
 
 
     def mount(self, mountpoint, mount_options, foreground=False):
     def mount(self, mountpoint, mount_options, foreground=False):
         """Mount filesystem on *mountpoint* with *mount_options*."""
         """Mount filesystem on *mountpoint* with *mount_options*."""
@@ -117,7 +117,7 @@ class FuseOperations(llfuse.Operations):
         """Build fuse inode hierarchy from archive metadata
         """Build fuse inode hierarchy from archive metadata
         """
         """
         unpacker = msgpack.Unpacker()
         unpacker = msgpack.Unpacker()
-        for key, chunk in zip(archive.metadata[b'items'], self.repository.get_many(archive.metadata[b'items'])):
+        for key, chunk in zip(archive.metadata.items, self.repository.get_many(archive.metadata.items)):
             _, data = self.key.decrypt(key, chunk)
             _, data = self.key.decrypt(key, chunk)
             unpacker.feed(data)
             unpacker.feed(data)
             for item in unpacker:
             for item in unpacker:

+ 82 - 29
src/borg/helpers.py

@@ -18,7 +18,7 @@ import time
 import unicodedata
 import unicodedata
 import uuid
 import uuid
 from binascii import hexlify
 from binascii import hexlify
-from collections import namedtuple, deque
+from collections import namedtuple, deque, abc
 from contextlib import contextmanager
 from contextlib import contextmanager
 from datetime import datetime, timezone, timedelta
 from datetime import datetime, timezone, timedelta
 from fnmatch import translate
 from fnmatch import translate
@@ -97,12 +97,76 @@ def check_extension_modules():
         raise ExtensionModuleError
         raise ExtensionModuleError
 
 
 
 
+ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts')
+
+
+class Archives(abc.MutableMapping):
+    """
+    Nice wrapper around the archives dict, making sure only valid types/values get in
+    and we can deal with str keys (and it internally encodes to byte keys) and eiter
+    str timestamps or datetime timestamps.
+    """
+    def __init__(self):
+        # key: encoded archive name, value: dict(b'id': bytes_id, b'time': bytes_iso_ts)
+        self._archives = {}
+
+    def __len__(self):
+        return len(self._archives)
+
+    def __iter__(self):
+        return iter(safe_decode(name) for name in self._archives)
+
+    def __getitem__(self, name):
+        assert isinstance(name, str)
+        _name = safe_encode(name)
+        values = self._archives.get(_name)
+        if values is None:
+            raise KeyError
+        ts = parse_timestamp(values[b'time'].decode('utf-8'))
+        return ArchiveInfo(name=name, id=values[b'id'], ts=ts)
+
+    def __setitem__(self, name, info):
+        assert isinstance(name, str)
+        name = safe_encode(name)
+        assert isinstance(info, tuple)
+        id, ts = info
+        assert isinstance(id, bytes)
+        if isinstance(ts, datetime):
+            ts = ts.replace(tzinfo=None).isoformat()
+        assert isinstance(ts, str)
+        ts = ts.encode()
+        self._archives[name] = {b'id': id, b'time': ts}
+
+    def __delitem__(self, name):
+        assert isinstance(name, str)
+        name = safe_encode(name)
+        del self._archives[name]
+
+    def list(self, sort_by=None, reverse=False):
+        # inexpensive Archive.list_archives replacement if we just need .name, .id, .ts
+        archives = self.values()  # [self[name] for name in self]
+        if sort_by is not None:
+            archives = sorted(archives, key=attrgetter(sort_by), reverse=reverse)
+        return archives
+
+    def set_raw_dict(self, d):
+        """set the dict we get from the msgpack unpacker"""
+        for k, v in d.items():
+            assert isinstance(k, bytes)
+            assert isinstance(v, dict) and b'id' in v and b'time' in v
+            self._archives[k] = v
+
+    def get_raw_dict(self):
+        """get the dict we can give to the msgpack packer"""
+        return self._archives
+
+
 class Manifest:
 class Manifest:
 
 
     MANIFEST_ID = b'\0' * 32
     MANIFEST_ID = b'\0' * 32
 
 
     def __init__(self, key, repository, item_keys=None):
     def __init__(self, key, repository, item_keys=None):
-        self.archives = {}
+        self.archives = Archives()
         self.config = {}
         self.config = {}
         self.key = key
         self.key = key
         self.repository = repository
         self.repository = repository
@@ -114,6 +178,7 @@ class Manifest:
 
 
     @classmethod
     @classmethod
     def load(cls, repository, key=None):
     def load(cls, repository, key=None):
+        from .item import ManifestItem
         from .key import key_factory
         from .key import key_factory
         from .repository import Repository
         from .repository import Repository
         try:
         try:
@@ -125,42 +190,30 @@ class Manifest:
         manifest = cls(key, repository)
         manifest = cls(key, repository)
         _, data = key.decrypt(None, cdata)
         _, data = key.decrypt(None, cdata)
         manifest.id = key.id_hash(data)
         manifest.id = key.id_hash(data)
-        m = msgpack.unpackb(data)
-        if not m.get(b'version') == 1:
+        m = ManifestItem(internal_dict=msgpack.unpackb(data))
+        if m.get('version') != 1:
             raise ValueError('Invalid manifest version')
             raise ValueError('Invalid manifest version')
-        manifest.archives = dict((k.decode('utf-8'), v) for k, v in m[b'archives'].items())
-        manifest.timestamp = m.get(b'timestamp')
-        if manifest.timestamp:
-            manifest.timestamp = manifest.timestamp.decode('ascii')
-        manifest.config = m[b'config']
+        manifest.archives.set_raw_dict(m.archives)
+        manifest.timestamp = m.get('timestamp')
+        manifest.config = m.config
         # valid item keys are whatever is known in the repo or every key we know
         # valid item keys are whatever is known in the repo or every key we know
-        manifest.item_keys = ITEM_KEYS | frozenset(key.decode() for key in m.get(b'item_keys', []))
+        manifest.item_keys = ITEM_KEYS | frozenset(key.decode() for key in m.get('item_keys', []))
         return manifest, key
         return manifest, key
 
 
     def write(self):
     def write(self):
+        from .item import ManifestItem
         self.timestamp = datetime.utcnow().isoformat()
         self.timestamp = datetime.utcnow().isoformat()
-        data = msgpack.packb(StableDict({
-            'version': 1,
-            'archives': self.archives,
-            'timestamp': self.timestamp,
-            'config': self.config,
-            'item_keys': tuple(self.item_keys),
-        }))
+        manifest = ManifestItem(
+            version=1,
+            archives=self.archives.get_raw_dict(),
+            timestamp=self.timestamp,
+            config=self.config,
+            item_keys=tuple(self.item_keys),
+        )
+        data = msgpack.packb(manifest.as_dict())
         self.id = self.key.id_hash(data)
         self.id = self.key.id_hash(data)
         self.repository.put(self.MANIFEST_ID, self.key.encrypt(Chunk(data)))
         self.repository.put(self.MANIFEST_ID, self.key.encrypt(Chunk(data)))
 
 
-    def list_archive_infos(self, sort_by=None, reverse=False):
-        # inexpensive Archive.list_archives replacement if we just need .name, .id, .ts
-        ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts')
-        archives = []
-        for name, values in self.archives.items():
-            ts = parse_timestamp(values[b'time'].decode('utf-8'))
-            id = values[b'id']
-            archives.append(ArchiveInfo(name=name, id=id, ts=ts))
-        if sort_by is not None:
-            archives = sorted(archives, key=attrgetter(sort_by), reverse=reverse)
-        return archives
-
 
 
 def prune_within(archives, within):
 def prune_within(archives, within):
     multiplier = {'H': 1, 'd': 24, 'w': 24 * 7, 'm': 24 * 31, 'y': 24 * 365}
     multiplier = {'H': 1, 'd': 24, 'w': 24 * 7, 'm': 24 * 31, 'y': 24 * 365}

+ 58 - 0
src/borg/item.py

@@ -204,3 +204,61 @@ class Key(PropDict):
     enc_hmac_key = PropDict._make_property('enc_hmac_key', bytes)
     enc_hmac_key = PropDict._make_property('enc_hmac_key', bytes)
     id_key = PropDict._make_property('id_key', bytes)
     id_key = PropDict._make_property('id_key', bytes)
     chunk_seed = PropDict._make_property('chunk_seed', int)
     chunk_seed = PropDict._make_property('chunk_seed', int)
+
+
+class ArchiveItem(PropDict):
+    """
+    ArchiveItem abstraction that deals with validation and the low-level details internally:
+
+    An ArchiveItem is created either from msgpack unpacker output, from another dict, from kwargs or
+    built step-by-step by setting attributes.
+
+    msgpack gives us a dict with bytes-typed keys, just give it to ArchiveItem(d) and use arch.xxx later.
+
+    If a ArchiveItem shall be serialized, give as_dict() method output to msgpack packer.
+    """
+
+    VALID_KEYS = {'version', 'name', 'items', 'cmdline', 'hostname', 'username', 'time', 'time_end',
+                  'comment', 'chunker_params',
+                  'recreate_cmdline', 'recreate_source_id', 'recreate_args', 'recreate_partial_chunks',
+                  }  # str-typed keys
+
+    __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
+
+    version = PropDict._make_property('version', int)
+    name = PropDict._make_property('name', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
+    items = PropDict._make_property('items', list)
+    cmdline = PropDict._make_property('cmdline', list)  # list of s-e-str
+    hostname = PropDict._make_property('hostname', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
+    username = PropDict._make_property('username', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
+    time = PropDict._make_property('time', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
+    time_end = PropDict._make_property('time_end', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
+    comment = PropDict._make_property('comment', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
+    chunker_params = PropDict._make_property('chunker_params', tuple)
+    recreate_source_id = PropDict._make_property('recreate_source_id', bytes)
+    recreate_cmdline = PropDict._make_property('recreate_cmdline', list)  # list of s-e-str
+    recreate_args = PropDict._make_property('recreate_args', list)  # list of s-e-str
+    recreate_partial_chunks = PropDict._make_property('recreate_partial_chunks', list)  # list of tuples
+
+
+class ManifestItem(PropDict):
+    """
+    ManifestItem abstraction that deals with validation and the low-level details internally:
+
+    A ManifestItem is created either from msgpack unpacker output, from another dict, from kwargs or
+    built step-by-step by setting attributes.
+
+    msgpack gives us a dict with bytes-typed keys, just give it to ManifestItem(d) and use manifest.xxx later.
+
+    If a ManifestItem shall be serialized, give as_dict() method output to msgpack packer.
+    """
+
+    VALID_KEYS = {'version', 'archives', 'timestamp', 'config', 'item_keys', }  # str-typed keys
+
+    __slots__ = ("_dict", )  # avoid setting attributes not supported by properties
+
+    version = PropDict._make_property('version', int)
+    archives = PropDict._make_property('archives', dict)  # name -> dict
+    timestamp = PropDict._make_property('time', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
+    config = PropDict._make_property('config', dict)
+    item_keys = PropDict._make_property('item_keys', tuple)

+ 9 - 8
src/borg/remote.py

@@ -283,22 +283,23 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
             return msgid
             return msgid
 
 
         def handle_error(error, res):
         def handle_error(error, res):
-            if error == b'DoesNotExist':
+            error = error.decode('utf-8')
+            if error == 'DoesNotExist':
                 raise Repository.DoesNotExist(self.location.orig)
                 raise Repository.DoesNotExist(self.location.orig)
-            elif error == b'AlreadyExists':
+            elif error == 'AlreadyExists':
                 raise Repository.AlreadyExists(self.location.orig)
                 raise Repository.AlreadyExists(self.location.orig)
-            elif error == b'CheckNeeded':
+            elif error == 'CheckNeeded':
                 raise Repository.CheckNeeded(self.location.orig)
                 raise Repository.CheckNeeded(self.location.orig)
-            elif error == b'IntegrityError':
+            elif error == 'IntegrityError':
                 raise IntegrityError(res)
                 raise IntegrityError(res)
-            elif error == b'PathNotAllowed':
+            elif error == 'PathNotAllowed':
                 raise PathNotAllowed(*res)
                 raise PathNotAllowed(*res)
-            elif error == b'ObjectNotFound':
+            elif error == 'ObjectNotFound':
                 raise Repository.ObjectNotFound(res[0], self.location.orig)
                 raise Repository.ObjectNotFound(res[0], self.location.orig)
-            elif error == b'InvalidRPCMethod':
+            elif error == 'InvalidRPCMethod':
                 raise InvalidRPCMethod(*res)
                 raise InvalidRPCMethod(*res)
             else:
             else:
-                raise self.RPCError(res.decode('utf-8'), error.decode('utf-8'))
+                raise self.RPCError(res.decode('utf-8'), error)
 
 
         calls = list(calls)
         calls = list(calls)
         waiting_for = []
         waiting_for = []

+ 2 - 2
src/borg/testsuite/archive.py

@@ -8,7 +8,7 @@ import msgpack
 
 
 from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_dict, ITEM_KEYS, Statistics
 from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_dict, ITEM_KEYS, Statistics
 from ..archive import BackupOSError, backup_io, backup_io_iter
 from ..archive import BackupOSError, backup_io, backup_io_iter
-from ..item import Item
+from ..item import Item, ArchiveItem
 from ..key import PlaintextKey
 from ..key import PlaintextKey
 from ..helpers import Manifest
 from ..helpers import Manifest
 from . import BaseTestCase
 from . import BaseTestCase
@@ -77,7 +77,7 @@ class ArchiveTimestampTestCase(BaseTestCase):
         key = PlaintextKey(repository)
         key = PlaintextKey(repository)
         manifest = Manifest(repository, key)
         manifest = Manifest(repository, key)
         a = Archive(repository, key, manifest, 'test', create=True)
         a = Archive(repository, key, manifest, 'test', create=True)
-        a.metadata = {b'time': isoformat}
+        a.metadata = ArchiveItem(time=isoformat)
         self.assert_equal(a.ts, expected)
         self.assert_equal(a.ts, expected)
 
 
     def test_with_microseconds(self):
     def test_with_microseconds(self):

+ 1 - 1
src/borg/testsuite/archiver.py

@@ -1859,7 +1859,7 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
     def test_missing_archive_item_chunk(self):
     def test_missing_archive_item_chunk(self):
         archive, repository = self.open_archive('archive1')
         archive, repository = self.open_archive('archive1')
         with repository:
         with repository:
-            repository.delete(archive.metadata[b'items'][-5])
+            repository.delete(archive.metadata.items[-5])
             repository.commit()
             repository.commit()
         self.cmd('check', self.repository_location, exit_code=1)
         self.cmd('check', self.repository_location, exit_code=1)
         self.cmd('check', '--repair', self.repository_location, exit_code=0)
         self.cmd('check', '--repair', self.repository_location, exit_code=0)

+ 5 - 5
src/borg/testsuite/key.py

@@ -69,9 +69,9 @@ class TestKey:
         monkeypatch.setenv('BORG_PASSPHRASE', 'test')
         monkeypatch.setenv('BORG_PASSPHRASE', 'test')
         key = KeyfileKey.create(self.MockRepository(), self.MockArgs())
         key = KeyfileKey.create(self.MockRepository(), self.MockArgs())
         assert bytes_to_long(key.enc_cipher.iv, 8) == 0
         assert bytes_to_long(key.enc_cipher.iv, 8) == 0
-        manifest = key.encrypt(Chunk(b'XXX'))
+        manifest = key.encrypt(Chunk(b'ABC'))
         assert key.extract_nonce(manifest) == 0
         assert key.extract_nonce(manifest) == 0
-        manifest2 = key.encrypt(Chunk(b'XXX'))
+        manifest2 = key.encrypt(Chunk(b'ABC'))
         assert manifest != manifest2
         assert manifest != manifest2
         assert key.decrypt(None, manifest) == key.decrypt(None, manifest2)
         assert key.decrypt(None, manifest) == key.decrypt(None, manifest2)
         assert key.extract_nonce(manifest2) == 1
         assert key.extract_nonce(manifest2) == 1
@@ -91,7 +91,7 @@ class TestKey:
         assert not keyfile.exists()
         assert not keyfile.exists()
         key = KeyfileKey.create(self.MockRepository(), self.MockArgs())
         key = KeyfileKey.create(self.MockRepository(), self.MockArgs())
         assert keyfile.exists()
         assert keyfile.exists()
-        chunk = Chunk(b'XXX')
+        chunk = Chunk(b'ABC')
         chunk_id = key.id_hash(chunk.data)
         chunk_id = key.id_hash(chunk.data)
         chunk_cdata = key.encrypt(chunk)
         chunk_cdata = key.encrypt(chunk)
         key = KeyfileKey.detect(self.MockRepository(), chunk_cdata)
         key = KeyfileKey.detect(self.MockRepository(), chunk_cdata)
@@ -124,9 +124,9 @@ class TestKey:
         assert hexlify(key.enc_hmac_key) == b'b885a05d329a086627412a6142aaeb9f6c54ab7950f996dd65587251f6bc0901'
         assert hexlify(key.enc_hmac_key) == b'b885a05d329a086627412a6142aaeb9f6c54ab7950f996dd65587251f6bc0901'
         assert hexlify(key.enc_key) == b'2ff3654c6daf7381dbbe718d2b20b4f1ea1e34caa6cc65f6bb3ac376b93fed2a'
         assert hexlify(key.enc_key) == b'2ff3654c6daf7381dbbe718d2b20b4f1ea1e34caa6cc65f6bb3ac376b93fed2a'
         assert key.chunk_seed == -775740477
         assert key.chunk_seed == -775740477
-        manifest = key.encrypt(Chunk(b'XXX'))
+        manifest = key.encrypt(Chunk(b'ABC'))
         assert key.extract_nonce(manifest) == 0
         assert key.extract_nonce(manifest) == 0
-        manifest2 = key.encrypt(Chunk(b'XXX'))
+        manifest2 = key.encrypt(Chunk(b'ABC'))
         assert manifest != manifest2
         assert manifest != manifest2
         assert key.decrypt(None, manifest) == key.decrypt(None, manifest2)
         assert key.decrypt(None, manifest) == key.decrypt(None, manifest2)
         assert key.extract_nonce(manifest2) == 1
         assert key.extract_nonce(manifest2) == 1