Просмотр исходного кода

archiver: Add 'debug dump-manifest' and 'debug dump-archive' commands.

Martin Hostettler 8 лет назад
Родитель
Сommit
e8335dba0f
3 измененных файлов с 177 добавлено и 1 удалено
  1. 103 1
      src/borg/archiver.py
  2. 44 0
      src/borg/helpers.py
  3. 30 0
      src/borg/testsuite/archiver.py

+ 103 - 1
src/borg/archiver.py

@@ -4,6 +4,7 @@ import faulthandler
 import functools
 import hashlib
 import inspect
+import json
 import logging
 import os
 import re
@@ -22,6 +23,8 @@ from itertools import zip_longest
 from .logger import create_logger, setup_logging
 logger = create_logger()
 
+import msgpack
+
 from . import __version__
 from . import helpers
 from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special
@@ -34,11 +37,12 @@ from .helpers import Error, NoManifestError
 from .helpers import location_validator, archivename_validator, ChunkerParams, CompressionSpec
 from .helpers import PrefixSpec, SortBySpec, HUMAN_SORT_KEYS
 from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter, format_time, format_file_size, format_archive
-from .helpers import safe_encode, remove_surrogates, bin_to_hex
+from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict
 from .helpers import prune_within, prune_split
 from .helpers import to_localtime, timestamp
 from .helpers import get_cache_dir
 from .helpers import Manifest
+from .helpers import StableDict
 from .helpers import update_excludes, check_extension_modules
 from .helpers import dir_is_tagged, is_slow_msgpack, yes, sysinfo
 from .helpers import log_multi
@@ -1226,6 +1230,74 @@ class Archiver:
         print('Done.')
         return EXIT_SUCCESS
 
+    @with_repository()
+    def do_debug_dump_archive(self, args, repository, manifest, key):
+        """dump decoded archive metadata (not: data)"""
+
+        try:
+            archive_meta_orig = manifest.archives.get_raw_dict()[safe_encode(args.location.archive)]
+        except KeyError:
+            raise Archive.DoesNotExist(args.location.archive)
+
+        indent = 4
+
+        def do_indent(d):
+            return textwrap.indent(json.dumps(d, indent=indent), prefix=' ' * indent)
+
+        def output(fd):
+            # this outputs megabytes of data for a modest sized archive, so some manual streaming json output
+            fd.write('{\n')
+            fd.write('    "_name": ' + json.dumps(args.location.archive) + ",\n")
+            fd.write('    "_manifest_entry":\n')
+            fd.write(do_indent(prepare_dump_dict(archive_meta_orig)))
+            fd.write(',\n')
+
+            _, data = key.decrypt(archive_meta_orig[b'id'], repository.get(archive_meta_orig[b'id']))
+            archive_org_dict = msgpack.unpackb(data, object_hook=StableDict, unicode_errors='surrogateescape')
+
+            fd.write('    "_meta":\n')
+            fd.write(do_indent(prepare_dump_dict(archive_org_dict)))
+            fd.write(',\n')
+            fd.write('    "_items": [\n')
+
+            unpacker = msgpack.Unpacker(use_list=False, object_hook=StableDict)
+            first = True
+            for item_id in archive_org_dict[b'items']:
+                _, data = key.decrypt(item_id, repository.get(item_id))
+                unpacker.feed(data)
+                for item in unpacker:
+                    item = prepare_dump_dict(item)
+                    if first:
+                        first = False
+                    else:
+                        fd.write(',\n')
+                    fd.write(do_indent(item))
+
+            fd.write('\n')
+            fd.write('    ]\n}\n')
+
+        if args.path == '-':
+            output(sys.stdout)
+        else:
+            with open(args.path, 'w') as fd:
+                output(fd)
+        return EXIT_SUCCESS
+
+    @with_repository()
+    def do_debug_dump_manifest(self, args, repository, manifest, key):
+        """dump decoded repository manifest"""
+
+        _, data = key.decrypt(None, repository.get(manifest.MANIFEST_ID))
+
+        meta = prepare_dump_dict(msgpack.fallback.unpackb(data, object_hook=StableDict, unicode_errors='surrogateescape'))
+
+        if args.path == '-':
+            json.dump(meta, sys.stdout, indent=4)
+        else:
+            with open(args.path, 'w') as fd:
+                json.dump(meta, fd, indent=4)
+        return EXIT_SUCCESS
+
     @with_repository()
     def do_debug_dump_repo_objs(self, args, repository, manifest, key):
         """dump (decrypted, decompressed) repo objects"""
@@ -2716,6 +2788,36 @@ class Archiver:
                                type=location_validator(archive=True),
                                help='archive to dump')
 
+        debug_dump_archive_epilog = textwrap.dedent("""
+        This command dumps all metadata of an archive in a decoded form to a file.
+        """)
+        subparser = debug_parsers.add_parser('dump-archive', parents=[common_parser], add_help=False,
+                                          description=self.do_debug_dump_archive.__doc__,
+                                          epilog=debug_dump_archive_epilog,
+                                          formatter_class=argparse.RawDescriptionHelpFormatter,
+                                          help='dump decoded archive metadata (debug)')
+        subparser.set_defaults(func=self.do_debug_dump_archive)
+        subparser.add_argument('location', metavar='ARCHIVE',
+                               type=location_validator(archive=True),
+                               help='archive to dump')
+        subparser.add_argument('path', metavar='PATH', type=str,
+                               help='file to dump data into')
+
+        debug_dump_manifest_epilog = textwrap.dedent("""
+        This command dumps manifest metadata of a repository in a decoded form to a file.
+        """)
+        subparser = debug_parsers.add_parser('dump-manifest', parents=[common_parser], add_help=False,
+                                          description=self.do_debug_dump_manifest.__doc__,
+                                          epilog=debug_dump_manifest_epilog,
+                                          formatter_class=argparse.RawDescriptionHelpFormatter,
+                                          help='dump decoded repository metadata (debug)')
+        subparser.set_defaults(func=self.do_debug_dump_manifest)
+        subparser.add_argument('location', metavar='REPOSITORY',
+                               type=location_validator(archive=False),
+                               help='repository to dump')
+        subparser.add_argument('path', metavar='PATH', type=str,
+                               help='file to dump data into')
+
         debug_dump_repo_objs_epilog = textwrap.dedent("""
         This command dumps raw (but decrypted and decompressed) repo objects to files.
         """)

+ 44 - 0
src/borg/helpers.py

@@ -1,5 +1,6 @@
 import argparse
 import contextlib
+import collections
 import grp
 import hashlib
 import logging
@@ -1093,6 +1094,49 @@ def decode_dict(d, keys, encoding='utf-8', errors='surrogateescape'):
     return d
 
 
+def prepare_dump_dict(d):
+    def decode_bytes(value):
+        # this should somehow be reversable later, but usual strings should
+        # look nice and chunk ids should mostly show in hex. Use a special
+        # inband signaling character (ASCII DEL) to distinguish between
+        # decoded and hex mode.
+        if not value.startswith(b'\x7f'):
+            try:
+                value = value.decode()
+                return value
+            except UnicodeDecodeError:
+                pass
+        return '\u007f' + bin_to_hex(value)
+
+    def decode_tuple(t):
+        res = []
+        for value in t:
+            if isinstance(value, dict):
+                value = decode(value)
+            elif isinstance(value, tuple) or isinstance(value, list):
+                value = decode_tuple(value)
+            elif isinstance(value, bytes):
+                value = decode_bytes(value)
+            res.append(value)
+        return res
+
+    def decode(d):
+        res = collections.OrderedDict()
+        for key, value in d.items():
+            if isinstance(value, dict):
+                value = decode(value)
+            elif isinstance(value, (tuple, list)):
+                value = decode_tuple(value)
+            elif isinstance(value, bytes):
+                value = decode_bytes(value)
+            if isinstance(key, bytes):
+                key = key.decode()
+            res[key] = value
+        return res
+
+    return decode(d)
+
+
 def remove_surrogates(s, errors='replace'):
     """Replace surrogates generated by fsdecode with '?'
     """

+ 30 - 0
src/borg/testsuite/archiver.py

@@ -3,6 +3,7 @@ from configparser import ConfigParser
 import errno
 import os
 import inspect
+import json
 from datetime import datetime
 from datetime import timedelta
 from io import StringIO
@@ -2020,6 +2021,35 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
  2: 737475 - 88
 """
 
+    def test_debug_dump_manifest(self):
+        self.create_regular_file('file1', size=1024 * 80)
+        self.cmd('init', '--encryption=repokey', self.repository_location)
+        self.cmd('create', self.repository_location + '::test', 'input')
+        dump_file = self.output_path + '/dump'
+        output = self.cmd('debug', 'dump-manifest', self.repository_location, dump_file)
+        assert output == ""
+        with open(dump_file, "r") as f:
+            result = json.load(f)
+        assert 'archives' in result
+        assert 'config' in result
+        assert 'item_keys' in result
+        assert 'timestamp' in result
+        assert 'version' in result
+
+    def test_debug_dump_archive(self):
+        self.create_regular_file('file1', size=1024 * 80)
+        self.cmd('init', '--encryption=repokey', self.repository_location)
+        self.cmd('create', self.repository_location + '::test', 'input')
+        dump_file = self.output_path + '/dump'
+        output = self.cmd('debug', 'dump-archive', self.repository_location + "::test", dump_file)
+        assert output == ""
+        with open(dump_file, "r") as f:
+            result = json.load(f)
+        assert '_name' in result
+        assert '_manifest_entry' in result
+        assert '_meta' in result
+        assert '_items' in result
+
 
 @unittest.skipUnless('binary' in BORG_EXES, 'no borg.exe available')
 class ArchiverTestCaseBinary(ArchiverTestCase):