Sfoglia il codice sorgente

Merge pull request #2488 from enkore/issue/2439

list: --json-lines for archive contents
enkore 8 anni fa
parent
commit
badc79c323
4 ha cambiato i file con 67 aggiunte e 70 eliminazioni
  1. 16 34
      docs/internals/frontends.rst
  2. 18 6
      src/borg/archiver.py
  3. 5 24
      src/borg/helpers.py
  4. 28 6
      src/borg/testsuite/archiver.py

+ 16 - 34
docs/internals/frontends.rst

@@ -270,10 +270,12 @@ Example *borg info* output::
             "last_modified": "Mon, 2017-02-27 21:21:58",
             "last_modified": "Mon, 2017-02-27 21:21:58",
             "location": "/home/user/testrepo"
             "location": "/home/user/testrepo"
         },
         },
-        "security_dir": "/home/user/.config/borg/security/0cbe6166b46627fd26b97f8831e2ca97584280a46714ef84d2b668daf8271a23"
+        "security_dir": "/home/user/.config/borg/security/0cbe6166b46627fd26b97f8831e2ca97584280a46714ef84d2b668daf8271a23",
+        "archives": []
     }
     }
 
 
-.. rubric:: Archive formats
+Archive formats
++++++++++++++++
 
 
 :ref:`borg_info` uses an extended format for archives, which is more expensive to retrieve, while
 :ref:`borg_info` uses an extended format for archives, which is more expensive to retrieve, while
 :ref:`borg_list` uses a simpler format that is faster to retrieve. Either return archives in an
 :ref:`borg_list` uses a simpler format that is faster to retrieve. Either return archives in an
@@ -390,41 +392,21 @@ The same archive with more information (``borg info --last 1 --json``)::
         }
         }
     }
     }
 
 
-.. rubric:: File listings
+File listings
++++++++++++++
 
 
-Listing the contents of an archive can produce *a lot* of JSON. Each item (file, directory, ...) is described
-by one object in the *items* array of the :ref:`borg_list` output. Refer to the *borg list* documentation for
-the available keys and their meaning.
+Listing the contents of an archive can produce *a lot* of JSON. Since many JSON implementations
+don't support a streaming mode of operation, which is pretty much required to deal with this amount of
+JSON, output is generated in the `JSON lines <http://jsonlines.org/>`_ format, which is simply
+a number of JSON objects separated by new lines.
 
 
-Example (excerpt)::
+Each item (file, directory, ...) is described by one object in the :ref:`borg_list` output.
+Refer to the *borg list* documentation for the available keys and their meaning.
 
 
-    {
-        "encryption": {
-            "mode": "repokey"
-        },
-        "repository": {
-            "id": "0cbe6166b46627fd26b97f8831e2ca97584280a46714ef84d2b668daf8271a23",
-            "last_modified": "Mon, 2017-02-27 21:21:58",
-            "location": "/home/user/repository"
-        },
-        "items": [
-            {
-                "type": "d",
-                "mode": "drwxr-xr-x",
-                "user": "user",
-                "group": "user",
-                "uid": 1000,
-                "gid": 1000,
-                "path": "linux",
-                "healthy": true,
-                "source": "",
-                "linktarget": "",
-                "flags": null,
-                "isomtime": "Sat, 2016-05-07 19:46:01",
-                "size": 0
-            }
-        ]
-    }
+Example (excerpt) of ``borg list --json-lines``::
+
+    {"type": "d", "mode": "drwxr-xr-x", "user": "user", "group": "user", "uid": 1000, "gid": 1000, "path": "linux", "healthy": true, "source": "", "linktarget": "", "flags": null, "isomtime": "Sat, 2016-05-07 19:46:01", "size": 0}
+    {"type": "d", "mode": "drwxr-xr-x", "user": "user", "group": "user", "uid": 1000, "gid": 1000, "path": "linux/baz", "healthy": true, "source": "", "linktarget": "", "flags": null, "isomtime": "Sat, 2016-05-07 19:46:01", "size": 0}
 
 
 .. _msgid:
 .. _msgid:
 
 

+ 18 - 6
src/borg/archiver.py

@@ -1048,8 +1048,14 @@ class Archiver:
             write = sys.stdout.buffer.write
             write = sys.stdout.buffer.write
 
 
         if args.location.archive:
         if args.location.archive:
+            if args.json:
+                self.print_error('The --json option is only valid for listing archives, not archive contents.')
+                return self.exit_code
             return self._list_archive(args, repository, manifest, key, write)
             return self._list_archive(args, repository, manifest, key, write)
         else:
         else:
+            if args.json_lines:
+                self.print_error('The --json-lines option is only valid for listing archive contents, not archives.')
+                return self.exit_code
             return self._list_repository(args, manifest, write)
             return self._list_repository(args, manifest, write)
 
 
     def _list_archive(self, args, repository, manifest, key, write):
     def _list_archive(self, args, repository, manifest, key, write):
@@ -1065,11 +1071,9 @@ class Archiver:
             archive = Archive(repository, key, manifest, args.location.archive, cache=cache,
             archive = Archive(repository, key, manifest, args.location.archive, cache=cache,
                               consider_part_files=args.consider_part_files)
                               consider_part_files=args.consider_part_files)
 
 
-            formatter = ItemFormatter(archive, format, json=args.json)
-            write(safe_encode(formatter.begin()))
+            formatter = ItemFormatter(archive, format, json_lines=args.json_lines)
             for item in archive.iter_items(lambda item: matcher.match(item.path)):
             for item in archive.iter_items(lambda item: matcher.match(item.path)):
                 write(safe_encode(formatter.format_item(item)))
                 write(safe_encode(formatter.format_item(item)))
-            write(safe_encode(formatter.end()))
 
 
         # Only load the cache if it will be used
         # Only load the cache if it will be used
         if ItemFormatter.format_needs_cache(format):
         if ItemFormatter.format_needs_cache(format):
@@ -2611,9 +2615,17 @@ class Archiver:
                                help="""specify format for file listing
                                help="""specify format for file listing
                                 (default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NL}")""")
                                 (default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NL}")""")
         subparser.add_argument('--json', action='store_true',
         subparser.add_argument('--json', action='store_true',
-                               help='format output as JSON. The form of --format is ignored, but keys used in it '
-                                    'are added to the JSON output. Some keys are always present. Note: JSON can only '
-                                    'represent text. A "bpath" key is therefore not available.')
+                               help='Only valid for listing repository contents. Format output as JSON. '
+                                    'The form of --format is ignored, '
+                                    'but keys used in it are added to the JSON output. '
+                                    'Some keys are always present. Note: JSON can only represent text. '
+                                    'A "barchive" key is therefore not available.')
+        subparser.add_argument('--json-lines', action='store_true',
+                               help='Only valid for listing archive contents. Format output as JSON Lines. '
+                                    'The form of --format is ignored, '
+                                    'but keys used in it are added to the JSON output. '
+                                    'Some keys are always present. Note: JSON can only represent text. '
+                                    'A "bpath" key is therefore not available.')
         subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
         subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
                                type=location_validator(),
                                type=location_validator(),
                                help='repository/archive to list contents of')
                                help='repository/archive to list contents of')

+ 5 - 24
src/borg/helpers.py

@@ -1505,9 +1505,9 @@ class ItemFormatter(BaseFormatter):
         format_keys = {f[1] for f in Formatter().parse(format)}
         format_keys = {f[1] for f in Formatter().parse(format)}
         return any(key in cls.KEYS_REQUIRING_CACHE for key in format_keys)
         return any(key in cls.KEYS_REQUIRING_CACHE for key in format_keys)
 
 
-    def __init__(self, archive, format, *, json=False):
+    def __init__(self, archive, format, *, json_lines=False):
         self.archive = archive
         self.archive = archive
-        self.json = json
+        self.json_lines = json_lines
         static_keys = {
         static_keys = {
             'archivename': archive.name,
             'archivename': archive.name,
             'archiveid': archive.fpr,
             'archiveid': archive.fpr,
@@ -1532,33 +1532,14 @@ class ItemFormatter(BaseFormatter):
         for hash_function in hashlib.algorithms_guaranteed:
         for hash_function in hashlib.algorithms_guaranteed:
             self.add_key(hash_function, partial(self.hash_item, hash_function))
             self.add_key(hash_function, partial(self.hash_item, hash_function))
         self.used_call_keys = set(self.call_keys) & self.format_keys
         self.used_call_keys = set(self.call_keys) & self.format_keys
-        if self.json:
+        if self.json_lines:
             self.item_data = {}
             self.item_data = {}
             self.format_item = self.format_item_json
             self.format_item = self.format_item_json
-            self.first = True
         else:
         else:
             self.item_data = static_keys
             self.item_data = static_keys
 
 
-    def begin(self):
-        if not self.json:
-            return ''
-        begin = json_dump(basic_json_data(self.archive.manifest))
-        begin, _, _ = begin.rpartition('\n}')  # remove last closing brace, we want to extend the object
-        begin += ',\n'
-        begin += '    "items": [\n'
-        return begin
-
-    def end(self):
-        if not self.json:
-            return ''
-        return "]}"
-
     def format_item_json(self, item):
     def format_item_json(self, item):
-        if self.first:
-            self.first = False
-            return json.dumps(self.get_item_data(item))
-        else:
-            return ',' + json.dumps(self.get_item_data(item))
+        return json.dumps(self.get_item_data(item)) + '\n'
 
 
     def add_key(self, key, callable_with_item):
     def add_key(self, key, callable_with_item):
         self.call_keys[key] = callable_with_item
         self.call_keys[key] = callable_with_item
@@ -1585,7 +1566,7 @@ class ItemFormatter(BaseFormatter):
         item_data['uid'] = item.uid
         item_data['uid'] = item.uid
         item_data['gid'] = item.gid
         item_data['gid'] = item.gid
         item_data['path'] = remove_surrogates(item.path)
         item_data['path'] = remove_surrogates(item.path)
-        if self.json:
+        if self.json_lines:
             item_data['healthy'] = 'chunks_healthy' not in item
             item_data['healthy'] = 'chunks_healthy' not in item
         else:
         else:
             item_data['bpath'] = item.path
             item_data['bpath'] = item.path

+ 28 - 6
src/borg/testsuite/archiver.py

@@ -1615,22 +1615,44 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         assert list_repo['encryption']['mode'] == 'repokey'
         assert list_repo['encryption']['mode'] == 'repokey'
         assert 'keyfile' not in list_repo['encryption']
         assert 'keyfile' not in list_repo['encryption']
 
 
-        list_archive = json.loads(self.cmd('list', '--json', self.repository_location + '::test'))
-        assert list_repo['repository'] == list_archive['repository']
-        items = list_archive['items']
+        list_archive = self.cmd('list', '--json-lines', self.repository_location + '::test')
+        items = [json.loads(s) for s in list_archive.splitlines()]
         assert len(items) == 2
         assert len(items) == 2
         file1 = items[1]
         file1 = items[1]
         assert file1['path'] == 'input/file1'
         assert file1['path'] == 'input/file1'
         assert file1['size'] == 81920
         assert file1['size'] == 81920
 
 
-        list_archive = json.loads(self.cmd('list', '--json', '--format={sha256}', self.repository_location + '::test'))
-        assert list_repo['repository'] == list_archive['repository']
-        items = list_archive['items']
+        list_archive = self.cmd('list', '--json-lines', '--format={sha256}', self.repository_location + '::test')
+        items = [json.loads(s) for s in list_archive.splitlines()]
         assert len(items) == 2
         assert len(items) == 2
         file1 = items[1]
         file1 = items[1]
         assert file1['path'] == 'input/file1'
         assert file1['path'] == 'input/file1'
         assert file1['sha256'] == 'b2915eb69f260d8d3c25249195f2c8f4f716ea82ec760ae929732c0262442b2b'
         assert file1['sha256'] == 'b2915eb69f260d8d3c25249195f2c8f4f716ea82ec760ae929732c0262442b2b'
 
 
+    def test_list_json_args(self):
+        self.cmd('init', '--encryption=repokey', self.repository_location)
+        self.cmd('list', '--json-lines', self.repository_location, exit_code=2)
+        self.cmd('list', '--json', self.repository_location + '::archive', exit_code=2)
+
+    def test_log_json(self):
+        self.create_test_files()
+        self.cmd('init', '--encryption=repokey', self.repository_location)
+        log = self.cmd('create', '--log-json', self.repository_location + '::test', 'input', '--list', '--debug')
+        messages = {}  # type -> message, one of each kind
+        for line in log.splitlines():
+            msg = json.loads(line)
+            messages[msg['type']] = msg
+
+        file_status = messages['file_status']
+        assert 'status' in file_status
+        assert file_status['path'].startswith('input')
+
+        log_message = messages['log_message']
+        assert isinstance(log_message['time'], float)
+        assert log_message['levelname'] == 'DEBUG'  # there should only be DEBUG messages
+        assert log_message['name'].startswith('borg.')
+        assert isinstance(log_message['message'], str)
+
     def _get_sizes(self, compression, compressible, size=10000):
     def _get_sizes(self, compression, compressible, size=10000):
         if compressible:
         if compressible:
             contents = b'X' * size
             contents = b'X' * size