Răsfoiți Sursa

list: JSON lines output for archive contents

Marian Beermann 8 ani în urmă
părinte
comite
2a22f93e44
4 a modificat fișierele cu 36 adăugiri și 66 ștergeri
  1. 9 30
      docs/internals/frontends.rst
  2. 18 6
      src/borg/archiver.py
  3. 5 24
      src/borg/helpers.py
  4. 4 6
      src/borg/testsuite/archiver.py

+ 9 - 30
docs/internals/frontends.rst

@@ -392,39 +392,18 @@ The same archive with more information (``borg info --last 1 --json``)::
 
 
 .. rubric:: File listings
 .. rubric:: File listings
 
 
-Listing the contents of an archive can produce *a lot* of JSON. Each item (file, directory, ...) is described
-by one object in the *items* array of the :ref:`borg_list` output. Refer to the *borg list* documentation for
-the available keys and their meaning.
+Listing the contents of an archive can produce *a lot* of JSON. Since many JSON implementations
+don't support a streaming mode of operation, which is pretty much required to deal with this amount of
+JSON, output is generated in the `JSON lines <http://jsonlines.org/>`_ format, which is simply
+a number of JSON objects separated by new lines.
+
+Each item (file, directory, ...) is described by one object in the :ref:`borg_list` output.
+Refer to the *borg list* documentation for the available keys and their meaning.
 
 
 Example (excerpt)::
 Example (excerpt)::
 
 
-    {
-        "encryption": {
-            "mode": "repokey"
-        },
-        "repository": {
-            "id": "0cbe6166b46627fd26b97f8831e2ca97584280a46714ef84d2b668daf8271a23",
-            "last_modified": "Mon, 2017-02-27 21:21:58",
-            "location": "/home/user/repository"
-        },
-        "items": [
-            {
-                "type": "d",
-                "mode": "drwxr-xr-x",
-                "user": "user",
-                "group": "user",
-                "uid": 1000,
-                "gid": 1000,
-                "path": "linux",
-                "healthy": true,
-                "source": "",
-                "linktarget": "",
-                "flags": null,
-                "isomtime": "Sat, 2016-05-07 19:46:01",
-                "size": 0
-            }
-        ]
-    }
+    {"type": "d", "mode": "drwxr-xr-x", "user": "user", "group": "user", "uid": 1000, "gid": 1000, "path": "linux", "healthy": true, "source": "", "linktarget": "", "flags": null, "isomtime": "Sat, 2016-05-07 19:46:01", "size": 0}
+    {"type": "d", "mode": "drwxr-xr-x", "user": "user", "group": "user", "uid": 1000, "gid": 1000, "path": "linux/baz", "healthy": true, "source": "", "linktarget": "", "flags": null, "isomtime": "Sat, 2016-05-07 19:46:01", "size": 0}
 
 
 .. _msgid:
 .. _msgid:
 
 

+ 18 - 6
src/borg/archiver.py

@@ -1048,8 +1048,14 @@ class Archiver:
             write = sys.stdout.buffer.write
             write = sys.stdout.buffer.write
 
 
         if args.location.archive:
         if args.location.archive:
+            if args.json:
+                self.print_error('The --json option is only valid for listing archives, not archive contents.')
+                return self.exit_code
             return self._list_archive(args, repository, manifest, key, write)
             return self._list_archive(args, repository, manifest, key, write)
         else:
         else:
+            if args.json_lines:
+                self.print_error('The --json-lines option is only valid for listing archive contents, not archives.')
+                return self.exit_code
             return self._list_repository(args, manifest, write)
             return self._list_repository(args, manifest, write)
 
 
     def _list_archive(self, args, repository, manifest, key, write):
     def _list_archive(self, args, repository, manifest, key, write):
@@ -1065,11 +1071,9 @@ class Archiver:
             archive = Archive(repository, key, manifest, args.location.archive, cache=cache,
             archive = Archive(repository, key, manifest, args.location.archive, cache=cache,
                               consider_part_files=args.consider_part_files)
                               consider_part_files=args.consider_part_files)
 
 
-            formatter = ItemFormatter(archive, format, json=args.json)
-            write(safe_encode(formatter.begin()))
+            formatter = ItemFormatter(archive, format, json_lines=args.json_lines)
             for item in archive.iter_items(lambda item: matcher.match(item.path)):
             for item in archive.iter_items(lambda item: matcher.match(item.path)):
                 write(safe_encode(formatter.format_item(item)))
                 write(safe_encode(formatter.format_item(item)))
-            write(safe_encode(formatter.end()))
 
 
         # Only load the cache if it will be used
         # Only load the cache if it will be used
         if ItemFormatter.format_needs_cache(format):
         if ItemFormatter.format_needs_cache(format):
@@ -2616,9 +2620,17 @@ class Archiver:
                                help="""specify format for file listing
                                help="""specify format for file listing
                                 (default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NL}")""")
                                 (default: "{mode} {user:6} {group:6} {size:8d} {isomtime} {path}{extra}{NL}")""")
         subparser.add_argument('--json', action='store_true',
         subparser.add_argument('--json', action='store_true',
-                               help='format output as JSON. The form of --format is ignored, but keys used in it '
-                                    'are added to the JSON output. Some keys are always present. Note: JSON can only '
-                                    'represent text. A "bpath" key is therefore not available.')
+                               help='Only valid for listing archives. Format output as JSON. '
+                                    'The form of --format is ignored, '
+                                    'but keys used in it are added to the JSON output. '
+                                    'Some keys are always present. Note: JSON can only represent text. '
+                                    'A "barchive" key is therefore not available.')
+        subparser.add_argument('--json-lines', action='store_true',
+                               help='Only valid for listing archive contents. Format output as JSON Lines. '
+                                    'The form of --format is ignored, '
+                                    'but keys used in it are added to the JSON output. '
+                                    'Some keys are always present. Note: JSON can only represent text. '
+                                    'A "bpath" key is therefore not available.')
         subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
         subparser.add_argument('location', metavar='REPOSITORY_OR_ARCHIVE', nargs='?', default='',
                                type=location_validator(),
                                type=location_validator(),
                                help='repository/archive to list contents of')
                                help='repository/archive to list contents of')

+ 5 - 24
src/borg/helpers.py

@@ -1505,9 +1505,9 @@ class ItemFormatter(BaseFormatter):
         format_keys = {f[1] for f in Formatter().parse(format)}
         format_keys = {f[1] for f in Formatter().parse(format)}
         return any(key in cls.KEYS_REQUIRING_CACHE for key in format_keys)
         return any(key in cls.KEYS_REQUIRING_CACHE for key in format_keys)
 
 
-    def __init__(self, archive, format, *, json=False):
+    def __init__(self, archive, format, *, json_lines=False):
         self.archive = archive
         self.archive = archive
-        self.json = json
+        self.json_lines = json_lines
         static_keys = {
         static_keys = {
             'archivename': archive.name,
             'archivename': archive.name,
             'archiveid': archive.fpr,
             'archiveid': archive.fpr,
@@ -1532,33 +1532,14 @@ class ItemFormatter(BaseFormatter):
         for hash_function in hashlib.algorithms_guaranteed:
         for hash_function in hashlib.algorithms_guaranteed:
             self.add_key(hash_function, partial(self.hash_item, hash_function))
             self.add_key(hash_function, partial(self.hash_item, hash_function))
         self.used_call_keys = set(self.call_keys) & self.format_keys
         self.used_call_keys = set(self.call_keys) & self.format_keys
-        if self.json:
+        if self.json_lines:
             self.item_data = {}
             self.item_data = {}
             self.format_item = self.format_item_json
             self.format_item = self.format_item_json
-            self.first = True
         else:
         else:
             self.item_data = static_keys
             self.item_data = static_keys
 
 
-    def begin(self):
-        if not self.json:
-            return ''
-        begin = json_dump(basic_json_data(self.archive.manifest))
-        begin, _, _ = begin.rpartition('\n}')  # remove last closing brace, we want to extend the object
-        begin += ',\n'
-        begin += '    "items": [\n'
-        return begin
-
-    def end(self):
-        if not self.json:
-            return ''
-        return "]}"
-
     def format_item_json(self, item):
     def format_item_json(self, item):
-        if self.first:
-            self.first = False
-            return json.dumps(self.get_item_data(item))
-        else:
-            return ',' + json.dumps(self.get_item_data(item))
+        return json.dumps(self.get_item_data(item)) + '\n'
 
 
     def add_key(self, key, callable_with_item):
     def add_key(self, key, callable_with_item):
         self.call_keys[key] = callable_with_item
         self.call_keys[key] = callable_with_item
@@ -1585,7 +1566,7 @@ class ItemFormatter(BaseFormatter):
         item_data['uid'] = item.uid
         item_data['uid'] = item.uid
         item_data['gid'] = item.gid
         item_data['gid'] = item.gid
         item_data['path'] = remove_surrogates(item.path)
         item_data['path'] = remove_surrogates(item.path)
-        if self.json:
+        if self.json_lines:
             item_data['healthy'] = 'chunks_healthy' not in item
             item_data['healthy'] = 'chunks_healthy' not in item
         else:
         else:
             item_data['bpath'] = item.path
             item_data['bpath'] = item.path

+ 4 - 6
src/borg/testsuite/archiver.py

@@ -1615,17 +1615,15 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         assert list_repo['encryption']['mode'] == 'repokey'
         assert list_repo['encryption']['mode'] == 'repokey'
         assert 'keyfile' not in list_repo['encryption']
         assert 'keyfile' not in list_repo['encryption']
 
 
-        list_archive = json.loads(self.cmd('list', '--json', self.repository_location + '::test'))
-        assert list_repo['repository'] == list_archive['repository']
-        items = list_archive['items']
+        list_archive = self.cmd('list', '--json-lines', self.repository_location + '::test')
+        items = [json.loads(s) for s in list_archive.splitlines()]
         assert len(items) == 2
         assert len(items) == 2
         file1 = items[1]
         file1 = items[1]
         assert file1['path'] == 'input/file1'
         assert file1['path'] == 'input/file1'
         assert file1['size'] == 81920
         assert file1['size'] == 81920
 
 
-        list_archive = json.loads(self.cmd('list', '--json', '--format={sha256}', self.repository_location + '::test'))
-        assert list_repo['repository'] == list_archive['repository']
-        items = list_archive['items']
+        list_archive = self.cmd('list', '--json-lines', '--format={sha256}', self.repository_location + '::test')
+        items = [json.loads(s) for s in list_archive.splitlines()]
         assert len(items) == 2
         assert len(items) == 2
         file1 = items[1]
         file1 = items[1]
         assert file1['path'] == 'input/file1'
         assert file1['path'] == 'input/file1'