Sfoglia il codice sorgente

Add --format option to `borg diff`, resolve issue #4634 (#7534)

diff: add --format option

also: refactoring/improvements of BaseFormatter
Tarrailt 2 anni fa
parent
commit
616d5e7330

+ 34 - 37
src/borg/archive.py

@@ -11,6 +11,7 @@ from functools import partial
 from getpass import getuser
 from io import BytesIO
 from itertools import groupby, zip_longest
+from typing import Iterator
 from shutil import get_terminal_size
 
 from .platformflags import is_win32
@@ -297,31 +298,24 @@ class DownloadPipeline:
         unpacker = msgpack.Unpacker(use_list=False)
         for data in self.fetch_many(ids):
             unpacker.feed(data)
-            items = [Item(internal_dict=item) for item in unpacker]
-            for item in items:
+            for _item in unpacker:
+                item = Item(internal_dict=_item)
                 if "chunks" in item:
                     item.chunks = [ChunkListEntry(*e) for e in item.chunks]
-
-            if filter:
-                items = [item for item in items if filter(item)]
-
-            if preload:
-                for item in items:
-                    if "chunks" in item:
-                        hlid = item.get("hlid", None)
-                        if hlid is None:
-                            preload_chunks = True
-                        else:
-                            if hlid in hlids_preloaded:
-                                preload_chunks = False
-                            else:
-                                # not having the hardlink's chunks already preloaded for other hardlink to same inode
-                                preload_chunks = True
-                                hlids_preloaded.add(hlid)
-                        if preload_chunks:
-                            self.repository.preload([c.id for c in item.chunks])
-
-            for item in items:
+                if filter and not filter(item):
+                    continue
+                if preload and "chunks" in item:
+                    hlid = item.get("hlid", None)
+                    if hlid is None:
+                        preload_chunks = True
+                    elif hlid in hlids_preloaded:
+                        preload_chunks = False
+                    else:
+                        # not having the hardlink's chunks already preloaded for other hardlink to same inode
+                        preload_chunks = True
+                        hlids_preloaded.add(hlid)
+                    if preload_chunks:
+                        self.repository.preload([c.id for c in item.chunks])
                 yield item
 
     def fetch_many(self, ids, is_preloaded=False):
@@ -631,10 +625,9 @@ Duration: {0.duration}
     def iter_items(self, filter=None, preload=False):
         # note: when calling this with preload=True, later fetch_many() must be called with
         # is_preloaded=True or the RemoteRepository code will leak memory!
-        for item in self.pipeline.unpack_many(
+        yield from self.pipeline.unpack_many(
             self.metadata.items, preload=preload, filter=lambda item: self.item_filter(item, filter)
-        ):
-            yield item
+        )
 
     def add_item(self, item, show_progress=True, stats=None):
         if show_progress and self.show_progress:
@@ -1123,55 +1116,59 @@ Duration: {0.duration}
             logger.warning("borg check --repair is required to free all space.")
 
     @staticmethod
-    def compare_archives_iter(archive1, archive2, matcher=None, can_compare_chunk_ids=False, content_only=False):
+    def compare_archives_iter(
+        archive1: "Archive", archive2: "Archive", matcher=None, can_compare_chunk_ids=False
+    ) -> Iterator[ItemDiff]:
         """
-        Yields tuples with a path and an ItemDiff instance describing changes/indicating equality.
+        Yields an ItemDiff instance describing changes/indicating equality.
 
         :param matcher: PatternMatcher class to restrict results to only matching paths.
         :param can_compare_chunk_ids: Whether --chunker-params are the same for both archives.
         """
 
-        def compare_items(item1, item2):
+        def compare_items(path: str, item1: Item, item2: Item):
             return ItemDiff(
+                path,
                 item1,
                 item2,
                 archive1.pipeline.fetch_many([c.id for c in item1.get("chunks", [])]),
                 archive2.pipeline.fetch_many([c.id for c in item2.get("chunks", [])]),
                 can_compare_chunk_ids=can_compare_chunk_ids,
-                content_only=content_only,
             )
 
-        orphans_archive1 = OrderedDict()
-        orphans_archive2 = OrderedDict()
+        orphans_archive1: OrderedDict[str, Item] = OrderedDict()
+        orphans_archive2: OrderedDict[str, Item] = OrderedDict()
+
+        assert matcher is not None, "matcher must be set"
 
         for item1, item2 in zip_longest(
             archive1.iter_items(lambda item: matcher.match(item.path)),
             archive2.iter_items(lambda item: matcher.match(item.path)),
         ):
             if item1 and item2 and item1.path == item2.path:
-                yield (item1.path, compare_items(item1, item2))
+                yield compare_items(item1.path, item1, item2)
                 continue
             if item1:
                 matching_orphan = orphans_archive2.pop(item1.path, None)
                 if matching_orphan:
-                    yield (item1.path, compare_items(item1, matching_orphan))
+                    yield compare_items(item1.path, item1, matching_orphan)
                 else:
                     orphans_archive1[item1.path] = item1
             if item2:
                 matching_orphan = orphans_archive1.pop(item2.path, None)
                 if matching_orphan:
-                    yield (matching_orphan.path, compare_items(matching_orphan, item2))
+                    yield compare_items(matching_orphan.path, matching_orphan, item2)
                 else:
                     orphans_archive2[item2.path] = item2
         # At this point orphans_* contain items that had no matching partner in the other archive
         for added in orphans_archive2.values():
             path = added.path
             deleted_item = Item.create_deleted(path)
-            yield (path, compare_items(deleted_item, added))
+            yield compare_items(path, deleted_item, added)
         for deleted in orphans_archive1.values():
             path = deleted.path
             deleted_item = Item.create_deleted(path)
-            yield (path, compare_items(deleted, deleted_item))
+            yield compare_items(path, deleted, deleted_item)
 
 
 class MetadataCollector:

+ 82 - 34
src/borg/archiver/diff_cmd.py

@@ -1,13 +1,14 @@
 import argparse
+import textwrap
 import json
+import sys
+import os
 
-from ._common import with_repository, with_archive, build_matcher
+from ._common import with_repository, with_archive, build_matcher, Highlander
 from ..archive import Archive
 from ..constants import *  # NOQA
-from ..helpers import archivename_validator
+from ..helpers import BaseFormatter, DiffFormatter, archivename_validator, BorgJsonEncoder
 from ..manifest import Manifest
-from ..helpers.parseformat import BorgJsonEncoder
-
 from ..logger import create_logger
 
 logger = create_logger()
@@ -18,14 +19,12 @@ class DiffMixIn:
     @with_archive
     def do_diff(self, args, repository, manifest, archive):
         """Diff contents of two archives"""
-
-        def print_json_output(diff, path):
-            print(json.dumps({"path": path, "changes": [j for j, str in diff]}, sort_keys=True, cls=BorgJsonEncoder))
-
-        def print_text_output(diff, path):
-            print("{:<19} {}".format(" ".join([str for j, str in diff]), path))
-
-        print_output = print_json_output if args.json_lines else print_text_output
+        if args.format is not None:
+            format = args.format
+        elif args.content_only:
+            format = "{content}{link}{directory}{blkdev}{chrdev}{fifo} {path}{NL}"
+        else:
+            format = os.environ.get("BORG_DIFF_FORMAT", "{change} {path}{NL}")
 
         archive1 = archive
         archive2 = Archive(manifest, args.other_name)
@@ -43,17 +42,36 @@ class DiffMixIn:
 
         matcher = build_matcher(args.patterns, args.paths)
 
-        diffs = Archive.compare_archives_iter(
-            archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids, content_only=args.content_only
+        diffs_iter = Archive.compare_archives_iter(
+            archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids
         )
         # Conversion to string and filtering for diff.equal to save memory if sorting
-        diffs = ((path, diff.changes()) for path, diff in diffs if not diff.equal)
+        diffs = (diff for diff in diffs_iter if not diff.equal(args.content_only))
 
         if args.sort:
-            diffs = sorted(diffs)
-
-        for path, diff in diffs:
-            print_output(diff, path)
+            diffs = sorted(diffs, key=lambda diff: diff.path)
+
+        formatter = DiffFormatter(format, args.content_only)
+        for diff in diffs:
+            if args.json_lines:
+                print(
+                    json.dumps(
+                        {
+                            "path": diff.path,
+                            "changes": [
+                                change.to_dict()
+                                for name, change in diff.changes().items()
+                                if not args.content_only or (name not in DiffFormatter.METADATA)
+                            ],
+                        },
+                        sort_keys=True,
+                        cls=BorgJsonEncoder,
+                    )
+                )
+            else:
+                res: str = formatter.format_item(diff)
+                if res.strip():
+                    sys.stdout.write(res)
 
         for pattern in matcher.get_unmatched_include_patterns():
             self.print_warning("Include pattern '%s' never matched.", pattern)
@@ -64,25 +82,48 @@ class DiffMixIn:
         from ._common import process_epilog
         from ._common import define_exclusion_group
 
-        diff_epilog = process_epilog(
-            """
-            This command finds differences (file contents, user/group/mode) between archives.
+        diff_epilog = (
+            process_epilog(
+                """
+        This command finds differences (file contents, metadata) between ARCHIVE1 and ARCHIVE2.
+
+        For more help on include/exclude patterns, see the :ref:`borg_patterns` command output.
+
+        .. man NOTES
+
+        The FORMAT specifier syntax
+        +++++++++++++++++++++++++++
+
+        The ``--format`` option uses python's `format string syntax
+        <https://docs.python.org/3.9/library/string.html#formatstrings>`_.
 
-            A repository location and an archive name must be specified for REPO::ARCHIVE1.
-            ARCHIVE2 is just another archive name in same repository (no repository location
-            allowed).
+        Examples:
+        ::
 
-            For archives created with Borg 1.1 or newer diff automatically detects whether
-            the archives are created with the same chunker params. If so, only chunk IDs
-            are compared, which is very fast.
+            $ borg diff --format '{content:30} {path}{NL}' ArchiveFoo ArchiveBar
+            modified:  +4.1 kB  -1.0 kB    file-diff
+            ...
 
-            For archives prior to Borg 1.1 chunk contents are compared by default.
-            If you did not create the archives with different chunker params,
-            pass ``--same-chunker-params``.
-            Note that the chunker params changed from Borg 0.xx to 1.0.
+            # {VAR:<NUMBER} - pad to NUMBER columns left-aligned.
+            # {VAR:>NUMBER} - pad to NUMBER columns right-aligned.
+            $ borg diff --format '{content:>30} {path}{NL}' ArchiveFoo ArchiveBar
+               modified:  +4.1 kB  -1.0 kB file-diff
+            ...
 
-            For more help on include/exclude patterns, see the :ref:`borg_patterns` command output.
-            """
+        The following keys are always available:
+
+
+        """
+            )
+            + BaseFormatter.keys_help()
+            + textwrap.dedent(
+                """
+
+        Keys available only when showing differences between archives:
+
+        """
+            )
+            + DiffFormatter.keys_help()
         )
         subparser = subparsers.add_parser(
             "diff",
@@ -107,6 +148,13 @@ class DiffMixIn:
             help="Override check of chunker parameters.",
         )
         subparser.add_argument("--sort", dest="sort", action="store_true", help="Sort the output lines by file path.")
+        subparser.add_argument(
+            "--format",
+            metavar="FORMAT",
+            dest="format",
+            action=Highlander,
+            help='specify format for differences between archives (default: "{change} {path}{NL}")',
+        )
         subparser.add_argument("--json-lines", action="store_true", help="Format output as JSON Lines. ")
         subparser.add_argument(
             "--content-only",

+ 2 - 3
src/borg/archiver/list_cmd.py

@@ -29,10 +29,9 @@ class ListMixIn:
 
         def _list_inner(cache):
             archive = Archive(manifest, args.name, cache=cache)
-
-            formatter = ItemFormatter(archive, format, json_lines=args.json_lines)
+            formatter = ItemFormatter(archive, format)
             for item in archive.iter_items(lambda item: matcher.match(item.path)):
-                sys.stdout.write(formatter.format_item(item))
+                sys.stdout.write(formatter.format_item(item, args.json_lines, sort=True))
 
         # Only load the cache if it will be used
         if ItemFormatter.format_needs_cache(format):

+ 2 - 2
src/borg/archiver/prune_cmd.py

@@ -89,7 +89,7 @@ class PruneMixIn:
             format = "{archive}"
         else:
             format = os.environ.get("BORG_PRUNE_FORMAT", "{archive:<36} {time} [{id}]")
-        formatter = ArchiveFormatter(format, repository, manifest, manifest.key, json=False, iec=args.iec)
+        formatter = ArchiveFormatter(format, repository, manifest, manifest.key, iec=args.iec)
 
         checkpoint_re = r"\.checkpoint(\.\d+)?"
         archives_checkpoints = manifest.archives.list(
@@ -169,7 +169,7 @@ class PruneMixIn:
                     or (args.list_pruned and archive in to_delete)
                     or (args.list_kept and archive not in to_delete)
                 ):
-                    list_logger.info(f"{log_message:<40} {formatter.format_item(archive)}")
+                    list_logger.info(f"{log_message:<40} {formatter.format_item(archive, jsonline=False)}")
             pi.finish()
             if sig_int:
                 # Ctrl-C / SIGINT: do not checkpoint (commit) again, we already have a checkpoint in this case.

+ 3 - 3
src/borg/archiver/rlist_cmd.py

@@ -23,15 +23,15 @@ class RListMixIn:
             format = "{archive}{NL}"
         else:
             format = os.environ.get("BORG_RLIST_FORMAT", "{archive:<36} {time} [{id}]{NL}")
-        formatter = ArchiveFormatter(format, repository, manifest, manifest.key, json=args.json, iec=args.iec)
+        formatter = ArchiveFormatter(format, repository, manifest, manifest.key, iec=args.iec)
 
         output_data = []
 
         for archive_info in manifest.archives.list_considering(args):
             if args.json:
-                output_data.append(formatter.get_item_data(archive_info))
+                output_data.append(formatter.get_item_data(archive_info, args.json))
             else:
-                sys.stdout.write(formatter.format_item(archive_info))
+                sys.stdout.write(formatter.format_item(archive_info, args.json))
 
         if args.json:
             json_print(basic_json_data(manifest, extra={"archives": output_data}))

+ 1 - 1
src/borg/helpers/__init__.py

@@ -28,7 +28,7 @@ from .parseformat import sizeof_fmt, sizeof_fmt_iec, sizeof_fmt_decimal, Locatio
 from .parseformat import format_line, replace_placeholders, PlaceholderError, relative_time_marker_validator
 from .parseformat import format_archive, parse_stringified_list, clean_lines
 from .parseformat import location_validator, archivename_validator, comment_validator
-from .parseformat import BaseFormatter, ArchiveFormatter, ItemFormatter, file_status
+from .parseformat import BaseFormatter, ArchiveFormatter, ItemFormatter, DiffFormatter, file_status
 from .parseformat import swidth_slice, ellipsis_truncate
 from .parseformat import BorgJsonEncoder, basic_json_data, json_print, json_dump, prepare_dump_dict
 from .parseformat import Highlander, MakePathSafeAction

+ 205 - 109
src/borg/helpers/parseformat.py

@@ -1,3 +1,4 @@
+import abc
 import argparse
 import base64
 import hashlib
@@ -8,6 +9,7 @@ import re
 import shlex
 import stat
 import uuid
+from typing import List, Dict, Set, Tuple, ClassVar, Any, TYPE_CHECKING, Literal
 from binascii import hexlify
 from collections import Counter, OrderedDict
 from datetime import datetime, timezone
@@ -27,6 +29,9 @@ from .. import __version_tuple__ as borg_version_tuple
 from ..constants import *  # NOQA
 from ..platformflags import is_win32
 
+if TYPE_CHECKING:
+    from ..item import ItemDiff
+
 
 def bin_to_hex(binary):
     return hexlify(binary).decode("ascii")
@@ -649,8 +654,10 @@ def archivename_validator(text):
     return validate_text(text)
 
 
-class BaseFormatter:
-    FIXED_KEYS = {
+class BaseFormatter(metaclass=abc.ABCMeta):
+    format: str
+    static_data: Dict[str, Any]
+    FIXED_KEYS: ClassVar[Dict[str, str]] = {
         # Formatting aids
         "LF": "\n",
         "SPACE": " ",
@@ -660,25 +667,49 @@ class BaseFormatter:
         "NEWLINE": "\n",
         "NL": "\n",  # \n is automatically converted to os.linesep on write
     }
+    KEY_DESCRIPTIONS: ClassVar[Dict[str, str]] = {
+        "NEWLINE": "OS dependent line separator",
+        "NL": "alias of NEWLINE",
+        "NUL": "NUL character for creating print0 / xargs -0 like output",
+        "SPACE": "space character",
+        "TAB": "tab character",
+        "CR": "carriage return character",
+        "LF": "line feed character",
+    }
+    KEY_GROUPS: ClassVar[Tuple[Tuple[str, ...], ...]] = (("NEWLINE", "NL", "NUL", "SPACE", "TAB", "CR", "LF"),)
 
-    def get_item_data(self, item):
-        raise NotImplementedError
+    def __init__(self, format: str, static: Dict[str, Any]) -> None:
+        self.format = partial_format(format, static)
+        self.static_data = static
 
-    def format_item(self, item):
-        return self.format.format_map(self.get_item_data(item))
+    @abc.abstractmethod
+    def get_item_data(self, item, jsonline=False) -> dict:
+        raise NotImplementedError
 
-    @staticmethod
-    def keys_help():
+    def format_item(self, item, jsonline=False, sort=False):
+        data = self.get_item_data(item, jsonline)
         return (
-            "- NEWLINE: OS dependent line separator\n"
-            "- NL: alias of NEWLINE\n"
-            "- NUL: NUL character for creating print0 / xargs -0 like output\n"
-            "- SPACE\n"
-            "- TAB\n"
-            "- CR\n"
-            "- LF"
+            f"{json.dumps(data, cls=BorgJsonEncoder, sort_keys=sort)}\n" if jsonline else self.format.format_map(data)
         )
 
+    @classmethod
+    def keys_help(cls):
+        help = []
+        keys: Set[str] = set()
+        keys.update(cls.KEY_DESCRIPTIONS.keys())
+        keys.update(key for group in cls.KEY_GROUPS for key in group)
+
+        for group in cls.KEY_GROUPS:
+            for key in group:
+                keys.remove(key)
+                text = "- " + key
+                if key in cls.KEY_DESCRIPTIONS:
+                    text += ": " + cls.KEY_DESCRIPTIONS[key]
+                help.append(text)
+            help.append("")
+        assert not keys, str(keys)
+        return "\n".join(help)
+
 
 class ArchiveFormatter(BaseFormatter):
     KEY_DESCRIPTIONS = {
@@ -703,47 +734,17 @@ class ArchiveFormatter(BaseFormatter):
         ("size", "nfiles"),
     )
 
-    @classmethod
-    def available_keys(cls):
-        from ..manifest import ArchiveInfo
-
-        fake_archive_info = ArchiveInfo("archivename", b"\1" * 32, datetime(1970, 1, 1, tzinfo=timezone.utc))
-        formatter = cls("", None, None, None)
-        keys = []
-        keys.extend(formatter.call_keys.keys())
-        keys.extend(formatter.get_item_data(fake_archive_info).keys())
-        return keys
-
-    @classmethod
-    def keys_help(cls):
-        help = []
-        keys = cls.available_keys()
-        for key in cls.FIXED_KEYS:
-            keys.remove(key)
-
-        for group in cls.KEY_GROUPS:
-            for key in group:
-                keys.remove(key)
-                text = "- " + key
-                if key in cls.KEY_DESCRIPTIONS:
-                    text += ": " + cls.KEY_DESCRIPTIONS[key]
-                help.append(text)
-            help.append("")
-        assert not keys, str(keys)
-        return "\n".join(help)
-
-    def __init__(self, format, repository, manifest, key, *, json=False, iec=False):
+    def __init__(self, format, repository, manifest, key, *, iec=False):
+        static_data = {}  # here could be stuff on repo level, above archive level
+        static_data.update(self.FIXED_KEYS)
+        super().__init__(format, static_data)
         self.repository = repository
         self.manifest = manifest
         self.key = key
         self.name = None
         self.id = None
         self._archive = None
-        self.json = json
         self.iec = iec
-        static_keys = {}  # here could be stuff on repo level, above archive level
-        static_keys.update(self.FIXED_KEYS)
-        self.format = partial_format(format, static_keys)
         self.format_keys = {f[1] for f in Formatter().parse(format)}
         self.call_keys = {
             "hostname": partial(self.get_meta, "hostname", ""),
@@ -755,20 +756,12 @@ class ArchiveFormatter(BaseFormatter):
             "end": self.get_ts_end,
         }
         self.used_call_keys = set(self.call_keys) & self.format_keys
-        if self.json:
-            self.item_data = {}
-            self.format_item = self.format_item_json
-        else:
-            self.item_data = static_keys
-
-    def format_item_json(self, item):
-        return json.dumps(self.get_item_data(item), cls=BorgJsonEncoder) + "\n"
 
-    def get_item_data(self, archive_info):
+    def get_item_data(self, archive_info, jsonline=False):
         self.name = archive_info.name
         self.id = archive_info.id
         item_data = {}
-        item_data.update(self.item_data)
+        item_data.update({} if jsonline else self.static_data)
         item_data.update(
             {
                 "name": archive_info.name,
@@ -812,15 +805,31 @@ class ItemFormatter(BaseFormatter):
     # shake_* is not provided because it uses an incompatible .digest() method to support variable length.
     hash_algorithms = set(hashlib.algorithms_guaranteed).union({"xxh64"}).difference({"shake_128", "shake_256"})
     KEY_DESCRIPTIONS = {
+        "type": "file type (file, dir, symlink, ...)",
+        "mode": "file mode (as in stat)",
+        "uid": "user id of file owner",
+        "gid": "group id of file owner",
+        "user": "user name of file owner",
+        "group": "group name of file owner",
         "path": "file path",
         "target": "link target for symlinks",
         "hlid": "hard link identity (same if hardlinking same fs object)",
+        "flags": "file flags",
         "extra": 'prepends {target} with " -> " for soft links and " link to " for hard links',
+        "size": "file size",
         "dsize": "deduplicated size",
         "num_chunks": "number of chunks in this file",
         "unique_chunks": "number of unique chunks in this file",
+        "mtime": "file modification time",
+        "ctime": "file change time",
+        "atime": "file access time",
+        "isomtime": "file modification time (ISO 8601 format)",
+        "isoctime": "file change time (ISO 8601 format)",
+        "isoatime": "file access time (ISO 8601 format)",
         "xxh64": "XXH64 checksum of this file (note: this is NOT a cryptographic hash!)",
         "health": 'either "healthy" (file ok) or "broken" (if file has all-zero replacement chunks)',
+        "archiveid": "internal ID of the archive",
+        "archivename": "name of the archive",
     }
     KEY_GROUPS = (
         ("type", "mode", "uid", "gid", "user", "group", "path", "target", "hlid", "flags"),
@@ -833,57 +842,19 @@ class ItemFormatter(BaseFormatter):
 
     KEYS_REQUIRING_CACHE = ("dsize", "unique_chunks")
 
-    @classmethod
-    def available_keys(cls):
-        class FakeArchive:
-            fpr = name = ""
-
-        from ..item import Item
-
-        fake_item = Item(mode=0, path="foo", user="", group="", mtime=0, uid=0, gid=0)
-        formatter = cls(FakeArchive, "")
-        keys = []
-        keys.extend(formatter.call_keys.keys())
-        keys.extend(formatter.get_item_data(fake_item).keys())
-        return keys
-
-    @classmethod
-    def keys_help(cls):
-        help = []
-        keys = cls.available_keys()
-        for key in cls.FIXED_KEYS:
-            keys.remove(key)
-
-        for group in cls.KEY_GROUPS:
-            for key in group:
-                keys.remove(key)
-                text = "- " + key
-                if key in cls.KEY_DESCRIPTIONS:
-                    text += ": " + cls.KEY_DESCRIPTIONS[key]
-                help.append(text)
-            help.append("")
-        assert not keys, str(keys)
-        return "\n".join(help)
-
     @classmethod
     def format_needs_cache(cls, format):
         format_keys = {f[1] for f in Formatter().parse(format)}
         return any(key in cls.KEYS_REQUIRING_CACHE for key in format_keys)
 
-    def __init__(self, archive, format, *, json_lines=False):
+    def __init__(self, archive, format):
         from ..checksums import StreamingXXH64
 
+        static_data = {"archivename": archive.name, "archiveid": archive.fpr}
+        static_data.update(self.FIXED_KEYS)
+        super().__init__(format, static_data)
         self.xxh64 = StreamingXXH64
         self.archive = archive
-        self.json_lines = json_lines
-        static_keys = {"archivename": archive.name, "archiveid": archive.fpr}
-        static_keys.update(self.FIXED_KEYS)
-        if self.json_lines:
-            self.item_data = {}
-            self.format_item = self.format_item_json
-        else:
-            self.item_data = static_keys
-        self.format = partial_format(format, static_keys)
         self.format_keys = {f[1] for f in Formatter().parse(format)}
         self.call_keys = {
             "size": self.calculate_size,
@@ -901,17 +872,14 @@ class ItemFormatter(BaseFormatter):
             self.call_keys[hash_function] = partial(self.hash_item, hash_function)
         self.used_call_keys = set(self.call_keys) & self.format_keys
 
-    def format_item_json(self, item):
-        return json.dumps(self.get_item_data(item), cls=BorgJsonEncoder, sort_keys=True) + "\n"
-
-    def get_item_data(self, item):
+    def get_item_data(self, item, jsonline=False):
         item_data = {}
-        item_data.update(self.item_data)
+        item_data.update({} if jsonline else self.static_data)
 
         item_data.update(text_to_json("path", item.path))
         target = item.get("target", "")
         item_data.update(text_to_json("target", target))
-        if not self.json_lines:
+        if not jsonline:
             item_data["extra"] = "" if not target else f" -> {item_data['target']}"
 
         hlid = item.get("hlid")
@@ -928,7 +896,7 @@ class ItemFormatter(BaseFormatter):
         item_data.update(text_to_json("user", item.get("user", str(item_data["uid"]))))
         item_data.update(text_to_json("group", item.get("group", str(item_data["gid"]))))
 
-        if self.json_lines:
+        if jsonline:
             item_data["healthy"] = "chunks_healthy" not in item
         else:
             item_data["health"] = "broken" if "chunks_healthy" in item else "healthy"
@@ -944,7 +912,7 @@ class ItemFormatter(BaseFormatter):
 
         item: The item to sum its unique chunks' metadata
         metadata_func: A function that takes a parameter of type ChunkIndexEntry and returns a number, used to return
-                       the metadata needed from the chunk
+        the metadata needed from the chunk
         """
         chunk_index = self.archive.cache.chunks
         chunks = item.get("chunks", [])
@@ -976,6 +944,134 @@ class ItemFormatter(BaseFormatter):
         return self.format_time(key, item).isoformat()
 
 
+class DiffFormatter(BaseFormatter):
+    KEY_DESCRIPTIONS = {
+        "path": "archived file path",
+        "change": "all available changes",
+        "content": "file content change",
+        "mode": "file mode change",
+        "type": "file type change",
+        "owner": "file owner (user/group) change",
+        "user": "file user change",
+        "group": "file group change",
+        "link": "file link change",
+        "directory": "file directory change",
+        "blkdev": "file block device change",
+        "chrdev": "file character device change",
+        "fifo": "file fifo change",
+        "mtime": "file modification time change",
+        "ctime": "file change time change",
+        "isomtime": "file modification time change (ISO 8601)",
+        "isoctime": "file creation time change (ISO 8601)",
+    }
+    KEY_GROUPS = (
+        ("path", "change"),
+        ("content", "mode", "type", "owner", "group", "user"),
+        ("link", "directory", "blkdev", "chrdev", "fifo"),
+        ("mtime", "ctime", "isomtime", "isoctime"),
+    )
+    METADATA = ("mode", "type", "owner", "group", "user", "mtime", "ctime")
+
+    def __init__(self, format, content_only=False):
+        static_data = {}
+        static_data.update(self.FIXED_KEYS)
+        super().__init__(format or "{content}{link}{directory}{blkdev}{chrdev}{fifo} {path}{NL}", static_data)
+        self.content_only = content_only
+        self.format_keys = {f[1] for f in Formatter().parse(format)}
+        self.call_keys = {
+            "content": self.format_content,
+            "mode": self.format_mode,
+            "type": partial(self.format_mode, filetype=True),
+            "owner": partial(self.format_owner),
+            "group": partial(self.format_owner, spec="group"),
+            "user": partial(self.format_owner, spec="user"),
+            "link": partial(self.format_other, "link"),
+            "directory": partial(self.format_other, "directory"),
+            "blkdev": partial(self.format_other, "blkdev"),
+            "chrdev": partial(self.format_other, "chrdev"),
+            "fifo": partial(self.format_other, "fifo"),
+            "mtime": partial(self.format_time, "mtime"),
+            "ctime": partial(self.format_time, "ctime"),
+            "isomtime": partial(self.format_iso_time, "mtime"),
+            "isoctime": partial(self.format_iso_time, "ctime"),
+        }
+        self.used_call_keys = set(self.call_keys) & self.format_keys
+        if self.content_only:
+            self.used_call_keys -= set(self.METADATA)
+
+    def get_item_data(self, item: "ItemDiff", jsonline=False) -> dict:
+        diff_data = {}
+        for key in self.used_call_keys:
+            diff_data[key] = self.call_keys[key](item)
+
+        change = []
+        for key in self.call_keys:
+            if key in ("isomtime", "isoctime"):
+                continue
+            if self.content_only and key in self.METADATA:
+                continue
+            change.append(self.call_keys[key](item))
+        diff_data["change"] = " ".join([v for v in change if v])
+        diff_data["path"] = item.path
+        diff_data.update({} if jsonline else self.static_data)
+        return diff_data
+
+    def format_other(self, key, diff: "ItemDiff"):
+        change = diff.changes().get(key)
+        return f"{change.diff_type}".ljust(27) if change else ""  # 27 is the length of the content change
+
+    def format_mode(self, diff: "ItemDiff", filetype=False):
+        change = diff.type() if filetype else diff.mode()
+        return f"[{change.diff_data['item1']} -> {change.diff_data['item2']}]" if change else ""
+
+    def format_owner(self, diff: "ItemDiff", spec: Literal["owner", "user", "group"] = "owner"):
+        if spec == "user":
+            change = diff.user()
+            return f"[{change.diff_data['item1']} -> {change.diff_data['item2']}]" if change else ""
+        if spec == "group":
+            change = diff.group()
+            return f"[{change.diff_data['item1']} -> {change.diff_data['item2']}]" if change else ""
+        if spec != "owner":
+            raise ValueError(f"Invalid owner spec: {spec}")
+        change = diff.owner()
+        if change:
+            return "[{}:{} -> {}:{}]".format(
+                change.diff_data["item1"][0],
+                change.diff_data["item1"][1],
+                change.diff_data["item2"][0],
+                change.diff_data["item2"][1],
+            )
+        return ""
+
+    def format_content(self, diff: "ItemDiff"):
+        change = diff.content()
+        if change:
+            if change.diff_type == "added":
+                return "{}: {:>20}".format(change.diff_type, format_file_size(change.diff_data["added"]))
+            if change.diff_type == "removed":
+                return "{}: {:>18}".format(change.diff_type, format_file_size(change.diff_data["removed"]))
+            if "added" not in change.diff_data and "removed" not in change.diff_data:
+                return "modified:  (can't get size)"
+            return "{}: {:>8} {:>8}".format(
+                change.diff_type,
+                format_file_size(change.diff_data["added"], precision=1, sign=True),
+                format_file_size(-change.diff_data["removed"], precision=1, sign=True),
+            )
+        return ""
+
+    def format_time(self, key, diff: "ItemDiff"):
+        change = diff.changes().get(key)
+        return f"[{key}: {change.diff_data['item1']} -> {change.diff_data['item2']}]" if change else ""
+
+    def format_iso_time(self, key, diff: "ItemDiff"):
+        change = diff.changes().get(key)
+        return (
+            f"[{key}: {change.diff_data['item1'].isoformat()} -> {change.diff_data['item2'].isoformat()}]"
+            if change
+            else ""
+        )
+
+
 def file_status(mode):
     if stat.S_ISREG(mode):
         return "A"

+ 30 - 3
src/borg/item.pyi

@@ -1,4 +1,4 @@
-from typing import FrozenSet, Set, NamedTuple, Tuple, Mapping, Dict, List, Iterator, Callable, Any
+from typing import FrozenSet, Set, NamedTuple, Tuple, Mapping, Dict, List, Iterator, Callable, Any, Optional
 
 from .helpers import StableDict
 
@@ -247,9 +247,36 @@ class ManifestItem(PropDict):
     @item_keys.setter
     def item_keys(self, val: Tuple) -> None: ...
 
+class DiffChange:
+    diff_type: str
+    diff_data: Dict[str, Any]
+    def __init__(self, diff_type: str, diff_data: Optional[Dict[str, Any]] = ...) -> None: ...
+    def to_dict(self) -> Dict[str, Any]: ...
+
 class ItemDiff:
-    def __init__(self, *args, **kwargs) -> None: ...
-    def _chunk_content_equal(self, c1: Iterator, c2: Iterator) -> bool: ...
+    path: str
+    def __init__(
+        self,
+        path: str,
+        item1: Item,
+        item2: Item,
+        chunk_1: Iterator,
+        chunk_2: Iterator,
+        numeric_ids: bool = ...,
+        can_compare_chunk_ids: bool = ...,
+    ) -> None: ...
+    def changes(self) -> Dict[str, DiffChange]: ...
+    def equal(self, content_only: bool = ...) -> bool: ...
+    def content(self) -> Optional[DiffChange]: ...
+    def ctime(self) -> Optional[DiffChange]: ...
+    def mtime(self) -> Optional[DiffChange]: ...
+    def mode(self) -> Optional[DiffChange]: ...
+    def type(self) -> Optional[DiffChange]: ...
+    def owner(self) -> Optional[DiffChange]: ...
+    def user(self) -> Optional[DiffChange]: ...
+    def group(self) -> Optional[DiffChange]: ...
+
+def chunk_content_equal(chunks_a: Iterator, chunks_b: Iterator) -> bool: ...
 
 class Key(PropDict):
     @property

+ 91 - 48
src/borg/item.pyx

@@ -620,66 +620,76 @@ cpdef _init_names():
 _init_names()
 
 
+class DiffChange:
+    """
+    Stores a change in a diff.
+
+    The diff_type denotes the type of change, e.g. "added", "removed", "modified".
+    The diff_data contains additional information about the change, e.g. the old and new mode.
+    """
+    def __init__(self, diff_type, diff_data=None):
+        self.diff_type = diff_type
+        self.diff_data = diff_data or {}
+
+    def to_dict(self):
+        return {"type": self.diff_type, **self.diff_data}
+
+
 class ItemDiff:
     """
     Comparison of two items from different archives.
 
     The items may have different paths and still be considered equal (e.g. for renames).
-    It does not include extended or time attributes in the comparison.
     """
 
-    def __init__(self, item1, item2, chunk_iterator1, chunk_iterator2, numeric_ids=False, can_compare_chunk_ids=False, content_only=False):
+    def __init__(self, path, item1, item2, chunk_1, chunk_2, numeric_ids=False, can_compare_chunk_ids=False):
+        self.path = path
         self._item1 = item1
         self._item2 = item2
-        self._content_only = content_only
         self._numeric_ids = numeric_ids
         self._can_compare_chunk_ids = can_compare_chunk_ids
-        self.equal = self._equal(chunk_iterator1, chunk_iterator2)
-        changes = []
+        self._chunk_1 = chunk_1
+        self._chunk_2 = chunk_2
+        
+        self._changes = {}
 
         if self._item1.is_link() or self._item2.is_link():
-            changes.append(self._link_diff())
+            self._link_diff()
 
         if 'chunks' in self._item1 and 'chunks' in self._item2:
-            changes.append(self._content_diff())
+            self._content_diff()
 
         if self._item1.is_dir() or self._item2.is_dir():
-            changes.append(self._presence_diff('directory'))
+            self._presence_diff('directory')
 
         if self._item1.is_blk() or self._item2.is_blk():
-            changes.append(self._presence_diff('blkdev'))
+            self._presence_diff('blkdev')
 
         if self._item1.is_chr() or self._item2.is_chr():
-            changes.append(self._presence_diff('chrdev'))
+            self._presence_diff('chrdev')
 
         if self._item1.is_fifo() or self._item2.is_fifo():
-            changes.append(self._presence_diff('fifo'))
+            self._presence_diff('fifo')
 
-        if not self._content_only:
-            if not (self._item1.get('deleted') or self._item2.get('deleted')):
-                changes.append(self._owner_diff())
-                changes.append(self._mode_diff())
-                changes.extend(self._time_diffs())
+        if not (self._item1.get('deleted') or self._item2.get('deleted')):
+            self._owner_diff()
+            self._mode_diff()
+            self._time_diffs()
 
-        # filter out empty changes
-        self._changes = [ch for ch in changes if ch]
 
     def changes(self):
         return self._changes
 
     def __repr__(self):
-        if self.equal:
-            return 'equal'
-        return ' '.join(str for d, str in self._changes)
+        return (' '.join(self._changes.keys())) or 'equal'
 
-    def _equal(self, chunk_iterator1, chunk_iterator2):
+    def equal(self, content_only=False):
         # if both are deleted, there is nothing at path regardless of what was deleted
         if self._item1.get('deleted') and self._item2.get('deleted'):
             return True
 
         attr_list = ['deleted', 'target']
-
-        if not self._content_only:
+        if not content_only:
             attr_list += ['mode', 'ctime', 'mtime']
             attr_list += ['uid', 'gid'] if self._numeric_ids else ['user', 'group']
 
@@ -693,74 +703,107 @@ class ItemDiff:
                 return False
 
         if 'chunks' in self._item1 and 'chunks' in self._item2:
-            return self._content_equal(chunk_iterator1, chunk_iterator2)
+            return self._content_equal()
 
         return True
 
     def _presence_diff(self, item_type):
         if not self._item1.get('deleted') and self._item2.get('deleted'):
-            chg = 'removed ' + item_type
-            return ({"type": chg}, chg)
+            self._changes[item_type] = DiffChange(f"removed {item_type}")
+            return True
         if self._item1.get('deleted') and not self._item2.get('deleted'):
-            chg = 'added ' + item_type
-            return ({"type": chg}, chg)
+            self._changes[item_type] = DiffChange(f"added {item_type}")
+            return True
 
     def _link_diff(self):
-        pd = self._presence_diff('link')
-        if pd is not None:
-            return pd
+        if self._presence_diff('link'):
+            return True
         if 'target' in self._item1 and 'target' in self._item2 and self._item1.target != self._item2.target:
-            return ({"type": 'changed link'}, 'changed link')
+            self._changes['link'] = DiffChange('changed link')
+            return True
 
     def _content_diff(self):
         if self._item1.get('deleted'):
             sz = self._item2.get_size()
-            return ({"type": "added", "size": sz}, 'added {:>13}'.format(format_file_size(sz)))
+            self._changes['content'] = DiffChange("added", {"added": sz, "removed": 0})
+            return True
         if self._item2.get('deleted'):
             sz = self._item1.get_size()
-            return ({"type": "removed", "size": sz}, 'removed {:>11}'.format(format_file_size(sz)))
+            self._changes['content'] = DiffChange("removed", {"added": 0, "removed": sz})
+            return True
         if not self._can_compare_chunk_ids:
-            return ({"type": "modified"}, "modified")
+            self._changes['content'] = DiffChange("modified")
+            return True
         chunk_ids1 = {c.id for c in self._item1.chunks}
         chunk_ids2 = {c.id for c in self._item2.chunks}
         added_ids = chunk_ids2 - chunk_ids1
         removed_ids = chunk_ids1 - chunk_ids2
         added = self._item2.get_size(consider_ids=added_ids)
         removed = self._item1.get_size(consider_ids=removed_ids)
-        return ({"type": "modified", "added": added, "removed": removed},
-            '{:>9} {:>9}'.format(format_file_size(added, precision=1, sign=True),
-            format_file_size(-removed, precision=1, sign=True)))
+        self._changes['content'] = DiffChange("modified", {"added": added, "removed": removed})
+        return True
+
 
     def _owner_diff(self):
         u_attr, g_attr = ('uid', 'gid') if self._numeric_ids else ('user', 'group')
         u1, g1 = self._item1.get(u_attr), self._item1.get(g_attr)
         u2, g2 = self._item2.get(u_attr), self._item2.get(g_attr)
-        if (u1, g1) != (u2, g2):
-            return ({"type": "owner", "old_user": u1, "old_group": g1, "new_user": u2, "new_group": g2},
-                    '[{}:{} -> {}:{}]'.format(u1, g1, u2, g2))
+        if (u1, g1) == (u2, g2):
+            return False
+        self._changes['owner'] = DiffChange("changed owner", {"item1": (u1, g1), "item2": (u2, g2)})
+        if u1 != u2:
+            self._changes['user'] = DiffChange("changed user", {"item1": u1, "item2": u2})
+        if g1 != g2:
+            self._changes['group'] = DiffChange("changed group", {"item1": g1, "item2": g2})
+        return True
 
     def _mode_diff(self):
         if 'mode' in self._item1 and 'mode' in self._item2 and self._item1.mode != self._item2.mode:
             mode1 = stat.filemode(self._item1.mode)
             mode2 = stat.filemode(self._item2.mode)
-            return ({"type": "mode", "old_mode": mode1, "new_mode": mode2}, '[{} -> {}]'.format(mode1, mode2))
+            self._changes['mode'] = DiffChange("changed mode", {"item1": mode1, "item2": mode2})
+            if mode1[0] != mode2[0]:
+                self._changes['type'] = DiffChange("changed type", {"item1": mode1[0], "item2": mode2[0]})
 
     def _time_diffs(self):
-        changes = []
         attrs = ["ctime", "mtime"]
         for attr in attrs:
             if attr in self._item1 and attr in self._item2 and self._item1.get(attr) != self._item2.get(attr):
                 ts1 = OutputTimestamp(safe_timestamp(self._item1.get(attr)))
                 ts2 = OutputTimestamp(safe_timestamp(self._item2.get(attr)))
-                changes.append(({"type": attr, f"old_{attr}": ts1, f"new_{attr}": ts2}, '[{}: {} -> {}]'.format(attr, ts1, ts2)))
-        return changes
+                self._changes[attr] = DiffChange(attr, {"item1": ts1, "item2": ts2},)
+        return True
+
+    def content(self):
+        return self._changes.get('content')
+
+    def ctime(self):
+        return self._changes.get('ctime')
+
+    def mtime(self):
+        return self._changes.get('mtime')
+
+    def mode(self):
+        return self._changes.get('mode')
+
+    def type(self):
+        return self._changes.get('type')
+
+    def owner(self):
+        return self._changes.get('owner')
+
+    def user(self):
+        return self._changes.get('user')
+
+    def group(self):
+        return self._changes.get('group')
 
-    def _content_equal(self, chunk_iterator1, chunk_iterator2):
+    def _content_equal(self):
         if self._can_compare_chunk_ids:
             return self._item1.chunks == self._item2.chunks
         if self._item1.get_size() != self._item2.get_size():
             return False
-        return chunks_contents_equal(chunk_iterator1, chunk_iterator2)
+        return chunks_contents_equal(self._chunk_1, self._chunk_2)
 
 
 def chunks_contents_equal(chunks_a, chunks_b):

+ 43 - 27
src/borg/testsuite/archiver/diff_cmd.py

@@ -72,22 +72,20 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd(f"--repo={self.repository_location}", "create", "test1b", "input", "--chunker-params", "16,18,17,4095")
 
         def do_asserts(output, can_compare_ids, content_only=False):
-            # File contents changed (deleted and replaced with a new file)
-            change = "B" if can_compare_ids else "{:<19}".format("modified")
-            lines = output.splitlines()
+            lines: list = output.splitlines()
             assert "file_replaced" in output  # added to debug #3494
+            change = "modified.*B" if can_compare_ids else r"modified:  \(can't get size\)"
             self.assert_line_exists(lines, f"{change}.*input/file_replaced")
-
             # File unchanged
             assert "input/file_unchanged" not in output
 
             # Directory replaced with a regular file
             if "BORG_TESTS_IGNORE_MODES" not in os.environ and not is_win32 and not content_only:
-                self.assert_line_exists(lines, "drwxr-xr-x -> -rwxr-xr-x.*input/dir_replaced_with_file")
+                self.assert_line_exists(lines, "[drwxr-xr-x -> -rwxr-xr-x].*input/dir_replaced_with_file")
 
             # Basic directory cases
-            assert "added directory     input/dir_added" in output
-            assert "removed directory   input/dir_removed" in output
+            assert "added directory             input/dir_added" in output
+            assert "removed directory           input/dir_removed" in output
 
             if are_symlinks_supported():
                 # Basic symlink cases
@@ -96,8 +94,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
                 self.assert_line_exists(lines, "removed link.*input/link_removed")
 
                 # Symlink replacing or being replaced
-                assert "input/dir_replaced_with_link" in output
-                assert "input/link_replaced_by_file" in output
+                if not content_only:
+                    assert "input/dir_replaced_with_link" in output
+                    assert "input/link_replaced_by_file" in output
 
                 # Symlink target removed. Should not affect the symlink at all.
                 assert "input/link_target_removed" not in output
@@ -105,7 +104,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             # The inode has two links and the file contents changed. Borg
             # should notice the changes in both links. However, the symlink
             # pointing to the file is not changed.
-            change = "0 B" if can_compare_ids else "{:<19}".format("modified")
+            change = "modified.*0 B" if can_compare_ids else r"modified:  \(can't get size\)"
             self.assert_line_exists(lines, f"{change}.*input/empty")
             if are_hardlinks_supported():
                 self.assert_line_exists(lines, f"{change}.*input/hardlink_contents_changed")
@@ -114,18 +113,18 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
             # Added a new file and a hard link to it. Both links to the same
             # inode should appear as separate files.
-            assert "added       2.05 kB input/file_added" in output
+            assert "added:              2.05 kB input/file_added" in output
             if are_hardlinks_supported():
-                assert "added       2.05 kB input/hardlink_added" in output
+                assert "added:              2.05 kB input/hardlink_added" in output
 
             # check if a diff between nonexistent and empty new file is found
-            assert "added           0 B input/file_empty_added" in output
+            assert "added:                  0 B input/file_empty_added" in output
 
             # The inode has two links and both of them are deleted. They should
             # appear as two deleted files.
-            assert "removed       256 B input/file_removed" in output
+            assert "removed:              256 B input/file_removed" in output
             if are_hardlinks_supported():
-                assert "removed       256 B input/hardlink_removed" in output
+                assert "removed:              256 B input/hardlink_removed" in output
 
             if are_hardlinks_supported() and content_only:
                 # Another link (marked previously as the source in borg) to the
@@ -143,7 +142,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
                 chgsets = [j["changes"] for j in data if j["path"] == filename]
                 assert len(chgsets) < 2
                 # return a flattened list of changes for given filename
-                return [chg for chgset in chgsets for chg in chgset]
+                return sum(chgsets, [])
 
             # convert output to list of dicts
             joutput = [json.loads(line) for line in output.split("\n") if line]
@@ -157,7 +156,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
             # Directory replaced with a regular file
             if "BORG_TESTS_IGNORE_MODES" not in os.environ and not is_win32 and not content_only:
-                assert {"type": "mode", "old_mode": "drwxr-xr-x", "new_mode": "-rwxr-xr-x"} in get_changes(
+                assert {"type": "changed mode", "item1": "drwxr-xr-x", "item2": "-rwxr-xr-x"} in get_changes(
                     "input/dir_replaced_with_file", joutput
                 )
 
@@ -175,11 +174,11 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
                 if not content_only:
                     assert any(
-                        chg["type"] == "mode" and chg["new_mode"].startswith("l")
+                        chg["type"] == "changed mode" and chg["item1"].startswith("d") and chg["item2"].startswith("l")
                         for chg in get_changes("input/dir_replaced_with_link", joutput)
                     ), get_changes("input/dir_replaced_with_link", joutput)
                     assert any(
-                        chg["type"] == "mode" and chg["old_mode"].startswith("l")
+                        chg["type"] == "changed mode" and chg["item1"].startswith("l") and chg["item2"].startswith("-")
                         for chg in get_changes("input/link_replaced_by_file", joutput)
                     ), get_changes("input/link_replaced_by_file", joutput)
 
@@ -198,18 +197,18 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
             # Added a new file and a hard link to it. Both links to the same
             # inode should appear as separate files.
-            assert {"type": "added", "size": 2048} in get_changes("input/file_added", joutput)
+            assert {"added": 2048, "removed": 0, "type": "added"} in get_changes("input/file_added", joutput)
             if are_hardlinks_supported():
-                assert {"type": "added", "size": 2048} in get_changes("input/hardlink_added", joutput)
+                assert {"added": 2048, "removed": 0, "type": "added"} in get_changes("input/hardlink_added", joutput)
 
             # check if a diff between nonexistent and empty new file is found
-            assert {"type": "added", "size": 0} in get_changes("input/file_empty_added", joutput)
+            assert {"added": 0, "removed": 0, "type": "added"} in get_changes("input/file_empty_added", joutput)
 
             # The inode has two links and both of them are deleted. They should
             # appear as two deleted files.
-            assert {"type": "removed", "size": 256} in get_changes("input/file_removed", joutput)
+            assert {"added": 0, "removed": 256, "type": "removed"} in get_changes("input/file_removed", joutput)
             if are_hardlinks_supported():
-                assert {"type": "removed", "size": 256} in get_changes("input/hardlink_removed", joutput)
+                assert {"added": 0, "removed": 256, "type": "removed"} in get_changes("input/hardlink_removed", joutput)
 
             if are_hardlinks_supported() and content_only:
                 # Another link (marked previously as the source in borg) to the
@@ -251,14 +250,28 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             time.sleep(1)  # HFS has a 1s timestamp granularity
         self.create_regular_file("test_file", size=15)
         self.cmd(f"--repo={self.repository_location}", "create", "archive2", "input")
-        output = self.cmd(f"--repo={self.repository_location}", "diff", "archive1", "archive2")
+        output = self.cmd(
+            f"--repo={self.repository_location}",
+            "diff",
+            "archive1",
+            "archive2",
+            "--format",
+            "'{mtime}{ctime} {path}{NL}'",
+        )
         self.assert_in("mtime", output)
         self.assert_in("ctime", output)  # Should show up on windows as well since it is a new file.
         if is_darwin:
             time.sleep(1)  # HFS has a 1s timestamp granularity
         os.chmod("input/test_file", 0o777)
         self.cmd(f"--repo={self.repository_location}", "create", "archive3", "input")
-        output = self.cmd(f"--repo={self.repository_location}", "diff", "archive2", "archive3")
+        output = self.cmd(
+            f"--repo={self.repository_location}",
+            "diff",
+            "archive2",
+            "archive3",
+            "--format",
+            "'{mtime}{ctime} {path}{NL}'",
+        )
         self.assert_not_in("mtime", output)
         # Checking platform because ctime should not be shown on windows since it wasn't recreated.
         if not is_win32:
@@ -294,7 +307,10 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             "e_file_changed",
             "f_file_removed",
         ]
-        assert all(x in line for x, line in zip(expected, output.splitlines()))
+        assert isinstance(output, str)
+        outputs = output.splitlines()
+        assert len(outputs) == len(expected)
+        assert all(x in line for x, line in zip(expected, outputs))
 
 
 class RemoteArchiverTestCase(RemoteArchiverTestCaseBase, ArchiverTestCase):