浏览代码

Add --format option to `borg diff`, resolve issue #4634 (#7534)

diff: add --format option

also: refactoring/improvements of BaseFormatter
Tarrailt 2 年之前
父节点
当前提交
616d5e7330

+ 34 - 37
src/borg/archive.py

@@ -11,6 +11,7 @@ from functools import partial
 from getpass import getuser
 from getpass import getuser
 from io import BytesIO
 from io import BytesIO
 from itertools import groupby, zip_longest
 from itertools import groupby, zip_longest
+from typing import Iterator
 from shutil import get_terminal_size
 from shutil import get_terminal_size
 
 
 from .platformflags import is_win32
 from .platformflags import is_win32
@@ -297,31 +298,24 @@ class DownloadPipeline:
         unpacker = msgpack.Unpacker(use_list=False)
         unpacker = msgpack.Unpacker(use_list=False)
         for data in self.fetch_many(ids):
         for data in self.fetch_many(ids):
             unpacker.feed(data)
             unpacker.feed(data)
-            items = [Item(internal_dict=item) for item in unpacker]
-            for item in items:
+            for _item in unpacker:
+                item = Item(internal_dict=_item)
                 if "chunks" in item:
                 if "chunks" in item:
                     item.chunks = [ChunkListEntry(*e) for e in item.chunks]
                     item.chunks = [ChunkListEntry(*e) for e in item.chunks]
-
-            if filter:
-                items = [item for item in items if filter(item)]
-
-            if preload:
-                for item in items:
-                    if "chunks" in item:
-                        hlid = item.get("hlid", None)
-                        if hlid is None:
-                            preload_chunks = True
-                        else:
-                            if hlid in hlids_preloaded:
-                                preload_chunks = False
-                            else:
-                                # not having the hardlink's chunks already preloaded for other hardlink to same inode
-                                preload_chunks = True
-                                hlids_preloaded.add(hlid)
-                        if preload_chunks:
-                            self.repository.preload([c.id for c in item.chunks])
-
-            for item in items:
+                if filter and not filter(item):
+                    continue
+                if preload and "chunks" in item:
+                    hlid = item.get("hlid", None)
+                    if hlid is None:
+                        preload_chunks = True
+                    elif hlid in hlids_preloaded:
+                        preload_chunks = False
+                    else:
+                        # not having the hardlink's chunks already preloaded for other hardlink to same inode
+                        preload_chunks = True
+                        hlids_preloaded.add(hlid)
+                    if preload_chunks:
+                        self.repository.preload([c.id for c in item.chunks])
                 yield item
                 yield item
 
 
     def fetch_many(self, ids, is_preloaded=False):
     def fetch_many(self, ids, is_preloaded=False):
@@ -631,10 +625,9 @@ Duration: {0.duration}
     def iter_items(self, filter=None, preload=False):
     def iter_items(self, filter=None, preload=False):
         # note: when calling this with preload=True, later fetch_many() must be called with
         # note: when calling this with preload=True, later fetch_many() must be called with
         # is_preloaded=True or the RemoteRepository code will leak memory!
         # is_preloaded=True or the RemoteRepository code will leak memory!
-        for item in self.pipeline.unpack_many(
+        yield from self.pipeline.unpack_many(
             self.metadata.items, preload=preload, filter=lambda item: self.item_filter(item, filter)
             self.metadata.items, preload=preload, filter=lambda item: self.item_filter(item, filter)
-        ):
-            yield item
+        )
 
 
     def add_item(self, item, show_progress=True, stats=None):
     def add_item(self, item, show_progress=True, stats=None):
         if show_progress and self.show_progress:
         if show_progress and self.show_progress:
@@ -1123,55 +1116,59 @@ Duration: {0.duration}
             logger.warning("borg check --repair is required to free all space.")
             logger.warning("borg check --repair is required to free all space.")
 
 
     @staticmethod
     @staticmethod
-    def compare_archives_iter(archive1, archive2, matcher=None, can_compare_chunk_ids=False, content_only=False):
+    def compare_archives_iter(
+        archive1: "Archive", archive2: "Archive", matcher=None, can_compare_chunk_ids=False
+    ) -> Iterator[ItemDiff]:
         """
         """
-        Yields tuples with a path and an ItemDiff instance describing changes/indicating equality.
+        Yields an ItemDiff instance describing changes/indicating equality.
 
 
         :param matcher: PatternMatcher class to restrict results to only matching paths.
         :param matcher: PatternMatcher class to restrict results to only matching paths.
         :param can_compare_chunk_ids: Whether --chunker-params are the same for both archives.
         :param can_compare_chunk_ids: Whether --chunker-params are the same for both archives.
         """
         """
 
 
-        def compare_items(item1, item2):
+        def compare_items(path: str, item1: Item, item2: Item):
             return ItemDiff(
             return ItemDiff(
+                path,
                 item1,
                 item1,
                 item2,
                 item2,
                 archive1.pipeline.fetch_many([c.id for c in item1.get("chunks", [])]),
                 archive1.pipeline.fetch_many([c.id for c in item1.get("chunks", [])]),
                 archive2.pipeline.fetch_many([c.id for c in item2.get("chunks", [])]),
                 archive2.pipeline.fetch_many([c.id for c in item2.get("chunks", [])]),
                 can_compare_chunk_ids=can_compare_chunk_ids,
                 can_compare_chunk_ids=can_compare_chunk_ids,
-                content_only=content_only,
             )
             )
 
 
-        orphans_archive1 = OrderedDict()
-        orphans_archive2 = OrderedDict()
+        orphans_archive1: OrderedDict[str, Item] = OrderedDict()
+        orphans_archive2: OrderedDict[str, Item] = OrderedDict()
+
+        assert matcher is not None, "matcher must be set"
 
 
         for item1, item2 in zip_longest(
         for item1, item2 in zip_longest(
             archive1.iter_items(lambda item: matcher.match(item.path)),
             archive1.iter_items(lambda item: matcher.match(item.path)),
             archive2.iter_items(lambda item: matcher.match(item.path)),
             archive2.iter_items(lambda item: matcher.match(item.path)),
         ):
         ):
             if item1 and item2 and item1.path == item2.path:
             if item1 and item2 and item1.path == item2.path:
-                yield (item1.path, compare_items(item1, item2))
+                yield compare_items(item1.path, item1, item2)
                 continue
                 continue
             if item1:
             if item1:
                 matching_orphan = orphans_archive2.pop(item1.path, None)
                 matching_orphan = orphans_archive2.pop(item1.path, None)
                 if matching_orphan:
                 if matching_orphan:
-                    yield (item1.path, compare_items(item1, matching_orphan))
+                    yield compare_items(item1.path, item1, matching_orphan)
                 else:
                 else:
                     orphans_archive1[item1.path] = item1
                     orphans_archive1[item1.path] = item1
             if item2:
             if item2:
                 matching_orphan = orphans_archive1.pop(item2.path, None)
                 matching_orphan = orphans_archive1.pop(item2.path, None)
                 if matching_orphan:
                 if matching_orphan:
-                    yield (matching_orphan.path, compare_items(matching_orphan, item2))
+                    yield compare_items(matching_orphan.path, matching_orphan, item2)
                 else:
                 else:
                     orphans_archive2[item2.path] = item2
                     orphans_archive2[item2.path] = item2
         # At this point orphans_* contain items that had no matching partner in the other archive
         # At this point orphans_* contain items that had no matching partner in the other archive
         for added in orphans_archive2.values():
         for added in orphans_archive2.values():
             path = added.path
             path = added.path
             deleted_item = Item.create_deleted(path)
             deleted_item = Item.create_deleted(path)
-            yield (path, compare_items(deleted_item, added))
+            yield compare_items(path, deleted_item, added)
         for deleted in orphans_archive1.values():
         for deleted in orphans_archive1.values():
             path = deleted.path
             path = deleted.path
             deleted_item = Item.create_deleted(path)
             deleted_item = Item.create_deleted(path)
-            yield (path, compare_items(deleted, deleted_item))
+            yield compare_items(path, deleted, deleted_item)
 
 
 
 
 class MetadataCollector:
 class MetadataCollector:

+ 82 - 34
src/borg/archiver/diff_cmd.py

@@ -1,13 +1,14 @@
 import argparse
 import argparse
+import textwrap
 import json
 import json
+import sys
+import os
 
 
-from ._common import with_repository, with_archive, build_matcher
+from ._common import with_repository, with_archive, build_matcher, Highlander
 from ..archive import Archive
 from ..archive import Archive
 from ..constants import *  # NOQA
 from ..constants import *  # NOQA
-from ..helpers import archivename_validator
+from ..helpers import BaseFormatter, DiffFormatter, archivename_validator, BorgJsonEncoder
 from ..manifest import Manifest
 from ..manifest import Manifest
-from ..helpers.parseformat import BorgJsonEncoder
-
 from ..logger import create_logger
 from ..logger import create_logger
 
 
 logger = create_logger()
 logger = create_logger()
@@ -18,14 +19,12 @@ class DiffMixIn:
     @with_archive
     @with_archive
     def do_diff(self, args, repository, manifest, archive):
     def do_diff(self, args, repository, manifest, archive):
         """Diff contents of two archives"""
         """Diff contents of two archives"""
-
-        def print_json_output(diff, path):
-            print(json.dumps({"path": path, "changes": [j for j, str in diff]}, sort_keys=True, cls=BorgJsonEncoder))
-
-        def print_text_output(diff, path):
-            print("{:<19} {}".format(" ".join([str for j, str in diff]), path))
-
-        print_output = print_json_output if args.json_lines else print_text_output
+        if args.format is not None:
+            format = args.format
+        elif args.content_only:
+            format = "{content}{link}{directory}{blkdev}{chrdev}{fifo} {path}{NL}"
+        else:
+            format = os.environ.get("BORG_DIFF_FORMAT", "{change} {path}{NL}")
 
 
         archive1 = archive
         archive1 = archive
         archive2 = Archive(manifest, args.other_name)
         archive2 = Archive(manifest, args.other_name)
@@ -43,17 +42,36 @@ class DiffMixIn:
 
 
         matcher = build_matcher(args.patterns, args.paths)
         matcher = build_matcher(args.patterns, args.paths)
 
 
-        diffs = Archive.compare_archives_iter(
-            archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids, content_only=args.content_only
+        diffs_iter = Archive.compare_archives_iter(
+            archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids
         )
         )
         # Conversion to string and filtering for diff.equal to save memory if sorting
         # Conversion to string and filtering for diff.equal to save memory if sorting
-        diffs = ((path, diff.changes()) for path, diff in diffs if not diff.equal)
+        diffs = (diff for diff in diffs_iter if not diff.equal(args.content_only))
 
 
         if args.sort:
         if args.sort:
-            diffs = sorted(diffs)
-
-        for path, diff in diffs:
-            print_output(diff, path)
+            diffs = sorted(diffs, key=lambda diff: diff.path)
+
+        formatter = DiffFormatter(format, args.content_only)
+        for diff in diffs:
+            if args.json_lines:
+                print(
+                    json.dumps(
+                        {
+                            "path": diff.path,
+                            "changes": [
+                                change.to_dict()
+                                for name, change in diff.changes().items()
+                                if not args.content_only or (name not in DiffFormatter.METADATA)
+                            ],
+                        },
+                        sort_keys=True,
+                        cls=BorgJsonEncoder,
+                    )
+                )
+            else:
+                res: str = formatter.format_item(diff)
+                if res.strip():
+                    sys.stdout.write(res)
 
 
         for pattern in matcher.get_unmatched_include_patterns():
         for pattern in matcher.get_unmatched_include_patterns():
             self.print_warning("Include pattern '%s' never matched.", pattern)
             self.print_warning("Include pattern '%s' never matched.", pattern)
@@ -64,25 +82,48 @@ class DiffMixIn:
         from ._common import process_epilog
         from ._common import process_epilog
         from ._common import define_exclusion_group
         from ._common import define_exclusion_group
 
 
-        diff_epilog = process_epilog(
-            """
-            This command finds differences (file contents, user/group/mode) between archives.
+        diff_epilog = (
+            process_epilog(
+                """
+        This command finds differences (file contents, metadata) between ARCHIVE1 and ARCHIVE2.
+
+        For more help on include/exclude patterns, see the :ref:`borg_patterns` command output.
+
+        .. man NOTES
+
+        The FORMAT specifier syntax
+        +++++++++++++++++++++++++++
+
+        The ``--format`` option uses python's `format string syntax
+        <https://docs.python.org/3.9/library/string.html#formatstrings>`_.
 
 
-            A repository location and an archive name must be specified for REPO::ARCHIVE1.
-            ARCHIVE2 is just another archive name in same repository (no repository location
-            allowed).
+        Examples:
+        ::
 
 
-            For archives created with Borg 1.1 or newer diff automatically detects whether
-            the archives are created with the same chunker params. If so, only chunk IDs
-            are compared, which is very fast.
+            $ borg diff --format '{content:30} {path}{NL}' ArchiveFoo ArchiveBar
+            modified:  +4.1 kB  -1.0 kB    file-diff
+            ...
 
 
-            For archives prior to Borg 1.1 chunk contents are compared by default.
-            If you did not create the archives with different chunker params,
-            pass ``--same-chunker-params``.
-            Note that the chunker params changed from Borg 0.xx to 1.0.
+            # {VAR:<NUMBER} - pad to NUMBER columns left-aligned.
+            # {VAR:>NUMBER} - pad to NUMBER columns right-aligned.
+            $ borg diff --format '{content:>30} {path}{NL}' ArchiveFoo ArchiveBar
+               modified:  +4.1 kB  -1.0 kB file-diff
+            ...
 
 
-            For more help on include/exclude patterns, see the :ref:`borg_patterns` command output.
-            """
+        The following keys are always available:
+
+
+        """
+            )
+            + BaseFormatter.keys_help()
+            + textwrap.dedent(
+                """
+
+        Keys available only when showing differences between archives:
+
+        """
+            )
+            + DiffFormatter.keys_help()
         )
         )
         subparser = subparsers.add_parser(
         subparser = subparsers.add_parser(
             "diff",
             "diff",
@@ -107,6 +148,13 @@ class DiffMixIn:
             help="Override check of chunker parameters.",
             help="Override check of chunker parameters.",
         )
         )
         subparser.add_argument("--sort", dest="sort", action="store_true", help="Sort the output lines by file path.")
         subparser.add_argument("--sort", dest="sort", action="store_true", help="Sort the output lines by file path.")
+        subparser.add_argument(
+            "--format",
+            metavar="FORMAT",
+            dest="format",
+            action=Highlander,
+            help='specify format for differences between archives (default: "{change} {path}{NL}")',
+        )
         subparser.add_argument("--json-lines", action="store_true", help="Format output as JSON Lines. ")
         subparser.add_argument("--json-lines", action="store_true", help="Format output as JSON Lines. ")
         subparser.add_argument(
         subparser.add_argument(
             "--content-only",
             "--content-only",

+ 2 - 3
src/borg/archiver/list_cmd.py

@@ -29,10 +29,9 @@ class ListMixIn:
 
 
         def _list_inner(cache):
         def _list_inner(cache):
             archive = Archive(manifest, args.name, cache=cache)
             archive = Archive(manifest, args.name, cache=cache)
-
-            formatter = ItemFormatter(archive, format, json_lines=args.json_lines)
+            formatter = ItemFormatter(archive, format)
             for item in archive.iter_items(lambda item: matcher.match(item.path)):
             for item in archive.iter_items(lambda item: matcher.match(item.path)):
-                sys.stdout.write(formatter.format_item(item))
+                sys.stdout.write(formatter.format_item(item, args.json_lines, sort=True))
 
 
         # Only load the cache if it will be used
         # Only load the cache if it will be used
         if ItemFormatter.format_needs_cache(format):
         if ItemFormatter.format_needs_cache(format):

+ 2 - 2
src/borg/archiver/prune_cmd.py

@@ -89,7 +89,7 @@ class PruneMixIn:
             format = "{archive}"
             format = "{archive}"
         else:
         else:
             format = os.environ.get("BORG_PRUNE_FORMAT", "{archive:<36} {time} [{id}]")
             format = os.environ.get("BORG_PRUNE_FORMAT", "{archive:<36} {time} [{id}]")
-        formatter = ArchiveFormatter(format, repository, manifest, manifest.key, json=False, iec=args.iec)
+        formatter = ArchiveFormatter(format, repository, manifest, manifest.key, iec=args.iec)
 
 
         checkpoint_re = r"\.checkpoint(\.\d+)?"
         checkpoint_re = r"\.checkpoint(\.\d+)?"
         archives_checkpoints = manifest.archives.list(
         archives_checkpoints = manifest.archives.list(
@@ -169,7 +169,7 @@ class PruneMixIn:
                     or (args.list_pruned and archive in to_delete)
                     or (args.list_pruned and archive in to_delete)
                     or (args.list_kept and archive not in to_delete)
                     or (args.list_kept and archive not in to_delete)
                 ):
                 ):
-                    list_logger.info(f"{log_message:<40} {formatter.format_item(archive)}")
+                    list_logger.info(f"{log_message:<40} {formatter.format_item(archive, jsonline=False)}")
             pi.finish()
             pi.finish()
             if sig_int:
             if sig_int:
                 # Ctrl-C / SIGINT: do not checkpoint (commit) again, we already have a checkpoint in this case.
                 # Ctrl-C / SIGINT: do not checkpoint (commit) again, we already have a checkpoint in this case.

+ 3 - 3
src/borg/archiver/rlist_cmd.py

@@ -23,15 +23,15 @@ class RListMixIn:
             format = "{archive}{NL}"
             format = "{archive}{NL}"
         else:
         else:
             format = os.environ.get("BORG_RLIST_FORMAT", "{archive:<36} {time} [{id}]{NL}")
             format = os.environ.get("BORG_RLIST_FORMAT", "{archive:<36} {time} [{id}]{NL}")
-        formatter = ArchiveFormatter(format, repository, manifest, manifest.key, json=args.json, iec=args.iec)
+        formatter = ArchiveFormatter(format, repository, manifest, manifest.key, iec=args.iec)
 
 
         output_data = []
         output_data = []
 
 
         for archive_info in manifest.archives.list_considering(args):
         for archive_info in manifest.archives.list_considering(args):
             if args.json:
             if args.json:
-                output_data.append(formatter.get_item_data(archive_info))
+                output_data.append(formatter.get_item_data(archive_info, args.json))
             else:
             else:
-                sys.stdout.write(formatter.format_item(archive_info))
+                sys.stdout.write(formatter.format_item(archive_info, args.json))
 
 
         if args.json:
         if args.json:
             json_print(basic_json_data(manifest, extra={"archives": output_data}))
             json_print(basic_json_data(manifest, extra={"archives": output_data}))

+ 1 - 1
src/borg/helpers/__init__.py

@@ -28,7 +28,7 @@ from .parseformat import sizeof_fmt, sizeof_fmt_iec, sizeof_fmt_decimal, Locatio
 from .parseformat import format_line, replace_placeholders, PlaceholderError, relative_time_marker_validator
 from .parseformat import format_line, replace_placeholders, PlaceholderError, relative_time_marker_validator
 from .parseformat import format_archive, parse_stringified_list, clean_lines
 from .parseformat import format_archive, parse_stringified_list, clean_lines
 from .parseformat import location_validator, archivename_validator, comment_validator
 from .parseformat import location_validator, archivename_validator, comment_validator
-from .parseformat import BaseFormatter, ArchiveFormatter, ItemFormatter, file_status
+from .parseformat import BaseFormatter, ArchiveFormatter, ItemFormatter, DiffFormatter, file_status
 from .parseformat import swidth_slice, ellipsis_truncate
 from .parseformat import swidth_slice, ellipsis_truncate
 from .parseformat import BorgJsonEncoder, basic_json_data, json_print, json_dump, prepare_dump_dict
 from .parseformat import BorgJsonEncoder, basic_json_data, json_print, json_dump, prepare_dump_dict
 from .parseformat import Highlander, MakePathSafeAction
 from .parseformat import Highlander, MakePathSafeAction

+ 205 - 109
src/borg/helpers/parseformat.py

@@ -1,3 +1,4 @@
+import abc
 import argparse
 import argparse
 import base64
 import base64
 import hashlib
 import hashlib
@@ -8,6 +9,7 @@ import re
 import shlex
 import shlex
 import stat
 import stat
 import uuid
 import uuid
+from typing import List, Dict, Set, Tuple, ClassVar, Any, TYPE_CHECKING, Literal
 from binascii import hexlify
 from binascii import hexlify
 from collections import Counter, OrderedDict
 from collections import Counter, OrderedDict
 from datetime import datetime, timezone
 from datetime import datetime, timezone
@@ -27,6 +29,9 @@ from .. import __version_tuple__ as borg_version_tuple
 from ..constants import *  # NOQA
 from ..constants import *  # NOQA
 from ..platformflags import is_win32
 from ..platformflags import is_win32
 
 
+if TYPE_CHECKING:
+    from ..item import ItemDiff
+
 
 
 def bin_to_hex(binary):
 def bin_to_hex(binary):
     return hexlify(binary).decode("ascii")
     return hexlify(binary).decode("ascii")
@@ -649,8 +654,10 @@ def archivename_validator(text):
     return validate_text(text)
     return validate_text(text)
 
 
 
 
-class BaseFormatter:
-    FIXED_KEYS = {
+class BaseFormatter(metaclass=abc.ABCMeta):
+    format: str
+    static_data: Dict[str, Any]
+    FIXED_KEYS: ClassVar[Dict[str, str]] = {
         # Formatting aids
         # Formatting aids
         "LF": "\n",
         "LF": "\n",
         "SPACE": " ",
         "SPACE": " ",
@@ -660,25 +667,49 @@ class BaseFormatter:
         "NEWLINE": "\n",
         "NEWLINE": "\n",
         "NL": "\n",  # \n is automatically converted to os.linesep on write
         "NL": "\n",  # \n is automatically converted to os.linesep on write
     }
     }
+    KEY_DESCRIPTIONS: ClassVar[Dict[str, str]] = {
+        "NEWLINE": "OS dependent line separator",
+        "NL": "alias of NEWLINE",
+        "NUL": "NUL character for creating print0 / xargs -0 like output",
+        "SPACE": "space character",
+        "TAB": "tab character",
+        "CR": "carriage return character",
+        "LF": "line feed character",
+    }
+    KEY_GROUPS: ClassVar[Tuple[Tuple[str, ...], ...]] = (("NEWLINE", "NL", "NUL", "SPACE", "TAB", "CR", "LF"),)
 
 
-    def get_item_data(self, item):
-        raise NotImplementedError
+    def __init__(self, format: str, static: Dict[str, Any]) -> None:
+        self.format = partial_format(format, static)
+        self.static_data = static
 
 
-    def format_item(self, item):
-        return self.format.format_map(self.get_item_data(item))
+    @abc.abstractmethod
+    def get_item_data(self, item, jsonline=False) -> dict:
+        raise NotImplementedError
 
 
-    @staticmethod
-    def keys_help():
+    def format_item(self, item, jsonline=False, sort=False):
+        data = self.get_item_data(item, jsonline)
         return (
         return (
-            "- NEWLINE: OS dependent line separator\n"
-            "- NL: alias of NEWLINE\n"
-            "- NUL: NUL character for creating print0 / xargs -0 like output\n"
-            "- SPACE\n"
-            "- TAB\n"
-            "- CR\n"
-            "- LF"
+            f"{json.dumps(data, cls=BorgJsonEncoder, sort_keys=sort)}\n" if jsonline else self.format.format_map(data)
         )
         )
 
 
+    @classmethod
+    def keys_help(cls):
+        help = []
+        keys: Set[str] = set()
+        keys.update(cls.KEY_DESCRIPTIONS.keys())
+        keys.update(key for group in cls.KEY_GROUPS for key in group)
+
+        for group in cls.KEY_GROUPS:
+            for key in group:
+                keys.remove(key)
+                text = "- " + key
+                if key in cls.KEY_DESCRIPTIONS:
+                    text += ": " + cls.KEY_DESCRIPTIONS[key]
+                help.append(text)
+            help.append("")
+        assert not keys, str(keys)
+        return "\n".join(help)
+
 
 
 class ArchiveFormatter(BaseFormatter):
 class ArchiveFormatter(BaseFormatter):
     KEY_DESCRIPTIONS = {
     KEY_DESCRIPTIONS = {
@@ -703,47 +734,17 @@ class ArchiveFormatter(BaseFormatter):
         ("size", "nfiles"),
         ("size", "nfiles"),
     )
     )
 
 
-    @classmethod
-    def available_keys(cls):
-        from ..manifest import ArchiveInfo
-
-        fake_archive_info = ArchiveInfo("archivename", b"\1" * 32, datetime(1970, 1, 1, tzinfo=timezone.utc))
-        formatter = cls("", None, None, None)
-        keys = []
-        keys.extend(formatter.call_keys.keys())
-        keys.extend(formatter.get_item_data(fake_archive_info).keys())
-        return keys
-
-    @classmethod
-    def keys_help(cls):
-        help = []
-        keys = cls.available_keys()
-        for key in cls.FIXED_KEYS:
-            keys.remove(key)
-
-        for group in cls.KEY_GROUPS:
-            for key in group:
-                keys.remove(key)
-                text = "- " + key
-                if key in cls.KEY_DESCRIPTIONS:
-                    text += ": " + cls.KEY_DESCRIPTIONS[key]
-                help.append(text)
-            help.append("")
-        assert not keys, str(keys)
-        return "\n".join(help)
-
-    def __init__(self, format, repository, manifest, key, *, json=False, iec=False):
+    def __init__(self, format, repository, manifest, key, *, iec=False):
+        static_data = {}  # here could be stuff on repo level, above archive level
+        static_data.update(self.FIXED_KEYS)
+        super().__init__(format, static_data)
         self.repository = repository
         self.repository = repository
         self.manifest = manifest
         self.manifest = manifest
         self.key = key
         self.key = key
         self.name = None
         self.name = None
         self.id = None
         self.id = None
         self._archive = None
         self._archive = None
-        self.json = json
         self.iec = iec
         self.iec = iec
-        static_keys = {}  # here could be stuff on repo level, above archive level
-        static_keys.update(self.FIXED_KEYS)
-        self.format = partial_format(format, static_keys)
         self.format_keys = {f[1] for f in Formatter().parse(format)}
         self.format_keys = {f[1] for f in Formatter().parse(format)}
         self.call_keys = {
         self.call_keys = {
             "hostname": partial(self.get_meta, "hostname", ""),
             "hostname": partial(self.get_meta, "hostname", ""),
@@ -755,20 +756,12 @@ class ArchiveFormatter(BaseFormatter):
             "end": self.get_ts_end,
             "end": self.get_ts_end,
         }
         }
         self.used_call_keys = set(self.call_keys) & self.format_keys
         self.used_call_keys = set(self.call_keys) & self.format_keys
-        if self.json:
-            self.item_data = {}
-            self.format_item = self.format_item_json
-        else:
-            self.item_data = static_keys
-
-    def format_item_json(self, item):
-        return json.dumps(self.get_item_data(item), cls=BorgJsonEncoder) + "\n"
 
 
-    def get_item_data(self, archive_info):
+    def get_item_data(self, archive_info, jsonline=False):
         self.name = archive_info.name
         self.name = archive_info.name
         self.id = archive_info.id
         self.id = archive_info.id
         item_data = {}
         item_data = {}
-        item_data.update(self.item_data)
+        item_data.update({} if jsonline else self.static_data)
         item_data.update(
         item_data.update(
             {
             {
                 "name": archive_info.name,
                 "name": archive_info.name,
@@ -812,15 +805,31 @@ class ItemFormatter(BaseFormatter):
     # shake_* is not provided because it uses an incompatible .digest() method to support variable length.
     # shake_* is not provided because it uses an incompatible .digest() method to support variable length.
     hash_algorithms = set(hashlib.algorithms_guaranteed).union({"xxh64"}).difference({"shake_128", "shake_256"})
     hash_algorithms = set(hashlib.algorithms_guaranteed).union({"xxh64"}).difference({"shake_128", "shake_256"})
     KEY_DESCRIPTIONS = {
     KEY_DESCRIPTIONS = {
+        "type": "file type (file, dir, symlink, ...)",
+        "mode": "file mode (as in stat)",
+        "uid": "user id of file owner",
+        "gid": "group id of file owner",
+        "user": "user name of file owner",
+        "group": "group name of file owner",
         "path": "file path",
         "path": "file path",
         "target": "link target for symlinks",
         "target": "link target for symlinks",
         "hlid": "hard link identity (same if hardlinking same fs object)",
         "hlid": "hard link identity (same if hardlinking same fs object)",
+        "flags": "file flags",
         "extra": 'prepends {target} with " -> " for soft links and " link to " for hard links',
         "extra": 'prepends {target} with " -> " for soft links and " link to " for hard links',
+        "size": "file size",
         "dsize": "deduplicated size",
         "dsize": "deduplicated size",
         "num_chunks": "number of chunks in this file",
         "num_chunks": "number of chunks in this file",
         "unique_chunks": "number of unique chunks in this file",
         "unique_chunks": "number of unique chunks in this file",
+        "mtime": "file modification time",
+        "ctime": "file change time",
+        "atime": "file access time",
+        "isomtime": "file modification time (ISO 8601 format)",
+        "isoctime": "file change time (ISO 8601 format)",
+        "isoatime": "file access time (ISO 8601 format)",
         "xxh64": "XXH64 checksum of this file (note: this is NOT a cryptographic hash!)",
         "xxh64": "XXH64 checksum of this file (note: this is NOT a cryptographic hash!)",
         "health": 'either "healthy" (file ok) or "broken" (if file has all-zero replacement chunks)',
         "health": 'either "healthy" (file ok) or "broken" (if file has all-zero replacement chunks)',
+        "archiveid": "internal ID of the archive",
+        "archivename": "name of the archive",
     }
     }
     KEY_GROUPS = (
     KEY_GROUPS = (
         ("type", "mode", "uid", "gid", "user", "group", "path", "target", "hlid", "flags"),
         ("type", "mode", "uid", "gid", "user", "group", "path", "target", "hlid", "flags"),
@@ -833,57 +842,19 @@ class ItemFormatter(BaseFormatter):
 
 
     KEYS_REQUIRING_CACHE = ("dsize", "unique_chunks")
     KEYS_REQUIRING_CACHE = ("dsize", "unique_chunks")
 
 
-    @classmethod
-    def available_keys(cls):
-        class FakeArchive:
-            fpr = name = ""
-
-        from ..item import Item
-
-        fake_item = Item(mode=0, path="foo", user="", group="", mtime=0, uid=0, gid=0)
-        formatter = cls(FakeArchive, "")
-        keys = []
-        keys.extend(formatter.call_keys.keys())
-        keys.extend(formatter.get_item_data(fake_item).keys())
-        return keys
-
-    @classmethod
-    def keys_help(cls):
-        help = []
-        keys = cls.available_keys()
-        for key in cls.FIXED_KEYS:
-            keys.remove(key)
-
-        for group in cls.KEY_GROUPS:
-            for key in group:
-                keys.remove(key)
-                text = "- " + key
-                if key in cls.KEY_DESCRIPTIONS:
-                    text += ": " + cls.KEY_DESCRIPTIONS[key]
-                help.append(text)
-            help.append("")
-        assert not keys, str(keys)
-        return "\n".join(help)
-
     @classmethod
     @classmethod
     def format_needs_cache(cls, format):
     def format_needs_cache(cls, format):
         format_keys = {f[1] for f in Formatter().parse(format)}
         format_keys = {f[1] for f in Formatter().parse(format)}
         return any(key in cls.KEYS_REQUIRING_CACHE for key in format_keys)
         return any(key in cls.KEYS_REQUIRING_CACHE for key in format_keys)
 
 
-    def __init__(self, archive, format, *, json_lines=False):
+    def __init__(self, archive, format):
         from ..checksums import StreamingXXH64
         from ..checksums import StreamingXXH64
 
 
+        static_data = {"archivename": archive.name, "archiveid": archive.fpr}
+        static_data.update(self.FIXED_KEYS)
+        super().__init__(format, static_data)
         self.xxh64 = StreamingXXH64
         self.xxh64 = StreamingXXH64
         self.archive = archive
         self.archive = archive
-        self.json_lines = json_lines
-        static_keys = {"archivename": archive.name, "archiveid": archive.fpr}
-        static_keys.update(self.FIXED_KEYS)
-        if self.json_lines:
-            self.item_data = {}
-            self.format_item = self.format_item_json
-        else:
-            self.item_data = static_keys
-        self.format = partial_format(format, static_keys)
         self.format_keys = {f[1] for f in Formatter().parse(format)}
         self.format_keys = {f[1] for f in Formatter().parse(format)}
         self.call_keys = {
         self.call_keys = {
             "size": self.calculate_size,
             "size": self.calculate_size,
@@ -901,17 +872,14 @@ class ItemFormatter(BaseFormatter):
             self.call_keys[hash_function] = partial(self.hash_item, hash_function)
             self.call_keys[hash_function] = partial(self.hash_item, hash_function)
         self.used_call_keys = set(self.call_keys) & self.format_keys
         self.used_call_keys = set(self.call_keys) & self.format_keys
 
 
-    def format_item_json(self, item):
-        return json.dumps(self.get_item_data(item), cls=BorgJsonEncoder, sort_keys=True) + "\n"
-
-    def get_item_data(self, item):
+    def get_item_data(self, item, jsonline=False):
         item_data = {}
         item_data = {}
-        item_data.update(self.item_data)
+        item_data.update({} if jsonline else self.static_data)
 
 
         item_data.update(text_to_json("path", item.path))
         item_data.update(text_to_json("path", item.path))
         target = item.get("target", "")
         target = item.get("target", "")
         item_data.update(text_to_json("target", target))
         item_data.update(text_to_json("target", target))
-        if not self.json_lines:
+        if not jsonline:
             item_data["extra"] = "" if not target else f" -> {item_data['target']}"
             item_data["extra"] = "" if not target else f" -> {item_data['target']}"
 
 
         hlid = item.get("hlid")
         hlid = item.get("hlid")
@@ -928,7 +896,7 @@ class ItemFormatter(BaseFormatter):
         item_data.update(text_to_json("user", item.get("user", str(item_data["uid"]))))
         item_data.update(text_to_json("user", item.get("user", str(item_data["uid"]))))
         item_data.update(text_to_json("group", item.get("group", str(item_data["gid"]))))
         item_data.update(text_to_json("group", item.get("group", str(item_data["gid"]))))
 
 
-        if self.json_lines:
+        if jsonline:
             item_data["healthy"] = "chunks_healthy" not in item
             item_data["healthy"] = "chunks_healthy" not in item
         else:
         else:
             item_data["health"] = "broken" if "chunks_healthy" in item else "healthy"
             item_data["health"] = "broken" if "chunks_healthy" in item else "healthy"
@@ -944,7 +912,7 @@ class ItemFormatter(BaseFormatter):
 
 
         item: The item to sum its unique chunks' metadata
         item: The item to sum its unique chunks' metadata
         metadata_func: A function that takes a parameter of type ChunkIndexEntry and returns a number, used to return
         metadata_func: A function that takes a parameter of type ChunkIndexEntry and returns a number, used to return
-                       the metadata needed from the chunk
+        the metadata needed from the chunk
         """
         """
         chunk_index = self.archive.cache.chunks
         chunk_index = self.archive.cache.chunks
         chunks = item.get("chunks", [])
         chunks = item.get("chunks", [])
@@ -976,6 +944,134 @@ class ItemFormatter(BaseFormatter):
         return self.format_time(key, item).isoformat()
         return self.format_time(key, item).isoformat()
 
 
 
 
+class DiffFormatter(BaseFormatter):
+    KEY_DESCRIPTIONS = {
+        "path": "archived file path",
+        "change": "all available changes",
+        "content": "file content change",
+        "mode": "file mode change",
+        "type": "file type change",
+        "owner": "file owner (user/group) change",
+        "user": "file user change",
+        "group": "file group change",
+        "link": "file link change",
+        "directory": "file directory change",
+        "blkdev": "file block device change",
+        "chrdev": "file character device change",
+        "fifo": "file fifo change",
+        "mtime": "file modification time change",
+        "ctime": "file change time change",
+        "isomtime": "file modification time change (ISO 8601)",
+        "isoctime": "file creation time change (ISO 8601)",
+    }
+    KEY_GROUPS = (
+        ("path", "change"),
+        ("content", "mode", "type", "owner", "group", "user"),
+        ("link", "directory", "blkdev", "chrdev", "fifo"),
+        ("mtime", "ctime", "isomtime", "isoctime"),
+    )
+    METADATA = ("mode", "type", "owner", "group", "user", "mtime", "ctime")
+
+    def __init__(self, format, content_only=False):
+        static_data = {}
+        static_data.update(self.FIXED_KEYS)
+        super().__init__(format or "{content}{link}{directory}{blkdev}{chrdev}{fifo} {path}{NL}", static_data)
+        self.content_only = content_only
+        self.format_keys = {f[1] for f in Formatter().parse(format)}
+        self.call_keys = {
+            "content": self.format_content,
+            "mode": self.format_mode,
+            "type": partial(self.format_mode, filetype=True),
+            "owner": partial(self.format_owner),
+            "group": partial(self.format_owner, spec="group"),
+            "user": partial(self.format_owner, spec="user"),
+            "link": partial(self.format_other, "link"),
+            "directory": partial(self.format_other, "directory"),
+            "blkdev": partial(self.format_other, "blkdev"),
+            "chrdev": partial(self.format_other, "chrdev"),
+            "fifo": partial(self.format_other, "fifo"),
+            "mtime": partial(self.format_time, "mtime"),
+            "ctime": partial(self.format_time, "ctime"),
+            "isomtime": partial(self.format_iso_time, "mtime"),
+            "isoctime": partial(self.format_iso_time, "ctime"),
+        }
+        self.used_call_keys = set(self.call_keys) & self.format_keys
+        if self.content_only:
+            self.used_call_keys -= set(self.METADATA)
+
+    def get_item_data(self, item: "ItemDiff", jsonline=False) -> dict:
+        diff_data = {}
+        for key in self.used_call_keys:
+            diff_data[key] = self.call_keys[key](item)
+
+        change = []
+        for key in self.call_keys:
+            if key in ("isomtime", "isoctime"):
+                continue
+            if self.content_only and key in self.METADATA:
+                continue
+            change.append(self.call_keys[key](item))
+        diff_data["change"] = " ".join([v for v in change if v])
+        diff_data["path"] = item.path
+        diff_data.update({} if jsonline else self.static_data)
+        return diff_data
+
+    def format_other(self, key, diff: "ItemDiff"):
+        change = diff.changes().get(key)
+        return f"{change.diff_type}".ljust(27) if change else ""  # 27 is the length of the content change
+
+    def format_mode(self, diff: "ItemDiff", filetype=False):
+        change = diff.type() if filetype else diff.mode()
+        return f"[{change.diff_data['item1']} -> {change.diff_data['item2']}]" if change else ""
+
+    def format_owner(self, diff: "ItemDiff", spec: Literal["owner", "user", "group"] = "owner"):
+        if spec == "user":
+            change = diff.user()
+            return f"[{change.diff_data['item1']} -> {change.diff_data['item2']}]" if change else ""
+        if spec == "group":
+            change = diff.group()
+            return f"[{change.diff_data['item1']} -> {change.diff_data['item2']}]" if change else ""
+        if spec != "owner":
+            raise ValueError(f"Invalid owner spec: {spec}")
+        change = diff.owner()
+        if change:
+            return "[{}:{} -> {}:{}]".format(
+                change.diff_data["item1"][0],
+                change.diff_data["item1"][1],
+                change.diff_data["item2"][0],
+                change.diff_data["item2"][1],
+            )
+        return ""
+
+    def format_content(self, diff: "ItemDiff"):
+        change = diff.content()
+        if change:
+            if change.diff_type == "added":
+                return "{}: {:>20}".format(change.diff_type, format_file_size(change.diff_data["added"]))
+            if change.diff_type == "removed":
+                return "{}: {:>18}".format(change.diff_type, format_file_size(change.diff_data["removed"]))
+            if "added" not in change.diff_data and "removed" not in change.diff_data:
+                return "modified:  (can't get size)"
+            return "{}: {:>8} {:>8}".format(
+                change.diff_type,
+                format_file_size(change.diff_data["added"], precision=1, sign=True),
+                format_file_size(-change.diff_data["removed"], precision=1, sign=True),
+            )
+        return ""
+
+    def format_time(self, key, diff: "ItemDiff"):
+        change = diff.changes().get(key)
+        return f"[{key}: {change.diff_data['item1']} -> {change.diff_data['item2']}]" if change else ""
+
+    def format_iso_time(self, key, diff: "ItemDiff"):
+        change = diff.changes().get(key)
+        return (
+            f"[{key}: {change.diff_data['item1'].isoformat()} -> {change.diff_data['item2'].isoformat()}]"
+            if change
+            else ""
+        )
+
+
 def file_status(mode):
 def file_status(mode):
     if stat.S_ISREG(mode):
     if stat.S_ISREG(mode):
         return "A"
         return "A"

+ 30 - 3
src/borg/item.pyi

@@ -1,4 +1,4 @@
-from typing import FrozenSet, Set, NamedTuple, Tuple, Mapping, Dict, List, Iterator, Callable, Any
+from typing import FrozenSet, Set, NamedTuple, Tuple, Mapping, Dict, List, Iterator, Callable, Any, Optional
 
 
 from .helpers import StableDict
 from .helpers import StableDict
 
 
@@ -247,9 +247,36 @@ class ManifestItem(PropDict):
     @item_keys.setter
     @item_keys.setter
     def item_keys(self, val: Tuple) -> None: ...
     def item_keys(self, val: Tuple) -> None: ...
 
 
+class DiffChange:
+    diff_type: str
+    diff_data: Dict[str, Any]
+    def __init__(self, diff_type: str, diff_data: Optional[Dict[str, Any]] = ...) -> None: ...
+    def to_dict(self) -> Dict[str, Any]: ...
+
 class ItemDiff:
 class ItemDiff:
-    def __init__(self, *args, **kwargs) -> None: ...
-    def _chunk_content_equal(self, c1: Iterator, c2: Iterator) -> bool: ...
+    path: str
+    def __init__(
+        self,
+        path: str,
+        item1: Item,
+        item2: Item,
+        chunk_1: Iterator,
+        chunk_2: Iterator,
+        numeric_ids: bool = ...,
+        can_compare_chunk_ids: bool = ...,
+    ) -> None: ...
+    def changes(self) -> Dict[str, DiffChange]: ...
+    def equal(self, content_only: bool = ...) -> bool: ...
+    def content(self) -> Optional[DiffChange]: ...
+    def ctime(self) -> Optional[DiffChange]: ...
+    def mtime(self) -> Optional[DiffChange]: ...
+    def mode(self) -> Optional[DiffChange]: ...
+    def type(self) -> Optional[DiffChange]: ...
+    def owner(self) -> Optional[DiffChange]: ...
+    def user(self) -> Optional[DiffChange]: ...
+    def group(self) -> Optional[DiffChange]: ...
+
+def chunk_content_equal(chunks_a: Iterator, chunks_b: Iterator) -> bool: ...
 
 
 class Key(PropDict):
 class Key(PropDict):
     @property
     @property

+ 91 - 48
src/borg/item.pyx

@@ -620,66 +620,76 @@ cpdef _init_names():
 _init_names()
 _init_names()
 
 
 
 
+class DiffChange:
+    """
+    Stores a change in a diff.
+
+    The diff_type denotes the type of change, e.g. "added", "removed", "modified".
+    The diff_data contains additional information about the change, e.g. the old and new mode.
+    """
+    def __init__(self, diff_type, diff_data=None):
+        self.diff_type = diff_type
+        self.diff_data = diff_data or {}
+
+    def to_dict(self):
+        return {"type": self.diff_type, **self.diff_data}
+
+
 class ItemDiff:
 class ItemDiff:
     """
     """
     Comparison of two items from different archives.
     Comparison of two items from different archives.
 
 
     The items may have different paths and still be considered equal (e.g. for renames).
     The items may have different paths and still be considered equal (e.g. for renames).
-    It does not include extended or time attributes in the comparison.
     """
     """
 
 
-    def __init__(self, item1, item2, chunk_iterator1, chunk_iterator2, numeric_ids=False, can_compare_chunk_ids=False, content_only=False):
+    def __init__(self, path, item1, item2, chunk_1, chunk_2, numeric_ids=False, can_compare_chunk_ids=False):
+        self.path = path
         self._item1 = item1
         self._item1 = item1
         self._item2 = item2
         self._item2 = item2
-        self._content_only = content_only
         self._numeric_ids = numeric_ids
         self._numeric_ids = numeric_ids
         self._can_compare_chunk_ids = can_compare_chunk_ids
         self._can_compare_chunk_ids = can_compare_chunk_ids
-        self.equal = self._equal(chunk_iterator1, chunk_iterator2)
-        changes = []
+        self._chunk_1 = chunk_1
+        self._chunk_2 = chunk_2
+        
+        self._changes = {}
 
 
         if self._item1.is_link() or self._item2.is_link():
         if self._item1.is_link() or self._item2.is_link():
-            changes.append(self._link_diff())
+            self._link_diff()
 
 
         if 'chunks' in self._item1 and 'chunks' in self._item2:
         if 'chunks' in self._item1 and 'chunks' in self._item2:
-            changes.append(self._content_diff())
+            self._content_diff()
 
 
         if self._item1.is_dir() or self._item2.is_dir():
         if self._item1.is_dir() or self._item2.is_dir():
-            changes.append(self._presence_diff('directory'))
+            self._presence_diff('directory')
 
 
         if self._item1.is_blk() or self._item2.is_blk():
         if self._item1.is_blk() or self._item2.is_blk():
-            changes.append(self._presence_diff('blkdev'))
+            self._presence_diff('blkdev')
 
 
         if self._item1.is_chr() or self._item2.is_chr():
         if self._item1.is_chr() or self._item2.is_chr():
-            changes.append(self._presence_diff('chrdev'))
+            self._presence_diff('chrdev')
 
 
         if self._item1.is_fifo() or self._item2.is_fifo():
         if self._item1.is_fifo() or self._item2.is_fifo():
-            changes.append(self._presence_diff('fifo'))
+            self._presence_diff('fifo')
 
 
-        if not self._content_only:
-            if not (self._item1.get('deleted') or self._item2.get('deleted')):
-                changes.append(self._owner_diff())
-                changes.append(self._mode_diff())
-                changes.extend(self._time_diffs())
+        if not (self._item1.get('deleted') or self._item2.get('deleted')):
+            self._owner_diff()
+            self._mode_diff()
+            self._time_diffs()
 
 
-        # filter out empty changes
-        self._changes = [ch for ch in changes if ch]
 
 
     def changes(self):
     def changes(self):
         return self._changes
         return self._changes
 
 
     def __repr__(self):
     def __repr__(self):
-        if self.equal:
-            return 'equal'
-        return ' '.join(str for d, str in self._changes)
+        return (' '.join(self._changes.keys())) or 'equal'
 
 
-    def _equal(self, chunk_iterator1, chunk_iterator2):
+    def equal(self, content_only=False):
         # if both are deleted, there is nothing at path regardless of what was deleted
         # if both are deleted, there is nothing at path regardless of what was deleted
         if self._item1.get('deleted') and self._item2.get('deleted'):
         if self._item1.get('deleted') and self._item2.get('deleted'):
             return True
             return True
 
 
         attr_list = ['deleted', 'target']
         attr_list = ['deleted', 'target']
-
-        if not self._content_only:
+        if not content_only:
             attr_list += ['mode', 'ctime', 'mtime']
             attr_list += ['mode', 'ctime', 'mtime']
             attr_list += ['uid', 'gid'] if self._numeric_ids else ['user', 'group']
             attr_list += ['uid', 'gid'] if self._numeric_ids else ['user', 'group']
 
 
@@ -693,74 +703,107 @@ class ItemDiff:
                 return False
                 return False
 
 
         if 'chunks' in self._item1 and 'chunks' in self._item2:
         if 'chunks' in self._item1 and 'chunks' in self._item2:
-            return self._content_equal(chunk_iterator1, chunk_iterator2)
+            return self._content_equal()
 
 
         return True
         return True
 
 
     def _presence_diff(self, item_type):
     def _presence_diff(self, item_type):
         if not self._item1.get('deleted') and self._item2.get('deleted'):
         if not self._item1.get('deleted') and self._item2.get('deleted'):
-            chg = 'removed ' + item_type
-            return ({"type": chg}, chg)
+            self._changes[item_type] = DiffChange(f"removed {item_type}")
+            return True
         if self._item1.get('deleted') and not self._item2.get('deleted'):
         if self._item1.get('deleted') and not self._item2.get('deleted'):
-            chg = 'added ' + item_type
-            return ({"type": chg}, chg)
+            self._changes[item_type] = DiffChange(f"added {item_type}")
+            return True
 
 
     def _link_diff(self):
     def _link_diff(self):
-        pd = self._presence_diff('link')
-        if pd is not None:
-            return pd
+        if self._presence_diff('link'):
+            return True
         if 'target' in self._item1 and 'target' in self._item2 and self._item1.target != self._item2.target:
         if 'target' in self._item1 and 'target' in self._item2 and self._item1.target != self._item2.target:
-            return ({"type": 'changed link'}, 'changed link')
+            self._changes['link'] = DiffChange('changed link')
+            return True
 
 
     def _content_diff(self):
     def _content_diff(self):
         if self._item1.get('deleted'):
         if self._item1.get('deleted'):
             sz = self._item2.get_size()
             sz = self._item2.get_size()
-            return ({"type": "added", "size": sz}, 'added {:>13}'.format(format_file_size(sz)))
+            self._changes['content'] = DiffChange("added", {"added": sz, "removed": 0})
+            return True
         if self._item2.get('deleted'):
         if self._item2.get('deleted'):
             sz = self._item1.get_size()
             sz = self._item1.get_size()
-            return ({"type": "removed", "size": sz}, 'removed {:>11}'.format(format_file_size(sz)))
+            self._changes['content'] = DiffChange("removed", {"added": 0, "removed": sz})
+            return True
         if not self._can_compare_chunk_ids:
         if not self._can_compare_chunk_ids:
-            return ({"type": "modified"}, "modified")
+            self._changes['content'] = DiffChange("modified")
+            return True
         chunk_ids1 = {c.id for c in self._item1.chunks}
         chunk_ids1 = {c.id for c in self._item1.chunks}
         chunk_ids2 = {c.id for c in self._item2.chunks}
         chunk_ids2 = {c.id for c in self._item2.chunks}
         added_ids = chunk_ids2 - chunk_ids1
         added_ids = chunk_ids2 - chunk_ids1
         removed_ids = chunk_ids1 - chunk_ids2
         removed_ids = chunk_ids1 - chunk_ids2
         added = self._item2.get_size(consider_ids=added_ids)
         added = self._item2.get_size(consider_ids=added_ids)
         removed = self._item1.get_size(consider_ids=removed_ids)
         removed = self._item1.get_size(consider_ids=removed_ids)
-        return ({"type": "modified", "added": added, "removed": removed},
-            '{:>9} {:>9}'.format(format_file_size(added, precision=1, sign=True),
-            format_file_size(-removed, precision=1, sign=True)))
+        self._changes['content'] = DiffChange("modified", {"added": added, "removed": removed})
+        return True
+
 
 
     def _owner_diff(self):
     def _owner_diff(self):
         u_attr, g_attr = ('uid', 'gid') if self._numeric_ids else ('user', 'group')
         u_attr, g_attr = ('uid', 'gid') if self._numeric_ids else ('user', 'group')
         u1, g1 = self._item1.get(u_attr), self._item1.get(g_attr)
         u1, g1 = self._item1.get(u_attr), self._item1.get(g_attr)
         u2, g2 = self._item2.get(u_attr), self._item2.get(g_attr)
         u2, g2 = self._item2.get(u_attr), self._item2.get(g_attr)
-        if (u1, g1) != (u2, g2):
-            return ({"type": "owner", "old_user": u1, "old_group": g1, "new_user": u2, "new_group": g2},
-                    '[{}:{} -> {}:{}]'.format(u1, g1, u2, g2))
+        if (u1, g1) == (u2, g2):
+            return False
+        self._changes['owner'] = DiffChange("changed owner", {"item1": (u1, g1), "item2": (u2, g2)})
+        if u1 != u2:
+            self._changes['user'] = DiffChange("changed user", {"item1": u1, "item2": u2})
+        if g1 != g2:
+            self._changes['group'] = DiffChange("changed group", {"item1": g1, "item2": g2})
+        return True
 
 
     def _mode_diff(self):
     def _mode_diff(self):
         if 'mode' in self._item1 and 'mode' in self._item2 and self._item1.mode != self._item2.mode:
         if 'mode' in self._item1 and 'mode' in self._item2 and self._item1.mode != self._item2.mode:
             mode1 = stat.filemode(self._item1.mode)
             mode1 = stat.filemode(self._item1.mode)
             mode2 = stat.filemode(self._item2.mode)
             mode2 = stat.filemode(self._item2.mode)
-            return ({"type": "mode", "old_mode": mode1, "new_mode": mode2}, '[{} -> {}]'.format(mode1, mode2))
+            self._changes['mode'] = DiffChange("changed mode", {"item1": mode1, "item2": mode2})
+            if mode1[0] != mode2[0]:
+                self._changes['type'] = DiffChange("changed type", {"item1": mode1[0], "item2": mode2[0]})
 
 
     def _time_diffs(self):
     def _time_diffs(self):
-        changes = []
         attrs = ["ctime", "mtime"]
         attrs = ["ctime", "mtime"]
         for attr in attrs:
         for attr in attrs:
             if attr in self._item1 and attr in self._item2 and self._item1.get(attr) != self._item2.get(attr):
             if attr in self._item1 and attr in self._item2 and self._item1.get(attr) != self._item2.get(attr):
                 ts1 = OutputTimestamp(safe_timestamp(self._item1.get(attr)))
                 ts1 = OutputTimestamp(safe_timestamp(self._item1.get(attr)))
                 ts2 = OutputTimestamp(safe_timestamp(self._item2.get(attr)))
                 ts2 = OutputTimestamp(safe_timestamp(self._item2.get(attr)))
-                changes.append(({"type": attr, f"old_{attr}": ts1, f"new_{attr}": ts2}, '[{}: {} -> {}]'.format(attr, ts1, ts2)))
-        return changes
+                self._changes[attr] = DiffChange(attr, {"item1": ts1, "item2": ts2},)
+        return True
+
+    def content(self):
+        return self._changes.get('content')
+
+    def ctime(self):
+        return self._changes.get('ctime')
+
+    def mtime(self):
+        return self._changes.get('mtime')
+
+    def mode(self):
+        return self._changes.get('mode')
+
+    def type(self):
+        return self._changes.get('type')
+
+    def owner(self):
+        return self._changes.get('owner')
+
+    def user(self):
+        return self._changes.get('user')
+
+    def group(self):
+        return self._changes.get('group')
 
 
-    def _content_equal(self, chunk_iterator1, chunk_iterator2):
+    def _content_equal(self):
         if self._can_compare_chunk_ids:
         if self._can_compare_chunk_ids:
             return self._item1.chunks == self._item2.chunks
             return self._item1.chunks == self._item2.chunks
         if self._item1.get_size() != self._item2.get_size():
         if self._item1.get_size() != self._item2.get_size():
             return False
             return False
-        return chunks_contents_equal(chunk_iterator1, chunk_iterator2)
+        return chunks_contents_equal(self._chunk_1, self._chunk_2)
 
 
 
 
 def chunks_contents_equal(chunks_a, chunks_b):
 def chunks_contents_equal(chunks_a, chunks_b):

+ 43 - 27
src/borg/testsuite/archiver/diff_cmd.py

@@ -72,22 +72,20 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd(f"--repo={self.repository_location}", "create", "test1b", "input", "--chunker-params", "16,18,17,4095")
         self.cmd(f"--repo={self.repository_location}", "create", "test1b", "input", "--chunker-params", "16,18,17,4095")
 
 
         def do_asserts(output, can_compare_ids, content_only=False):
         def do_asserts(output, can_compare_ids, content_only=False):
-            # File contents changed (deleted and replaced with a new file)
-            change = "B" if can_compare_ids else "{:<19}".format("modified")
-            lines = output.splitlines()
+            lines: list = output.splitlines()
             assert "file_replaced" in output  # added to debug #3494
             assert "file_replaced" in output  # added to debug #3494
+            change = "modified.*B" if can_compare_ids else r"modified:  \(can't get size\)"
             self.assert_line_exists(lines, f"{change}.*input/file_replaced")
             self.assert_line_exists(lines, f"{change}.*input/file_replaced")
-
             # File unchanged
             # File unchanged
             assert "input/file_unchanged" not in output
             assert "input/file_unchanged" not in output
 
 
             # Directory replaced with a regular file
             # Directory replaced with a regular file
             if "BORG_TESTS_IGNORE_MODES" not in os.environ and not is_win32 and not content_only:
             if "BORG_TESTS_IGNORE_MODES" not in os.environ and not is_win32 and not content_only:
-                self.assert_line_exists(lines, "drwxr-xr-x -> -rwxr-xr-x.*input/dir_replaced_with_file")
+                self.assert_line_exists(lines, "[drwxr-xr-x -> -rwxr-xr-x].*input/dir_replaced_with_file")
 
 
             # Basic directory cases
             # Basic directory cases
-            assert "added directory     input/dir_added" in output
-            assert "removed directory   input/dir_removed" in output
+            assert "added directory             input/dir_added" in output
+            assert "removed directory           input/dir_removed" in output
 
 
             if are_symlinks_supported():
             if are_symlinks_supported():
                 # Basic symlink cases
                 # Basic symlink cases
@@ -96,8 +94,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
                 self.assert_line_exists(lines, "removed link.*input/link_removed")
                 self.assert_line_exists(lines, "removed link.*input/link_removed")
 
 
                 # Symlink replacing or being replaced
                 # Symlink replacing or being replaced
-                assert "input/dir_replaced_with_link" in output
-                assert "input/link_replaced_by_file" in output
+                if not content_only:
+                    assert "input/dir_replaced_with_link" in output
+                    assert "input/link_replaced_by_file" in output
 
 
                 # Symlink target removed. Should not affect the symlink at all.
                 # Symlink target removed. Should not affect the symlink at all.
                 assert "input/link_target_removed" not in output
                 assert "input/link_target_removed" not in output
@@ -105,7 +104,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             # The inode has two links and the file contents changed. Borg
             # The inode has two links and the file contents changed. Borg
             # should notice the changes in both links. However, the symlink
             # should notice the changes in both links. However, the symlink
             # pointing to the file is not changed.
             # pointing to the file is not changed.
-            change = "0 B" if can_compare_ids else "{:<19}".format("modified")
+            change = "modified.*0 B" if can_compare_ids else r"modified:  \(can't get size\)"
             self.assert_line_exists(lines, f"{change}.*input/empty")
             self.assert_line_exists(lines, f"{change}.*input/empty")
             if are_hardlinks_supported():
             if are_hardlinks_supported():
                 self.assert_line_exists(lines, f"{change}.*input/hardlink_contents_changed")
                 self.assert_line_exists(lines, f"{change}.*input/hardlink_contents_changed")
@@ -114,18 +113,18 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
 
             # Added a new file and a hard link to it. Both links to the same
             # Added a new file and a hard link to it. Both links to the same
             # inode should appear as separate files.
             # inode should appear as separate files.
-            assert "added       2.05 kB input/file_added" in output
+            assert "added:              2.05 kB input/file_added" in output
             if are_hardlinks_supported():
             if are_hardlinks_supported():
-                assert "added       2.05 kB input/hardlink_added" in output
+                assert "added:              2.05 kB input/hardlink_added" in output
 
 
             # check if a diff between nonexistent and empty new file is found
             # check if a diff between nonexistent and empty new file is found
-            assert "added           0 B input/file_empty_added" in output
+            assert "added:                  0 B input/file_empty_added" in output
 
 
             # The inode has two links and both of them are deleted. They should
             # The inode has two links and both of them are deleted. They should
             # appear as two deleted files.
             # appear as two deleted files.
-            assert "removed       256 B input/file_removed" in output
+            assert "removed:              256 B input/file_removed" in output
             if are_hardlinks_supported():
             if are_hardlinks_supported():
-                assert "removed       256 B input/hardlink_removed" in output
+                assert "removed:              256 B input/hardlink_removed" in output
 
 
             if are_hardlinks_supported() and content_only:
             if are_hardlinks_supported() and content_only:
                 # Another link (marked previously as the source in borg) to the
                 # Another link (marked previously as the source in borg) to the
@@ -143,7 +142,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
                 chgsets = [j["changes"] for j in data if j["path"] == filename]
                 chgsets = [j["changes"] for j in data if j["path"] == filename]
                 assert len(chgsets) < 2
                 assert len(chgsets) < 2
                 # return a flattened list of changes for given filename
                 # return a flattened list of changes for given filename
-                return [chg for chgset in chgsets for chg in chgset]
+                return sum(chgsets, [])
 
 
             # convert output to list of dicts
             # convert output to list of dicts
             joutput = [json.loads(line) for line in output.split("\n") if line]
             joutput = [json.loads(line) for line in output.split("\n") if line]
@@ -157,7 +156,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
 
             # Directory replaced with a regular file
             # Directory replaced with a regular file
             if "BORG_TESTS_IGNORE_MODES" not in os.environ and not is_win32 and not content_only:
             if "BORG_TESTS_IGNORE_MODES" not in os.environ and not is_win32 and not content_only:
-                assert {"type": "mode", "old_mode": "drwxr-xr-x", "new_mode": "-rwxr-xr-x"} in get_changes(
+                assert {"type": "changed mode", "item1": "drwxr-xr-x", "item2": "-rwxr-xr-x"} in get_changes(
                     "input/dir_replaced_with_file", joutput
                     "input/dir_replaced_with_file", joutput
                 )
                 )
 
 
@@ -175,11 +174,11 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
 
                 if not content_only:
                 if not content_only:
                     assert any(
                     assert any(
-                        chg["type"] == "mode" and chg["new_mode"].startswith("l")
+                        chg["type"] == "changed mode" and chg["item1"].startswith("d") and chg["item2"].startswith("l")
                         for chg in get_changes("input/dir_replaced_with_link", joutput)
                         for chg in get_changes("input/dir_replaced_with_link", joutput)
                     ), get_changes("input/dir_replaced_with_link", joutput)
                     ), get_changes("input/dir_replaced_with_link", joutput)
                     assert any(
                     assert any(
-                        chg["type"] == "mode" and chg["old_mode"].startswith("l")
+                        chg["type"] == "changed mode" and chg["item1"].startswith("l") and chg["item2"].startswith("-")
                         for chg in get_changes("input/link_replaced_by_file", joutput)
                         for chg in get_changes("input/link_replaced_by_file", joutput)
                     ), get_changes("input/link_replaced_by_file", joutput)
                     ), get_changes("input/link_replaced_by_file", joutput)
 
 
@@ -198,18 +197,18 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
 
             # Added a new file and a hard link to it. Both links to the same
             # Added a new file and a hard link to it. Both links to the same
             # inode should appear as separate files.
             # inode should appear as separate files.
-            assert {"type": "added", "size": 2048} in get_changes("input/file_added", joutput)
+            assert {"added": 2048, "removed": 0, "type": "added"} in get_changes("input/file_added", joutput)
             if are_hardlinks_supported():
             if are_hardlinks_supported():
-                assert {"type": "added", "size": 2048} in get_changes("input/hardlink_added", joutput)
+                assert {"added": 2048, "removed": 0, "type": "added"} in get_changes("input/hardlink_added", joutput)
 
 
             # check if a diff between nonexistent and empty new file is found
             # check if a diff between nonexistent and empty new file is found
-            assert {"type": "added", "size": 0} in get_changes("input/file_empty_added", joutput)
+            assert {"added": 0, "removed": 0, "type": "added"} in get_changes("input/file_empty_added", joutput)
 
 
             # The inode has two links and both of them are deleted. They should
             # The inode has two links and both of them are deleted. They should
             # appear as two deleted files.
             # appear as two deleted files.
-            assert {"type": "removed", "size": 256} in get_changes("input/file_removed", joutput)
+            assert {"added": 0, "removed": 256, "type": "removed"} in get_changes("input/file_removed", joutput)
             if are_hardlinks_supported():
             if are_hardlinks_supported():
-                assert {"type": "removed", "size": 256} in get_changes("input/hardlink_removed", joutput)
+                assert {"added": 0, "removed": 256, "type": "removed"} in get_changes("input/hardlink_removed", joutput)
 
 
             if are_hardlinks_supported() and content_only:
             if are_hardlinks_supported() and content_only:
                 # Another link (marked previously as the source in borg) to the
                 # Another link (marked previously as the source in borg) to the
@@ -251,14 +250,28 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             time.sleep(1)  # HFS has a 1s timestamp granularity
             time.sleep(1)  # HFS has a 1s timestamp granularity
         self.create_regular_file("test_file", size=15)
         self.create_regular_file("test_file", size=15)
         self.cmd(f"--repo={self.repository_location}", "create", "archive2", "input")
         self.cmd(f"--repo={self.repository_location}", "create", "archive2", "input")
-        output = self.cmd(f"--repo={self.repository_location}", "diff", "archive1", "archive2")
+        output = self.cmd(
+            f"--repo={self.repository_location}",
+            "diff",
+            "archive1",
+            "archive2",
+            "--format",
+            "'{mtime}{ctime} {path}{NL}'",
+        )
         self.assert_in("mtime", output)
         self.assert_in("mtime", output)
         self.assert_in("ctime", output)  # Should show up on windows as well since it is a new file.
         self.assert_in("ctime", output)  # Should show up on windows as well since it is a new file.
         if is_darwin:
         if is_darwin:
             time.sleep(1)  # HFS has a 1s timestamp granularity
             time.sleep(1)  # HFS has a 1s timestamp granularity
         os.chmod("input/test_file", 0o777)
         os.chmod("input/test_file", 0o777)
         self.cmd(f"--repo={self.repository_location}", "create", "archive3", "input")
         self.cmd(f"--repo={self.repository_location}", "create", "archive3", "input")
-        output = self.cmd(f"--repo={self.repository_location}", "diff", "archive2", "archive3")
+        output = self.cmd(
+            f"--repo={self.repository_location}",
+            "diff",
+            "archive2",
+            "archive3",
+            "--format",
+            "'{mtime}{ctime} {path}{NL}'",
+        )
         self.assert_not_in("mtime", output)
         self.assert_not_in("mtime", output)
         # Checking platform because ctime should not be shown on windows since it wasn't recreated.
         # Checking platform because ctime should not be shown on windows since it wasn't recreated.
         if not is_win32:
         if not is_win32:
@@ -294,7 +307,10 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             "e_file_changed",
             "e_file_changed",
             "f_file_removed",
             "f_file_removed",
         ]
         ]
-        assert all(x in line for x, line in zip(expected, output.splitlines()))
+        assert isinstance(output, str)
+        outputs = output.splitlines()
+        assert len(outputs) == len(expected)
+        assert all(x in line for x, line in zip(expected, outputs))
 
 
 
 
 class RemoteArchiverTestCase(RemoteArchiverTestCaseBase, ArchiverTestCase):
 class RemoteArchiverTestCase(RemoteArchiverTestCaseBase, ArchiverTestCase):