فهرست منبع

Merge pull request #2842 from imsodin/diffmount-diff_refactor

Refactor diff functionality
enkore 8 سال پیش
والد
کامیت
5fb2b4b0a6
4فایلهای تغییر یافته به همراه267 افزوده شده و 229 حذف شده
  1. 86 2
      src/borg/archive.py
  2. 11 216
      src/borg/archiver.py
  3. 166 7
      src/borg/item.pyx
  4. 4 4
      src/borg/testsuite/archiver.py

+ 86 - 2
src/borg/archive.py

@@ -5,12 +5,13 @@ import socket
 import stat
 import stat
 import sys
 import sys
 import time
 import time
+from collections import OrderedDict
 from contextlib import contextmanager
 from contextlib import contextmanager
 from datetime import datetime, timezone, timedelta
 from datetime import datetime, timezone, timedelta
 from functools import partial
 from functools import partial
 from getpass import getuser
 from getpass import getuser
 from io import BytesIO
 from io import BytesIO
-from itertools import groupby
+from itertools import groupby, zip_longest
 from shutil import get_terminal_size
 from shutil import get_terminal_size
 
 
 import msgpack
 import msgpack
@@ -40,7 +41,7 @@ from .helpers import bin_to_hex
 from .helpers import safe_ns
 from .helpers import safe_ns
 from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi
 from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi
 from .patterns import PathPrefixPattern, FnmatchPattern, IECommand
 from .patterns import PathPrefixPattern, FnmatchPattern, IECommand
-from .item import Item, ArchiveItem
+from .item import Item, ArchiveItem, ItemDiff
 from .platform import acl_get, acl_set, set_flags, get_flags, swidth
 from .platform import acl_get, acl_set, set_flags, get_flags, swidth
 from .remote import cache_if_remote
 from .remote import cache_if_remote
 from .repository import Repository, LIST_SCAN_LIMIT
 from .repository import Repository, LIST_SCAN_LIMIT
@@ -819,6 +820,89 @@ Utilization of max. archive size: {csize_max:.0%}
             # Was this EPERM due to the O_NOATIME flag? Try again without it:
             # Was this EPERM due to the O_NOATIME flag? Try again without it:
             return os.open(path, flags_normal)
             return os.open(path, flags_normal)
 
 
+    @staticmethod
+    def compare_archives_iter(archive1, archive2, matcher=None, can_compare_chunk_ids=False):
+        """
+        Yields tuples with a path and an ItemDiff instance describing changes/indicating equality.
+
+        :param matcher: PatternMatcher class to restrict results to only matching paths.
+        :param can_compare_chunk_ids: Whether --chunker-params are the same for both archives.
+        """
+
+        def hardlink_master_seen(item):
+            return 'source' not in item or not hardlinkable(item.mode) or item.source in hardlink_masters
+
+        def is_hardlink_master(item):
+            return item.get('hardlink_master', True) and 'source' not in item
+
+        def update_hardlink_masters(item1, item2):
+            if is_hardlink_master(item1) or is_hardlink_master(item2):
+                hardlink_masters[item1.path] = (item1, item2)
+
+        def has_hardlink_master(item, hardlink_masters):
+            return hardlinkable(item.mode) and item.get('source') in hardlink_masters
+
+        def compare_items(item1, item2):
+            if has_hardlink_master(item1, hardlink_masters):
+                item1 = hardlink_masters[item1.source][0]
+            if has_hardlink_master(item2, hardlink_masters):
+                item2 = hardlink_masters[item2.source][1]
+            return ItemDiff(item1, item2,
+                            archive1.pipeline.fetch_many([c.id for c in item1.get('chunks', [])]),
+                            archive2.pipeline.fetch_many([c.id for c in item2.get('chunks', [])]),
+                            can_compare_chunk_ids=can_compare_chunk_ids)
+
+        def defer_if_necessary(item1, item2):
+            """Adds item tuple to deferred if necessary and returns True, if items were deferred"""
+            update_hardlink_masters(item1, item2)
+            defer = not hardlink_master_seen(item1) or not hardlink_master_seen(item2)
+            if defer:
+                deferred.append((item1, item2))
+            return defer
+
+        orphans_archive1 = OrderedDict()
+        orphans_archive2 = OrderedDict()
+        deferred = []
+        hardlink_masters = {}
+
+        for item1, item2 in zip_longest(
+                archive1.iter_items(lambda item: matcher.match(item.path)),
+                archive2.iter_items(lambda item: matcher.match(item.path)),
+        ):
+            if item1 and item2 and item1.path == item2.path:
+                if not defer_if_necessary(item1, item2):
+                    yield (item1.path, compare_items(item1, item2))
+                continue
+            if item1:
+                matching_orphan = orphans_archive2.pop(item1.path, None)
+                if matching_orphan:
+                    if not defer_if_necessary(item1, matching_orphan):
+                        yield (item1.path, compare_items(item1, matching_orphan))
+                else:
+                    orphans_archive1[item1.path] = item1
+            if item2:
+                matching_orphan = orphans_archive1.pop(item2.path, None)
+                if matching_orphan:
+                    if not defer_if_necessary(matching_orphan, item2):
+                        yield (matching_orphan.path, compare_items(matching_orphan, item2))
+                else:
+                    orphans_archive2[item2.path] = item2
+        # At this point orphans_* contain items that had no matching partner in the other archive
+        for added in orphans_archive2.values():
+            path = added.path
+            deleted_item = Item.create_deleted(path)
+            update_hardlink_masters(deleted_item, added)
+            yield (path, compare_items(deleted_item, added))
+        for deleted in orphans_archive1.values():
+            path = deleted.path
+            deleted_item = Item.create_deleted(path)
+            update_hardlink_masters(deleted, deleted_item)
+            yield (path, compare_items(deleted, deleted_item))
+        for item1, item2 in deferred:
+            assert hardlink_master_seen(item1)
+            assert hardlink_master_seen(item2)
+            yield (path, compare_items(item1, item2))
+
 
 
 class MetadataCollector:
 class MetadataCollector:
     def __init__(self, *, noatime, noctime, numeric_owner):
     def __init__(self, *, noatime, noctime, numeric_owner):

+ 11 - 216
src/borg/archiver.py

@@ -195,33 +195,6 @@ class Archiver:
             else:
             else:
                 logging.getLogger('borg.output.list').info("%1s %s", status, remove_surrogates(path))
                 logging.getLogger('borg.output.list').info("%1s %s", status, remove_surrogates(path))
 
 
-    @staticmethod
-    def compare_chunk_contents(chunks1, chunks2):
-        """Compare two chunk iterators (like returned by :meth:`.DownloadPipeline.fetch_many`)"""
-        end = object()
-        alen = ai = 0
-        blen = bi = 0
-        while True:
-            if not alen - ai:
-                a = next(chunks1, end)
-                if a is end:
-                    return not blen - bi and next(chunks2, end) is end
-                a = memoryview(a)
-                alen = len(a)
-                ai = 0
-            if not blen - bi:
-                b = next(chunks2, end)
-                if b is end:
-                    return not alen - ai and next(chunks1, end) is end
-                b = memoryview(b)
-                blen = len(b)
-                bi = 0
-            slicelen = min(alen - ai, blen - bi)
-            if a[ai:ai + slicelen] != b[bi:bi + slicelen]:
-                return False
-            ai += slicelen
-            bi += slicelen
-
     @staticmethod
     @staticmethod
     def build_matcher(inclexcl_patterns, include_paths):
     def build_matcher(inclexcl_patterns, include_paths):
         matcher = PatternMatcher()
         matcher = PatternMatcher()
@@ -967,195 +940,9 @@ class Archiver:
     @with_archive
     @with_archive
     def do_diff(self, args, repository, manifest, key, archive):
     def do_diff(self, args, repository, manifest, key, archive):
         """Diff contents of two archives"""
         """Diff contents of two archives"""
-        def fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2):
-            chunks1 = archive1.pipeline.fetch_many(chunk_ids1)
-            chunks2 = archive2.pipeline.fetch_many(chunk_ids2)
-            return self.compare_chunk_contents(chunks1, chunks2)
-
-        def sum_chunk_size(item, consider_ids=None):
-            if item.get('deleted'):
-                size = None
-            else:
-                if consider_ids is not None:  # consider only specific chunks
-                    size = sum(chunk.size for chunk in item.chunks if chunk.id in consider_ids)
-                else:  # consider all chunks
-                    size = item.get_size()
-            return size
-
-        def get_owner(item):
-            if args.numeric_owner:
-                return item.uid, item.gid
-            else:
-                return item.user, item.group
-
-        def get_mode(item):
-            if 'mode' in item:
-                return stat.filemode(item.mode)
-            else:
-                return [None]
-
-        def has_hardlink_master(item, hardlink_masters):
-            return hardlinkable(item.mode) and item.get('source') in hardlink_masters
-
-        def compare_link(item1, item2):
-            # These are the simple link cases. For special cases, e.g. if a
-            # regular file is replaced with a link or vice versa, it is
-            # indicated in compare_mode instead.
-            if item1.get('deleted'):
-                return 'added link'
-            elif item2.get('deleted'):
-                return 'removed link'
-            elif 'source' in item1 and 'source' in item2 and item1.source != item2.source:
-                return 'changed link'
-
-        def contents_changed(item1, item2):
-            if can_compare_chunk_ids:
-                return item1.chunks != item2.chunks
-            else:
-                if sum_chunk_size(item1) != sum_chunk_size(item2):
-                    return True
-                else:
-                    chunk_ids1 = [c.id for c in item1.chunks]
-                    chunk_ids2 = [c.id for c in item2.chunks]
-                    return not fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2)
-
-        def compare_content(path, item1, item2):
-            if contents_changed(item1, item2):
-                if item1.get('deleted'):
-                    return 'added {:>13}'.format(format_file_size(sum_chunk_size(item2)))
-                if item2.get('deleted'):
-                    return 'removed {:>11}'.format(format_file_size(sum_chunk_size(item1)))
-                if not can_compare_chunk_ids:
-                    return 'modified'
-                chunk_ids1 = {c.id for c in item1.chunks}
-                chunk_ids2 = {c.id for c in item2.chunks}
-                added_ids = chunk_ids2 - chunk_ids1
-                removed_ids = chunk_ids1 - chunk_ids2
-                added = sum_chunk_size(item2, added_ids)
-                removed = sum_chunk_size(item1, removed_ids)
-                return '{:>9} {:>9}'.format(format_file_size(added, precision=1, sign=True),
-                                            format_file_size(-removed, precision=1, sign=True))
-
-        def compare_directory(item1, item2):
-            if item2.get('deleted') and not item1.get('deleted'):
-                return 'removed directory'
-            elif item1.get('deleted') and not item2.get('deleted'):
-                return 'added directory'
-
-        def compare_owner(item1, item2):
-            user1, group1 = get_owner(item1)
-            user2, group2 = get_owner(item2)
-            if user1 != user2 or group1 != group2:
-                return '[{}:{} -> {}:{}]'.format(user1, group1, user2, group2)
-
-        def compare_mode(item1, item2):
-            if item1.mode != item2.mode:
-                return '[{} -> {}]'.format(get_mode(item1), get_mode(item2))
-
-        def compare_items(output, path, item1, item2, hardlink_masters, deleted=False):
-            """
-            Compare two items with identical paths.
-            :param deleted: Whether one of the items has been deleted
-            """
-            changes = []
-
-            if has_hardlink_master(item1, hardlink_masters):
-                item1 = hardlink_masters[item1.source][0]
-
-            if has_hardlink_master(item2, hardlink_masters):
-                item2 = hardlink_masters[item2.source][1]
-
-            if get_mode(item1)[0] == 'l' or get_mode(item2)[0] == 'l':
-                changes.append(compare_link(item1, item2))
-
-            if 'chunks' in item1 and 'chunks' in item2:
-                changes.append(compare_content(path, item1, item2))
 
 
-            if get_mode(item1)[0] == 'd' or get_mode(item2)[0] == 'd':
-                changes.append(compare_directory(item1, item2))
-
-            if not deleted:
-                changes.append(compare_owner(item1, item2))
-                changes.append(compare_mode(item1, item2))
-
-            changes = [x for x in changes if x]
-            if changes:
-                output_line = (remove_surrogates(path), ' '.join(changes))
-
-                if args.sort:
-                    output.append(output_line)
-                else:
-                    print_output(output_line)
-
-        def print_output(line):
-            print("{:<19} {}".format(line[1], line[0]))
-
-        def compare_archives(archive1, archive2, matcher):
-            def hardlink_master_seen(item):
-                return 'source' not in item or not hardlinkable(item.mode) or item.source in hardlink_masters
-
-            def is_hardlink_master(item):
-                return item.get('hardlink_master', True) and 'source' not in item
-
-            def update_hardlink_masters(item1, item2):
-                if is_hardlink_master(item1) or is_hardlink_master(item2):
-                    hardlink_masters[item1.path] = (item1, item2)
-
-            def compare_or_defer(item1, item2):
-                update_hardlink_masters(item1, item2)
-                if not hardlink_master_seen(item1) or not hardlink_master_seen(item2):
-                    deferred.append((item1, item2))
-                else:
-                    compare_items(output, item1.path, item1, item2, hardlink_masters)
-
-            orphans_archive1 = collections.OrderedDict()
-            orphans_archive2 = collections.OrderedDict()
-            deferred = []
-            hardlink_masters = {}
-            output = []
-
-            for item1, item2 in zip_longest(
-                    archive1.iter_items(lambda item: matcher.match(item.path)),
-                    archive2.iter_items(lambda item: matcher.match(item.path)),
-            ):
-                if item1 and item2 and item1.path == item2.path:
-                    compare_or_defer(item1, item2)
-                    continue
-                if item1:
-                    matching_orphan = orphans_archive2.pop(item1.path, None)
-                    if matching_orphan:
-                        compare_or_defer(item1, matching_orphan)
-                    else:
-                        orphans_archive1[item1.path] = item1
-                if item2:
-                    matching_orphan = orphans_archive1.pop(item2.path, None)
-                    if matching_orphan:
-                        compare_or_defer(matching_orphan, item2)
-                    else:
-                        orphans_archive2[item2.path] = item2
-            # At this point orphans_* contain items that had no matching partner in the other archive
-            deleted_item = Item(
-                deleted=True,
-                chunks=[],
-                mode=0,
-            )
-            for added in orphans_archive2.values():
-                path = added.path
-                deleted_item.path = path
-                update_hardlink_masters(deleted_item, added)
-                compare_items(output, path, deleted_item, added, hardlink_masters, deleted=True)
-            for deleted in orphans_archive1.values():
-                path = deleted.path
-                deleted_item.path = path
-                update_hardlink_masters(deleted, deleted_item)
-                compare_items(output, path, deleted, deleted_item, hardlink_masters, deleted=True)
-            for item1, item2 in deferred:
-                assert hardlink_master_seen(item1)
-                assert hardlink_master_seen(item2)
-                compare_items(output, item1.path, item1, item2, hardlink_masters)
-
-            for line in sorted(output):
-                print_output(line)
+        def print_output(diff, path):
+            print("{:<19} {}".format(diff, path))
 
 
         archive1 = archive
         archive1 = archive
         archive2 = Archive(repository, key, manifest, args.archive2,
         archive2 = Archive(repository, key, manifest, args.archive2,
@@ -1170,7 +957,15 @@ class Archiver:
 
 
         matcher = self.build_matcher(args.patterns, args.paths)
         matcher = self.build_matcher(args.patterns, args.paths)
 
 
-        compare_archives(archive1, archive2, matcher)
+        diffs = Archive.compare_archives_iter(archive1, archive2, matcher, can_compare_chunk_ids=can_compare_chunk_ids)
+        # Conversion to string and filtering for diff.equal to save memory if sorting
+        diffs = ((path, str(diff)) for path, diff in diffs if not diff.equal)
+
+        if args.sort:
+            diffs = sorted(diffs)
+
+        for path, diff in diffs:
+            print_output(diff, path)
 
 
         for pattern in matcher.get_unmatched_include_patterns():
         for pattern in matcher.get_unmatched_include_patterns():
             self.print_warning("Include pattern '%s' never matched.", pattern)
             self.print_warning("Include pattern '%s' never matched.", pattern)

+ 166 - 7
src/borg/item.pyx

@@ -5,6 +5,7 @@ from .constants import ITEM_KEYS
 from .helpers import safe_encode, safe_decode
 from .helpers import safe_encode, safe_decode
 from .helpers import bigint_to_int, int_to_bigint
 from .helpers import bigint_to_int, int_to_bigint
 from .helpers import StableDict
 from .helpers import StableDict
+from .helpers import format_file_size
 
 
 cdef extern from "_item.c":
 cdef extern from "_item.c":
     object _object_to_optr(object obj)
     object _object_to_optr(object obj)
@@ -184,19 +185,22 @@ class Item(PropDict):
 
 
     part = PropDict._make_property('part', int)
     part = PropDict._make_property('part', int)
 
 
-    def get_size(self, hardlink_masters=None, memorize=False, compressed=False, from_chunks=False):
+    def get_size(self, hardlink_masters=None, memorize=False, compressed=False, from_chunks=False, consider_ids=None):
         """
         """
         Determine the (uncompressed or compressed) size of this item.
         Determine the (uncompressed or compressed) size of this item.
 
 
-        For hardlink slaves, the size is computed via the hardlink master's
-        chunk list, if available (otherwise size will be returned as 0).
-
-        If memorize is True, the computed size value will be stored into the item.
+        :param hardlink_masters: If given, the size of hardlink slaves is computed via the hardlink master's chunk list,
+        otherwise size will be returned as 0.
+        :param memorize: Whether the computed size value will be stored into the item.
+        :param compressed: Whether the compressed or uncompressed size will be returned.
+        :param from_chunks: If true, size is computed from chunks even if a precomputed value is available.
+        :param consider_ids: Returns the size of the given ids only.
         """
         """
         attr = 'csize' if compressed else 'size'
         attr = 'csize' if compressed else 'size'
         assert not (compressed and memorize), 'Item does not have a csize field.'
         assert not (compressed and memorize), 'Item does not have a csize field.'
+        assert not (consider_ids is not None and memorize), "Can't store size when considering only certain ids"
         try:
         try:
-            if from_chunks:
+            if from_chunks or consider_ids is not None:
                 raise AttributeError
                 raise AttributeError
             size = getattr(self, attr)
             size = getattr(self, attr)
         except AttributeError:
         except AttributeError:
@@ -226,7 +230,10 @@ class Item(PropDict):
                         chunks, _ = hardlink_masters.get(master, (None, None))
                         chunks, _ = hardlink_masters.get(master, (None, None))
                 if chunks is None:
                 if chunks is None:
                     return 0
                     return 0
-            size = sum(getattr(ChunkListEntry(*chunk), attr) for chunk in chunks)
+            if consider_ids is not None:
+                size = sum(getattr(ChunkListEntry(*chunk), attr) for chunk in chunks if chunk.id in consider_ids)
+            else:
+                size = sum(getattr(ChunkListEntry(*chunk), attr) for chunk in chunks)
             # if requested, memorize the precomputed (c)size for items that have an own chunks list:
             # if requested, memorize the precomputed (c)size for items that have an own chunks list:
             if memorize and having_chunks:
             if memorize and having_chunks:
                 setattr(self, attr, size)
                 setattr(self, attr, size)
@@ -251,6 +258,21 @@ class Item(PropDict):
     def from_optr(self, optr):
     def from_optr(self, optr):
         return _optr_to_object(optr)
         return _optr_to_object(optr)
 
 
+    @classmethod
+    def create_deleted(cls, path):
+        return cls(deleted=True, chunks=[], mode=0, path=path)
+
+    def is_link(self):
+        return self._is_type(stat.S_ISLNK)
+
+    def is_dir(self):
+        return self._is_type(stat.S_ISDIR)
+
+    def _is_type(self, typetest):
+        try:
+            return typetest(self.mode)
+        except AttributeError:
+            return False
 
 
 
 
 class EncryptedKey(PropDict):
 class EncryptedKey(PropDict):
@@ -358,3 +380,140 @@ class ManifestItem(PropDict):
     timestamp = PropDict._make_property('timestamp', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
     timestamp = PropDict._make_property('timestamp', str, 'surrogate-escaped str', encode=safe_encode, decode=safe_decode)
     config = PropDict._make_property('config', dict)
     config = PropDict._make_property('config', dict)
     item_keys = PropDict._make_property('item_keys', tuple)
     item_keys = PropDict._make_property('item_keys', tuple)
+
+class ItemDiff:
+    """
+    Comparison of two items from different archives.
+
+    The items may have different paths and still be considered equal (e.g. for renames).
+    It does not include extended or time attributes in the comparison.
+    """
+
+    def __init__(self, item1, item2, chunk_iterator1, chunk_iterator2, numeric_owner=False, can_compare_chunk_ids=False):
+        self._item1 = item1
+        self._item2 = item2
+        self._numeric_owner = numeric_owner
+        self._can_compare_chunk_ids = can_compare_chunk_ids
+        self.equal = self._equal(chunk_iterator1, chunk_iterator2)
+
+    def __repr__(self):
+        if self.equal:
+            return 'equal'
+
+        changes = []
+
+        if self._item1.is_link() or self._item2.is_link():
+            changes.append(self._link_string())
+
+        if 'chunks' in self._item1 and 'chunks' in self._item2:
+            changes.append(self._content_string())
+
+        if self._item1.is_dir() or self._item2.is_dir():
+            changes.append(self._dir_string())
+
+        if not (self._item1.get('deleted') or self._item2.get('deleted')):
+            changes.append(self._owner_string())
+            changes.append(self._mode_string())
+
+        return ' '.join((x for x in changes if x))
+
+    def _equal(self, chunk_iterator1, chunk_iterator2):
+        # if both are deleted, there is nothing at path regardless of what was deleted
+        if self._item1.get('deleted') and self._item2.get('deleted'):
+            return True
+
+        attr_list = ['deleted', 'mode', 'source']
+        attr_list += ['uid', 'gid'] if self._numeric_owner else ['user', 'group']
+        for attr in attr_list:
+            if self._item1.get(attr) != self._item2.get(attr):
+                return False
+
+        if 'mode' in self._item1:     # mode of item1 and item2 is equal
+            if (self._item1.is_link() and 'source' in self._item1 and 'source' in self._item2
+                and self._item1.source != self._item2.source):
+                return False
+
+        if 'chunks' in self._item1 and 'chunks' in self._item2:
+            return self._content_equal(chunk_iterator1, chunk_iterator2)
+
+        return True
+
+    def _link_string(self):
+        if self._item1.get('deleted'):
+            return 'added link'
+        if self._item2.get('deleted'):
+            return 'removed link'
+        if 'source' in self._item1 and 'source' in self._item2 and self._item1.source != self._item2.source:
+            return 'changed link'
+
+    def _content_string(self):
+        if self._item1.get('deleted'):
+            return ('added {:>13}'.format(format_file_size(self._item2.get_size())))
+        if self._item2.get('deleted'):
+            return ('removed {:>11}'.format(format_file_size(self._item1.get_size())))
+        if not self._can_compare_chunk_ids:
+            return 'modified'
+        chunk_ids1 = {c.id for c in self._item1.chunks}
+        chunk_ids2 = {c.id for c in self._item2.chunks}
+        added_ids = chunk_ids2 - chunk_ids1
+        removed_ids = chunk_ids1 - chunk_ids2
+        added = self._item2.get_size(consider_ids=added_ids)
+        removed = self._item1.get_size(consider_ids=removed_ids)
+        return ('{:>9} {:>9}'.format(format_file_size(added, precision=1, sign=True),
+                                     format_file_size(-removed, precision=1, sign=True)))
+
+    def _dir_string(self):
+        if self._item2.get('deleted') and not self._item1.get('deleted'):
+            return 'removed directory'
+        if self._item1.get('deleted') and not self._item2.get('deleted'):
+            return 'added directory'
+
+    def _owner_string(self):
+        u_attr, g_attr = ('uid', 'gid') if self._numeric_owner else ('user', 'group')
+        u1, g1 = self._item1.get(u_attr), self._item1.get(g_attr)
+        u2, g2 = self._item2.get(u_attr), self._item2.get(g_attr)
+        if (u1, g1) != (u2, g2):
+            return '[{}:{} -> {}:{}]'.format(u1, g1, u2, g2)
+
+    def _mode_string(self):
+        if 'mode' in self._item1 and 'mode' in self._item2 and self._item1.mode != self._item2.mode:
+            return '[{} -> {}]'.format(stat.filemode(self._item1.mode), stat.filemode(self._item2.mode))
+
+    def _content_equal(self, chunk_iterator1, chunk_iterator2):
+        if self._can_compare_chunk_ids:
+            return self._item1.chunks == self._item2.chunks
+        if self._item1.get_size() != self._item2.get_size():
+            return False
+        return ItemDiff._chunk_content_equal(chunk_iterator1, chunk_iterator2)
+
+    @staticmethod
+    def _chunk_content_equal(chunks1, chunks2):
+        """
+        Compare chunk content and return True if they are identical.
+
+        The chunks must be given as chunk iterators (like returned by :meth:`.DownloadPipeline.fetch_many`).
+        """
+
+        end = object()
+        alen = ai = 0
+        blen = bi = 0
+        while True:
+            if not alen - ai:
+                a = next(chunks1, end)
+                if a is end:
+                    return not blen - bi and next(chunks2, end) is end
+                a = memoryview(a)
+                alen = len(a)
+                ai = 0
+            if not blen - bi:
+                b = next(chunks2, end)
+                if b is end:
+                    return not alen - ai and next(chunks1, end) is end
+                b = memoryview(b)
+                blen = len(b)
+                bi = 0
+            slicelen = min(alen - ai, blen - bi)
+            if a[ai:ai + slicelen] != b[bi:bi + slicelen]:
+                return False
+            ai += slicelen
+            bi += slicelen

+ 4 - 4
src/borg/testsuite/archiver.py

@@ -47,7 +47,7 @@ from ..helpers import bin_to_hex
 from ..helpers import MAX_S
 from ..helpers import MAX_S
 from ..nanorst import RstToTextLazy, rst_to_terminal
 from ..nanorst import RstToTextLazy, rst_to_terminal
 from ..patterns import IECommand, PatternMatcher, parse_pattern
 from ..patterns import IECommand, PatternMatcher, parse_pattern
-from ..item import Item
+from ..item import Item, ItemDiff
 from ..logger import setup_logging
 from ..logger import setup_logging
 from ..remote import RemoteRepository, PathNotAllowed
 from ..remote import RemoteRepository, PathNotAllowed
 from ..repository import Repository
 from ..repository import Repository
@@ -3430,12 +3430,12 @@ def test_get_args():
     assert args.func == archiver.do_serve
     assert args.func == archiver.do_serve
 
 
 
 
-def test_compare_chunk_contents():
+def test_chunk_content_equal():
     def ccc(a, b):
     def ccc(a, b):
         chunks_a = [data for data in a]
         chunks_a = [data for data in a]
         chunks_b = [data for data in b]
         chunks_b = [data for data in b]
-        compare1 = Archiver.compare_chunk_contents(iter(chunks_a), iter(chunks_b))
-        compare2 = Archiver.compare_chunk_contents(iter(chunks_b), iter(chunks_a))
+        compare1 = ItemDiff._chunk_content_equal(iter(chunks_a), iter(chunks_b))
+        compare2 = ItemDiff._chunk_content_equal(iter(chunks_b), iter(chunks_a))
         assert compare1 == compare2
         assert compare1 == compare2
         return compare1
         return compare1
     assert ccc([
     assert ccc([