Pārlūkot izejas kodu

Improve 'borg diff' output format

The main design goals of the new format:
 - One file takes exactly one line of output
 - The format is easy to read with typical, long list of changes
 - Metadata changes are visible and easy to examine
 - Unuseful information is not shown

Resolves #757.
Lauri Niskanen 9 gadi atpakaļ
vecāks
revīzija
1d3e69e4c7
3 mainītis faili ar 197 papildinājumiem un 84 dzēšanām
  1. 101 48
      borg/archiver.py
  2. 88 20
      borg/testsuite/archiver.py
  3. 8 16
      docs/usage.rst

+ 101 - 48
borg/archiver.py

@@ -433,86 +433,139 @@ class Archiver:
     @with_archive
     def do_diff(self, args, repository, manifest, key, archive):
         """Diff contents of two archives"""
-        def format_bytes(count):
-            if count is None:
-                return "<deleted>"
-            return format_file_size(count)
-
         def fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2):
             chunks1 = archive1.pipeline.fetch_many(chunk_ids1)
             chunks2 = archive2.pipeline.fetch_many(chunk_ids2)
             return self.compare_chunk_contents(chunks1, chunks2)
 
+        def sum_chunk_size(item):
+            if item.get(b'deleted'):
+                return None
+            else:
+                return sum(c[1] for c in item[b'chunks'])
+
         def get_owner(item):
             if args.numeric_owner:
                 return item[b'uid'], item[b'gid']
             else:
                 return item[b'user'], item[b'group']
 
-        def compare_items(path, item1, item2, deleted=False):
+        def get_mode(item):
+            if b'mode' in item:
+                return stat.filemode(item[b'mode'])
+            else:
+                return [None]
+
+        def has_hardlink_master(item, hardlink_masters):
+            return item.get(b'source') in hardlink_masters and get_mode(item)[0] != 'l'
+
+        def compare_link(item1, item2):
+            # These are the simple link cases. For special cases, e.g. if a
+            # regular file is replaced with a link or vice versa, it is
+            # indicated in compare_mode instead.
+            if item1.get(b'deleted'):
+                return 'added link'
+            elif item2.get(b'deleted'):
+                return 'removed link'
+            elif item1[b'source'] != item2[b'source']:
+                return 'changed link'
+
+        def contents_changed(item1, item2):
+            if can_compare_chunk_ids:
+                return item1[b'chunks'] != item2[b'chunks']
+            else:
+                if sum_chunk_size(item1) != sum_chunk_size(item2):
+                    return True
+                else:
+                    chunk_ids1 = [c[0] for c in item1[b'chunks']]
+                    chunk_ids2 = [c[0] for c in item2[b'chunks']]
+                    return not fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2)
+
+        def compare_content(path, item1, item2):
+            if contents_changed(item1, item2):
+                if item1.get(b'deleted'):
+                    return ('added {:>13}'.format(format_file_size(sum_chunk_size(item2))))
+                elif item2.get(b'deleted'):
+                    return ('removed {:>11}'.format(format_file_size(sum_chunk_size(item1))))
+                else:
+                    chunk_id_set1 = {c[0] for c in item1[b'chunks']}
+                    chunk_id_set2 = {c[0] for c in item2[b'chunks']}
+                    added = sum(c[1] for c in (chunk_id_set2 - chunk_id_set1))
+                    removed = -sum(c[1] for c in (chunk_id_set1 - chunk_id_set2))
+
+                    return ('{:>9} {:>9}'.format(format_file_size(added, precision=1, sign=True),
+                                                 format_file_size(removed, precision=1, sign=True)))
+
+        def compare_directory(item1, item2):
+            if item2.get(b'deleted') and not item1.get(b'deleted'):
+                return 'removed directory'
+            elif item1.get(b'deleted') and not item2.get(b'deleted'):
+                return 'added directory'
+
+        def compare_owner(item1, item2):
+            user1, group1 = get_owner(item1)
+            user2, group2 = get_owner(item2)
+            if user1 != user2 or group1 != group2:
+                return '[{}:{} -> {}:{}]'.format(user1, group1, user2, group2)
+
+        def compare_mode(item1, item2):
+            if item1[b'mode'] != item2[b'mode']:
+                return '[{} -> {}]'.format(get_mode(item1), get_mode(item2))
+
+        def compare_items(path, item1, item2, hardlink_masters, deleted=False):
             """
             Compare two items with identical paths.
             :param deleted: Whether one of the items has been deleted
             """
+            changes = []
+
+            if item1.get(b'hardlink_master') or item2.get(b'hardlink_master'):
+                hardlink_masters[path] = (item1, item2)
+
+            if has_hardlink_master(item1, hardlink_masters):
+                item1 = hardlink_masters[item1[b'source']][0]
+
+            if has_hardlink_master(item2, hardlink_masters):
+                item2 = hardlink_masters[item2[b'source']][1]
+
+            if get_mode(item1)[0] == 'l' or get_mode(item2)[0] == 'l':
+                changes.append(compare_link(item1, item2))
+
+            if b'chunks' in item1 and b'chunks' in item2:
+                changes.append(compare_content(path, item1, item2))
+
+            if get_mode(item1)[0] == 'd' or get_mode(item2)[0] == 'd':
+                changes.append(compare_directory(item1, item2))
+
             if not deleted:
-                if item1[b'mode'] != item2[b'mode']:
-                    print(remove_surrogates(path), 'different mode')
-                    print('\t', args.location.archive, stat.filemode(item1[b'mode']))
-                    print('\t', args.archive2, stat.filemode(item2[b'mode']))
-
-                user1, group1 = get_owner(item1)
-                user2, group2 = get_owner(item2)
-                if user1 != user2 or group1 != group2:
-                    print(remove_surrogates(path), 'different owner')
-                    print('\t', args.location.archive, 'user=%s, group=%s' % (user1, group1))
-                    print('\t', args.archive2, 'user=%s, group=%s' % (user2, group2))
-
-                if not stat.S_ISREG(item1[b'mode']):
-                    return
-            if b'chunks' not in item1 or b'chunks' not in item2:
-                # At least one of the items is a link
-                if item1.get(b'source') != item2.get(b'source'):
-                    print(remove_surrogates(path), 'different link')
-                    print('\t', args.location.archive, item1.get(b'source', '<regular file>'))
-                    print('\t', args.archive2, item2.get(b'source', '<regular file>'))
-                return
-            if deleted or not can_compare_chunk_ids or item1[b'chunks'] != item2[b'chunks']:
-                # Contents are different
-                chunk_ids1 = [c[0] for c in item1[b'chunks']]
-                chunk_ids2 = [c[0] for c in item2[b'chunks']]
-                chunk_id_set1 = set(chunk_ids1)
-                chunk_id_set2 = set(chunk_ids2)
-                total1 = None if item1.get(b'deleted') else sum(c[1] for c in item1[b'chunks'])
-                total2 = None if item2.get(b'deleted') else sum(c[1] for c in item2[b'chunks'])
-                if (not can_compare_chunk_ids and total1 == total2 and not deleted and
-                        fetch_and_compare_chunks(chunk_ids1, chunk_ids2, archive1, archive2)):
-                    return
-                added = sum(c[1] for c in (chunk_id_set2 - chunk_id_set1))
-                removed = sum(c[1] for c in (chunk_id_set1 - chunk_id_set2))
-                print(remove_surrogates(path), 'different contents')
-                print('\t +%s, -%s, %s, %s' % (format_bytes(added), format_bytes(removed),
-                                               format_bytes(total1), format_bytes(total2)))
+                changes.append(compare_owner(item1, item2))
+                changes.append(compare_mode(item1, item2))
+
+            changes = [x for x in changes if x]
+            if changes:
+                print("{:<19} {}".format(' '.join(changes), remove_surrogates(path)))
 
         def compare_archives(archive1, archive2, matcher):
             orphans_archive1 = {}
             orphans_archive2 = {}
+            hardlink_masters = {}
             for item1, item2 in zip_longest(
                     archive1.iter_items(lambda item: matcher.match(item[b'path'])),
                     archive2.iter_items(lambda item: matcher.match(item[b'path'])),
             ):
                 if item1 and item2 and item1[b'path'] == item2[b'path']:
-                    compare_items(item1[b'path'], item1, item2)
+                    compare_items(item1[b'path'], item1, item2, hardlink_masters)
                     continue
                 if item1:
                     matching_orphan = orphans_archive2.pop(item1[b'path'], None)
                     if matching_orphan:
-                        compare_items(item1[b'path'], item1, matching_orphan)
+                        compare_items(item1[b'path'], item1, matching_orphan, hardlink_masters)
                     else:
                         orphans_archive1[item1[b'path']] = item1
                 if item2:
                     matching_orphan = orphans_archive1.pop(item2[b'path'], None)
                     if matching_orphan:
-                        compare_items(item2[b'path'], matching_orphan, item2)
+                        compare_items(item2[b'path'], matching_orphan, item2, hardlink_masters)
                     else:
                         orphans_archive2[item2[b'path']] = item2
             # At this point orphans_* contain items that had no matching partner in the other archive
@@ -520,12 +573,12 @@ class Archiver:
                 compare_items(added[b'path'], {
                     b'deleted': True,
                     b'chunks': [],
-                }, added, deleted=True)
+                }, added, hardlink_masters, deleted=True)
             for deleted in orphans_archive1.values():
                 compare_items(deleted[b'path'], deleted, {
                     b'deleted': True,
                     b'chunks': [],
-                }, deleted=True)
+                }, hardlink_masters, deleted=True)
 
         archive1 = archive
         archive2 = Archive(repository, key, manifest, args.archive2)

+ 88 - 20
borg/testsuite/archiver.py

@@ -1236,33 +1236,101 @@ class DiffArchiverTestCase(ArchiverTestCaseBase):
     create_regular_file = ArchiverTestCase.create_regular_file
 
     def test_basic_functionality(self):
+        # Initialize test folder
         self.create_test_files()
         self.cmd('init', self.repository_location)
-        os.chmod('input/dir2', stat.S_IFDIR | 0o755)
-        self.create_regular_file('file3', size=1024)
+
+        # Setup files for the first snapshot
+        self.create_regular_file('file_unchanged', size=128)
+        self.create_regular_file('file_removed', size=256)
+        self.create_regular_file('file_removed2', size=512)
+        self.create_regular_file('file_replaced', size=1024)
+        os.mkdir('input/dir_replaced_with_file')
+        os.chmod('input/dir_replaced_with_file', stat.S_IFDIR | 0o755)
+        os.mkdir('input/dir_removed')
+        os.symlink('input/dir_replaced_with_file', 'input/link_changed')
+        os.symlink('input/file_unchanged', 'input/link_removed')
+        os.symlink('input/file_removed2', 'input/link_target_removed')
+        os.symlink('input/empty', 'input/symlink')
+        os.link('input/empty', 'input/hardlink_contents_changed')
+        os.link('input/file_removed', 'input/hardlink_removed')
+        os.link('input/file_removed2', 'input/hardlink_target_removed')
+        os.link('input/file_replaced', 'input/hardlink_target_replaced')
+
+        # Create the first snapshot
         self.cmd('create', self.repository_location + '::test0', 'input')
-        # replace 'hardlink' with a file
-        os.unlink('input/hardlink')
-        self.create_regular_file('hardlink', size=1024 * 80)
-        # replace directory with a file
-        os.unlink('input/dir2/file2')
-        os.rmdir('input/dir2')
-        self.create_regular_file('dir2', size=1024 * 80)
-        os.chmod('input/dir2', stat.S_IFREG | 0o755)
-        self.create_regular_file('file3', size=1024, contents=b'0')
+
+        # Setup files for the second snapshot
+        self.create_regular_file('file_added', size=2048)
+        os.unlink('input/file_removed')
+        os.unlink('input/file_removed2')
+        os.unlink('input/file_replaced')
+        self.create_regular_file('file_replaced', size=4096, contents=b'0')
+        os.rmdir('input/dir_replaced_with_file')
+        self.create_regular_file('dir_replaced_with_file', size=8192 * 80)
+        os.chmod('input/dir_replaced_with_file', stat.S_IFREG | 0o755)
+        os.mkdir('input/dir_added')
+        os.rmdir('input/dir_removed')
+        os.unlink('input/link_changed')
+        os.symlink('input/dir_added', 'input/link_changed')
+        os.symlink('input/dir_added', 'input/link_added')
+        os.unlink('input/link_removed')
+        os.unlink('input/hardlink_removed')
+        os.link('input/file_added', 'input/hardlink_added')
+
+        with open('input/empty', 'ab') as fd:
+            fd.write(b'appended_data')
+
+        # Create the second snapshot
         self.cmd('create', self.repository_location + '::test1a', 'input')
         self.cmd('create', '--chunker-params', '16,18,17,4095', self.repository_location + '::test1b', 'input')
 
         def do_asserts(output, archive):
-            assert 'input/file3 different contents' in output
-            assert 'input/hardlink different mode' in output
-            assert ('input/hardlink different link\n'
-                    '	 test0 input/file1\n'
-                    '	 test%s <regular file>' % archive) in output
-            assert ('input/dir2 different mode\n'
-                    '	 test0 drwxr-xr-x\n'
-                    '	 test%s -rwxr-xr-x\n' % archive) in output
-            assert 'input/dir2/file2 different contents' in output
+            # File contents changed (deleted and replaced with a new file)
+            assert 'B input/file_replaced' in output
+
+            # File unchanged
+            assert 'input/file_unchanged' not in output
+
+            # Directory replaced with a regular file
+            assert '[drwxr-xr-x -> -rwxr-xr-x] input/dir_replaced_with_file' in output
+
+            # Basic directory cases
+            assert 'added directory     input/dir_added' in output
+            assert 'removed directory   input/dir_removed' in output
+
+            # Basic symlink cases
+            assert 'changed link        input/link_changed' in output
+            assert 'added link          input/link_added' in output
+            assert 'removed link        input/link_removed' in output
+
+            # Symlink target removed. Should not affect the symlink at all.
+            assert 'input/link_target_removed' not in output
+
+            # The inode has two links and the file contents changed. Borg
+            # should notice the changes in both links.
+            assert '0 B input/empty' in output
+            assert '0 B input/hardlink_contents_changed' in output
+
+            # Added a new file and a hard link to it. Both links to the same
+            # inode should appear as separate files.
+            assert 'added       2.05 kB input/file_added' in output
+            assert 'added       2.05 kB input/hardlink_added' in output
+
+            # The inode has two links and both of them are deleted. They should
+            # appear as two deleted files.
+            assert 'removed       256 B input/file_removed' in output
+            assert 'removed       256 B input/hardlink_removed' in output
+
+            # Another link (marked previously as the source in borg) to the
+            # same inode was removed. This should not change this link at all.
+            assert 'input/hardlink_target_removed' not in output
+
+            # Another link (marked previously as the source in borg) to the
+            # same inode was replaced with a new regular file. This should not
+            # change this link at all.
+            assert 'input/hardlink_target_replaced' not in output
+
         do_asserts(self.cmd('diff', self.repository_location + '::test0', 'test1a'), '1a')
         # We expect exit_code=1 due to the chunker params warning
         do_asserts(self.cmd('diff', self.repository_location + '::test0', 'test1b', exit_code=1), '1b')

+ 8 - 16
docs/usage.rst

@@ -399,26 +399,18 @@ Examples
 
     $ cd ..
     $ borg diff testrepo::archive1 archive2
-    file1 different mode
-             archive1 -rw-r--r--
-             archive2 -rwxr-xr-x
-    file2 different contents
-             +28 B, -31 B, 4.19 MB, 4.19 MB
+    [-rw-r--r-- -> -rwxr-xr-x] file1
+       +135 B    -252 B file2
 
     $ borg diff testrepo::archive2 archive3
-    file3 different contents
-             +0 B, -0 B, 0 B, <deleted>
+    added           0 B file4
+    removed         0 B file3
 
     $ borg diff testrepo::archive1 archive3
-    file1 different mode
-             archive1 -rw-r--r--
-             archive3 -rwxr-xr-x
-    file2 different contents
-             +28 B, -31 B, 4.19 MB, 4.19 MB
-    file3 different contents
-             +0 B, -0 B, 0 B, <deleted>
-    file4 different contents
-             +0 B, -0 B, <deleted>, 0 B
+    [-rw-r--r-- -> -rwxr-xr-x] file1
+       +135 B    -252 B file2
+    added           0 B file4
+    removed         0 B file3
 
 .. include:: usage/delete.rst.inc