Răsfoiți Sursa

Add '--sort' option for sorting diff command output

Previously, on 'borg diff', the output always had first the modifications, then
additions, and finally removals. Output may be easier to follow if the various
kinds of changes are interleaved. This commit is a simple solution that first
collects the output lines and sorts them by file path before printing. This new
behavior is optional and disabled by default. It can be enabled with '--sort'
command line option.

This option will be especially useful after the planned multi-threading changes
arrive. Multi-threading may shuffle the archive order of files making diff
output hard to follow without sorting.

Resolves #797.
Lauri Niskanen 9 ani în urmă
părinte
comite
9d1a30c08b
3 a modificat fișierele cu 56 adăugiri și 8 ștergeri
  1. 23 7
      borg/archiver.py
  2. 31 0
      borg/testsuite/archiver.py
  3. 2 1
      docs/usage/diff.rst.inc

+ 23 - 7
borg/archiver.py

@@ -514,7 +514,7 @@ class Archiver:
             if item1[b'mode'] != item2[b'mode']:
                 return '[{} -> {}]'.format(get_mode(item1), get_mode(item2))
 
-        def compare_items(path, item1, item2, hardlink_masters, deleted=False):
+        def compare_items(output, path, item1, item2, hardlink_masters, deleted=False):
             """
             Compare two items with identical paths.
             :param deleted: Whether one of the items has been deleted
@@ -545,43 +545,56 @@ class Archiver:
 
             changes = [x for x in changes if x]
             if changes:
-                print("{:<19} {}".format(' '.join(changes), remove_surrogates(path)))
+                output_line = (remove_surrogates(path), ' '.join(changes))
+
+                if args.sort:
+                    output.append(output_line)
+                else:
+                    print_output(output_line)
+
+        def print_output(line):
+            print("{:<19} {}".format(line[1], line[0]))
 
         def compare_archives(archive1, archive2, matcher):
             orphans_archive1 = collections.OrderedDict()
             orphans_archive2 = collections.OrderedDict()
             hardlink_masters = {}
+            output = []
+
             for item1, item2 in zip_longest(
                     archive1.iter_items(lambda item: matcher.match(item[b'path'])),
                     archive2.iter_items(lambda item: matcher.match(item[b'path'])),
             ):
                 if item1 and item2 and item1[b'path'] == item2[b'path']:
-                    compare_items(item1[b'path'], item1, item2, hardlink_masters)
+                    compare_items(output, item1[b'path'], item1, item2, hardlink_masters)
                     continue
                 if item1:
                     matching_orphan = orphans_archive2.pop(item1[b'path'], None)
                     if matching_orphan:
-                        compare_items(item1[b'path'], item1, matching_orphan, hardlink_masters)
+                        compare_items(output, item1[b'path'], item1, matching_orphan, hardlink_masters)
                     else:
                         orphans_archive1[item1[b'path']] = item1
                 if item2:
                     matching_orphan = orphans_archive1.pop(item2[b'path'], None)
                     if matching_orphan:
-                        compare_items(item2[b'path'], matching_orphan, item2, hardlink_masters)
+                        compare_items(output, item2[b'path'], matching_orphan, item2, hardlink_masters)
                     else:
                         orphans_archive2[item2[b'path']] = item2
             # At this point orphans_* contain items that had no matching partner in the other archive
             for added in orphans_archive2.values():
-                compare_items(added[b'path'], {
+                compare_items(output, added[b'path'], {
                     b'deleted': True,
                     b'chunks': [],
                 }, added, hardlink_masters, deleted=True)
             for deleted in orphans_archive1.values():
-                compare_items(deleted[b'path'], deleted, {
+                compare_items(output, deleted[b'path'], deleted, {
                     b'deleted': True,
                     b'chunks': [],
                 }, hardlink_masters, deleted=True)
 
+            for line in sorted(output):
+                print_output(line)
+
         archive1 = archive
         archive2 = Archive(repository, key, manifest, args.archive2)
 
@@ -1308,6 +1321,9 @@ class Archiver:
         subparser.add_argument('--same-chunker-params', dest='same_chunker_params',
                                action='store_true', default=False,
                                help='Override check of chunker parameters.')
+        subparser.add_argument('--sort', dest='sort',
+                               action='store_true', default=False,
+                               help='Sort the output lines by file path.')
         subparser.add_argument('location', metavar='ARCHIVE1',
                                type=location_validator(archive=True),
                                help='archive')

+ 31 - 0
borg/testsuite/archiver.py

@@ -1351,6 +1351,37 @@ class DiffArchiverTestCase(ArchiverTestCaseBase):
         # We expect exit_code=1 due to the chunker params warning
         do_asserts(self.cmd('diff', self.repository_location + '::test0', 'test1b', exit_code=1), '1b')
 
+    def test_sort_option(self):
+        self.cmd('init', self.repository_location)
+
+        self.create_regular_file('a_file_removed', size=8)
+        self.create_regular_file('f_file_removed', size=16)
+        self.create_regular_file('c_file_changed', size=32)
+        self.create_regular_file('e_file_changed', size=64)
+        self.cmd('create', self.repository_location + '::test0', 'input')
+
+        os.unlink('input/a_file_removed')
+        os.unlink('input/f_file_removed')
+        os.unlink('input/c_file_changed')
+        os.unlink('input/e_file_changed')
+        self.create_regular_file('c_file_changed', size=512)
+        self.create_regular_file('e_file_changed', size=1024)
+        self.create_regular_file('b_file_added', size=128)
+        self.create_regular_file('d_file_added', size=256)
+        self.cmd('create', self.repository_location + '::test1', 'input')
+
+        output = self.cmd('diff', '--sort', self.repository_location + '::test0', 'test1')
+        expected = [
+            'a_file_removed',
+            'b_file_added',
+            'c_file_changed',
+            'd_file_added',
+            'e_file_changed',
+            'f_file_removed',
+        ]
+
+        assert all(x in line for x, line in zip(expected, output.splitlines()))
+
 
 def test_get_args():
     archiver = Archiver()

+ 2 - 1
docs/usage/diff.rst.inc

@@ -8,7 +8,7 @@ borg diff
                      [--show-rc] [--no-files-cache] [--umask M]
                      [--remote-path PATH] [-e PATTERN]
                      [--exclude-from EXCLUDEFILE] [--numeric-owner]
-                     [--same-chunker-params]
+                     [--same-chunker-params] [--sort]
                      ARCHIVE1 ARCHIVE2 [PATH [PATH ...]]
     
     Diff contents of two archives
@@ -39,6 +39,7 @@ borg diff
       --numeric-owner       only consider numeric user and group identifiers
       --same-chunker-params
                             Override check of chunker parameters.
+      --sort                Sort the output lines by file path.
     
 Description
 ~~~~~~~~~~~