Browse Source

Add '--sort' option for sorting diff command output

Previously, on 'borg diff', the output always had first the modifications, then
additions, and finally removals. Output may be easier to follow if the various
kinds of changes are interleaved. This commit is a simple solution that first
collects the output lines and sorts them by file path before printing. This new
behavior is optional and disabled by default. It can be enabled with '--sort'
command line option.

This option will be especially useful after the planned multi-threading changes
arrive. Multi-threading may shuffle the archive order of files making diff
output hard to follow without sorting.

Resolves #797.
Lauri Niskanen 9 years ago
parent
commit
9d1a30c08b
3 changed files with 56 additions and 8 deletions
  1. 23 7
      borg/archiver.py
  2. 31 0
      borg/testsuite/archiver.py
  3. 2 1
      docs/usage/diff.rst.inc

+ 23 - 7
borg/archiver.py

@@ -514,7 +514,7 @@ class Archiver:
             if item1[b'mode'] != item2[b'mode']:
             if item1[b'mode'] != item2[b'mode']:
                 return '[{} -> {}]'.format(get_mode(item1), get_mode(item2))
                 return '[{} -> {}]'.format(get_mode(item1), get_mode(item2))
 
 
-        def compare_items(path, item1, item2, hardlink_masters, deleted=False):
+        def compare_items(output, path, item1, item2, hardlink_masters, deleted=False):
             """
             """
             Compare two items with identical paths.
             Compare two items with identical paths.
             :param deleted: Whether one of the items has been deleted
             :param deleted: Whether one of the items has been deleted
@@ -545,43 +545,56 @@ class Archiver:
 
 
             changes = [x for x in changes if x]
             changes = [x for x in changes if x]
             if changes:
             if changes:
-                print("{:<19} {}".format(' '.join(changes), remove_surrogates(path)))
+                output_line = (remove_surrogates(path), ' '.join(changes))
+
+                if args.sort:
+                    output.append(output_line)
+                else:
+                    print_output(output_line)
+
+        def print_output(line):
+            print("{:<19} {}".format(line[1], line[0]))
 
 
         def compare_archives(archive1, archive2, matcher):
         def compare_archives(archive1, archive2, matcher):
             orphans_archive1 = collections.OrderedDict()
             orphans_archive1 = collections.OrderedDict()
             orphans_archive2 = collections.OrderedDict()
             orphans_archive2 = collections.OrderedDict()
             hardlink_masters = {}
             hardlink_masters = {}
+            output = []
+
             for item1, item2 in zip_longest(
             for item1, item2 in zip_longest(
                     archive1.iter_items(lambda item: matcher.match(item[b'path'])),
                     archive1.iter_items(lambda item: matcher.match(item[b'path'])),
                     archive2.iter_items(lambda item: matcher.match(item[b'path'])),
                     archive2.iter_items(lambda item: matcher.match(item[b'path'])),
             ):
             ):
                 if item1 and item2 and item1[b'path'] == item2[b'path']:
                 if item1 and item2 and item1[b'path'] == item2[b'path']:
-                    compare_items(item1[b'path'], item1, item2, hardlink_masters)
+                    compare_items(output, item1[b'path'], item1, item2, hardlink_masters)
                     continue
                     continue
                 if item1:
                 if item1:
                     matching_orphan = orphans_archive2.pop(item1[b'path'], None)
                     matching_orphan = orphans_archive2.pop(item1[b'path'], None)
                     if matching_orphan:
                     if matching_orphan:
-                        compare_items(item1[b'path'], item1, matching_orphan, hardlink_masters)
+                        compare_items(output, item1[b'path'], item1, matching_orphan, hardlink_masters)
                     else:
                     else:
                         orphans_archive1[item1[b'path']] = item1
                         orphans_archive1[item1[b'path']] = item1
                 if item2:
                 if item2:
                     matching_orphan = orphans_archive1.pop(item2[b'path'], None)
                     matching_orphan = orphans_archive1.pop(item2[b'path'], None)
                     if matching_orphan:
                     if matching_orphan:
-                        compare_items(item2[b'path'], matching_orphan, item2, hardlink_masters)
+                        compare_items(output, item2[b'path'], matching_orphan, item2, hardlink_masters)
                     else:
                     else:
                         orphans_archive2[item2[b'path']] = item2
                         orphans_archive2[item2[b'path']] = item2
             # At this point orphans_* contain items that had no matching partner in the other archive
             # At this point orphans_* contain items that had no matching partner in the other archive
             for added in orphans_archive2.values():
             for added in orphans_archive2.values():
-                compare_items(added[b'path'], {
+                compare_items(output, added[b'path'], {
                     b'deleted': True,
                     b'deleted': True,
                     b'chunks': [],
                     b'chunks': [],
                 }, added, hardlink_masters, deleted=True)
                 }, added, hardlink_masters, deleted=True)
             for deleted in orphans_archive1.values():
             for deleted in orphans_archive1.values():
-                compare_items(deleted[b'path'], deleted, {
+                compare_items(output, deleted[b'path'], deleted, {
                     b'deleted': True,
                     b'deleted': True,
                     b'chunks': [],
                     b'chunks': [],
                 }, hardlink_masters, deleted=True)
                 }, hardlink_masters, deleted=True)
 
 
+            for line in sorted(output):
+                print_output(line)
+
         archive1 = archive
         archive1 = archive
         archive2 = Archive(repository, key, manifest, args.archive2)
         archive2 = Archive(repository, key, manifest, args.archive2)
 
 
@@ -1308,6 +1321,9 @@ class Archiver:
         subparser.add_argument('--same-chunker-params', dest='same_chunker_params',
         subparser.add_argument('--same-chunker-params', dest='same_chunker_params',
                                action='store_true', default=False,
                                action='store_true', default=False,
                                help='Override check of chunker parameters.')
                                help='Override check of chunker parameters.')
+        subparser.add_argument('--sort', dest='sort',
+                               action='store_true', default=False,
+                               help='Sort the output lines by file path.')
         subparser.add_argument('location', metavar='ARCHIVE1',
         subparser.add_argument('location', metavar='ARCHIVE1',
                                type=location_validator(archive=True),
                                type=location_validator(archive=True),
                                help='archive')
                                help='archive')

+ 31 - 0
borg/testsuite/archiver.py

@@ -1351,6 +1351,37 @@ class DiffArchiverTestCase(ArchiverTestCaseBase):
         # We expect exit_code=1 due to the chunker params warning
         # We expect exit_code=1 due to the chunker params warning
         do_asserts(self.cmd('diff', self.repository_location + '::test0', 'test1b', exit_code=1), '1b')
         do_asserts(self.cmd('diff', self.repository_location + '::test0', 'test1b', exit_code=1), '1b')
 
 
+    def test_sort_option(self):
+        self.cmd('init', self.repository_location)
+
+        self.create_regular_file('a_file_removed', size=8)
+        self.create_regular_file('f_file_removed', size=16)
+        self.create_regular_file('c_file_changed', size=32)
+        self.create_regular_file('e_file_changed', size=64)
+        self.cmd('create', self.repository_location + '::test0', 'input')
+
+        os.unlink('input/a_file_removed')
+        os.unlink('input/f_file_removed')
+        os.unlink('input/c_file_changed')
+        os.unlink('input/e_file_changed')
+        self.create_regular_file('c_file_changed', size=512)
+        self.create_regular_file('e_file_changed', size=1024)
+        self.create_regular_file('b_file_added', size=128)
+        self.create_regular_file('d_file_added', size=256)
+        self.cmd('create', self.repository_location + '::test1', 'input')
+
+        output = self.cmd('diff', '--sort', self.repository_location + '::test0', 'test1')
+        expected = [
+            'a_file_removed',
+            'b_file_added',
+            'c_file_changed',
+            'd_file_added',
+            'e_file_changed',
+            'f_file_removed',
+        ]
+
+        assert all(x in line for x, line in zip(expected, output.splitlines()))
+
 
 
 def test_get_args():
 def test_get_args():
     archiver = Archiver()
     archiver = Archiver()

+ 2 - 1
docs/usage/diff.rst.inc

@@ -8,7 +8,7 @@ borg diff
                      [--show-rc] [--no-files-cache] [--umask M]
                      [--show-rc] [--no-files-cache] [--umask M]
                      [--remote-path PATH] [-e PATTERN]
                      [--remote-path PATH] [-e PATTERN]
                      [--exclude-from EXCLUDEFILE] [--numeric-owner]
                      [--exclude-from EXCLUDEFILE] [--numeric-owner]
-                     [--same-chunker-params]
+                     [--same-chunker-params] [--sort]
                      ARCHIVE1 ARCHIVE2 [PATH [PATH ...]]
                      ARCHIVE1 ARCHIVE2 [PATH [PATH ...]]
     
     
     Diff contents of two archives
     Diff contents of two archives
@@ -39,6 +39,7 @@ borg diff
       --numeric-owner       only consider numeric user and group identifiers
       --numeric-owner       only consider numeric user and group identifiers
       --same-chunker-params
       --same-chunker-params
                             Override check of chunker parameters.
                             Override check of chunker parameters.
+      --sort                Sort the output lines by file path.
     
     
 Description
 Description
 ~~~~~~~~~~~
 ~~~~~~~~~~~