Browse Source

Add "borgmatic list --find" flag for searching for files across multiple archives (#541).

Dan Helfman 3 years ago
parent
commit
d14f22e121
5 changed files with 317 additions and 107 deletions
  1. 4 0
      NEWS
  2. 92 14
      borgmatic/borg/list.py
  3. 9 2
      borgmatic/commands/arguments.py
  4. 24 0
      docs/how-to/inspect-your-backups.md
  5. 188 91
      tests/unit/borg/test_list.py

+ 4 - 0
NEWS

@@ -1,4 +1,8 @@
 1.6.3.dev0
+ * #541: Add "borgmatic list --find" flag for searching for files across multiple archives, useful
+   for hunting down that file you accidentally deleted so you can extract it. See the documentation
+   for more information:
+   https://torsion.org/borgmatic/docs/how-to/inspect-your-backups/#searching-for-a-file
  * Deprecate "borgmatic list --successful" flag, as listing only non-checkpoint (successful)
    archives is now the default in newer versions of Borg.
 

+ 92 - 14
borgmatic/borg/list.py

@@ -1,4 +1,6 @@
+import copy
 import logging
+import re
 
 from borgmatic.borg.flags import make_flags, make_flags_from_arguments
 from borgmatic.execute import execute_command
@@ -40,15 +42,20 @@ def resolve_archive_name(repository, archive, storage_config, local_path='borg',
     return latest_archive
 
 
-def list_archives(repository, storage_config, list_arguments, local_path='borg', remote_path=None):
+MAKE_FLAGS_EXCLUDES = ('repository', 'archive', 'successful', 'paths', 'find_paths')
+
+
+def make_list_command(
+    repository, storage_config, list_arguments, local_path='borg', remote_path=None
+):
     '''
-    Given a local or remote repository path, a storage config dict, and the arguments to the list
-    action, display the output of listing Borg archives in the repository or return JSON output. Or,
-    if an archive name is given, listing the files in that archive.
+    Given a local or remote repository path, a storage config dict, the arguments to the list
+    action, and local and remote Borg paths, return a command as a tuple to list archives or paths
+    within an archive.
     '''
     lock_wait = storage_config.get('lock_wait', None)
 
-    full_command = (
+    return (
         (local_path, 'list')
         + (
             ('--info',)
@@ -62,19 +69,90 @@ def list_archives(repository, storage_config, list_arguments, local_path='borg',
         )
         + make_flags('remote-path', remote_path)
         + make_flags('lock-wait', lock_wait)
-        + make_flags_from_arguments(
-            list_arguments, excludes=('repository', 'archive', 'successful', 'paths')
-        )
+        + make_flags_from_arguments(list_arguments, excludes=MAKE_FLAGS_EXCLUDES,)
         + (
-            '::'.join((repository, list_arguments.archive))
+            ('::'.join((repository, list_arguments.archive)),)
             if list_arguments.archive
-            else repository,
+            else (repository,)
         )
         + (tuple(list_arguments.paths) if list_arguments.paths else ())
     )
 
-    return execute_command(
-        full_command,
-        output_log_level=None if list_arguments.json else logging.WARNING,
-        borg_local_path=local_path,
+
+def make_find_paths(find_paths):
+    '''
+    Given a sequence of path fragments or patterns as passed to `--find`, transform all path
+    fragments into glob patterns. Pass through existing patterns untouched.
+
+    For example, given find_paths of:
+
+      ['foo.txt', 'pp:root/somedir']
+
+    ... transform that into:
+
+      ['sh:**/*foo.txt*/**', 'pp:root/somedir']
+    '''
+
+    return tuple(
+        find_path
+        if re.compile(r'([-!+RrPp] )|(\w\w:)').match(find_path)
+        else f'sh:**/*{find_path}*/**'
+        for find_path in find_paths
     )
+
+
+def list_archives(repository, storage_config, list_arguments, local_path='borg', remote_path=None):
+    '''
+    Given a local or remote repository path, a storage config dict, the arguments to the list
+    action, and local and remote Borg paths, display the output of listing Borg archives in the
+    repository or return JSON output. Or, if an archive name is given, list the files in that
+    archive. Or, if list_arguments.find_paths are given, list the files by searching across multiple
+    archives.
+    '''
+    # If there are any paths to find (and there's not a single archive already selected), start by
+    # getting a list of archives to search.
+    if list_arguments.find_paths and not list_arguments.archive:
+        repository_arguments = copy.copy(list_arguments)
+        repository_arguments.archive = None
+        repository_arguments.json = False
+        repository_arguments.format = None
+
+        # Ask Borg to list archives. Capture its output for use below.
+        archive_lines = tuple(
+            execute_command(
+                make_list_command(
+                    repository, storage_config, repository_arguments, local_path, remote_path
+                ),
+                output_log_level=None,
+                borg_local_path=local_path,
+            )
+            .strip('\n')
+            .split('\n')
+        )
+    else:
+        archive_lines = (list_arguments.archive,)
+
+    # For each archive listed by Borg, run list on the contents of that archive.
+    for archive_line in archive_lines:
+        try:
+            archive = archive_line.split()[0]
+        except (AttributeError, IndexError):
+            archive = None
+
+        if archive:
+            logger.warning(archive_line)
+
+        archive_arguments = copy.copy(list_arguments)
+        archive_arguments.archive = archive
+        main_command = make_list_command(
+            repository, storage_config, archive_arguments, local_path, remote_path
+        ) + make_find_paths(list_arguments.find_paths)
+
+        output = execute_command(
+            main_command,
+            output_log_level=None if list_arguments.json else logging.WARNING,
+            borg_local_path=local_path,
+        )
+
+        if list_arguments.json:
+            return output

+ 9 - 2
borgmatic/commands/arguments.py

@@ -554,7 +554,14 @@ def make_parsers():
         metavar='PATH',
         nargs='+',
         dest='paths',
-        help='Paths to list from archive, defaults to the entire archive',
+        help='Paths or patterns to list from a single selected archive (via "--archive"), defaults to listing the entire archive',
+    )
+    list_group.add_argument(
+        '--find',
+        metavar='PATH',
+        nargs='+',
+        dest='find_paths',
+        help='Partial paths or patterns to search for and list across multiple archives',
     )
     list_group.add_argument(
         '--short', default=False, action='store_true', help='Output only archive or path names'
@@ -571,7 +578,7 @@ def make_parsers():
     )
     list_group.add_argument(
         '--successful',
-        default=False,
+        default=True,
         action='store_true',
         help='Deprecated in favor of listing successful (non-checkpoint) backups by default in newer versions of Borg',
     )

+ 24 - 0
docs/how-to/inspect-your-backups.md

@@ -51,6 +51,30 @@ borgmatic info
 `--info`. Or upgrade borgmatic!)
 
 
+### Searching for a file
+
+Let's say you've accidentally deleted a file and want to find the backup
+archive(s) containing it. `borgmatic list` provides a `--find` flag for
+exactly this purpose. For instance, if you're looking for a `foo.txt`:
+
+```bash
+borgmatic list --find foo.txt
+```
+
+This will list your archives and indicate those with files matching
+`*foo.txt*` anywhere in the archive. The `--find` parameter can alternatively
+be a [Borg
+pattern](https://borgbackup.readthedocs.io/en/stable/usage/help.html#borg-patterns).
+
+To limit the archives searched, use the standard `list` parameters for
+filtering archives such as `--last`, `--archive`, `--glob-archives`, etc. For
+example, to search only the last five archives:
+
+```bash
+borgmatic list --find foo.txt --last 5
+```
+
+
 ## Logging
 
 By default, borgmatic logs to a local syslog-compatible daemon if one is

+ 188 - 91
tests/unit/borg/test_list.py

@@ -1,3 +1,4 @@
+import argparse
 import logging
 
 import pytest
@@ -106,156 +107,125 @@ def test_resolve_archive_name_with_lock_wait_calls_borg_with_lock_wait_parameter
     )
 
 
-def test_list_archives_calls_borg_with_parameters():
-    flexmock(module).should_receive('execute_command').with_args(
-        ('borg', 'list', 'repo'), output_log_level=logging.WARNING, borg_local_path='borg'
-    )
+def test_make_list_command_includes_log_info():
+    insert_logging_mock(logging.INFO)
 
-    module.list_archives(
+    command = module.make_list_command(
         repository='repo',
         storage_config={},
         list_arguments=flexmock(archive=None, paths=None, json=False),
     )
 
+    assert command == ('borg', 'list', '--info', 'repo')
 
-def test_list_archives_with_log_info_calls_borg_with_info_parameter():
-    flexmock(module).should_receive('execute_command').with_args(
-        ('borg', 'list', '--info', 'repo'), output_log_level=logging.WARNING, borg_local_path='borg'
-    )
+
+def test_make_list_command_includes_json_but_not_info():
     insert_logging_mock(logging.INFO)
 
-    module.list_archives(
+    command = module.make_list_command(
         repository='repo',
         storage_config={},
-        list_arguments=flexmock(archive=None, paths=None, json=False),
+        list_arguments=flexmock(archive=None, paths=None, json=True),
     )
 
+    assert command == ('borg', 'list', '--json', 'repo')
 
-def test_list_archives_with_log_info_and_json_suppresses_most_borg_output():
-    flexmock(module).should_receive('execute_command').with_args(
-        ('borg', 'list', '--json', 'repo'), output_log_level=None, borg_local_path='borg'
-    )
-    insert_logging_mock(logging.INFO)
 
-    module.list_archives(
+def test_make_list_command_includes_log_debug():
+    insert_logging_mock(logging.DEBUG)
+
+    command = module.make_list_command(
         repository='repo',
         storage_config={},
-        list_arguments=flexmock(archive=None, paths=None, json=True),
+        list_arguments=flexmock(archive=None, paths=None, json=False),
     )
 
+    assert command == ('borg', 'list', '--debug', '--show-rc', 'repo')
 
-def test_list_archives_with_log_debug_calls_borg_with_debug_parameter():
-    flexmock(module).should_receive('execute_command').with_args(
-        ('borg', 'list', '--debug', '--show-rc', 'repo'),
-        output_log_level=logging.WARNING,
-        borg_local_path='borg',
-    )
+
+def test_make_list_command_includes_json_but_not_debug():
     insert_logging_mock(logging.DEBUG)
 
-    module.list_archives(
+    command = module.make_list_command(
         repository='repo',
         storage_config={},
-        list_arguments=flexmock(archive=None, paths=None, json=False),
+        list_arguments=flexmock(archive=None, paths=None, json=True),
     )
 
+    assert command == ('borg', 'list', '--json', 'repo')
 
-def test_list_archives_with_log_debug_and_json_suppresses_most_borg_output():
-    flexmock(module).should_receive('execute_command').with_args(
-        ('borg', 'list', '--json', 'repo'), output_log_level=None, borg_local_path='borg'
-    )
-    insert_logging_mock(logging.DEBUG)
 
-    module.list_archives(
+def test_make_list_command_includes_json():
+    command = module.make_list_command(
         repository='repo',
         storage_config={},
         list_arguments=flexmock(archive=None, paths=None, json=True),
     )
 
+    assert command == ('borg', 'list', '--json', 'repo')
 
-def test_list_archives_with_lock_wait_calls_borg_with_lock_wait_parameters():
-    storage_config = {'lock_wait': 5}
-    flexmock(module).should_receive('execute_command').with_args(
-        ('borg', 'list', '--lock-wait', '5', 'repo'),
-        output_log_level=logging.WARNING,
-        borg_local_path='borg',
-    )
 
-    module.list_archives(
+def test_make_list_command_includes_lock_wait():
+    command = module.make_list_command(
         repository='repo',
-        storage_config=storage_config,
+        storage_config={'lock_wait': 5},
         list_arguments=flexmock(archive=None, paths=None, json=False),
     )
 
+    assert command == ('borg', 'list', '--lock-wait', '5', 'repo')
 
-def test_list_archives_with_archive_calls_borg_with_archive_parameter():
-    storage_config = {}
-    flexmock(module).should_receive('execute_command').with_args(
-        ('borg', 'list', 'repo::archive'), output_log_level=logging.WARNING, borg_local_path='borg'
-    )
 
-    module.list_archives(
+def test_make_list_command_includes_archive():
+    command = module.make_list_command(
         repository='repo',
-        storage_config=storage_config,
+        storage_config={},
         list_arguments=flexmock(archive='archive', paths=None, json=False),
     )
 
+    assert command == ('borg', 'list', 'repo::archive')
 
-def test_list_archives_with_path_calls_borg_with_path_parameter():
-    storage_config = {}
-    flexmock(module).should_receive('execute_command').with_args(
-        ('borg', 'list', 'repo::archive', 'var/lib'),
-        output_log_level=logging.WARNING,
-        borg_local_path='borg',
-    )
 
-    module.list_archives(
+def test_make_list_command_includes_archive_and_path():
+    command = module.make_list_command(
         repository='repo',
-        storage_config=storage_config,
+        storage_config={},
         list_arguments=flexmock(archive='archive', paths=['var/lib'], json=False),
     )
 
+    assert command == ('borg', 'list', 'repo::archive', 'var/lib')
 
-def test_list_archives_with_local_path_calls_borg_via_local_path():
-    flexmock(module).should_receive('execute_command').with_args(
-        ('borg1', 'list', 'repo'), output_log_level=logging.WARNING, borg_local_path='borg1'
-    )
 
-    module.list_archives(
+def test_make_list_command_includes_local_path():
+    command = module.make_list_command(
         repository='repo',
         storage_config={},
         list_arguments=flexmock(archive=None, paths=None, json=False),
-        local_path='borg1',
+        local_path='borg2',
     )
 
+    assert command == ('borg2', 'list', 'repo')
 
-def test_list_archives_with_remote_path_calls_borg_with_remote_path_parameters():
-    flexmock(module).should_receive('execute_command').with_args(
-        ('borg', 'list', '--remote-path', 'borg1', 'repo'),
-        output_log_level=logging.WARNING,
-        borg_local_path='borg',
-    )
 
-    module.list_archives(
+def test_make_list_command_includes_remote_path():
+    command = module.make_list_command(
         repository='repo',
         storage_config={},
         list_arguments=flexmock(archive=None, paths=None, json=False),
-        remote_path='borg1',
+        remote_path='borg2',
     )
 
+    assert command == ('borg', 'list', '--remote-path', 'borg2', 'repo')
 
-def test_list_archives_with_short_calls_borg_with_short_parameter():
-    flexmock(module).should_receive('execute_command').with_args(
-        ('borg', 'list', '--short', 'repo'),
-        output_log_level=logging.WARNING,
-        borg_local_path='borg',
-    ).and_return('[]')
 
-    module.list_archives(
+def test_make_list_command_includes_short():
+    command = module.make_list_command(
         repository='repo',
         storage_config={},
         list_arguments=flexmock(archive=None, paths=None, json=False, short=True),
     )
 
+    assert command == ('borg', 'list', '--short', 'repo')
+
 
 @pytest.mark.parametrize(
     'argument_name',
@@ -271,29 +241,156 @@ def test_list_archives_with_short_calls_borg_with_short_parameter():
         'patterns_from',
     ),
 )
-def test_list_archives_passes_through_arguments_to_borg(argument_name):
+def test_make_list_command_includes_additional_flags(argument_name):
+    command = module.make_list_command(
+        repository='repo',
+        storage_config={},
+        list_arguments=flexmock(
+            archive=None,
+            paths=None,
+            json=False,
+            find_paths=None,
+            format=None,
+            **{argument_name: 'value'}
+        ),
+    )
+
+    assert command == ('borg', 'list', '--' + argument_name.replace('_', '-'), 'value', 'repo')
+
+
+def test_make_find_paths_passes_through_empty_paths():
+    assert module.make_find_paths(()) == ()
+
+
+def test_make_find_paths_passes_through_patterns():
+    find_paths = (
+        'fm:*',
+        'sh:**/*.txt',
+        're:^.*$',
+        'pp:root/somedir',
+        'pf:root/foo.txt',
+        'R /',
+        'r /',
+        'p /',
+        'P /',
+        '+ /',
+        '- /',
+        '! /',
+    )
+
+    assert module.make_find_paths(find_paths) == find_paths
+
+
+def test_make_find_paths_adds_globs_to_path_fragments():
+    assert module.make_find_paths(('foo.txt',)) == ('sh:**/*foo.txt*/**',)
+
+
+def test_list_archives_calls_borg_with_parameters():
+    list_arguments = argparse.Namespace(archive=None, paths=None, json=False, find_paths=None)
+
+    flexmock(module).should_receive('make_list_command').with_args(
+        repository='repo',
+        storage_config={},
+        list_arguments=list_arguments,
+        local_path='borg',
+        remote_path=None,
+    ).and_return(('borg', 'list', 'repo'))
+    flexmock(module).should_receive('make_find_paths').and_return(())
     flexmock(module).should_receive('execute_command').with_args(
-        ('borg', 'list', '--' + argument_name.replace('_', '-'), 'value', 'repo'),
-        output_log_level=logging.WARNING,
-        borg_local_path='borg',
-    ).and_return('[]')
+        ('borg', 'list', 'repo'), output_log_level=logging.WARNING, borg_local_path='borg'
+    ).once()
+
+    module.list_archives(
+        repository='repo', storage_config={}, list_arguments=list_arguments,
+    )
+
+
+def test_list_archives_with_json_suppresses_most_borg_output():
+    list_arguments = argparse.Namespace(archive=None, paths=None, json=True, find_paths=None)
+
+    flexmock(module).should_receive('make_list_command').with_args(
+        repository='repo',
+        storage_config={},
+        list_arguments=list_arguments,
+        local_path='borg',
+        remote_path=None,
+    ).and_return(('borg', 'list', 'repo'))
+    flexmock(module).should_receive('make_find_paths').and_return(())
+    flexmock(module).should_receive('execute_command').with_args(
+        ('borg', 'list', 'repo'), output_log_level=None, borg_local_path='borg'
+    ).once()
 
     module.list_archives(
+        repository='repo', storage_config={}, list_arguments=list_arguments,
+    )
+
+
+def test_list_archives_calls_borg_with_local_path():
+    list_arguments = argparse.Namespace(archive=None, paths=None, json=False, find_paths=None)
+
+    flexmock(module).should_receive('make_list_command').with_args(
         repository='repo',
         storage_config={},
-        list_arguments=flexmock(archive=None, paths=None, json=False, **{argument_name: 'value'}),
+        list_arguments=list_arguments,
+        local_path='borg2',
+        remote_path=None,
+    ).and_return(('borg2', 'list', 'repo'))
+    flexmock(module).should_receive('make_find_paths').and_return(())
+    flexmock(module).should_receive('execute_command').with_args(
+        ('borg2', 'list', 'repo'), output_log_level=logging.WARNING, borg_local_path='borg2'
+    ).once()
+
+    module.list_archives(
+        repository='repo', storage_config={}, list_arguments=list_arguments, local_path='borg2',
     )
 
 
-def test_list_archives_with_json_calls_borg_with_json_parameter():
+def test_list_archives_calls_borg_multiple_times_with_find_paths():
+    glob_paths = ('**/*foo.txt*/**',)
+    list_arguments = argparse.Namespace(
+        archive=None, paths=None, json=False, find_paths=['foo.txt'], format=None
+    )
+
+    flexmock(module).should_receive('make_list_command').and_return(
+        ('borg', 'list', 'repo')
+    ).and_return(('borg', 'list', 'repo::archive1')).and_return(('borg', 'list', 'repo::archive2'))
+    flexmock(module).should_receive('make_find_paths').and_return(glob_paths)
     flexmock(module).should_receive('execute_command').with_args(
-        ('borg', 'list', '--json', 'repo'), output_log_level=None, borg_local_path='borg'
-    ).and_return('[]')
+        ('borg', 'list', 'repo'), output_log_level=None, borg_local_path='borg'
+    ).and_return(
+        'archive1   Sun, 2022-05-29 15:27:04 [abc]\narchive2   Mon, 2022-05-30 19:47:15 [xyz]'
+    ).once()
+    flexmock(module).should_receive('execute_command').with_args(
+        ('borg', 'list', 'repo::archive1') + glob_paths,
+        output_log_level=logging.WARNING,
+        borg_local_path='borg',
+    ).once()
+    flexmock(module).should_receive('execute_command').with_args(
+        ('borg', 'list', 'repo::archive2') + glob_paths,
+        output_log_level=logging.WARNING,
+        borg_local_path='borg',
+    ).once()
+
+    module.list_archives(
+        repository='repo', storage_config={}, list_arguments=list_arguments,
+    )
+
 
-    json_output = module.list_archives(
+def test_list_archives_calls_borg_with_archive():
+    list_arguments = argparse.Namespace(archive='archive', paths=None, json=False, find_paths=None)
+
+    flexmock(module).should_receive('make_list_command').with_args(
         repository='repo',
         storage_config={},
-        list_arguments=flexmock(archive=None, paths=None, json=True),
-    )
+        list_arguments=list_arguments,
+        local_path='borg',
+        remote_path=None,
+    ).and_return(('borg', 'list', 'repo::archive'))
+    flexmock(module).should_receive('make_find_paths').and_return(())
+    flexmock(module).should_receive('execute_command').with_args(
+        ('borg', 'list', 'repo::archive'), output_log_level=logging.WARNING, borg_local_path='borg'
+    ).once()
 
-    assert json_output == '[]'
+    module.list_archives(
+        repository='repo', storage_config={}, list_arguments=list_arguments,
+    )