Browse Source

Simplified dump metadata comparison logic and got a few tests passing (#418).

Dan Helfman 6 months ago
parent
commit
3db79b4352
3 changed files with 113 additions and 117 deletions
  1. 1 1
      NEWS
  2. 46 56
      borgmatic/actions/restore.py
  3. 66 60
      tests/unit/actions/test_restore.py

+ 1 - 1
NEWS

@@ -3,7 +3,7 @@
    or hooks.
  * #947: To avoid a hang in the database hooks, error and exit when the borgmatic runtime
    directory overlaps with the configured excludes.
- * #954: Fix findmnt command error in the Btrfs hook by switching to parsing JSON output.
+ * #954: Fix a findmnt command error in the Btrfs hook by switching to parsing JSON output.
  * When the ZFS, Btrfs, or LVM hooks aren't configured, don't try to cleanup snapshots for them.
 
 1.9.4

+ 46 - 56
borgmatic/actions/restore.py

@@ -21,41 +21,29 @@ logger = logging.getLogger(__name__)
 UNSPECIFIED = object()
 
 
-class Dump(
-    collections.namedtuple(
-        'Dump',
-        ('hook_name', 'data_source_name', 'hostname', 'port'),
-        defaults=('localhost', None),
-    )
-):
-    def __eq__(self, other):
-        '''
-        Compare two namedtuples for equality while supporting a field value of UNSPECIFIED, which
-        indicates that the field should match any value.
-        '''
-        for field_name in self._fields:
-            self_value = getattr(self, field_name)
-            other_value = getattr(other, field_name)
-
-            if self_value == UNSPECIFIED or other_value == UNSPECIFIED:
-                continue
+Dump = collections.namedtuple(
+    'Dump',
+    ('hook_name', 'data_source_name', 'hostname', 'port'),
+    defaults=('localhost', None),
+)
 
-            if self_value != other_value:
-                return False
 
-        return True
-
-    def __ne__(self, other):
-        return not self == other
+def dumps_match(first, second):
+    '''
+    Compare two Dump instances for equality while supporting a field value of UNSPECIFIED, which
+    indicates that the field should match any value.
+    '''
+    for field_name in first._fields:
+        first_value = getattr(first, field_name)
+        second_value = getattr(second, field_name)
 
-    def __lt__(self, other):
-        return self.data_source_name < other.data_source_name
+        if first_value == UNSPECIFIED or second_value == UNSPECIFIED:
+            continue
 
-    def __gt__(self, other):
-        return self.data_source_name > other.data_source_name
+        if first_value != second_value:
+            return False
 
-    def __hash__(self):
-        return hash(tuple(self))
+    return True
 
 
 def render_dump_metadata(dump):
@@ -103,13 +91,15 @@ def get_configured_data_source(config, restore_dump):
         (hook_name, hook_data_source)
         for (hook_name, hook) in hooks_to_search.items()
         for hook_data_source in hook
-        if Dump(
-            hook_name,
-            hook_data_source.get('name'),
-            hook_data_source.get('hostname'),
-            hook_data_source.get('port'),
+        if dumps_match(
+            Dump(
+                hook_name,
+                hook_data_source.get('name'),
+                hook_data_source.get('hostname'),
+                hook_data_source.get('port'),
+            ),
+            restore_dump,
         )
-        == restore_dump
     )
 
     if not matching_dumps:
@@ -331,13 +321,13 @@ def collect_dumps_from_archive(
 def get_dumps_to_restore(restore_arguments, dumps_from_archive):
     '''
     Given restore arguments as an argparse.Namespace instance indicating which dumps to restore and
-    a set of Dump instances representing the dumps found in an archive, return a set of Dump
-    instances to restore. As part of this, replace any Dump having a data source name of "all" with
-    multiple named Dump instances as appropriate.
+    a set of Dump instances representing the dumps found in an archive, return a set of specific
+    Dump instances from the archive to restore. As part of this, replace any Dump having a data
+    source name of "all" with multiple named Dump instances as appropriate.
 
     Raise ValueError if any of the requested data source names cannot be found in the archive.
     '''
-    dumps_to_restore = (
+    requested_dumps = (
         {
             Dump(
                 hook_name=(
@@ -365,24 +355,24 @@ def get_dumps_to_restore(restore_arguments, dumps_from_archive):
             )
         }
     )
+    missing_dumps = set()
+    dumps_to_restore = set()
 
-    # If "all" is in dumps_to_restore, then replace it with named dumps found within the archive.
-    try:
-        all_dump = next(dump for dump in dumps_to_restore if dump.data_source_name == 'all')
-    except StopIteration:
-        pass
-    else:
-        dumps_to_restore.remove(all_dump)
+    # If there's a requested "all" dump, add every dump from the archive to the dumps to restore.
+    if any(dump for dump in requested_dumps if dump.data_source_name == 'all'):
+        dumps_to_restore.update(dumps_from_archive)
 
-        for dump in dumps_from_archive:
-            if all_dump.hook_name == UNSPECIFIED or dump.hook_name == all_dump.hook_name:
-                dumps_to_restore.add(dump)
+    # Put any archive dump matching a requested dump in the dumps to restore.
+    for requested_dump in requested_dumps:
+        if requested_dump.data_source_name == 'all':
+            continue
 
-    missing_dumps = {
-        restore_dump
-        for restore_dump in dumps_to_restore
-        if all(restore_dump != archive_dump for archive_dump in dumps_from_archive)
-    }
+        for archive_dump in dumps_from_archive:
+            if dumps_match(requested_dump, archive_dump):
+                dumps_to_restore.add(archive_dump)
+                break
+        else:
+            missing_dumps.add(requested_dump)
 
     if missing_dumps:
         rendered_dumps = ', '.join(
@@ -390,7 +380,7 @@ def get_dumps_to_restore(restore_arguments, dumps_from_archive):
         )
 
         raise ValueError(
-            f"Cannot restore data source{'s' if len(missing_dumps) > 1 else ''} {rendered_dumps} missing from archive"
+            f"Cannot restore data source dump{'s' if len(missing_dumps) > 1 else ''} {rendered_dumps} missing from archive"
         )
 
     return dumps_to_restore

+ 66 - 60
tests/unit/actions/test_restore.py

@@ -1,3 +1,5 @@
+import collections
+
 import pytest
 from flexmock import flexmock
 
@@ -417,53 +419,73 @@ def test_get_dumps_to_restore_with_all_in_requested_names_finds_all_archive_data
 
 
 def test_get_dumps_to_restore_with_all_in_requested_names_plus_additional_requested_names_omits_duplicates():
-    archive_data_source_names = {'postresql_databases': ['foo', 'bar']}
+    dumps_from_archive = {
+        module.Dump('postresql_databases', 'foo'),
+        module.Dump('postresql_databases', 'bar'),
+    }
 
-    restore_names = module.get_dumps_to_restore(
-        requested_data_source_names=['all', 'foo', 'bar'],
-        archive_data_source_names=archive_data_source_names,
+    assert (
+        module.get_dumps_to_restore(
+            restore_arguments=flexmock(
+                hook=None,
+                data_sources=['all', 'foo', 'bar'],
+                original_hostname=None,
+                original_port=None,
+            ),
+            dumps_from_archive=dumps_from_archive,
+        )
+        == dumps_from_archive
     )
 
-    assert restore_names == archive_data_source_names
-
 
-def test_get_dumps_to_restore_raises_for_all_in_requested_names_and_requested_named_missing_from_archives():
+def test_get_dumps_to_restore_raises_for_all_in_requested_names_and_requested_names_missing_from_archives():
     with pytest.raises(ValueError):
         module.get_dumps_to_restore(
-            requested_data_source_names=['all', 'foo', 'bar'],
-            archive_data_source_names={'postresql_databases': ['foo']},
+            restore_arguments=flexmock(
+                hook=None,
+                data_sources=['all', 'foo', 'bar'],
+                original_hostname=None,
+                original_port=None,
+            ),
+            dumps_from_archive={module.Dump('postresql_databases', 'foo')},
         )
 
 
-def test_ensure_requested_dumps_restored_with_all_data_sources_found_does_not_raise():
+def test_ensure_requested_dumps_restored_with_all_dumps_restored_does_not_raise():
     module.ensure_requested_dumps_restored(
-        restore_names={'postgresql_databases': ['foo']},
-        remaining_restore_names={'postgresql_databases': ['bar']},
-        found_names=['foo', 'bar'],
+        dumps_to_restore={
+            module.Dump(hook_name='postgresql_databases', data_source_name='foo'),
+            module.Dump(hook_name='postgresql_databases', data_source_name='bar'),
+        },
+        dumps_actually_restored={
+            module.Dump(hook_name='postgresql_databases', data_source_name='foo'),
+            module.Dump(hook_name='postgresql_databases', data_source_name='bar'),
+        },
     )
 
 
-def test_ensure_requested_dumps_restored_with_no_data_sources_raises():
+def test_ensure_requested_dumps_restored_with_no_dumps_raises():
     with pytest.raises(ValueError):
         module.ensure_requested_dumps_restored(
-            restore_names={'postgresql_databases': []},
-            remaining_restore_names={},
-            found_names=[],
+            dumps_to_restore={},
+            dumps_actually_restored={},
         )
 
 
-def test_ensure_requested_dumps_restored_with_missing_data_sources_raises():
+def test_ensure_requested_dumps_restored_with_missing_dumps_raises():
     with pytest.raises(ValueError):
         module.ensure_requested_dumps_restored(
-            restore_names={'postgresql_databases': ['foo']},
-            remaining_restore_names={'postgresql_databases': ['bar']},
-            found_names=['foo'],
+            dumps_to_restore={module.Dump(hook_name='postgresql_databases', data_source_name='foo')},
+            dumps_actually_restored={
+                module.Dump(hook_name='postgresql_databases', data_source_name='bar')
+            },
         )
 
 
 def test_run_restore_restores_each_data_source():
-    restore_names = {
-        'postgresql_databases': ['foo', 'bar'],
+    dumps_to_restore = {
+        module.Dump(hook_name='postgresql_databases', data_source_name='foo'),
+        module.Dump(hook_name='postgresql_databases', data_source_name='bar'),
     }
 
     flexmock(module.borgmatic.config.validate).should_receive('repositories_match').and_return(True)
@@ -479,7 +501,7 @@ def test_run_restore_restores_each_data_source():
         flexmock()
     )
     flexmock(module).should_receive('collect_dumps_from_archive').and_return(flexmock())
-    flexmock(module).should_receive('get_dumps_to_restore').and_return(restore_names)
+    flexmock(module).should_receive('get_dumps_to_restore').and_return(dumps_to_restore)
     flexmock(module).should_receive('get_configured_data_source').and_return(
         ('postgresql_databases', {'name': 'foo'})
     ).and_return(('postgresql_databases', {'name': 'bar'}))
@@ -559,8 +581,9 @@ def test_run_restore_bails_for_non_matching_repository():
 
 
 def test_run_restore_restores_data_source_configured_with_all_name():
-    restore_names = {
-        'postgresql_databases': ['foo', 'bar'],
+    dumps_to_restore = {
+        module.Dump(hook_name='postgresql_databases', data_source_name='foo'),
+        module.Dump(hook_name='postgresql_databases', data_source_name='bar'),
     }
 
     flexmock(module.borgmatic.config.validate).should_receive('repositories_match').and_return(True)
@@ -576,25 +599,18 @@ def test_run_restore_restores_data_source_configured_with_all_name():
         flexmock()
     )
     flexmock(module).should_receive('collect_dumps_from_archive').and_return(flexmock())
-    flexmock(module).should_receive('get_dumps_to_restore').and_return(restore_names)
+    flexmock(module).should_receive('get_dumps_to_restore').and_return(dumps_to_restore)
     flexmock(module).should_receive('get_configured_data_source').with_args(
         config=object,
-        archive_data_source_names=object,
-        hook_name='postgresql_databases',
-        data_source_name='foo',
+        restore_dump=module.Dump(hook_name='postgresql_databases', data_source_name='foo'),
     ).and_return(('postgresql_databases', {'name': 'foo'}))
     flexmock(module).should_receive('get_configured_data_source').with_args(
         config=object,
-        archive_data_source_names=object,
-        hook_name='postgresql_databases',
-        data_source_name='bar',
+        restore_dump=module.Dump(hook_name='postgresql_databases', data_source_name='bar'),
     ).and_return((None, None))
     flexmock(module).should_receive('get_configured_data_source').with_args(
         config=object,
-        archive_data_source_names=object,
-        hook_name='postgresql_databases',
-        data_source_name='bar',
-        configuration_data_source_name='all',
+        restore_dump=module.Dump(hook_name='postgresql_databases', data_source_name='all'),
     ).and_return(('postgresql_databases', {'name': 'bar'}))
     flexmock(module).should_receive('restore_single_dump').with_args(
         repository=object,
@@ -646,8 +662,9 @@ def test_run_restore_restores_data_source_configured_with_all_name():
 
 
 def test_run_restore_skips_missing_data_source():
-    restore_names = {
-        'postgresql_databases': ['foo', 'bar'],
+    dumps_to_restore = {
+        module.Dump(hook_name='postgresql_databases', data_source_name='foo'),
+        module.Dump(hook_name='postgresql_databases', data_source_name='bar'),
     }
 
     flexmock(module.borgmatic.config.validate).should_receive('repositories_match').and_return(True)
@@ -663,25 +680,18 @@ def test_run_restore_skips_missing_data_source():
         flexmock()
     )
     flexmock(module).should_receive('collect_dumps_from_archive').and_return(flexmock())
-    flexmock(module).should_receive('get_dumps_to_restore').and_return(restore_names)
+    flexmock(module).should_receive('get_dumps_to_restore').and_return(dumps_to_restore)
     flexmock(module).should_receive('get_configured_data_source').with_args(
         config=object,
-        archive_data_source_names=object,
-        hook_name='postgresql_databases',
-        data_source_name='foo',
+        restore_dump=module.Dump(hook_name='postgresql_databases', data_source_name='foo'),
     ).and_return(('postgresql_databases', {'name': 'foo'}))
     flexmock(module).should_receive('get_configured_data_source').with_args(
         config=object,
-        archive_data_source_names=object,
-        hook_name='postgresql_databases',
-        data_source_name='bar',
+        restore_dump=module.Dump(hook_name='postgresql_databases', data_source_name='bar'),
     ).and_return((None, None))
     flexmock(module).should_receive('get_configured_data_source').with_args(
         config=object,
-        archive_data_source_names=object,
-        hook_name='postgresql_databases',
-        data_source_name='bar',
-        configuration_data_source_name='all',
+        restore_dump=module.Dump(hook_name='postgresql_databases', data_source_name='all'),
     ).and_return((None, None))
     flexmock(module).should_receive('restore_single_dump').with_args(
         repository=object,
@@ -733,9 +743,9 @@ def test_run_restore_skips_missing_data_source():
 
 
 def test_run_restore_restores_data_sources_from_different_hooks():
-    restore_names = {
-        'postgresql_databases': ['foo'],
-        'mysql_databases': ['bar'],
+    dumps_to_restore = {
+        module.Dump(hook_name='postgresql_databases', data_source_name='foo'),
+        module.Dump(hook_name='mysql_databases', data_source_name='foo'),
     }
 
     flexmock(module.borgmatic.config.validate).should_receive('repositories_match').and_return(True)
@@ -751,18 +761,14 @@ def test_run_restore_restores_data_sources_from_different_hooks():
         flexmock()
     )
     flexmock(module).should_receive('collect_dumps_from_archive').and_return(flexmock())
-    flexmock(module).should_receive('get_dumps_to_restore').and_return(restore_names)
+    flexmock(module).should_receive('get_dumps_to_restore').and_return(dumps_to_restore)
     flexmock(module).should_receive('get_configured_data_source').with_args(
         config=object,
-        archive_data_source_names=object,
-        hook_name='postgresql_databases',
-        data_source_name='foo',
+        restore_dump=module.Dump(hook_name='postgresql_databases', data_source_name='foo'),
     ).and_return(('postgresql_databases', {'name': 'foo'}))
     flexmock(module).should_receive('get_configured_data_source').with_args(
         config=object,
-        archive_data_source_names=object,
-        hook_name='mysql_databases',
-        data_source_name='bar',
+        restore_dump=module.Dump(hook_name='mysql_databases', data_source_name='foo'),
     ).and_return(('mysql_databases', {'name': 'bar'}))
     flexmock(module).should_receive('restore_single_dump').with_args(
         repository=object,