Selaa lähdekoodia

Initial work on supporting same-named database with different ports, hosts, or hooks (#418).

Dan Helfman 10 kuukautta sitten
vanhempi
sitoutus
b1e343f15c

+ 4 - 0
NEWS

@@ -1,3 +1,7 @@
+1.9.5.dev0
+ * #418: Backup and restore databases that have the same name but with different ports, hostnames,
+   or hooks.
+
 1.9.4
  * #80 (beta): Add an LVM hook for snapshotting and backing up LVM logical volumes. See the
    documentation for more information:

+ 209 - 153
borgmatic/actions/restore.py

@@ -1,3 +1,4 @@
+import collections
 import copy
 import logging
 import os
@@ -17,30 +18,75 @@ import borgmatic.hooks.dispatch
 logger = logging.getLogger(__name__)
 
 
-UNSPECIFIED_HOOK = object()
+UNSPECIFIED = object()
 
 
-def get_configured_data_source(
-    config,
-    archive_data_source_names,
-    hook_name,
-    data_source_name,
-    configuration_data_source_name=None,
+class Dump(
+    collections.namedtuple(
+        'Dump',
+        ('hook_name', 'data_source_name', 'hostname', 'port'),
+        defaults=('localhost', None),
+    )
 ):
+    def __eq__(self, other):
+        '''
+        Compare two namedtuples for equality while supporting a field value of UNSPECIFIED, which
+        indicates that the field should match any value.
+        '''
+        for field_name in self._fields:
+            self_value = getattr(self, field_name)
+            other_value = getattr(other, field_name)
+
+            if self_value == UNSPECIFIED or other_value == UNSPECIFIED:
+                continue
+
+            if self_value != other_value:
+                return False
+
+        return True
+
+    def __ne__(self, other):
+        return not self == other
+
+    def __lt__(self, other):
+        return self.data_source_name < other.data_source_name
+
+    def __gt__(self, other):
+        return self.data_source_name > other.data_source_name
+
+    def __hash__(self):
+        return hash(tuple(self))
+
+
+def render_dump_metadata(dump):
+    '''
+    Given a Dump instance, make a display string describing it for use in log messges.
+    '''
+    name = dump.data_source_name if dump.data_source_name != UNSPECIFIED else 'unspecified'
+    hostname = dump.hostname or 'localhost'
+    port = dump.port if dump.port != UNSPECIFIED else None
+
+    if port:
+        metadata = f'{name}@:{port}' if hostname is UNSPECIFIED else f'{name}@{hostname}:{port}'
+    else:
+        metadata = f'{name}' if hostname is UNSPECIFIED else f'{name}@{hostname}'
+
+    if dump.hook_name not in (None, UNSPECIFIED):
+        return f'{metadata} ({dump.hook_name})'
+
+    return metadata
+
+
+def get_configured_data_source(config, restore_dump):
     '''
-    Find the first data source with the given hook name and data source name in the configuration
-    dict and the given archive data source names dict (from hook name to data source names contained
-    in a particular backup archive). If UNSPECIFIED_HOOK is given as the hook name, search all data
-    source hooks for the named data source. If a configuration data source name is given, use that
-    instead of the data source name to lookup the data source in the given hooks configuration.
+    Search in the given configuration dict for dumps corresponding to the given dump to restore. If
+    there are multiple matches, error. If UNSPECIFIED is given as any field in the restore dump,
+    then that can match any valid value.
 
     Return the found data source as a tuple of (found hook name, data source configuration dict) or
     (None, None) if not found.
     '''
-    if not configuration_data_source_name:
-        configuration_data_source_name = data_source_name
-
-    if hook_name == UNSPECIFIED_HOOK:
+    if restore_dump.hook_name == UNSPECIFIED:
         hooks_to_search = {
             hook_name: value
             for (hook_name, value) in config.items()
@@ -49,21 +95,33 @@ def get_configured_data_source(
         }
     else:
         try:
-            hooks_to_search = {hook_name: config[hook_name]}
+            hooks_to_search = {restore_dump.hook_name: config[restore_dump.hook_name]}
         except KeyError:
             return (None, None)
 
-    return next(
-        (
-            (name, hook_data_source)
-            for (name, hook) in hooks_to_search.items()
-            for hook_data_source in hook
-            if hook_data_source['name'] == configuration_data_source_name
-            and data_source_name in archive_data_source_names.get(name, [])
-        ),
-        (None, None),
+    matching_dumps = tuple(
+        (hook_name, hook_data_source)
+        for (hook_name, hook) in hooks_to_search.items()
+        for hook_data_source in hook
+        if Dump(
+            hook_name,
+            hook_data_source.get('name'),
+            hook_data_source.get('hostname'),
+            hook_data_source.get('port'),
+        )
+        == restore_dump
     )
 
+    if not matching_dumps:
+        return (None, None)
+
+    if len(matching_dumps) > 1:
+        raise ValueError(
+            f'Cannot restore data source {render_dump_metadata(restore_dump)} because there are multiple matching dumps in the archive. Try adding additional flags to disambiguate.'
+        )
+
+    return matching_dumps[0]
+
 
 def strip_path_prefix_from_extracted_dump_destination(
     destination_path, borgmatic_runtime_directory
@@ -98,7 +156,7 @@ def strip_path_prefix_from_extracted_dump_destination(
         break
 
 
-def restore_single_data_source(
+def restore_single_dump(
     repository,
     config,
     local_borg_version,
@@ -116,8 +174,12 @@ def restore_single_data_source(
     username/password as connection params, and a configured data source configuration dict, restore
     that data source from the archive.
     '''
+    dump_metadata = render_dump_metadata(
+        Dump(hook_name, data_source["name"], data_source.get("hostname"), data_source.get("port"))
+    )
+
     logger.info(
-        f'{repository.get("label", repository["path"])}: Restoring data source {data_source["name"]}'
+        f'{repository.get("label", repository["path"])}: Restoring data source {dump_metadata}'
     )
 
     dump_patterns = borgmatic.hooks.dispatch.call_hooks(
@@ -180,7 +242,7 @@ def restore_single_data_source(
     )
 
 
-def collect_archive_data_source_names(
+def collect_dumps_from_archive(
     repository,
     archive,
     config,
@@ -192,17 +254,17 @@ def collect_archive_data_source_names(
 ):
     '''
     Given a local or remote repository path, a resolved archive name, a configuration dict, the
-    local Borg version, global_arguments an argparse.Namespace, local and remote Borg paths, and the
-    borgmatic runtime directory, query the archive for the names of data sources it contains as
-    dumps and return them as a dict from hook name to a sequence of data source names.
+    local Borg version, global arguments an argparse.Namespace, local and remote Borg paths, and the
+    borgmatic runtime directory, query the archive for the names of data sources dumps it contains
+    and return them as a set of Dump instances.
     '''
     borgmatic_source_directory = str(
         pathlib.Path(borgmatic.config.paths.get_borgmatic_source_directory(config))
     )
 
     # Probe for the data source dumps in multiple locations, as the default location has moved to
-    # the borgmatic runtime directory (which get stored as just "/borgmatic" with Borg 1.4+). But we
-    # still want to support reading dumps from previously created archives as well.
+    # the borgmatic runtime directory (which gets stored as just "/borgmatic" with Borg 1.4+). But
+    # we still want to support reading dumps from previously created archives as well.
     dump_paths = borgmatic.borg.list.capture_archive_listing(
         repository,
         archive,
@@ -224,9 +286,8 @@ def collect_archive_data_source_names(
         remote_path=remote_path,
     )
 
-    # Determine the data source names corresponding to the dumps found in the archive and
-    # add them to restore_names.
-    archive_data_source_names = {}
+    # Parse out the details for the dumps found in the archive.
+    dumps_from_archive = set()
 
     for dump_path in dump_paths:
         if not dump_path:
@@ -238,96 +299,112 @@ def collect_archive_data_source_names(
             borgmatic_source_directory,
         ):
             try:
-                (hook_name, _, data_source_name) = dump_path.split(base_directory + os.path.sep, 1)[
-                    1
-                ].split(os.path.sep)[0:3]
+                (hook_name, host_and_port, data_source_name) = dump_path.split(
+                    base_directory + os.path.sep, 1
+                )[1].split(os.path.sep)[0:3]
             except (ValueError, IndexError):
-                pass
-            else:
-                if data_source_name not in archive_data_source_names.get(hook_name, []):
-                    archive_data_source_names.setdefault(hook_name, []).extend([data_source_name])
-                    break
+                continue
+
+            parts = host_and_port.split(':', 1)
+
+            if len(parts) == 1:
+                parts += (None,)
+
+            (hostname, port) = parts
+
+            try:
+                port = int(port)
+            except (ValueError, TypeError):
+                port = None
+
+            dumps_from_archive.add(Dump(hook_name, data_source_name, hostname, port))
+
+            break
         else:
             logger.warning(
                 f'{repository}: Ignoring invalid data source dump path "{dump_path}" in archive {archive}'
             )
 
-    return archive_data_source_names
+    return dumps_from_archive
 
 
-def find_data_sources_to_restore(requested_data_source_names, archive_data_source_names):
+def get_dumps_to_restore(restore_arguments, dumps_from_archive):
     '''
-    Given a sequence of requested data source names to restore and a dict of hook name to the names
-    of data sources found in an archive, return an expanded sequence of data source names to
-    restore, replacing "all" with actual data source names as appropriate.
+    Given restore arguments as an argparse.Namespace instance indicating which dumps to restore and
+    a set of Dump instances representing the dumps found in an archive, return a set of Dump
+    instances to restore. As part of this, replace any Dump having a data source name of "all" with
+    multiple named Dump instances as appropriate.
 
     Raise ValueError if any of the requested data source names cannot be found in the archive.
     '''
-    # A map from data source hook name to the data source names to restore for that hook.
-    restore_names = (
-        {UNSPECIFIED_HOOK: requested_data_source_names}
-        if requested_data_source_names
-        else {UNSPECIFIED_HOOK: ['all']}
+    # A map from data source hook name to the dumps to restore for that hook.
+    dumps_to_restore = (
+        {
+            Dump(
+                hook_name=(restore_arguments.hook if restore_arguments.hook.endswith('_databases') else f'{restore_arguments.hook}_databases') if restore_arguments.hook else UNSPECIFIED,
+                data_source_name=name,
+                hostname=restore_arguments.original_hostname or 'localhost',
+                port=restore_arguments.original_port
+            )
+            for name in restore_arguments.data_sources
+        }
+        if restore_arguments.data_sources
+        else {
+            Dump(
+                hook_name=UNSPECIFIED,
+                data_source_name='all',
+                hostname=UNSPECIFIED,
+                port=UNSPECIFIED,
+            )
+        }
     )
 
-    # If "all" is in restore_names, then replace it with the names of dumps found within the
-    # archive.
-    if 'all' in restore_names[UNSPECIFIED_HOOK]:
-        restore_names[UNSPECIFIED_HOOK].remove('all')
-
-        for hook_name, data_source_names in archive_data_source_names.items():
-            restore_names.setdefault(hook_name, []).extend(data_source_names)
+    # If "all" is in dumps_to_restore, then replace it with named dumps found within the archive.
+    try:
+        all_dump = next(dump for dump in dumps_to_restore if dump.data_source_name == 'all')
+    except StopIteration:
+        pass
+    else:
+        dumps_to_restore.remove(all_dump)
 
-            # If a data source is to be restored as part of "all", then remove it from restore names
-            # so it doesn't get restored twice.
-            for data_source_name in data_source_names:
-                if data_source_name in restore_names[UNSPECIFIED_HOOK]:
-                    restore_names[UNSPECIFIED_HOOK].remove(data_source_name)
+        for dump in dumps_from_archive:
+            if all_dump.hook_name == UNSPECIFIED or dump.hook_name == all_dump.hook_name:
+                dumps_to_restore.add(dump)
 
-    if not restore_names[UNSPECIFIED_HOOK]:
-        restore_names.pop(UNSPECIFIED_HOOK)
+    missing_dumps = {
+        restore_dump
+        for restore_dump in dumps_to_restore
+        if all(restore_dump != archive_dump for archive_dump in dumps_from_archive)
+    }
 
-    combined_restore_names = set(
-        name for data_source_names in restore_names.values() for name in data_source_names
-    )
-    combined_archive_data_source_names = set(
-        name
-        for data_source_names in archive_data_source_names.values()
-        for name in data_source_names
-    )
+    if missing_dumps:
+        rendered_dumps = ', '.join(f'{render_dump_metadata(dump)}' for dump in sorted(missing_dumps))
 
-    missing_names = sorted(set(combined_restore_names) - combined_archive_data_source_names)
-    if missing_names:
-        joined_names = ', '.join(f'"{name}"' for name in missing_names)
         raise ValueError(
-            f"Cannot restore data source{'s' if len(missing_names) > 1 else ''} {joined_names} missing from archive"
+            f"Cannot restore data source{'s' if len(missing_dumps) > 1 else ''} {rendered_dumps} missing from archive"
         )
 
-    return restore_names
+    return dumps_to_restore
 
 
-def ensure_data_sources_found(restore_names, remaining_restore_names, found_names):
+def ensure_requested_dumps_restored(dumps_to_restore, dumps_actually_restored):
     '''
-    Given a dict from hook name to data source names to restore, a dict from hook name to remaining
-    data source names to restore, and a sequence of found (actually restored) data source names,
-    raise ValueError if requested data source to restore were missing from the archive and/or
-    configuration.
+    Given a set of requested dumps to restore and a set of dumps actually restored, raise ValueError
+    if any requested dumps to restore weren't restored, indicating that they were missing from the
+    archive and/or configuration.
     '''
-    combined_restore_names = set(
-        name
-        for data_source_names in tuple(restore_names.values())
-        + tuple(remaining_restore_names.values())
-        for name in data_source_names
+    if not dumps_actually_restored:
+        raise ValueError('No data source dumps were found to restore')
+
+    missing_dumps = sorted(
+        dumps_to_restore - dumps_actually_restored, key=lambda dump: dump.data_source_name
     )
 
-    if not combined_restore_names and not found_names:
-        raise ValueError('No data source dumps were found to restore')
+    if missing_dumps:
+        rendered_dumps = ', '.join(f'{render_dump_metadata(dump)}' for dump in missing_dumps)
 
-    missing_names = sorted(set(combined_restore_names) - set(found_names))
-    if missing_names:
-        joined_names = ', '.join(f'"{name}"' for name in missing_names)
         raise ValueError(
-            f"Cannot restore data source{'s' if len(missing_names) > 1 else ''} {joined_names} missing from borgmatic's configuration"
+            f"Cannot restore data source{'s' if len(missing_dumps) > 1 else ''} {rendered_dumps} missing from borgmatic's configuration"
         )
 
 
@@ -375,7 +452,7 @@ def run_restore(
             local_path,
             remote_path,
         )
-        archive_data_source_names = collect_archive_data_source_names(
+        dumps_from_archive = collect_dumps_from_archive(
             repository['path'],
             archive_name,
             config,
@@ -385,11 +462,9 @@ def run_restore(
             remote_path,
             borgmatic_runtime_directory,
         )
-        restore_names = find_data_sources_to_restore(
-            restore_arguments.data_sources, archive_data_source_names
-        )
-        found_names = set()
-        remaining_restore_names = {}
+        dumps_to_restore = get_dumps_to_restore(restore_arguments, dumps_from_archive)
+
+        dumps_actually_restored = set()
         connection_params = {
             'hostname': restore_arguments.hostname,
             'port': restore_arguments.port,
@@ -398,61 +473,42 @@ def run_restore(
             'restore_path': restore_arguments.restore_path,
         }
 
-        for hook_name, data_source_names in restore_names.items():
-            for data_source_name in data_source_names:
-                found_hook_name, found_data_source = get_configured_data_source(
-                    config, archive_data_source_names, hook_name, data_source_name
-                )
-
-                if not found_data_source:
-                    remaining_restore_names.setdefault(found_hook_name or hook_name, []).append(
-                        data_source_name
-                    )
-                    continue
-
-                found_names.add(data_source_name)
-                restore_single_data_source(
-                    repository,
-                    config,
-                    local_borg_version,
-                    global_arguments,
-                    local_path,
-                    remote_path,
-                    archive_name,
-                    found_hook_name or hook_name,
-                    dict(found_data_source, **{'schemas': restore_arguments.schemas}),
-                    connection_params,
-                    borgmatic_runtime_directory,
-                )
+        # Restore each dump.
+        for restore_dump in dumps_to_restore:
+            found_hook_name, found_data_source = get_configured_data_source(
+                config,
+                restore_dump,
+            )
 
-        # For any data sources that weren't found via exact matches in the configuration, try to
-        # fallback to "all" entries.
-        for hook_name, data_source_names in remaining_restore_names.items():
-            for data_source_name in data_source_names:
+            # For any data sources that weren't found via exact matches in the configuration, try to
+            # fallback to "all" entries.
+            if not found_data_source:
                 found_hook_name, found_data_source = get_configured_data_source(
-                    config, archive_data_source_names, hook_name, data_source_name, 'all'
+                    config,
+                    Dump(restore_dump.hook_name, 'all', restore_dump.hostname, restore_dump.port),
                 )
 
                 if not found_data_source:
                     continue
 
-                found_names.add(data_source_name)
-                data_source = copy.copy(found_data_source)
-                data_source['name'] = data_source_name
-
-                restore_single_data_source(
-                    repository,
-                    config,
-                    local_borg_version,
-                    global_arguments,
-                    local_path,
-                    remote_path,
-                    archive_name,
-                    found_hook_name or hook_name,
-                    dict(data_source, **{'schemas': restore_arguments.schemas}),
-                    connection_params,
-                    borgmatic_runtime_directory,
-                )
+                found_data_source = dict(found_data_source)
+                found_data_source['name'] = restore_dump.data_source_name
+
+            dumps_actually_restored.add(restore_dump)
+
+            restore_single_dump(
+                repository,
+                config,
+                local_borg_version,
+                global_arguments,
+                local_path,
+                remote_path,
+                archive_name,
+                found_hook_name or restore_dump.hook_name,
+                dict(found_data_source, **{'schemas': restore_arguments.schemas}),
+                connection_params,
+                borgmatic_runtime_directory,
+            )
 
         borgmatic.hooks.dispatch.call_hooks_even_if_unconfigured(
             'remove_data_source_dumps',
@@ -463,4 +519,4 @@ def run_restore(
             global_arguments.dry_run,
         )
 
-    ensure_data_sources_found(restore_names, remaining_restore_names, found_names)
+    ensure_requested_dumps_restored(dumps_to_restore, dumps_actually_restored)

+ 14 - 1
borgmatic/commands/arguments.py

@@ -1153,7 +1153,7 @@ def make_parsers():
         metavar='NAME',
         dest='data_sources',
         action='append',
-        help="Name of data source (e.g. database) to restore from archive, must be defined in borgmatic's configuration, can specify flag multiple times, defaults to all data sources in the archive",
+        help="Name of data source (e.g. database) to restore from the archive, must be defined in borgmatic's configuration, can specify the flag multiple times, defaults to all data sources in the archive",
     )
     restore_group.add_argument(
         '--schema',
@@ -1182,6 +1182,19 @@ def make_parsers():
         '--restore-path',
         help='Path to restore SQLite database dumps to. Defaults to the "restore_path" option in borgmatic\'s configuration',
     )
+    restore_group.add_argument(
+        '--original-hostname',
+        help="The hostname where the dump to restore came from, only necessary if you need to disambiguate dumps",
+    )
+    restore_group.add_argument(
+        '--original-port',
+        type=int,
+        help="The port where the dump to restore came from, only necessary if you need to disambiguate dumps",
+    )
+    restore_group.add_argument(
+        '--hook',
+        help="The name of the data source hook for the dump to restore, only necessary if you need to disambiguate dumps",
+    )
     restore_group.add_argument(
         '-h', '--help', action='help', help='Show this help message and exit'
     )

+ 6 - 4
borgmatic/hooks/data_source/dump.py

@@ -16,17 +16,19 @@ def make_data_source_dump_path(borgmatic_runtime_directory, data_source_hook_nam
     return os.path.join(borgmatic_runtime_directory, data_source_hook_name)
 
 
-def make_data_source_dump_filename(dump_path, name, hostname=None):
+def make_data_source_dump_filename(dump_path, name, hostname=None, port=None):
     '''
-    Based on the given dump directory path, data source name, and hostname, return a filename to use
-    for the data source dump. The hostname defaults to localhost.
+    Based on the given dump directory path, data source name, hostname, and port, return a filename
+    to use for the data source dump. The hostname defaults to localhost.
 
     Raise ValueError if the data source name is invalid.
     '''
     if os.path.sep in name:
         raise ValueError(f'Invalid data source name {name}')
 
-    return os.path.join(dump_path, hostname or 'localhost', name)
+    return os.path.join(
+        dump_path, (hostname or 'localhost') + (f':{port}' if port is not None else ''), name
+    )
 
 
 def create_parent_directory_for_dump(dump_path):

+ 4 - 1
borgmatic/hooks/data_source/mariadb.py

@@ -73,7 +73,10 @@ def execute_dump_command(
     '''
     database_name = database['name']
     dump_filename = dump.make_data_source_dump_filename(
-        dump_path, database['name'], database.get('hostname')
+        dump_path,
+        database['name'],
+        database.get('hostname'),
+        database.get('port'),
     )
 
     if os.path.exists(dump_filename):

+ 4 - 1
borgmatic/hooks/data_source/mongodb.py

@@ -51,7 +51,10 @@ def dump_data_sources(
     for database in databases:
         name = database['name']
         dump_filename = dump.make_data_source_dump_filename(
-            make_dump_path(borgmatic_runtime_directory), name, database.get('hostname')
+            make_dump_path(borgmatic_runtime_directory),
+            name,
+            database.get('hostname'),
+            database.get('port'),
         )
         dump_format = database.get('format', 'archive')
 

+ 4 - 1
borgmatic/hooks/data_source/mysql.py

@@ -73,7 +73,10 @@ def execute_dump_command(
     '''
     database_name = database['name']
     dump_filename = dump.make_data_source_dump_filename(
-        dump_path, database['name'], database.get('hostname')
+        dump_path,
+        database['name'],
+        database.get('hostname'),
+        database.get('port'),
     )
 
     if os.path.exists(dump_filename):

+ 4 - 1
borgmatic/hooks/data_source/postgresql.py

@@ -151,7 +151,10 @@ def dump_data_sources(
                 for part in shlex.split(database.get('pg_dump_command') or default_dump_command)
             )
             dump_filename = dump.make_data_source_dump_filename(
-                dump_path, database_name, database.get('hostname')
+                dump_path,
+                database_name,
+                database.get('hostname'),
+                database.get('port'),
             )
             if os.path.exists(dump_filename):
                 logger.warning(

+ 1 - 1
pyproject.toml

@@ -1,6 +1,6 @@
 [project]
 name = "borgmatic"
-version = "1.9.4"
+version = "1.9.5.dev0"
 authors = [
   { name="Dan Helfman", email="witten@torsion.org" },
 ]