Browse Source

Fix regression in support for PostgreSQL's "directory" dump format (#314).

Dan Helfman 5 years ago
parent
commit
89cb5eb76d

+ 4 - 0
NEWS

@@ -1,3 +1,7 @@
+1.5.5.dev0
+ * #314: Fix regression in support for PostgreSQL's "directory" dump format. Unlike other dump
+   formats, the "directory" dump format does not stream directly to/from Borg.
+
 1.5.4
  * #310: Fix legitimate database dump command errors (exit code 1) not being treated as errors by
    borgmatic.

+ 29 - 2
borgmatic/commands/borgmatic.py

@@ -254,6 +254,14 @@ def run_actions(
         )
     if 'create' in arguments:
         logger.info('{}: Creating archive{}'.format(repository, dry_run_label))
+        dispatch.call_hooks(
+            'remove_database_dumps',
+            hooks,
+            repository,
+            dump.DATABASE_HOOK_NAMES,
+            location,
+            global_arguments.dry_run,
+        )
         active_dumps = dispatch.call_hooks(
             'dump_databases',
             hooks,
@@ -346,6 +354,14 @@ def run_actions(
                     repository, arguments['restore'].archive
                 )
             )
+            dispatch.call_hooks(
+                'remove_database_dumps',
+                hooks,
+                repository,
+                dump.DATABASE_HOOK_NAMES,
+                location,
+                global_arguments.dry_run,
+            )
 
             restore_names = arguments['restore'].databases or []
             if 'all' in restore_names:
@@ -386,10 +402,12 @@ def run_actions(
                         local_path=local_path,
                         remote_path=remote_path,
                         destination_path='/',
-                        extract_to_stdout=True,
+                        # A directory format dump isn't a single file, and therefore can't extract
+                        # to stdout. In this case, the extract_process return value is None.
+                        extract_to_stdout=bool(restore_database.get('format') != 'directory'),
                     )
 
-                    # Run a single database restore, consuming the extract stdout.
+                    # Run a single database restore, consuming the extract stdout (if any).
                     dispatch.call_hooks(
                         'restore_database_dump',
                         {hook_name: [restore_database]},
@@ -400,6 +418,15 @@ def run_actions(
                         extract_process,
                     )
 
+            dispatch.call_hooks(
+                'remove_database_dumps',
+                hooks,
+                repository,
+                dump.DATABASE_HOOK_NAMES,
+                location,
+                global_arguments.dry_run,
+            )
+
             if not restore_names and not found_names:
                 raise ValueError('No databases were found to restore')
 

+ 15 - 9
borgmatic/hooks/dump.py

@@ -33,14 +33,18 @@ def make_database_dump_filename(dump_path, name, hostname=None):
     return os.path.join(os.path.expanduser(dump_path), hostname or 'localhost', name)
 
 
-def create_named_pipe_for_dump(dump_path):
+def create_parent_directory_for_dump(dump_path):
     '''
-    Create a named pipe at the given dump path.
+    Create a directory to contain the given dump path.
     '''
     os.makedirs(os.path.dirname(dump_path), mode=0o700, exist_ok=True)
-    if os.path.exists(dump_path):
-        os.remove(dump_path)
 
+
+def create_named_pipe_for_dump(dump_path):
+    '''
+    Create a named pipe at the given dump path.
+    '''
+    create_parent_directory_for_dump(dump_path)
     os.mkfifo(dump_path, mode=0o600)
 
 
@@ -74,13 +78,15 @@ def remove_database_dumps(dump_path, databases, database_type_name, log_prefix,
         if dry_run:
             continue
 
-        if os.path.isdir(dump_filename):
-            shutil.rmtree(dump_filename)
-        else:
-            os.remove(dump_filename)
+        if os.path.exists(dump_filename):
+            if os.path.isdir(dump_filename):
+                shutil.rmtree(dump_filename)
+            else:
+                os.remove(dump_filename)
+
         dump_file_dir = os.path.dirname(dump_filename)
 
-        if len(os.listdir(dump_file_dir)) == 0:
+        if os.path.exists(dump_file_dir) and len(os.listdir(dump_file_dir)) == 0:
             os.rmdir(dump_file_dir)
 
 

+ 19 - 6
borgmatic/hooks/postgresql.py

@@ -36,6 +36,7 @@ def dump_databases(databases, log_prefix, location_config, dry_run):
             make_dump_path(location_config), name, database.get('hostname')
         )
         all_databases = bool(name == 'all')
+        dump_format = database.get('format', 'custom')
         command = (
             (
                 'pg_dumpall' if all_databases else 'pg_dump',
@@ -46,12 +47,14 @@ def dump_databases(databases, log_prefix, location_config, dry_run):
             + (('--host', database['hostname']) if 'hostname' in database else ())
             + (('--port', str(database['port'])) if 'port' in database else ())
             + (('--username', database['username']) if 'username' in database else ())
-            + (() if all_databases else ('--format', database.get('format', 'custom')))
+            + (() if all_databases else ('--format', dump_format))
+            + (('--file', dump_filename) if dump_format == 'directory' else ())
             + (tuple(database['options'].split(' ')) if 'options' in database else ())
             + (() if all_databases else (name,))
             # Use shell redirection rather than the --file flag to sidestep synchronization issues
-            # when pg_dump/pg_dumpall tries to write to a named pipe.
-            + ('>', dump_filename)
+            # when pg_dump/pg_dumpall tries to write to a named pipe. But for the directory dump
+            # format in a particular, a named destination is required, and redirection doesn't work.
+            + (('>', dump_filename) if dump_format != 'directory' else ())
         )
         extra_environment = {'PGPASSWORD': database['password']} if 'password' in database else None
 
@@ -63,7 +66,10 @@ def dump_databases(databases, log_prefix, location_config, dry_run):
         if dry_run:
             continue
 
-        dump.create_named_pipe_for_dump(dump_filename)
+        if dump_format == 'directory':
+            dump.create_parent_directory_for_dump(dump_filename)
+        else:
+            dump.create_named_pipe_for_dump(dump_filename)
 
         processes.append(
             execute_command(
@@ -104,6 +110,9 @@ def restore_database_dump(database_config, log_prefix, location_config, dry_run,
     Use the given log prefix in any log entries. If this is a dry run, then don't actually restore
     anything. Trigger the given active extract process (an instance of subprocess.Popen) to produce
     output to consume.
+
+    If the extract process is None, then restore the dump from the filesystem rather than from an
+    extract stream.
     '''
     dry_run_label = ' (dry run; not actually restoring anything)' if dry_run else ''
 
@@ -112,6 +121,9 @@ def restore_database_dump(database_config, log_prefix, location_config, dry_run,
 
     database = database_config[0]
     all_databases = bool(database['name'] == 'all')
+    dump_filename = dump.make_database_dump_filename(
+        make_dump_path(location_config), database['name'], database.get('hostname')
+    )
     analyze_command = (
         ('psql', '--no-password', '--quiet')
         + (('--host', database['hostname']) if 'hostname' in database else ())
@@ -130,6 +142,7 @@ def restore_database_dump(database_config, log_prefix, location_config, dry_run,
         + (('--host', database['hostname']) if 'hostname' in database else ())
         + (('--port', str(database['port'])) if 'port' in database else ())
         + (('--username', database['username']) if 'username' in database else ())
+        + (() if extract_process else (dump_filename,))
     )
     extra_environment = {'PGPASSWORD': database['password']} if 'password' in database else None
 
@@ -141,9 +154,9 @@ def restore_database_dump(database_config, log_prefix, location_config, dry_run,
 
     execute_command_with_processes(
         restore_command,
-        [extract_process],
+        [extract_process] if extract_process else [],
         output_log_level=logging.DEBUG,
-        input_file=extract_process.stdout,
+        input_file=extract_process.stdout if extract_process else None,
         extra_environment=extra_environment,
         borg_local_path=location_config.get('local_path', 'borg'),
     )

+ 2 - 1
docs/how-to/backup-your-databases.md

@@ -24,7 +24,8 @@ hooks:
 
 As part of each backup, borgmatic streams a database dump for each configured
 database directly to Borg, so it's included in the backup without consuming
-additional disk space.
+additional disk space. (The one exception is PostgreSQL's "directory" dump
+format, which can't stream and therefore does consume temporary disk space.)
 
 To support this, borgmatic creates temporary named pipes in `~/.borgmatic` by
 default. To customize this path, set the `borgmatic_source_directory` option

+ 1 - 1
setup.py

@@ -1,6 +1,6 @@
 from setuptools import find_packages, setup
 
-VERSION = '1.5.4'
+VERSION = '1.5.5.dev0'
 
 
 setup(

+ 39 - 3
tests/end-to-end/test_database.py

@@ -8,11 +8,13 @@ import tempfile
 import pytest
 
 
-def write_configuration(config_path, repository_path, borgmatic_source_directory):
+def write_configuration(
+    config_path, repository_path, borgmatic_source_directory, postgresql_dump_format='custom'
+):
     '''
     Write out borgmatic configuration into a file at the config path. Set the options so as to work
     for testing. This includes injecting the given repository path, borgmatic source directory for
-    storing database dumps, and encryption passphrase.
+    storing database dumps, dump format (for PostgreSQL), and encryption passphrase.
     '''
     config = '''
 location:
@@ -31,6 +33,7 @@ hooks:
           hostname: postgresql
           username: postgres
           password: test
+          format: {}
         - name: all
           hostname: postgresql
           username: postgres
@@ -45,7 +48,7 @@ hooks:
           username: root
           password: test
 '''.format(
-        config_path, repository_path, borgmatic_source_directory
+        config_path, repository_path, borgmatic_source_directory, postgresql_dump_format
     )
 
     config_file = open(config_path, 'w')
@@ -93,6 +96,39 @@ def test_database_dump_and_restore():
         shutil.rmtree(temporary_directory)
 
 
+def test_database_dump_and_restore_with_directory_format():
+    # Create a Borg repository.
+    temporary_directory = tempfile.mkdtemp()
+    repository_path = os.path.join(temporary_directory, 'test.borg')
+    borgmatic_source_directory = os.path.join(temporary_directory, '.borgmatic')
+
+    original_working_directory = os.getcwd()
+
+    try:
+        config_path = os.path.join(temporary_directory, 'test.yaml')
+        write_configuration(
+            config_path,
+            repository_path,
+            borgmatic_source_directory,
+            postgresql_dump_format='directory',
+        )
+
+        subprocess.check_call(
+            'borgmatic -v 2 --config {} init --encryption repokey'.format(config_path).split(' ')
+        )
+
+        # Run borgmatic to generate a backup archive including a database dump.
+        subprocess.check_call('borgmatic create --config {} -v 2'.format(config_path).split(' '))
+
+        # Restore the database from the archive.
+        subprocess.check_call(
+            'borgmatic --config {} restore --archive latest'.format(config_path).split(' ')
+        )
+    finally:
+        os.chdir(original_working_directory)
+        shutil.rmtree(temporary_directory)
+
+
 def test_database_dump_with_error_causes_borgmatic_to_exit():
     # Create a Borg repository.
     temporary_directory = tempfile.mkdtemp()

+ 0 - 1
tests/integration/test_execute.py

@@ -66,7 +66,6 @@ def test_log_outputs_includes_error_output_in_exception():
             (process,), exclude_stdouts=(), output_log_level=logging.INFO, borg_local_path='borg'
         )
 
-    assert error.value.returncode == 2
     assert error.value.output
 
 

+ 24 - 3
tests/unit/hooks/test_dump.py

@@ -34,10 +34,14 @@ def test_make_database_dump_filename_with_invalid_name_raises():
         module.make_database_dump_filename('databases', 'invalid/name')
 
 
-def test_create_named_pipe_for_dump_does_not_raise():
+def test_create_parent_directory_for_dump_does_not_raise():
     flexmock(module.os).should_receive('makedirs')
-    flexmock(module.os.path).should_receive('exists').and_return(True)
-    flexmock(module.os).should_receive('remove')
+
+    module.create_parent_directory_for_dump('/path/to/parent')
+
+
+def test_create_named_pipe_for_dump_does_not_raise():
+    flexmock(module).should_receive('create_parent_directory_for_dump')
     flexmock(module.os).should_receive('mkfifo')
 
     module.create_named_pipe_for_dump('/path/to/pipe')
@@ -52,6 +56,7 @@ def test_remove_database_dumps_removes_dump_for_each_database():
         'databases', 'bar', None
     ).and_return('databases/localhost/bar')
 
+    flexmock(module.os.path).should_receive('exists').and_return(True)
     flexmock(module.os.path).should_receive('isdir').and_return(False)
     flexmock(module.os).should_receive('remove').with_args('databases/localhost/foo').once()
     flexmock(module.os).should_receive('remove').with_args('databases/localhost/bar').once()
@@ -70,6 +75,7 @@ def test_remove_database_dumps_removes_dump_in_directory_format():
         'databases', 'foo', None
     ).and_return('databases/localhost/foo')
 
+    flexmock(module.os.path).should_receive('exists').and_return(True)
     flexmock(module.os.path).should_receive('isdir').and_return(True)
     flexmock(module.os).should_receive('remove').never()
     flexmock(module.shutil).should_receive('rmtree').with_args('databases/localhost/foo').once()
@@ -87,6 +93,21 @@ def test_remove_database_dumps_with_dry_run_skips_removal():
     module.remove_database_dumps('databases', databases, 'SuperDB', 'test.yaml', dry_run=True)
 
 
+def test_remove_database_dumps_without_dump_present_skips_removal():
+    databases = [{'name': 'foo'}]
+    flexmock(module).should_receive('make_database_dump_filename').with_args(
+        'databases', 'foo', None
+    ).and_return('databases/localhost/foo')
+
+    flexmock(module.os.path).should_receive('exists').and_return(False)
+    flexmock(module.os.path).should_receive('isdir').never()
+    flexmock(module.os).should_receive('remove').never()
+    flexmock(module.shutil).should_receive('rmtree').never()
+    flexmock(module.os).should_receive('rmdir').never()
+
+    module.remove_database_dumps('databases', databases, 'SuperDB', 'test.yaml', dry_run=False)
+
+
 def test_remove_database_dumps_without_databases_does_not_raise():
     module.remove_database_dumps('databases', [], 'SuperDB', 'test.yaml', dry_run=False)
 

+ 51 - 6
tests/unit/hooks/test_postgresql.py

@@ -112,14 +112,15 @@ def test_dump_databases_runs_pg_dump_with_username_and_password():
     assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [process]
 
 
-def test_dump_databases_runs_pg_dump_with_format():
-    databases = [{'name': 'foo', 'format': 'tar'}]
+def test_dump_databases_runs_pg_dump_with_directory_format():
+    databases = [{'name': 'foo', 'format': 'directory'}]
     process = flexmock()
     flexmock(module).should_receive('make_dump_path').and_return('')
     flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
         'databases/localhost/foo'
     )
-    flexmock(module.dump).should_receive('create_named_pipe_for_dump')
+    flexmock(module.dump).should_receive('create_parent_directory_for_dump')
+    flexmock(module.dump).should_receive('create_named_pipe_for_dump').never()
 
     flexmock(module).should_receive('execute_command').with_args(
         (
@@ -128,10 +129,10 @@ def test_dump_databases_runs_pg_dump_with_format():
             '--clean',
             '--if-exists',
             '--format',
-            'tar',
-            'foo',
-            '>',
+            'directory',
+            '--file',
             'databases/localhost/foo',
+            'foo',
         ),
         shell=True,
         extra_environment=None,
@@ -194,6 +195,8 @@ def test_restore_database_dump_runs_pg_restore():
     database_config = [{'name': 'foo'}]
     extract_process = flexmock(stdout=flexmock())
 
+    flexmock(module).should_receive('make_dump_path')
+    flexmock(module.dump).should_receive('make_database_dump_filename')
     flexmock(module).should_receive('execute_command_with_processes').with_args(
         (
             'pg_restore',
@@ -223,6 +226,8 @@ def test_restore_database_dump_runs_pg_restore():
 def test_restore_database_dump_errors_on_multiple_database_config():
     database_config = [{'name': 'foo'}, {'name': 'bar'}]
 
+    flexmock(module).should_receive('make_dump_path')
+    flexmock(module.dump).should_receive('make_database_dump_filename')
     flexmock(module).should_receive('execute_command_with_processes').never()
     flexmock(module).should_receive('execute_command').never()
 
@@ -236,6 +241,8 @@ def test_restore_database_dump_runs_pg_restore_with_hostname_and_port():
     database_config = [{'name': 'foo', 'hostname': 'database.example.org', 'port': 5433}]
     extract_process = flexmock(stdout=flexmock())
 
+    flexmock(module).should_receive('make_dump_path')
+    flexmock(module.dump).should_receive('make_database_dump_filename')
     flexmock(module).should_receive('execute_command_with_processes').with_args(
         (
             'pg_restore',
@@ -282,6 +289,8 @@ def test_restore_database_dump_runs_pg_restore_with_username_and_password():
     database_config = [{'name': 'foo', 'username': 'postgres', 'password': 'trustsome1'}]
     extract_process = flexmock(stdout=flexmock())
 
+    flexmock(module).should_receive('make_dump_path')
+    flexmock(module.dump).should_receive('make_database_dump_filename')
     flexmock(module).should_receive('execute_command_with_processes').with_args(
         (
             'pg_restore',
@@ -324,6 +333,8 @@ def test_restore_database_dump_runs_psql_for_all_database_dump():
     database_config = [{'name': 'all'}]
     extract_process = flexmock(stdout=flexmock())
 
+    flexmock(module).should_receive('make_dump_path')
+    flexmock(module.dump).should_receive('make_database_dump_filename')
     flexmock(module).should_receive('execute_command_with_processes').with_args(
         ('psql', '--no-password'),
         processes=[extract_process],
@@ -344,8 +355,42 @@ def test_restore_database_dump_runs_psql_for_all_database_dump():
 def test_restore_database_dump_with_dry_run_skips_restore():
     database_config = [{'name': 'foo'}]
 
+    flexmock(module).should_receive('make_dump_path')
+    flexmock(module.dump).should_receive('make_database_dump_filename')
     flexmock(module).should_receive('execute_command_with_processes').never()
 
     module.restore_database_dump(
         database_config, 'test.yaml', {}, dry_run=True, extract_process=flexmock()
     )
+
+
+def test_restore_database_dump_without_extract_process_restores_from_disk():
+    database_config = [{'name': 'foo'}]
+
+    flexmock(module).should_receive('make_dump_path')
+    flexmock(module.dump).should_receive('make_database_dump_filename').and_return('/dump/path')
+    flexmock(module).should_receive('execute_command_with_processes').with_args(
+        (
+            'pg_restore',
+            '--no-password',
+            '--if-exists',
+            '--exit-on-error',
+            '--clean',
+            '--dbname',
+            'foo',
+            '/dump/path',
+        ),
+        processes=[],
+        output_log_level=logging.DEBUG,
+        input_file=None,
+        extra_environment=None,
+        borg_local_path='borg',
+    ).once()
+    flexmock(module).should_receive('execute_command').with_args(
+        ('psql', '--no-password', '--quiet', '--dbname', 'foo', '--command', 'ANALYZE'),
+        extra_environment=None,
+    ).once()
+
+    module.restore_database_dump(
+        database_config, 'test.yaml', {}, dry_run=False, extract_process=None
+    )