Browse Source

Fix for potential data loss (data not getting backed up) when dumping large "directory" format PostgreSQL/MongoDB databases (#643).

Dan Helfman 2 years ago
parent
commit
f5a448c7c2

+ 9 - 0
NEWS

@@ -1,3 +1,12 @@
+1.7.7
+ * #643: Fix for potential data loss (data not getting backed up) when dumping large "directory"
+   format PostgreSQL/MongoDB databases. Prior to the fix, these dumps would not finish writing to
+   disk before Borg consumed them. Now, the dumping process completes before Borg starts. This only
+   applies to "directory" format databases; other formats still stream to Borg without using
+   temporary disk space.
+ * Fix MongoDB "directory" format to work with mongodump/mongorestore without error. Prior to this
+   fix, only the "archive" format worked.
+
 1.7.6
 1.7.6
  * #393, #438, #560: Optionally dump "all" PostgreSQL/MySQL databases to separate files instead of
  * #393, #438, #560: Optionally dump "all" PostgreSQL/MySQL databases to separate files instead of
    one combined dump file, allowing more convenient restores of individual databases. You can enable
    one combined dump file, allowing more convenient restores of individual databases. You can enable

+ 12 - 9
borgmatic/hooks/mongodb.py

@@ -45,13 +45,14 @@ def dump_databases(databases, log_prefix, location_config, dry_run):
         if dry_run:
         if dry_run:
             continue
             continue
 
 
+        command = build_dump_command(database, dump_filename, dump_format)
+
         if dump_format == 'directory':
         if dump_format == 'directory':
             dump.create_parent_directory_for_dump(dump_filename)
             dump.create_parent_directory_for_dump(dump_filename)
+            execute_command(command, shell=True)
         else:
         else:
             dump.create_named_pipe_for_dump(dump_filename)
             dump.create_named_pipe_for_dump(dump_filename)
-
-        command = build_dump_command(database, dump_filename, dump_format)
-        processes.append(execute_command(command, shell=True, run_to_completion=False))
+            processes.append(execute_command(command, shell=True, run_to_completion=False))
 
 
     return processes
     return processes
 
 
@@ -61,9 +62,9 @@ def build_dump_command(database, dump_filename, dump_format):
     Return the mongodump command from a single database configuration.
     Return the mongodump command from a single database configuration.
     '''
     '''
     all_databases = database['name'] == 'all'
     all_databases = database['name'] == 'all'
-    command = ['mongodump', '--archive']
+    command = ['mongodump']
     if dump_format == 'directory':
     if dump_format == 'directory':
-        command.append(dump_filename)
+        command.extend(('--out', dump_filename))
     if 'hostname' in database:
     if 'hostname' in database:
         command.extend(('--host', database['hostname']))
         command.extend(('--host', database['hostname']))
     if 'port' in database:
     if 'port' in database:
@@ -79,7 +80,7 @@ def build_dump_command(database, dump_filename, dump_format):
     if 'options' in database:
     if 'options' in database:
         command.extend(database['options'].split(' '))
         command.extend(database['options'].split(' '))
     if dump_format != 'directory':
     if dump_format != 'directory':
-        command.extend(('>', dump_filename))
+        command.extend(('--archive', '>', dump_filename))
     return command
     return command
 
 
 
 
@@ -145,9 +146,11 @@ def build_restore_command(extract_process, database, dump_filename):
     '''
     '''
     Return the mongorestore command from a single database configuration.
     Return the mongorestore command from a single database configuration.
     '''
     '''
-    command = ['mongorestore', '--archive']
-    if not extract_process:
-        command.append(dump_filename)
+    command = ['mongorestore']
+    if extract_process:
+        command.append('--archive')
+    else:
+        command.extend(('--dir', dump_filename))
     if database['name'] != 'all':
     if database['name'] != 'all':
         command.extend(('--drop', '--db', database['name']))
         command.extend(('--drop', '--db', database['name']))
     if 'hostname' in database:
     if 'hostname' in database:

+ 10 - 8
borgmatic/hooks/postgresql.py

@@ -141,17 +141,19 @@ def dump_databases(databases, log_prefix, location_config, dry_run):
 
 
             if dump_format == 'directory':
             if dump_format == 'directory':
                 dump.create_parent_directory_for_dump(dump_filename)
                 dump.create_parent_directory_for_dump(dump_filename)
+                execute_command(
+                    command, shell=True, extra_environment=extra_environment,
+                )
             else:
             else:
                 dump.create_named_pipe_for_dump(dump_filename)
                 dump.create_named_pipe_for_dump(dump_filename)
-
-            processes.append(
-                execute_command(
-                    command,
-                    shell=True,
-                    extra_environment=extra_environment,
-                    run_to_completion=False,
+                processes.append(
+                    execute_command(
+                        command,
+                        shell=True,
+                        extra_environment=extra_environment,
+                        run_to_completion=False,
+                    )
                 )
                 )
-            )
 
 
     return processes
     return processes
 
 

+ 1 - 1
setup.py

@@ -1,6 +1,6 @@
 from setuptools import find_packages, setup
 from setuptools import find_packages, setup
 
 
-VERSION = '1.7.6'
+VERSION = '1.7.7'
 
 
 
 
 setup(
 setup(

+ 3 - 0
tests/end-to-end/test_database.py

@@ -14,6 +14,7 @@ def write_configuration(
     repository_path,
     repository_path,
     borgmatic_source_directory,
     borgmatic_source_directory,
     postgresql_dump_format='custom',
     postgresql_dump_format='custom',
+    mongodb_dump_format='archive',
 ):
 ):
     '''
     '''
     Write out borgmatic configuration into a file at the config path. Set the options so as to work
     Write out borgmatic configuration into a file at the config path. Set the options so as to work
@@ -67,6 +68,7 @@ hooks:
           username: root
           username: root
           password: test
           password: test
           authentication_database: admin
           authentication_database: admin
+          format: {mongodb_dump_format}
         - name: all
         - name: all
           hostname: mongodb
           hostname: mongodb
           username: root
           username: root
@@ -136,6 +138,7 @@ def test_database_dump_and_restore_with_directory_format():
             repository_path,
             repository_path,
             borgmatic_source_directory,
             borgmatic_source_directory,
             postgresql_dump_format='directory',
             postgresql_dump_format='directory',
+            mongodb_dump_format='directory',
         )
         )
 
 
         subprocess.check_call(
         subprocess.check_call(

+ 9 - 12
tests/unit/hooks/test_mongodb.py

@@ -17,7 +17,7 @@ def test_dump_databases_runs_mongodump_for_each_database():
 
 
     for name, process in zip(('foo', 'bar'), processes):
     for name, process in zip(('foo', 'bar'), processes):
         flexmock(module).should_receive('execute_command').with_args(
         flexmock(module).should_receive('execute_command').with_args(
-            ['mongodump', '--archive', '--db', name, '>', 'databases/localhost/{}'.format(name)],
+            ['mongodump', '--db', name, '--archive', '>', 'databases/localhost/{}'.format(name)],
             shell=True,
             shell=True,
             run_to_completion=False,
             run_to_completion=False,
         ).and_return(process).once()
         ).and_return(process).once()
@@ -49,13 +49,13 @@ def test_dump_databases_runs_mongodump_with_hostname_and_port():
     flexmock(module).should_receive('execute_command').with_args(
     flexmock(module).should_receive('execute_command').with_args(
         [
         [
             'mongodump',
             'mongodump',
-            '--archive',
             '--host',
             '--host',
             'database.example.org',
             'database.example.org',
             '--port',
             '--port',
             '5433',
             '5433',
             '--db',
             '--db',
             'foo',
             'foo',
+            '--archive',
             '>',
             '>',
             'databases/database.example.org/foo',
             'databases/database.example.org/foo',
         ],
         ],
@@ -85,7 +85,6 @@ def test_dump_databases_runs_mongodump_with_username_and_password():
     flexmock(module).should_receive('execute_command').with_args(
     flexmock(module).should_receive('execute_command').with_args(
         [
         [
             'mongodump',
             'mongodump',
-            '--archive',
             '--username',
             '--username',
             'mongo',
             'mongo',
             '--password',
             '--password',
@@ -94,6 +93,7 @@ def test_dump_databases_runs_mongodump_with_username_and_password():
             'admin',
             'admin',
             '--db',
             '--db',
             'foo',
             'foo',
+            '--archive',
             '>',
             '>',
             'databases/localhost/foo',
             'databases/localhost/foo',
         ],
         ],
@@ -106,7 +106,6 @@ def test_dump_databases_runs_mongodump_with_username_and_password():
 
 
 def test_dump_databases_runs_mongodump_with_directory_format():
 def test_dump_databases_runs_mongodump_with_directory_format():
     databases = [{'name': 'foo', 'format': 'directory'}]
     databases = [{'name': 'foo', 'format': 'directory'}]
-    process = flexmock()
     flexmock(module).should_receive('make_dump_path').and_return('')
     flexmock(module).should_receive('make_dump_path').and_return('')
     flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
     flexmock(module.dump).should_receive('make_database_dump_filename').and_return(
         'databases/localhost/foo'
         'databases/localhost/foo'
@@ -115,12 +114,10 @@ def test_dump_databases_runs_mongodump_with_directory_format():
     flexmock(module.dump).should_receive('create_named_pipe_for_dump').never()
     flexmock(module.dump).should_receive('create_named_pipe_for_dump').never()
 
 
     flexmock(module).should_receive('execute_command').with_args(
     flexmock(module).should_receive('execute_command').with_args(
-        ['mongodump', '--archive', 'databases/localhost/foo', '--db', 'foo'],
-        shell=True,
-        run_to_completion=False,
-    ).and_return(process).once()
+        ['mongodump', '--out', 'databases/localhost/foo', '--db', 'foo'], shell=True,
+    ).and_return(flexmock()).once()
 
 
-    assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [process]
+    assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == []
 
 
 
 
 def test_dump_databases_runs_mongodump_with_options():
 def test_dump_databases_runs_mongodump_with_options():
@@ -133,7 +130,7 @@ def test_dump_databases_runs_mongodump_with_options():
     flexmock(module.dump).should_receive('create_named_pipe_for_dump')
     flexmock(module.dump).should_receive('create_named_pipe_for_dump')
 
 
     flexmock(module).should_receive('execute_command').with_args(
     flexmock(module).should_receive('execute_command').with_args(
-        ['mongodump', '--archive', '--db', 'foo', '--stuff=such', '>', 'databases/localhost/foo'],
+        ['mongodump', '--db', 'foo', '--stuff=such', '--archive', '>', 'databases/localhost/foo'],
         shell=True,
         shell=True,
         run_to_completion=False,
         run_to_completion=False,
     ).and_return(process).once()
     ).and_return(process).once()
@@ -305,12 +302,12 @@ def test_restore_database_dump_with_dry_run_skips_restore():
 
 
 
 
 def test_restore_database_dump_without_extract_process_restores_from_disk():
 def test_restore_database_dump_without_extract_process_restores_from_disk():
-    database_config = [{'name': 'foo'}]
+    database_config = [{'name': 'foo', 'format': 'directory'}]
 
 
     flexmock(module).should_receive('make_dump_path')
     flexmock(module).should_receive('make_dump_path')
     flexmock(module.dump).should_receive('make_database_dump_filename').and_return('/dump/path')
     flexmock(module.dump).should_receive('make_database_dump_filename').and_return('/dump/path')
     flexmock(module).should_receive('execute_command_with_processes').with_args(
     flexmock(module).should_receive('execute_command_with_processes').with_args(
-        ['mongorestore', '--archive', '/dump/path', '--drop', '--db', 'foo'],
+        ['mongorestore', '--dir', '/dump/path', '--drop', '--db', 'foo'],
         processes=[],
         processes=[],
         output_log_level=logging.DEBUG,
         output_log_level=logging.DEBUG,
         input_file=None,
         input_file=None,

+ 2 - 4
tests/unit/hooks/test_postgresql.py

@@ -270,7 +270,6 @@ def test_make_extra_environment_maps_options_to_environment():
 
 
 def test_dump_databases_runs_pg_dump_with_directory_format():
 def test_dump_databases_runs_pg_dump_with_directory_format():
     databases = [{'name': 'foo', 'format': 'directory'}]
     databases = [{'name': 'foo', 'format': 'directory'}]
-    process = flexmock()
     flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
     flexmock(module).should_receive('make_extra_environment').and_return({'PGSSLMODE': 'disable'})
     flexmock(module).should_receive('make_dump_path').and_return('')
     flexmock(module).should_receive('make_dump_path').and_return('')
     flexmock(module).should_receive('database_names_to_dump').and_return(('foo',))
     flexmock(module).should_receive('database_names_to_dump').and_return(('foo',))
@@ -295,10 +294,9 @@ def test_dump_databases_runs_pg_dump_with_directory_format():
         ),
         ),
         shell=True,
         shell=True,
         extra_environment={'PGSSLMODE': 'disable'},
         extra_environment={'PGSSLMODE': 'disable'},
-        run_to_completion=False,
-    ).and_return(process).once()
+    ).and_return(flexmock()).once()
 
 
-    assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == [process]
+    assert module.dump_databases(databases, 'test.yaml', {}, dry_run=False) == []
 
 
 
 
 def test_dump_databases_runs_pg_dump_with_options():
 def test_dump_databases_runs_pg_dump_with_options():