Răsfoiți Sursa

When creating patterns for a "borg extract" during a restore, match directory format dumps too. Also make dump restore ordering to be deterministic (#1208).

Dan Helfman 3 zile în urmă
părinte
comite
00e2284662
3 a modificat fișierele cu 57 adăugiri și 41 ștergeri
  1. 2 2
      NEWS
  2. 32 34
      borgmatic/actions/restore.py
  3. 23 5
      borgmatic/hooks/data_source/dump.py

+ 2 - 2
NEWS

@@ -1,6 +1,6 @@
 2.0.14.dev0
 2.0.14.dev0
- * #1208: Fix for the database "restore" action restoring more databases than the "--database" flag
-   specifies.
+ * #1208: Fix for the "restore" action incorrectly extracting more database dumps than the
+   "--database" flag specifies.
  * #1212: Fix an error when restoring multiple directory-format database dumps at once.
  * #1212: Fix an error when restoring multiple directory-format database dumps at once.
 
 
 2.0.13
 2.0.13

+ 32 - 34
borgmatic/actions/restore.py

@@ -273,7 +273,7 @@ def collect_dumps_from_archive(
     borgmatic runtime directory, query the archive for the names of data sources dumps it contains
     borgmatic runtime directory, query the archive for the names of data sources dumps it contains
     and return them as a set of Dump instances.
     and return them as a set of Dump instances.
     '''
     '''
-    dumps_from_archive = set()
+    dumps_from_archive = {}  # Use a dict as an ordered set.
 
 
     # There is (at most) one dump metadata file per data source hook. Load each.
     # There is (at most) one dump metadata file per data source hook. Load each.
     for dumps_metadata_path in borgmatic.borg.list.capture_archive_listing(
     for dumps_metadata_path in borgmatic.borg.list.capture_archive_listing(
@@ -302,33 +302,30 @@ def collect_dumps_from_archive(
         if not dumps_metadata_path:
         if not dumps_metadata_path:
             continue
             continue
 
 
-        dumps_from_archive.update(
-            set(
-                borgmatic.hooks.data_source.dump.parse_data_source_dumps_metadata(
-                    borgmatic.borg.extract.extract_archive(
-                        global_arguments.dry_run,
-                        repository,
-                        archive,
-                        [dumps_metadata_path],
-                        config,
-                        local_borg_version,
-                        global_arguments,
-                        local_path=local_path,
-                        remote_path=remote_path,
-                        extract_to_stdout=True,
-                    )
-                    .stdout.read()
-                    .decode(),
-                    dumps_metadata_path,
-                )
+        for dump in borgmatic.hooks.data_source.dump.parse_data_source_dumps_metadata(
+            borgmatic.borg.extract.extract_archive(
+                global_arguments.dry_run,
+                repository,
+                archive,
+                [dumps_metadata_path],
+                config,
+                local_borg_version,
+                global_arguments,
+                local_path=local_path,
+                remote_path=remote_path,
+                extract_to_stdout=True,
             )
             )
-        )
+            .stdout.read()
+            .decode(),
+            dumps_metadata_path,
+        ):
+            dumps_from_archive[dump] = None
 
 
     # If we've successfully loaded any dumps metadata, we're done.
     # If we've successfully loaded any dumps metadata, we're done.
     if dumps_from_archive:
     if dumps_from_archive:
         logger.debug('Collecting database dumps from archive data source dumps metadata files')
         logger.debug('Collecting database dumps from archive data source dumps metadata files')
 
 
-        return dumps_from_archive
+        return tuple(dumps_from_archive.keys())
 
 
     # No dumps metadata files were found, so for backwards compatibility, fall back to parsing the
     # No dumps metadata files were found, so for backwards compatibility, fall back to parsing the
     # paths of dumps found in the archive to get their respective dump metadata.
     # paths of dumps found in the archive to get their respective dump metadata.
@@ -392,11 +389,11 @@ def collect_dumps_from_archive(
             except (ValueError, TypeError):
             except (ValueError, TypeError):
                 port = None
                 port = None
 
 
-            dumps_from_archive.add(
+            dumps_from_archive[
                 Dump(
                 Dump(
                     hook_name, data_source_name, None if hostname == 'localhost' else hostname, port
                     hook_name, data_source_name, None if hostname == 'localhost' else hostname, port
                 )
                 )
-            )
+            ] = None
 
 
             # We've successfully parsed the dump path, so need to probe any further.
             # We've successfully parsed the dump path, so need to probe any further.
             break
             break
@@ -405,13 +402,13 @@ def collect_dumps_from_archive(
                 f'Ignoring invalid data source dump path "{dump_path}" in archive {archive}',
                 f'Ignoring invalid data source dump path "{dump_path}" in archive {archive}',
             )
             )
 
 
-    return dumps_from_archive
+    return tuple(dumps_from_archive.keys())
 
 
 
 
 def get_dumps_to_restore(restore_arguments, dumps_from_archive):
 def get_dumps_to_restore(restore_arguments, dumps_from_archive):
     '''
     '''
     Given restore arguments as an argparse.Namespace instance indicating which dumps to restore and
     Given restore arguments as an argparse.Namespace instance indicating which dumps to restore and
-    a set of Dump instances representing the dumps found in an archive, return a set of specific
+    a tuple of Dump instances representing the dumps found in an archive, return a tuple of specific
     Dump instances from the archive to restore. As part of this, replace any Dump having a data
     Dump instances from the archive to restore. As part of this, replace any Dump having a data
     source name of "all" with multiple named Dump instances as appropriate.
     source name of "all" with multiple named Dump instances as appropriate.
 
 
@@ -456,11 +453,12 @@ def get_dumps_to_restore(restore_arguments, dumps_from_archive):
         }
         }
     )
     )
     missing_dumps = set()
     missing_dumps = set()
-    dumps_to_restore = set()
+    dumps_to_restore = {}  # Use a dict as an ordered set.
 
 
     # If there's a requested "all" dump, add every dump from the archive to the dumps to restore.
     # If there's a requested "all" dump, add every dump from the archive to the dumps to restore.
     if any(dump for dump in requested_dumps if dump.data_source_name == 'all'):
     if any(dump for dump in requested_dumps if dump.data_source_name == 'all'):
-        dumps_to_restore.update(dumps_from_archive)
+        for dump in dumps_from_archive:
+            dumps_to_restore[dump] = None
 
 
     # If any archive dump matches a requested dump, add the archive dump to the dumps to restore.
     # If any archive dump matches a requested dump, add the archive dump to the dumps to restore.
     for requested_dump in requested_dumps:
     for requested_dump in requested_dumps:
@@ -476,7 +474,7 @@ def get_dumps_to_restore(restore_arguments, dumps_from_archive):
         if len(matching_dumps) == 0:
         if len(matching_dumps) == 0:
             missing_dumps.add(requested_dump)
             missing_dumps.add(requested_dump)
         elif len(matching_dumps) == 1:
         elif len(matching_dumps) == 1:
-            dumps_to_restore.add(matching_dumps[0])
+            dumps_to_restore[matching_dumps[0]] = None
         else:
         else:
             raise ValueError(
             raise ValueError(
                 f'Cannot restore data source {render_dump_metadata(requested_dump)} because there are multiple matching dumps in the archive. Try adding flags to disambiguate.',
                 f'Cannot restore data source {render_dump_metadata(requested_dump)} because there are multiple matching dumps in the archive. Try adding flags to disambiguate.',
@@ -491,20 +489,20 @@ def get_dumps_to_restore(restore_arguments, dumps_from_archive):
             f"Cannot restore data source dump{'s' if len(missing_dumps) > 1 else ''} {rendered_dumps} missing from archive",
             f"Cannot restore data source dump{'s' if len(missing_dumps) > 1 else ''} {rendered_dumps} missing from archive",
         )
         )
 
 
-    return dumps_to_restore
+    return tuple(dumps_to_restore.keys())
 
 
 
 
 def ensure_requested_dumps_restored(dumps_to_restore, dumps_actually_restored):
 def ensure_requested_dumps_restored(dumps_to_restore, dumps_actually_restored):
     '''
     '''
-    Given a set of requested dumps to restore and a set of dumps actually restored, raise ValueError
-    if any requested dumps to restore weren't restored, indicating that they were missing from the
-    configuration.
+    Given a tuple of requested dumps to restore and a set of dumps actually restored, raise
+    ValueError if any requested dumps to restore weren't restored, indicating that they were missing
+    from the configuration.
     '''
     '''
     if not dumps_actually_restored:
     if not dumps_actually_restored:
         raise ValueError('No data source dumps were found to restore')
         raise ValueError('No data source dumps were found to restore')
 
 
     missing_dumps = sorted(
     missing_dumps = sorted(
-        dumps_to_restore - dumps_actually_restored,
+        set(dumps_to_restore) - dumps_actually_restored,
         key=lambda dump: dump.data_source_name,
         key=lambda dump: dump.data_source_name,
     )
     )
 
 

+ 23 - 5
borgmatic/hooks/data_source/dump.py

@@ -2,6 +2,7 @@ import fnmatch
 import json
 import json
 import logging
 import logging
 import os
 import os
+import re
 import shutil
 import shutil
 
 
 import borgmatic.actions.restore
 import borgmatic.actions.restore
@@ -114,16 +115,33 @@ def remove_data_source_dumps(dump_path, data_source_type_name, dry_run):
         shutil.rmtree(dump_path)
         shutil.rmtree(dump_path)
 
 
 
 
+END_OF_STRING_PATTTERN = re.compile(r'\\z', flags=re.IGNORECASE)
+
+
 def convert_glob_patterns_to_borg_pattern(patterns):
 def convert_glob_patterns_to_borg_pattern(patterns):
     '''
     '''
     Convert a sequence of shell glob patterns like "/etc/*", "/tmp/*" to the corresponding Borg
     Convert a sequence of shell glob patterns like "/etc/*", "/tmp/*" to the corresponding Borg
     regular expression archive pattern as a single string like "re:etc/.*|tmp/.*".
     regular expression archive pattern as a single string like "re:etc/.*|tmp/.*".
     '''
     '''
-    # Remove the "\z" or "\Z" generated by fnmatch.translate() because we don't want the pattern to
-    # match only at the end of a path, as directory format dumps require extracting files with paths
-    # longer than the pattern. E.g., a pattern of "borgmatic/*/foo_databases/test" should also match
-    # paths like "borgmatic/*/foo_databases/test/toc.dat"
+    # This deserves some explanation. Here's what this code is doing for each shell glob pattern:
+    #
+    #  * Strip off the leading slash, since Borg doesn't store leading slashes in archives.
+    #  * Use fnmatch.translate() to convert the pattern to a Borg pattern.
+    #  * Remove the "\z" or \"Z" end-of-string special character generated by fnmatch.translate(),
+    #    because Borg doesn't like it. Replace it with a "$" end-of-string special character
+    #    instead. And yes, this is using a regular expression to modify a regular expression.
+    #  * Do the above for each of:
+    #    * The plain pattern. This supports the use case of a standard database where its name is a
+    #      filename in the path. Example: borgmatic/foo_databases/hostname/test
+    #    * The pattern with "/*" tacked onto the end of it. This supports the use case of a
+    #      directory-format database where its name is a directory name in the path. Example:
+    #      borgmatic/foo_databases/hostname/test/toc.dat
+    #
+    # Join the resulting transformed patterns together with "|" and return them as a string.
     return 're:' + '|'.join(
     return 're:' + '|'.join(
-        fnmatch.translate(pattern.lstrip('/')).replace('\\z', '').replace('\\Z', '') + '$'
+        re.sub(END_OF_STRING_PATTTERN, '$', fnmatch.translate(stripped))
+        + '|'
+        + re.sub(END_OF_STRING_PATTTERN, '$', fnmatch.translate(stripped + '/*'))
         for pattern in patterns
         for pattern in patterns
+        for stripped in (pattern.lstrip('/'),)
     )
     )