Просмотр исходного кода

When creating patterns for a "borg extract" during a restore, match directory format dumps too. Also make dump restore ordering to be deterministic (#1208).

Dan Helfman 3 дней назад
Родитель
Сommit
00e2284662
3 измененных файлов с 57 добавлено и 41 удалено
  1. 2 2
      NEWS
  2. 32 34
      borgmatic/actions/restore.py
  3. 23 5
      borgmatic/hooks/data_source/dump.py

+ 2 - 2
NEWS

@@ -1,6 +1,6 @@
 2.0.14.dev0
- * #1208: Fix for the database "restore" action restoring more databases than the "--database" flag
-   specifies.
+ * #1208: Fix for the "restore" action incorrectly extracting more database dumps than the
+   "--database" flag specifies.
  * #1212: Fix an error when restoring multiple directory-format database dumps at once.
 
 2.0.13

+ 32 - 34
borgmatic/actions/restore.py

@@ -273,7 +273,7 @@ def collect_dumps_from_archive(
     borgmatic runtime directory, query the archive for the names of data sources dumps it contains
     and return them as a set of Dump instances.
     '''
-    dumps_from_archive = set()
+    dumps_from_archive = {}  # Use a dict as an ordered set.
 
     # There is (at most) one dump metadata file per data source hook. Load each.
     for dumps_metadata_path in borgmatic.borg.list.capture_archive_listing(
@@ -302,33 +302,30 @@ def collect_dumps_from_archive(
         if not dumps_metadata_path:
             continue
 
-        dumps_from_archive.update(
-            set(
-                borgmatic.hooks.data_source.dump.parse_data_source_dumps_metadata(
-                    borgmatic.borg.extract.extract_archive(
-                        global_arguments.dry_run,
-                        repository,
-                        archive,
-                        [dumps_metadata_path],
-                        config,
-                        local_borg_version,
-                        global_arguments,
-                        local_path=local_path,
-                        remote_path=remote_path,
-                        extract_to_stdout=True,
-                    )
-                    .stdout.read()
-                    .decode(),
-                    dumps_metadata_path,
-                )
+        for dump in borgmatic.hooks.data_source.dump.parse_data_source_dumps_metadata(
+            borgmatic.borg.extract.extract_archive(
+                global_arguments.dry_run,
+                repository,
+                archive,
+                [dumps_metadata_path],
+                config,
+                local_borg_version,
+                global_arguments,
+                local_path=local_path,
+                remote_path=remote_path,
+                extract_to_stdout=True,
             )
-        )
+            .stdout.read()
+            .decode(),
+            dumps_metadata_path,
+        ):
+            dumps_from_archive[dump] = None
 
     # If we've successfully loaded any dumps metadata, we're done.
     if dumps_from_archive:
         logger.debug('Collecting database dumps from archive data source dumps metadata files')
 
-        return dumps_from_archive
+        return tuple(dumps_from_archive.keys())
 
     # No dumps metadata files were found, so for backwards compatibility, fall back to parsing the
     # paths of dumps found in the archive to get their respective dump metadata.
@@ -392,11 +389,11 @@ def collect_dumps_from_archive(
             except (ValueError, TypeError):
                 port = None
 
-            dumps_from_archive.add(
+            dumps_from_archive[
                 Dump(
                     hook_name, data_source_name, None if hostname == 'localhost' else hostname, port
                 )
-            )
+            ] = None
 
             # We've successfully parsed the dump path, so need to probe any further.
             break
@@ -405,13 +402,13 @@ def collect_dumps_from_archive(
                 f'Ignoring invalid data source dump path "{dump_path}" in archive {archive}',
             )
 
-    return dumps_from_archive
+    return tuple(dumps_from_archive.keys())
 
 
 def get_dumps_to_restore(restore_arguments, dumps_from_archive):
     '''
     Given restore arguments as an argparse.Namespace instance indicating which dumps to restore and
-    a set of Dump instances representing the dumps found in an archive, return a set of specific
+    a tuple of Dump instances representing the dumps found in an archive, return a tuple of specific
     Dump instances from the archive to restore. As part of this, replace any Dump having a data
     source name of "all" with multiple named Dump instances as appropriate.
 
@@ -456,11 +453,12 @@ def get_dumps_to_restore(restore_arguments, dumps_from_archive):
         }
     )
     missing_dumps = set()
-    dumps_to_restore = set()
+    dumps_to_restore = {}  # Use a dict as an ordered set.
 
     # If there's a requested "all" dump, add every dump from the archive to the dumps to restore.
     if any(dump for dump in requested_dumps if dump.data_source_name == 'all'):
-        dumps_to_restore.update(dumps_from_archive)
+        for dump in dumps_from_archive:
+            dumps_to_restore[dump] = None
 
     # If any archive dump matches a requested dump, add the archive dump to the dumps to restore.
     for requested_dump in requested_dumps:
@@ -476,7 +474,7 @@ def get_dumps_to_restore(restore_arguments, dumps_from_archive):
         if len(matching_dumps) == 0:
             missing_dumps.add(requested_dump)
         elif len(matching_dumps) == 1:
-            dumps_to_restore.add(matching_dumps[0])
+            dumps_to_restore[matching_dumps[0]] = None
         else:
             raise ValueError(
                 f'Cannot restore data source {render_dump_metadata(requested_dump)} because there are multiple matching dumps in the archive. Try adding flags to disambiguate.',
@@ -491,20 +489,20 @@ def get_dumps_to_restore(restore_arguments, dumps_from_archive):
             f"Cannot restore data source dump{'s' if len(missing_dumps) > 1 else ''} {rendered_dumps} missing from archive",
         )
 
-    return dumps_to_restore
+    return tuple(dumps_to_restore.keys())
 
 
 def ensure_requested_dumps_restored(dumps_to_restore, dumps_actually_restored):
     '''
-    Given a set of requested dumps to restore and a set of dumps actually restored, raise ValueError
-    if any requested dumps to restore weren't restored, indicating that they were missing from the
-    configuration.
+    Given a tuple of requested dumps to restore and a set of dumps actually restored, raise
+    ValueError if any requested dumps to restore weren't restored, indicating that they were missing
+    from the configuration.
     '''
     if not dumps_actually_restored:
         raise ValueError('No data source dumps were found to restore')
 
     missing_dumps = sorted(
-        dumps_to_restore - dumps_actually_restored,
+        set(dumps_to_restore) - dumps_actually_restored,
         key=lambda dump: dump.data_source_name,
     )
 

+ 23 - 5
borgmatic/hooks/data_source/dump.py

@@ -2,6 +2,7 @@ import fnmatch
 import json
 import logging
 import os
+import re
 import shutil
 
 import borgmatic.actions.restore
@@ -114,16 +115,33 @@ def remove_data_source_dumps(dump_path, data_source_type_name, dry_run):
         shutil.rmtree(dump_path)
 
 
+END_OF_STRING_PATTTERN = re.compile(r'\\z', flags=re.IGNORECASE)
+
+
 def convert_glob_patterns_to_borg_pattern(patterns):
     '''
     Convert a sequence of shell glob patterns like "/etc/*", "/tmp/*" to the corresponding Borg
     regular expression archive pattern as a single string like "re:etc/.*|tmp/.*".
     '''
-    # Remove the "\z" or "\Z" generated by fnmatch.translate() because we don't want the pattern to
-    # match only at the end of a path, as directory format dumps require extracting files with paths
-    # longer than the pattern. E.g., a pattern of "borgmatic/*/foo_databases/test" should also match
-    # paths like "borgmatic/*/foo_databases/test/toc.dat"
+    # This deserves some explanation. Here's what this code is doing for each shell glob pattern:
+    #
+    #  * Strip off the leading slash, since Borg doesn't store leading slashes in archives.
+    #  * Use fnmatch.translate() to convert the pattern to a Borg pattern.
+    #  * Remove the "\z" or \"Z" end-of-string special character generated by fnmatch.translate(),
+    #    because Borg doesn't like it. Replace it with a "$" end-of-string special character
+    #    instead. And yes, this is using a regular expression to modify a regular expression.
+    #  * Do the above for each of:
+    #    * The plain pattern. This supports the use case of a standard database where its name is a
+    #      filename in the path. Example: borgmatic/foo_databases/hostname/test
+    #    * The pattern with "/*" tacked onto the end of it. This supports the use case of a
+    #      directory-format database where its name is a directory name in the path. Example:
+    #      borgmatic/foo_databases/hostname/test/toc.dat
+    #
+    # Join the resulting transformed patterns together with "|" and return them as a string.
     return 're:' + '|'.join(
-        fnmatch.translate(pattern.lstrip('/')).replace('\\z', '').replace('\\Z', '') + '$'
+        re.sub(END_OF_STRING_PATTTERN, '$', fnmatch.translate(stripped))
+        + '|'
+        + re.sub(END_OF_STRING_PATTTERN, '$', fnmatch.translate(stripped + '/*'))
         for pattern in patterns
+        for stripped in (pattern.lstrip('/'),)
     )