Sfoglia il codice sorgente

check --repair --undelete-archives: bring archives back from the dead

borg delete and borg prune do a quick and dirty archive deletion,
just removing the archives directory entry for them.

--undelete-archives can still find the archive metadata objects
by completely scanning the repository and re-create missing
archives directory entries.

but only until borg compact would remove all unused data.

if only the manifest is missing or corrupted, do not run that
scan, it is not required for the manifest anymore.
Thomas Waldmann 8 mesi fa
parent
commit
682aedba50

+ 33 - 19
src/borg/archive.py

@@ -1601,6 +1601,7 @@ class ArchiveChecker:
         *,
         verify_data=False,
         repair=False,
+        undelete_archives=False,
         match=None,
         sort_by="",
         first=0,
@@ -1613,6 +1614,7 @@ class ArchiveChecker:
         """Perform a set of checks on 'repository'
 
         :param repair: enable repair mode, write updated or corrected data into repository
+        :param undelete_archives: create archive directory entries that are missing
         :param first/last/sort_by: only check this number of first/last archives ordered by sort_by
         :param match: only check archives matching this pattern
         :param older/newer: only check archives older/newer than timedelta from now
@@ -1631,18 +1633,24 @@ class ArchiveChecker:
         self.repo_objs = RepoObj(self.key)
         if verify_data:
             self.verify_data()
+        rebuild_manifest = False
         try:
             repository.get_manifest()
         except NoManifestError:
+            logger.error("Repository manifest is missing.")
             self.error_found = True
-            self.manifest = self.rebuild_manifest()
+            rebuild_manifest = True
         else:
             try:
                 self.manifest = Manifest.load(repository, (Manifest.Operation.CHECK,), key=self.key)
             except IntegrityErrorBase as exc:
                 logger.error("Repository manifest is corrupted: %s", exc)
                 self.error_found = True
-                self.manifest = self.rebuild_manifest()
+                rebuild_manifest = True
+        if rebuild_manifest:
+            self.manifest = self.rebuild_manifest()
+        if undelete_archives:
+            self.rebuild_archives_directory()
         self.rebuild_archives(
             match=match, first=first, last=last, sort_by=sort_by, older=older, oldest=oldest, newer=newer, newest=newest
         )
@@ -1757,9 +1765,22 @@ class ArchiveChecker:
         )
 
     def rebuild_manifest(self):
-        """Rebuild the manifest object and the archives list.
+        """Rebuild the manifest object."""
+
+        logger.info("Rebuilding missing/corrupted manifest.")
+        # as we have lost the manifest, we do not know any more what valid item keys we had.
+        # collecting any key we encounter in a damaged repo seems unwise, thus we just use
+        # the hardcoded list from the source code. thus, it is not recommended to rebuild a
+        # lost manifest on a older borg version than the most recent one that was ever used
+        # within this repository (assuming that newer borg versions support more item keys).
+        return Manifest(self.key, self.repository)
+
+    def rebuild_archives_directory(self):
+        """Rebuild the archives directory, undeleting archives.
 
         Iterates through all objects in the repository looking for archive metadata blocks.
+        When finding some that do not have a corresponding archives directory entry, it will
+        create that entry (undeleting all archives).
         """
 
         def valid_archive(obj):
@@ -1767,18 +1788,12 @@ class ArchiveChecker:
                 return False
             return REQUIRED_ARCHIVE_KEYS.issubset(obj)
 
-        logger.info("Rebuilding missing manifest and missing archives directory entries, this might take some time...")
-        # as we have lost the manifest, we do not know any more what valid item keys we had.
-        # collecting any key we encounter in a damaged repo seems unwise, thus we just use
-        # the hardcoded list from the source code. thus, it is not recommended to rebuild a
-        # lost manifest on a older borg version than the most recent one that was ever used
-        # within this repository (assuming that newer borg versions support more item keys).
-        manifest = Manifest(self.key, self.repository)
+        logger.info("Rebuilding missing archives directory entries, this might take some time...")
         pi = ProgressIndicatorPercent(
             total=len(self.chunks),
-            msg="Rebuilding manifest and archives directory %6.2f%%",
+            msg="Rebuilding missing archives directory entries %6.2f%%",
             step=0.01,
-            msgid="check.rebuild_manifest",
+            msgid="check.rebuild_archives_directory",
         )
         for chunk_id, _ in self.chunks.iteritems():
             pi.show()
@@ -1801,25 +1816,24 @@ class ArchiveChecker:
                 archive = ArchiveItem(internal_dict=archive)
                 name = archive.name
                 logger.info(f"Found archive {name}, id {bin_to_hex(chunk_id)}.")
-                if manifest.archives.exists_name_and_id(name, chunk_id):
+                if self.manifest.archives.exists_name_and_id(name, chunk_id):
                     logger.info("We already have an archives directory entry for this.")
-                elif not manifest.archives.exists(name):
+                elif not self.manifest.archives.exists(name):
                     # no archives list entry yet and name is not taken yet, create an entry
                     logger.warning(f"Creating archives directory entry for {name}.")
-                    manifest.archives.create(name, chunk_id, archive.time)
+                    self.manifest.archives.create(name, chunk_id, archive.time)
                 else:
                     # we don't have an entry yet, but the name is taken by something else
                     i = 1
                     while True:
                         new_name = "%s.%d" % (name, i)
-                        if not manifest.archives.exists(new_name):
+                        if not self.manifest.archives.exists(new_name):
                             break
                         i += 1
                     logger.warning(f"Creating archives directory entry using {new_name}.")
-                    manifest.archives.create(new_name, chunk_id, archive.time)
+                    self.manifest.archives.create(new_name, chunk_id, archive.time)
         pi.finish()
-        logger.info("Manifest and archives directory rebuild complete.")
-        return manifest
+        logger.info("Rebuilding missing archives directory entries completed.")
 
     def rebuild_archives(
         self, first=0, last=0, sort_by="", match=None, older=None, newer=None, oldest=None, newest=None

+ 15 - 0
src/borg/archiver/check_cmd.py

@@ -37,6 +37,8 @@ class CheckMixIn:
             )
         if args.repair and args.max_duration:
             raise CommandError("--repair does not allow --max-duration argument.")
+        if args.undelete_archives and not args.repair:
+            raise CommandError("--undelete-archives requires --repair argument.")
         if args.max_duration and not args.repo_only:
             # when doing a partial repo check, we can only check xxh64 hashes in repository files.
             # also, there is no max_duration support in the archives check code anyway.
@@ -48,6 +50,7 @@ class CheckMixIn:
             repository,
             verify_data=args.verify_data,
             repair=args.repair,
+            undelete_archives=args.undelete_archives,
             match=args.match_archives,
             sort_by=args.sort_by or "ts",
             first=args.first,
@@ -175,6 +178,12 @@ class CheckMixIn:
         chunks of a "zero-patched" file reappear, this effectively "heals" the file.
         Consequently, if lost chunks were repaired earlier, it is advised to run
         ``--repair`` a second time after creating some new backups.
+
+        If ``--repair --undelete-archives`` is given, Borg will scan the repository
+        for archive metadata and if it finds some where no corresponding archives
+        directory entry exists, it will create the entries. This is basically undoing
+        ``borg delete archive`` or ``borg prune ...`` commands and only possible before
+        ``borg compact`` would remove the archives' data completely.
         """
         )
         subparser = subparsers.add_parser(
@@ -202,6 +211,12 @@ class CheckMixIn:
         subparser.add_argument(
             "--repair", dest="repair", action="store_true", help="attempt to repair any inconsistencies found"
         )
+        subparser.add_argument(
+            "--undelete-archives",
+            dest="undelete_archives",
+            action="store_true",
+            help="attempt to undelete archives (use with --repair)",
+        )
         subparser.add_argument(
             "--max-duration",
             metavar="SECONDS",

+ 5 - 1
src/borg/testsuite/archiver/check_cmd.py

@@ -289,7 +289,11 @@ def test_manifest_rebuild_duplicate_archive(archivers, request):
         archive_id = repo_objs.id_hash(archive)
         repository.put(archive_id, repo_objs.format(archive_id, {}, archive, ro_type=ROBJ_ARCHIVE_META))
     cmd(archiver, "check", exit_code=1)
-    cmd(archiver, "check", "--repair", exit_code=0)
+    # when undeleting archives, borg check will discover both the original archive1 as well as
+    # the fake archive1 we created above. for the fake one, a new archives directory entry
+    # named archive1.1 will be created because we request undeleting archives and there
+    # is no archives directory entry for the fake archive yet.
+    cmd(archiver, "check", "--repair", "--undelete-archives", exit_code=0)
     output = cmd(archiver, "rlist")
     assert "archive1" in output
     assert "archive1.1" in output