Răsfoiți Sursa

check --find-lost-archives (was: --undelete-archives)

Consider soft-deleted archives/ directory entries, but only create a new
archives/ directory entry if:
- there is no entry for that archive ID
- there is no soft-deleted entry for that archive ID either

Support running with or without --repair.

Without --repair, it can be used to detect such inconsistencies and return with rc != 0.

--repository-only contradicts --find-lost-archives.
Thomas Waldmann 7 luni în urmă
părinte
comite
a48a8d2bea

+ 18 - 10
src/borg/archive.py

@@ -1635,7 +1635,7 @@ class ArchiveChecker:
         *,
         verify_data=False,
         repair=False,
-        undelete_archives=False,
+        find_lost_archives=False,
         match=None,
         sort_by="",
         first=0,
@@ -1648,7 +1648,7 @@ class ArchiveChecker:
         """Perform a set of checks on 'repository'
 
         :param repair: enable repair mode, write updated or corrected data into repository
-        :param undelete_archives: create archive directory entries that are missing
+        :param find_lost_archives: create archive directory entries that are missing
         :param first/last/sort_by: only check this number of first/last archives ordered by sort_by
         :param match: only check archives matching this pattern
         :param older/newer: only check archives older/newer than timedelta from now
@@ -1685,7 +1685,7 @@ class ArchiveChecker:
                 rebuild_manifest = True
         if rebuild_manifest:
             self.manifest = self.rebuild_manifest()
-        if undelete_archives:
+        if find_lost_archives:
             self.rebuild_archives_directory()
         self.rebuild_archives(
             match=match, first=first, last=last, sort_by=sort_by, older=older, oldest=oldest, newer=newer, newest=newest
@@ -1815,8 +1815,10 @@ class ArchiveChecker:
         """Rebuild the archives directory, undeleting archives.
 
         Iterates through all objects in the repository looking for archive metadata blocks.
-        When finding some that do not have a corresponding archives directory entry, it will
-        create that entry (undeleting all archives).
+        When finding some that do not have a corresponding archives directory entry (either
+        a normal entry for an "existing" archive, or a soft-deleted entry for a "deleted"
+        archive), it will create that entry (making the archives directory consistent with
+        the repository).
         """
 
         def valid_archive(obj):
@@ -1862,12 +1864,18 @@ class ArchiveChecker:
                 archive = ArchiveItem(internal_dict=archive)
                 name = archive.name
                 archive_id, archive_id_hex = chunk_id, bin_to_hex(chunk_id)
-                logger.info(f"Found archive {name} {archive_id_hex}.")
-                if self.manifest.archives.exists_name_and_id(name, archive_id):
-                    logger.info("We already have an archives directory entry for this.")
+                if self.manifest.archives.exists_id(archive_id, deleted=False):
+                    logger.debug(f"We already have an archives directory entry for {name} {archive_id_hex}.")
+                elif self.manifest.archives.exists_id(archive_id, deleted=True):
+                    logger.debug(f"We already have a deleted archives directory entry for {name} {archive_id_hex}.")
                 else:
-                    logger.warning(f"Creating archives directory entry for {name} {archive_id_hex}.")
-                    self.manifest.archives.create(name, archive_id, archive.time)
+                    self.error_found = True
+                    if self.repair:
+                        logger.warning(f"Creating archives directory entry for {name} {archive_id_hex}.")
+                        self.manifest.archives.create(name, archive_id, archive.time)
+                    else:
+                        logger.warning(f"Would create archives directory entry for {name} {archive_id_hex}.")
+
         pi.finish()
         logger.info("Rebuilding missing archives directory entries completed.")
 

+ 9 - 11
src/borg/archiver/check_cmd.py

@@ -35,10 +35,10 @@ class CheckMixIn:
             raise CommandError(
                 "--repository-only contradicts --first, --last, -a / --match-archives and --verify-data arguments."
             )
+        if args.repo_only and args.find_lost_archives:
+            raise CommandError("--repository-only contradicts the --find-lost-archives option.")
         if args.repair and args.max_duration:
             raise CommandError("--repair does not allow --max-duration argument.")
-        if args.undelete_archives and not args.repair:
-            raise CommandError("--undelete-archives requires --repair argument.")
         if args.max_duration and not args.repo_only:
             # when doing a partial repo check, we can only check xxh64 hashes in repository files.
             # archives check requires that a full repo check was done before and has built/cached a ChunkIndex.
@@ -51,7 +51,7 @@ class CheckMixIn:
             repository,
             verify_data=args.verify_data,
             repair=args.repair,
-            undelete_archives=args.undelete_archives,
+            find_lost_archives=args.find_lost_archives,
             match=args.match_archives,
             sort_by=args.sort_by or "ts",
             first=args.first,
@@ -180,11 +180,12 @@ class CheckMixIn:
         Consequently, if lost chunks were repaired earlier, it is advised to run
         ``--repair`` a second time after creating some new backups.
 
-        If ``--repair --undelete-archives`` is given, Borg will scan the repository
+        If ``--repair --find-lost-archives`` is given, Borg will scan the repository
         for archive metadata and if it finds some where no corresponding archives
-        directory entry exists, it will create the entries. This is basically undoing
-        ``borg delete archive`` or ``borg prune ...`` commands and only possible before
-        ``borg compact`` would remove the archives' data completely.
+        directory entry exists, it will create one.
+        This will make archives reappear for which the directory entry was lost.
+        This is only possible before ``borg compact`` would remove the archives'
+        data completely.
         """
         )
         subparser = subparsers.add_parser(
@@ -213,10 +214,7 @@ class CheckMixIn:
             "--repair", dest="repair", action="store_true", help="attempt to repair any inconsistencies found"
         )
         subparser.add_argument(
-            "--undelete-archives",
-            dest="undelete_archives",
-            action="store_true",
-            help="attempt to undelete archives (use with --repair)",
+            "--find-lost-archives", dest="find_lost_archives", action="store_true", help="attempt to find lost archives"
         )
         subparser.add_argument(
             "--max-duration",

+ 8 - 0
src/borg/manifest.py

@@ -222,6 +222,14 @@ class Archives:
         else:
             return name in self._archives
 
+    def exists_id(self, id, *, deleted=False):
+        # check if an archive with this id exists
+        assert isinstance(id, bytes)
+        if not self.legacy:
+            return id in self.ids(deleted=deleted)
+        else:
+            raise NotImplementedError
+
     def exists_name_and_id(self, name, id):
         # check if an archive with this name AND id exists
         assert isinstance(name, str)

+ 12 - 8
src/borg/testsuite/archiver/check_cmd_test.py

@@ -1,4 +1,5 @@
 from datetime import datetime, timezone, timedelta
+from pathlib import Path
 import shutil
 from unittest.mock import patch
 
@@ -270,18 +271,21 @@ def test_manifest_rebuild_corrupted_chunk(archivers, request):
 def test_check_undelete_archives(archivers, request):
     archiver = request.getfixturevalue(archivers)
     check_cmd_setup(archiver)  # creates archive1 and archive2
-    # borg delete does it rather quick and dirty: it only kills the archives directory entry
-    cmd(archiver, "delete", "archive1")
-    cmd(archiver, "delete", "archive2")
+    existing_archive_ids = set(cmd(archiver, "repo-list", "--short").splitlines())
+    create_src_archive(archiver, "archive3")
+    archive_ids = set(cmd(archiver, "repo-list", "--short").splitlines())
+    new_archive_id_hex = (archive_ids - existing_archive_ids).pop()
+    (Path(archiver.repository_path) / "archives" / new_archive_id_hex).unlink()  # lose the entry for archive3
     output = cmd(archiver, "repo-list")
-    assert "archive1" not in output
-    assert "archive2" not in output
-    # borg check will re-discover archive1 and archive2 and new archives directory entries
-    # will be created because we requested undeleting archives.
-    cmd(archiver, "check", "--repair", "--undelete-archives", exit_code=0)
+    assert "archive1" in output
+    assert "archive2" in output
+    assert "archive3" not in output
+    # borg check will re-discover archive3 and create a new archives directory entry.
+    cmd(archiver, "check", "--repair", "--find-lost-archives", exit_code=0)
     output = cmd(archiver, "repo-list")
     assert "archive1" in output
     assert "archive2" in output
+    assert "archive3" in output
 
 
 def test_spoofed_archive(archivers, request):