Переглянути джерело

check --find-lost-archives (was: --undelete-archives)

Consider soft-deleted archives/ directory entries, but only create a new
archives/ directory entry if:
- there is no entry for that archive ID
- there is no soft-deleted entry for that archive ID either

Support running with or without --repair.

Without --repair, it can be used to detect such inconsistencies and return with rc != 0.

--repository-only contradicts --find-lost-archives.
Thomas Waldmann 7 місяців тому
батько
коміт
a48a8d2bea

+ 18 - 10
src/borg/archive.py

@@ -1635,7 +1635,7 @@ class ArchiveChecker:
         *,
         *,
         verify_data=False,
         verify_data=False,
         repair=False,
         repair=False,
-        undelete_archives=False,
+        find_lost_archives=False,
         match=None,
         match=None,
         sort_by="",
         sort_by="",
         first=0,
         first=0,
@@ -1648,7 +1648,7 @@ class ArchiveChecker:
         """Perform a set of checks on 'repository'
         """Perform a set of checks on 'repository'
 
 
         :param repair: enable repair mode, write updated or corrected data into repository
         :param repair: enable repair mode, write updated or corrected data into repository
-        :param undelete_archives: create archive directory entries that are missing
+        :param find_lost_archives: create archive directory entries that are missing
         :param first/last/sort_by: only check this number of first/last archives ordered by sort_by
         :param first/last/sort_by: only check this number of first/last archives ordered by sort_by
         :param match: only check archives matching this pattern
         :param match: only check archives matching this pattern
         :param older/newer: only check archives older/newer than timedelta from now
         :param older/newer: only check archives older/newer than timedelta from now
@@ -1685,7 +1685,7 @@ class ArchiveChecker:
                 rebuild_manifest = True
                 rebuild_manifest = True
         if rebuild_manifest:
         if rebuild_manifest:
             self.manifest = self.rebuild_manifest()
             self.manifest = self.rebuild_manifest()
-        if undelete_archives:
+        if find_lost_archives:
             self.rebuild_archives_directory()
             self.rebuild_archives_directory()
         self.rebuild_archives(
         self.rebuild_archives(
             match=match, first=first, last=last, sort_by=sort_by, older=older, oldest=oldest, newer=newer, newest=newest
             match=match, first=first, last=last, sort_by=sort_by, older=older, oldest=oldest, newer=newer, newest=newest
@@ -1815,8 +1815,10 @@ class ArchiveChecker:
         """Rebuild the archives directory, undeleting archives.
         """Rebuild the archives directory, undeleting archives.
 
 
         Iterates through all objects in the repository looking for archive metadata blocks.
         Iterates through all objects in the repository looking for archive metadata blocks.
-        When finding some that do not have a corresponding archives directory entry, it will
-        create that entry (undeleting all archives).
+        When finding some that do not have a corresponding archives directory entry (either
+        a normal entry for an "existing" archive, or a soft-deleted entry for a "deleted"
+        archive), it will create that entry (making the archives directory consistent with
+        the repository).
         """
         """
 
 
         def valid_archive(obj):
         def valid_archive(obj):
@@ -1862,12 +1864,18 @@ class ArchiveChecker:
                 archive = ArchiveItem(internal_dict=archive)
                 archive = ArchiveItem(internal_dict=archive)
                 name = archive.name
                 name = archive.name
                 archive_id, archive_id_hex = chunk_id, bin_to_hex(chunk_id)
                 archive_id, archive_id_hex = chunk_id, bin_to_hex(chunk_id)
-                logger.info(f"Found archive {name} {archive_id_hex}.")
-                if self.manifest.archives.exists_name_and_id(name, archive_id):
-                    logger.info("We already have an archives directory entry for this.")
+                if self.manifest.archives.exists_id(archive_id, deleted=False):
+                    logger.debug(f"We already have an archives directory entry for {name} {archive_id_hex}.")
+                elif self.manifest.archives.exists_id(archive_id, deleted=True):
+                    logger.debug(f"We already have a deleted archives directory entry for {name} {archive_id_hex}.")
                 else:
                 else:
-                    logger.warning(f"Creating archives directory entry for {name} {archive_id_hex}.")
-                    self.manifest.archives.create(name, archive_id, archive.time)
+                    self.error_found = True
+                    if self.repair:
+                        logger.warning(f"Creating archives directory entry for {name} {archive_id_hex}.")
+                        self.manifest.archives.create(name, archive_id, archive.time)
+                    else:
+                        logger.warning(f"Would create archives directory entry for {name} {archive_id_hex}.")
+
         pi.finish()
         pi.finish()
         logger.info("Rebuilding missing archives directory entries completed.")
         logger.info("Rebuilding missing archives directory entries completed.")
 
 

+ 9 - 11
src/borg/archiver/check_cmd.py

@@ -35,10 +35,10 @@ class CheckMixIn:
             raise CommandError(
             raise CommandError(
                 "--repository-only contradicts --first, --last, -a / --match-archives and --verify-data arguments."
                 "--repository-only contradicts --first, --last, -a / --match-archives and --verify-data arguments."
             )
             )
+        if args.repo_only and args.find_lost_archives:
+            raise CommandError("--repository-only contradicts the --find-lost-archives option.")
         if args.repair and args.max_duration:
         if args.repair and args.max_duration:
             raise CommandError("--repair does not allow --max-duration argument.")
             raise CommandError("--repair does not allow --max-duration argument.")
-        if args.undelete_archives and not args.repair:
-            raise CommandError("--undelete-archives requires --repair argument.")
         if args.max_duration and not args.repo_only:
         if args.max_duration and not args.repo_only:
             # when doing a partial repo check, we can only check xxh64 hashes in repository files.
             # when doing a partial repo check, we can only check xxh64 hashes in repository files.
             # archives check requires that a full repo check was done before and has built/cached a ChunkIndex.
             # archives check requires that a full repo check was done before and has built/cached a ChunkIndex.
@@ -51,7 +51,7 @@ class CheckMixIn:
             repository,
             repository,
             verify_data=args.verify_data,
             verify_data=args.verify_data,
             repair=args.repair,
             repair=args.repair,
-            undelete_archives=args.undelete_archives,
+            find_lost_archives=args.find_lost_archives,
             match=args.match_archives,
             match=args.match_archives,
             sort_by=args.sort_by or "ts",
             sort_by=args.sort_by or "ts",
             first=args.first,
             first=args.first,
@@ -180,11 +180,12 @@ class CheckMixIn:
         Consequently, if lost chunks were repaired earlier, it is advised to run
         Consequently, if lost chunks were repaired earlier, it is advised to run
         ``--repair`` a second time after creating some new backups.
         ``--repair`` a second time after creating some new backups.
 
 
-        If ``--repair --undelete-archives`` is given, Borg will scan the repository
+        If ``--repair --find-lost-archives`` is given, Borg will scan the repository
         for archive metadata and if it finds some where no corresponding archives
         for archive metadata and if it finds some where no corresponding archives
-        directory entry exists, it will create the entries. This is basically undoing
-        ``borg delete archive`` or ``borg prune ...`` commands and only possible before
-        ``borg compact`` would remove the archives' data completely.
+        directory entry exists, it will create one.
+        This will make archives reappear for which the directory entry was lost.
+        This is only possible before ``borg compact`` would remove the archives'
+        data completely.
         """
         """
         )
         )
         subparser = subparsers.add_parser(
         subparser = subparsers.add_parser(
@@ -213,10 +214,7 @@ class CheckMixIn:
             "--repair", dest="repair", action="store_true", help="attempt to repair any inconsistencies found"
             "--repair", dest="repair", action="store_true", help="attempt to repair any inconsistencies found"
         )
         )
         subparser.add_argument(
         subparser.add_argument(
-            "--undelete-archives",
-            dest="undelete_archives",
-            action="store_true",
-            help="attempt to undelete archives (use with --repair)",
+            "--find-lost-archives", dest="find_lost_archives", action="store_true", help="attempt to find lost archives"
         )
         )
         subparser.add_argument(
         subparser.add_argument(
             "--max-duration",
             "--max-duration",

+ 8 - 0
src/borg/manifest.py

@@ -222,6 +222,14 @@ class Archives:
         else:
         else:
             return name in self._archives
             return name in self._archives
 
 
+    def exists_id(self, id, *, deleted=False):
+        # check if an archive with this id exists
+        assert isinstance(id, bytes)
+        if not self.legacy:
+            return id in self.ids(deleted=deleted)
+        else:
+            raise NotImplementedError
+
     def exists_name_and_id(self, name, id):
     def exists_name_and_id(self, name, id):
         # check if an archive with this name AND id exists
         # check if an archive with this name AND id exists
         assert isinstance(name, str)
         assert isinstance(name, str)

+ 12 - 8
src/borg/testsuite/archiver/check_cmd_test.py

@@ -1,4 +1,5 @@
 from datetime import datetime, timezone, timedelta
 from datetime import datetime, timezone, timedelta
+from pathlib import Path
 import shutil
 import shutil
 from unittest.mock import patch
 from unittest.mock import patch
 
 
@@ -270,18 +271,21 @@ def test_manifest_rebuild_corrupted_chunk(archivers, request):
 def test_check_undelete_archives(archivers, request):
 def test_check_undelete_archives(archivers, request):
     archiver = request.getfixturevalue(archivers)
     archiver = request.getfixturevalue(archivers)
     check_cmd_setup(archiver)  # creates archive1 and archive2
     check_cmd_setup(archiver)  # creates archive1 and archive2
-    # borg delete does it rather quick and dirty: it only kills the archives directory entry
-    cmd(archiver, "delete", "archive1")
-    cmd(archiver, "delete", "archive2")
+    existing_archive_ids = set(cmd(archiver, "repo-list", "--short").splitlines())
+    create_src_archive(archiver, "archive3")
+    archive_ids = set(cmd(archiver, "repo-list", "--short").splitlines())
+    new_archive_id_hex = (archive_ids - existing_archive_ids).pop()
+    (Path(archiver.repository_path) / "archives" / new_archive_id_hex).unlink()  # lose the entry for archive3
     output = cmd(archiver, "repo-list")
     output = cmd(archiver, "repo-list")
-    assert "archive1" not in output
-    assert "archive2" not in output
-    # borg check will re-discover archive1 and archive2 and new archives directory entries
-    # will be created because we requested undeleting archives.
-    cmd(archiver, "check", "--repair", "--undelete-archives", exit_code=0)
+    assert "archive1" in output
+    assert "archive2" in output
+    assert "archive3" not in output
+    # borg check will re-discover archive3 and create a new archives directory entry.
+    cmd(archiver, "check", "--repair", "--find-lost-archives", exit_code=0)
     output = cmd(archiver, "repo-list")
     output = cmd(archiver, "repo-list")
     assert "archive1" in output
     assert "archive1" in output
     assert "archive2" in output
     assert "archive2" in output
+    assert "archive3" in output
 
 
 
 
 def test_spoofed_archive(archivers, request):
 def test_spoofed_archive(archivers, request):