Browse Source

Merge pull request #8468 from ThomasWaldmann/check-improvements

Check improvements
TW 8 months ago
parent
commit
3cae96cc99
3 changed files with 24 additions and 3 deletions
  1. 3 1
      src/borg/archive.py
  2. 3 2
      src/borg/archiver/check_cmd.py
  3. 18 0
      src/borg/repository.py

+ 3 - 1
src/borg/archive.py

@@ -1660,7 +1660,9 @@ class ArchiveChecker:
         self.check_all = not any((first, last, match, older, newer, oldest, newest))
         self.check_all = not any((first, last, match, older, newer, oldest, newest))
         self.repair = repair
         self.repair = repair
         self.repository = repository
         self.repository = repository
-        self.chunks = build_chunkindex_from_repo(self.repository, disable_caches=True, cache_immediately=not repair)
+        # Repository.check already did a full repository-level check and has built and cached a fresh chunkindex -
+        # we can use that here, so we don't disable the caches (also no need to cache immediately, again):
+        self.chunks = build_chunkindex_from_repo(self.repository, disable_caches=False, cache_immediately=False)
         self.key = self.make_key(repository)
         self.key = self.make_key(repository)
         self.repo_objs = RepoObj(self.key)
         self.repo_objs = RepoObj(self.key)
         if verify_data:
         if verify_data:

+ 3 - 2
src/borg/archiver/check_cmd.py

@@ -41,6 +41,7 @@ class CheckMixIn:
             raise CommandError("--undelete-archives requires --repair argument.")
             raise CommandError("--undelete-archives requires --repair argument.")
         if args.max_duration and not args.repo_only:
         if args.max_duration and not args.repo_only:
             # when doing a partial repo check, we can only check xxh64 hashes in repository files.
             # when doing a partial repo check, we can only check xxh64 hashes in repository files.
+            # archives check requires that a full repo check was done before and has built/cached a ChunkIndex.
             # also, there is no max_duration support in the archives check code anyway.
             # also, there is no max_duration support in the archives check code anyway.
             raise CommandError("--repository-only is required for --max-duration support.")
             raise CommandError("--repository-only is required for --max-duration support.")
         if not args.archives_only:
         if not args.archives_only:
@@ -77,8 +78,8 @@ class CheckMixIn:
            the repository. The read data is checked by size and hash. Bit rot and other
            the repository. The read data is checked by size and hash. Bit rot and other
            types of accidental damage can be detected this way. Running the repository
            types of accidental damage can be detected this way. Running the repository
            check can be split into multiple partial checks using ``--max-duration``.
            check can be split into multiple partial checks using ``--max-duration``.
-           When checking a remote repository, please note that the checks run on the
-           server and do not cause significant network traffic.
+           When checking a ssh:// remote repository, please note that the checks run on
+           the server and do not cause significant network traffic.
 
 
         2. Checking consistency and correctness of the archive metadata and optionally
         2. Checking consistency and correctness of the archive metadata and optionally
            archive data (requires ``--verify-data``). This includes ensuring that the
            archive data (requires ``--verify-data``). This includes ensuring that the

+ 18 - 0
src/borg/repository.py

@@ -8,6 +8,7 @@ from borgstore.backends.errors import BackendDoesNotExist as StoreBackendDoesNot
 
 
 from .checksums import xxh64
 from .checksums import xxh64
 from .constants import *  # NOQA
 from .constants import *  # NOQA
+from .hashindex import ChunkIndex, ChunkIndexEntry
 from .helpers import Error, ErrorWithTraceback, IntegrityError
 from .helpers import Error, ErrorWithTraceback, IntegrityError
 from .helpers import Location
 from .helpers import Location
 from .helpers import bin_to_hex, hex_to_bin
 from .helpers import bin_to_hex, hex_to_bin
@@ -306,6 +307,12 @@ class Repository:
         t_start = time.monotonic()
         t_start = time.monotonic()
         t_last_checkpoint = t_start
         t_last_checkpoint = t_start
         objs_checked = objs_errors = 0
         objs_checked = objs_errors = 0
+        chunks = ChunkIndex()
+        # we don't do refcounting anymore, neither we can know here whether any archive
+        # is using this object, but we assume that this is the case and set refcount to
+        # MAX_VALUE. As we don't do garbage collection here, this is not a problem.
+        # We also don't know the plaintext size, so we set it to 0.
+        init_entry = ChunkIndexEntry(refcount=ChunkIndex.MAX_VALUE, size=0)
         infos = self.store.list("data")
         infos = self.store.list("data")
         try:
         try:
             for info in infos:
             for info in infos:
@@ -338,6 +345,12 @@ class Repository:
                                 self.store.delete(key)
                                 self.store.delete(key)
                             else:
                             else:
                                 log_error("reloading did help, inconsistent behaviour detected!")
                                 log_error("reloading did help, inconsistent behaviour detected!")
+                if not (obj_corrupted and repair):
+                    # add all existing objects to the index.
+                    # borg check: the index may have corrupted objects (we did not delete them)
+                    # borg check --repair: the index will only have non-corrupted objects.
+                    id = hex_to_bin(info.name)
+                    chunks[id] = init_entry
                 now = time.monotonic()
                 now = time.monotonic()
                 if now > t_last_checkpoint + 300:  # checkpoint every 5 mins
                 if now > t_last_checkpoint + 300:  # checkpoint every 5 mins
                     t_last_checkpoint = now
                     t_last_checkpoint = now
@@ -353,6 +366,11 @@ class Repository:
                     self.store.delete("config/last-key-checked")
                     self.store.delete("config/last-key-checked")
                 except StoreObjectNotFound:
                 except StoreObjectNotFound:
                     pass
                     pass
+                if not partial:
+                    # if we did a full pass in one go, we built a complete, uptodate ChunkIndex, cache it!
+                    from .cache import write_chunkindex_to_repo_cache
+
+                    write_chunkindex_to_repo_cache(self, chunks, compact=True, clear=True, force_write=True)
         except StoreObjectNotFound:
         except StoreObjectNotFound:
             # it can be that there is no "data/" at all, then it crashes when iterating infos.
             # it can be that there is no "data/" at all, then it crashes when iterating infos.
             pass
             pass