Sfoglia il codice sorgente

Merge pull request #8515 from ThomasWaldmann/soft-delete

soft delete / undelete
TW 7 mesi fa
parent
commit
2dffc60507

+ 33 - 13
src/borg/archive.py

@@ -458,6 +458,7 @@ class Archive:
         end=None,
         end=None,
         log_json=False,
         log_json=False,
         iec=False,
         iec=False,
+        deleted=False,
     ):
     ):
         name_is_id = isinstance(name, bytes)
         name_is_id = isinstance(name, bytes)
         self.cwd = os.getcwd()
         self.cwd = os.getcwd()
@@ -499,8 +500,9 @@ class Archive:
             self.tags = set()
             self.tags = set()
         else:
         else:
             if name_is_id:
             if name_is_id:
-                # we also go over the manifest here to avoid quick&dirty deleted archives
-                info = self.manifest.archives.get_by_id(name)
+                # we also go over the manifest here to avoid soft-deleted archives,
+                # except if we explicitly request one via deleted=True.
+                info = self.manifest.archives.get_by_id(name, deleted=deleted)
             else:
             else:
                 info = self.manifest.archives.get(name)
                 info = self.manifest.archives.get(name)
             if info is None:
             if info is None:
@@ -1633,7 +1635,7 @@ class ArchiveChecker:
         *,
         *,
         verify_data=False,
         verify_data=False,
         repair=False,
         repair=False,
-        undelete_archives=False,
+        find_lost_archives=False,
         match=None,
         match=None,
         sort_by="",
         sort_by="",
         first=0,
         first=0,
@@ -1646,7 +1648,7 @@ class ArchiveChecker:
         """Perform a set of checks on 'repository'
         """Perform a set of checks on 'repository'
 
 
         :param repair: enable repair mode, write updated or corrected data into repository
         :param repair: enable repair mode, write updated or corrected data into repository
-        :param undelete_archives: create archive directory entries that are missing
+        :param find_lost_archives: create archive directory entries that are missing
         :param first/last/sort_by: only check this number of first/last archives ordered by sort_by
         :param first/last/sort_by: only check this number of first/last archives ordered by sort_by
         :param match: only check archives matching this pattern
         :param match: only check archives matching this pattern
         :param older/newer: only check archives older/newer than timedelta from now
         :param older/newer: only check archives older/newer than timedelta from now
@@ -1683,7 +1685,7 @@ class ArchiveChecker:
                 rebuild_manifest = True
                 rebuild_manifest = True
         if rebuild_manifest:
         if rebuild_manifest:
             self.manifest = self.rebuild_manifest()
             self.manifest = self.rebuild_manifest()
-        if undelete_archives:
+        if find_lost_archives:
             self.rebuild_archives_directory()
             self.rebuild_archives_directory()
         self.rebuild_archives(
         self.rebuild_archives(
             match=match, first=first, last=last, sort_by=sort_by, older=older, oldest=oldest, newer=newer, newest=newest
             match=match, first=first, last=last, sort_by=sort_by, older=older, oldest=oldest, newer=newer, newest=newest
@@ -1813,8 +1815,10 @@ class ArchiveChecker:
         """Rebuild the archives directory, undeleting archives.
         """Rebuild the archives directory, undeleting archives.
 
 
         Iterates through all objects in the repository looking for archive metadata blocks.
         Iterates through all objects in the repository looking for archive metadata blocks.
-        When finding some that do not have a corresponding archives directory entry, it will
-        create that entry (undeleting all archives).
+        When finding some that do not have a corresponding archives directory entry (either
+        a normal entry for an "existing" archive, or a soft-deleted entry for a "deleted"
+        archive), it will create that entry (making the archives directory consistent with
+        the repository).
         """
         """
 
 
         def valid_archive(obj):
         def valid_archive(obj):
@@ -1831,6 +1835,16 @@ class ArchiveChecker:
         )
         )
         for chunk_id, _ in self.chunks.iteritems():
         for chunk_id, _ in self.chunks.iteritems():
             pi.show()
             pi.show()
+            cdata = self.repository.get(chunk_id, read_data=False)  # only get metadata
+            try:
+                meta = self.repo_objs.parse_meta(chunk_id, cdata, ro_type=ROBJ_DONTCARE)
+            except IntegrityErrorBase as exc:
+                logger.error("Skipping corrupted chunk: %s", exc)
+                self.error_found = True
+                continue
+            if meta["type"] != ROBJ_ARCHIVE_META:
+                continue
+            # now we know it is an archive metadata chunk, load the full object from the repo:
             cdata = self.repository.get(chunk_id)
             cdata = self.repository.get(chunk_id)
             try:
             try:
                 meta, data = self.repo_objs.parse(chunk_id, cdata, ro_type=ROBJ_DONTCARE)
                 meta, data = self.repo_objs.parse(chunk_id, cdata, ro_type=ROBJ_DONTCARE)
@@ -1839,7 +1853,7 @@ class ArchiveChecker:
                 self.error_found = True
                 self.error_found = True
                 continue
                 continue
             if meta["type"] != ROBJ_ARCHIVE_META:
             if meta["type"] != ROBJ_ARCHIVE_META:
-                continue
+                continue  # should never happen
             try:
             try:
                 archive = msgpack.unpackb(data)
                 archive = msgpack.unpackb(data)
             # Ignore exceptions that might be raised when feeding msgpack with invalid data
             # Ignore exceptions that might be raised when feeding msgpack with invalid data
@@ -1850,12 +1864,18 @@ class ArchiveChecker:
                 archive = ArchiveItem(internal_dict=archive)
                 archive = ArchiveItem(internal_dict=archive)
                 name = archive.name
                 name = archive.name
                 archive_id, archive_id_hex = chunk_id, bin_to_hex(chunk_id)
                 archive_id, archive_id_hex = chunk_id, bin_to_hex(chunk_id)
-                logger.info(f"Found archive {name} {archive_id_hex}.")
-                if self.manifest.archives.exists_name_and_id(name, archive_id):
-                    logger.info("We already have an archives directory entry for this.")
+                if self.manifest.archives.exists_id(archive_id, deleted=False):
+                    logger.debug(f"We already have an archives directory entry for {name} {archive_id_hex}.")
+                elif self.manifest.archives.exists_id(archive_id, deleted=True):
+                    logger.debug(f"We already have a deleted archives directory entry for {name} {archive_id_hex}.")
                 else:
                 else:
-                    logger.warning(f"Creating archives directory entry for {name} {archive_id_hex}.")
-                    self.manifest.archives.create(name, archive_id, archive.time)
+                    self.error_found = True
+                    if self.repair:
+                        logger.warning(f"Creating archives directory entry for {name} {archive_id_hex}.")
+                        self.manifest.archives.create(name, archive_id, archive.time)
+                    else:
+                        logger.warning(f"Would create archives directory entry for {name} {archive_id_hex}.")
+
         pi.finish()
         pi.finish()
         logger.info("Rebuilding missing archives directory entries completed.")
         logger.info("Rebuilding missing archives directory entries completed.")
 
 

+ 3 - 0
src/borg/archiver/__init__.py

@@ -92,6 +92,7 @@ from .serve_cmd import ServeMixIn
 from .tag_cmd import TagMixIn
 from .tag_cmd import TagMixIn
 from .tar_cmds import TarMixIn
 from .tar_cmds import TarMixIn
 from .transfer_cmd import TransferMixIn
 from .transfer_cmd import TransferMixIn
+from .undelete_cmd import UnDeleteMixIn
 from .version_cmd import VersionMixIn
 from .version_cmd import VersionMixIn
 
 
 
 
@@ -124,6 +125,7 @@ class Archiver(
     TagMixIn,
     TagMixIn,
     TarMixIn,
     TarMixIn,
     TransferMixIn,
     TransferMixIn,
+    UnDeleteMixIn,
     VersionMixIn,
     VersionMixIn,
 ):
 ):
     def __init__(self, lock_wait=None, prog=None):
     def __init__(self, lock_wait=None, prog=None):
@@ -364,6 +366,7 @@ class Archiver(
         self.build_parser_tag(subparsers, common_parser, mid_common_parser)
         self.build_parser_tag(subparsers, common_parser, mid_common_parser)
         self.build_parser_tar(subparsers, common_parser, mid_common_parser)
         self.build_parser_tar(subparsers, common_parser, mid_common_parser)
         self.build_parser_transfer(subparsers, common_parser, mid_common_parser)
         self.build_parser_transfer(subparsers, common_parser, mid_common_parser)
+        self.build_parser_undelete(subparsers, common_parser, mid_common_parser)
         self.build_parser_version(subparsers, common_parser, mid_common_parser)
         self.build_parser_version(subparsers, common_parser, mid_common_parser)
         return parser
         return parser
 
 

+ 8 - 1
src/borg/archiver/_common.py

@@ -369,7 +369,9 @@ def define_exclusion_group(subparser, **kwargs):
     return exclude_group
     return exclude_group
 
 
 
 
-def define_archive_filters_group(subparser, *, sort_by=True, first_last=True, oldest_newest=True, older_newer=True):
+def define_archive_filters_group(
+    subparser, *, sort_by=True, first_last=True, oldest_newest=True, older_newer=True, deleted=False
+):
     filters_group = subparser.add_argument_group(
     filters_group = subparser.add_argument_group(
         "Archive filters", "Archive filters can be applied to repository targets."
         "Archive filters", "Archive filters can be applied to repository targets."
     )
     )
@@ -456,6 +458,11 @@ def define_archive_filters_group(subparser, *, sort_by=True, first_last=True, ol
             help="consider archives newer than (now - TIMESPAN), e.g. 7d or 12m.",
             help="consider archives newer than (now - TIMESPAN), e.g. 7d or 12m.",
         )
         )
 
 
+    if deleted:
+        filters_group.add_argument(
+            "--deleted", dest="deleted", action="store_true", help="consider only deleted archives."
+        )
+
     return filters_group
     return filters_group
 
 
 
 

+ 23 - 17
src/borg/archiver/check_cmd.py

@@ -35,10 +35,10 @@ class CheckMixIn:
             raise CommandError(
             raise CommandError(
                 "--repository-only contradicts --first, --last, -a / --match-archives and --verify-data arguments."
                 "--repository-only contradicts --first, --last, -a / --match-archives and --verify-data arguments."
             )
             )
+        if args.repo_only and args.find_lost_archives:
+            raise CommandError("--repository-only contradicts the --find-lost-archives option.")
         if args.repair and args.max_duration:
         if args.repair and args.max_duration:
             raise CommandError("--repair does not allow --max-duration argument.")
             raise CommandError("--repair does not allow --max-duration argument.")
-        if args.undelete_archives and not args.repair:
-            raise CommandError("--undelete-archives requires --repair argument.")
         if args.max_duration and not args.repo_only:
         if args.max_duration and not args.repo_only:
             # when doing a partial repo check, we can only check xxh64 hashes in repository files.
             # when doing a partial repo check, we can only check xxh64 hashes in repository files.
             # archives check requires that a full repo check was done before and has built/cached a ChunkIndex.
             # archives check requires that a full repo check was done before and has built/cached a ChunkIndex.
@@ -51,7 +51,7 @@ class CheckMixIn:
             repository,
             repository,
             verify_data=args.verify_data,
             verify_data=args.verify_data,
             repair=args.repair,
             repair=args.repair,
-            undelete_archives=args.undelete_archives,
+            find_lost_archives=args.find_lost_archives,
             match=args.match_archives,
             match=args.match_archives,
             sort_by=args.sort_by or "ts",
             sort_by=args.sort_by or "ts",
             first=args.first,
             first=args.first,
@@ -85,12 +85,14 @@ class CheckMixIn:
            archive data (requires ``--verify-data``). This includes ensuring that the
            archive data (requires ``--verify-data``). This includes ensuring that the
            repository manifest exists, the archive metadata chunk is present, and that
            repository manifest exists, the archive metadata chunk is present, and that
            all chunks referencing files (items) in the archive exist. This requires
            all chunks referencing files (items) in the archive exist. This requires
-           reading archive and file metadata, but not data. To cryptographically verify
-           the file (content) data integrity pass ``--verify-data``, but keep in mind
-           that this requires reading all data and is hence very time consuming. When
-           checking archives of a remote repository, archive checks run on the client
-           machine because they require decrypting data and therefore the encryption
-           key.
+           reading archive and file metadata, but not data. To scan for archives whose
+           entries were lost from the archive directory, pass ``--find-lost-archives``.
+           It requires reading all data and is hence very time consuming.
+           To additionally cryptographically verify the file (content) data integrity,
+           pass ``--verify-data``, which is even more time consuming.
+
+           When checking archives of a remote repository, archive checks run on the client
+           machine because they require decrypting data and therefore the encryption key.
 
 
         Both steps can also be run independently. Pass ``--repository-only`` to run the
         Both steps can also be run independently. Pass ``--repository-only`` to run the
         repository checks only, or pass ``--archives-only`` to run the archive checks
         repository checks only, or pass ``--archives-only`` to run the archive checks
@@ -122,6 +124,15 @@ class CheckMixIn:
         encrypted repositories against attackers without access to the keys. You can
         encrypted repositories against attackers without access to the keys. You can
         not use ``--verify-data`` with ``--repository-only``.
         not use ``--verify-data`` with ``--repository-only``.
 
 
+        The ``--find-lost-archives`` option will also scan the whole repository, but
+        tells Borg to search for lost archive metadata. If Borg encounters any archive
+        metadata that doesn't match with an archive directory entry, it means that an
+        entry was lost.
+        Unless ``borg compact`` is called, these archives can be fully restored with
+        ``--repair``. Please note that ``--find-lost-archives`` must read a lot of
+        data from the repository and is thus very time consuming. You can not use
+        ``--find-lost-archives`` with ``--repository-only``.
+
         About repair mode
         About repair mode
         +++++++++++++++++
         +++++++++++++++++
 
 
@@ -180,10 +191,8 @@ class CheckMixIn:
         Consequently, if lost chunks were repaired earlier, it is advised to run
         Consequently, if lost chunks were repaired earlier, it is advised to run
         ``--repair`` a second time after creating some new backups.
         ``--repair`` a second time after creating some new backups.
 
 
-        If ``--repair --undelete-archives`` is given, Borg will scan the repository
-        for archive metadata and if it finds some where no corresponding archives
-        directory entry exists, it will create the entries. This is basically undoing
-        ``borg delete archive`` or ``borg prune ...`` commands and only possible before
+        If ``--repair --find-lost-archives`` is given, previously lost entries will
+        be recreated in the archive directory. This is only possible before
         ``borg compact`` would remove the archives' data completely.
         ``borg compact`` would remove the archives' data completely.
         """
         """
         )
         )
@@ -213,10 +222,7 @@ class CheckMixIn:
             "--repair", dest="repair", action="store_true", help="attempt to repair any inconsistencies found"
             "--repair", dest="repair", action="store_true", help="attempt to repair any inconsistencies found"
         )
         )
         subparser.add_argument(
         subparser.add_argument(
-            "--undelete-archives",
-            dest="undelete_archives",
-            action="store_true",
-            help="attempt to undelete archives (use with --repair)",
+            "--find-lost-archives", dest="find_lost_archives", action="store_true", help="attempt to find lost archives"
         )
         )
         subparser.add_argument(
         subparser.add_argument(
             "--max-duration",
             "--max-duration",

+ 36 - 3
src/borg/archiver/compact_cmd.py

@@ -127,6 +127,15 @@ class ArchiveGarbageCollector:
             logger.warning(f"{len(self.reappeared_chunks)} previously missing objects re-appeared!" + run_repair)
             logger.warning(f"{len(self.reappeared_chunks)} previously missing objects re-appeared!" + run_repair)
             set_ec(EXIT_WARNING)
             set_ec(EXIT_WARNING)
 
 
+        logger.info("Cleaning archives directory from deleted archives...")
+        archive_infos = self.manifest.archives.list(sort_by=["ts"], deleted=True)
+        for archive_info in archive_infos:
+            name, id, hex_id = archive_info.name, archive_info.id, bin_to_hex(archive_info.id)
+            try:
+                self.manifest.archives.nuke_by_id(id)
+            except KeyError:
+                self.print_warning(f"Archive {name} {hex_id} not found.")
+
         repo_size_before = self.repository_size
         repo_size_before = self.repository_size
         logger.info("Determining unused objects...")
         logger.info("Determining unused objects...")
         unused = set()
         unused = set()
@@ -166,9 +175,33 @@ class CompactMixIn:
             """
             """
             Free repository space by deleting unused chunks.
             Free repository space by deleting unused chunks.
 
 
-            borg compact analyzes all existing archives to find out which chunks are
-            actually used. There might be unused chunks resulting from borg delete or prune,
-            which can be removed to free space in the repository.
+            borg compact analyzes all existing archives to find out which repository
+            objects are actually used (referenced). It then removes all unused objects
+            to free repository space.
+
+            Unused objects may result from:
+
+            - borg delete or prune usage
+            - interrupted backups (maybe retry the backup first before running compact!)
+            - backup of source files that had an I/O error in the middle of their contents
+              and that were skipped due to this.
+            - corruption of the repository (e.g. the archives directory having lost entries)
+
+            You usually don't want to run ``borg compact`` after every write operation, but
+            either regularly (e.g. once a month, possibly together with ``borg check``) or
+            when disk space needs to be freed.
+
+            **Important:**
+
+            After compacting it is not possible anymore to use ``borg undelete`` to recover
+            previously deleted archives.
+
+            ``borg compact`` might also delete data from archives that were "lost" due to
+            archives directory corruption. Such archives could potentially be restored with
+            ``borg check --find-lost-archives [--repair]``, which is slow and thus you
+            maybe usually don't want to do that unless there are signs of lost archives
+            (e.g. when seeing fatal errors when creating backups or when archives are
+            missing in ``borg list``).
 
 
             Differently than borg 1.x, borg2's compact needs the borg key if the repo is
             Differently than borg 1.x, borg2's compact needs the borg key if the repo is
             encrypted.
             encrypted.

+ 5 - 2
src/borg/archiver/delete_cmd.py

@@ -64,8 +64,11 @@ class DeleteMixIn:
             """
             """
         This command deletes archives from the repository.
         This command deletes archives from the repository.
 
 
-        Important: When deleting archives, repository disk space is **not** freed until
-        you run ``borg compact``.
+        Important:
+
+        - Repository disk space is **not** freed until you run ``borg compact``.
+        - You can use ``borg undelete`` to undelete archives, but only until
+          you run ``borg compact``.
 
 
         When in doubt, use ``--dry-run --list`` to see what would be deleted.
         When in doubt, use ``--dry-run --list`` to see what would be deleted.
 
 

+ 5 - 1
src/borg/archiver/prune_cmd.py

@@ -215,7 +215,11 @@ class PruneMixIn:
         The prune command prunes a repository by deleting all archives not matching
         The prune command prunes a repository by deleting all archives not matching
         any of the specified retention options.
         any of the specified retention options.
 
 
-        Important: Repository disk space is **not** freed until you run ``borg compact``.
+        Important:
+
+        - Repository disk space is **not** freed until you run ``borg compact``.
+        - You can use ``borg undelete`` to undelete archives, but only until
+          you run ``borg compact``.
 
 
         This command is normally used by automated backup scripts wanting to keep a
         This command is normally used by automated backup scripts wanting to keep a
         certain number of historic backups. This retention policy is commonly referred to as
         certain number of historic backups. This retention policy is commonly referred to as

+ 2 - 2
src/borg/archiver/repo_list_cmd.py

@@ -26,7 +26,7 @@ class RepoListMixIn:
                 "BORG_RLIST_FORMAT",
                 "BORG_RLIST_FORMAT",
                 "{id:.8}  {time}  {archive:<15}  {tags:<10}  {username:<10}  {hostname:<10}  {comment:.40}{NL}",
                 "{id:.8}  {time}  {archive:<15}  {tags:<10}  {username:<10}  {hostname:<10}  {comment:.40}{NL}",
             )
             )
-        formatter = ArchiveFormatter(format, repository, manifest, manifest.key, iec=args.iec)
+        formatter = ArchiveFormatter(format, repository, manifest, manifest.key, iec=args.iec, deleted=args.deleted)
 
 
         output_data = []
         output_data = []
 
 
@@ -113,4 +113,4 @@ class RepoListMixIn:
             "but keys used in it are added to the JSON output. "
             "but keys used in it are added to the JSON output. "
             "Some keys are always present. Note: JSON can only represent text.",
             "Some keys are always present. Note: JSON can only represent text.",
         )
         )
-        define_archive_filters_group(subparser)
+        define_archive_filters_group(subparser, deleted=True)

+ 90 - 0
src/borg/archiver/undelete_cmd.py

@@ -0,0 +1,90 @@
+import argparse
+import logging
+
+from ._common import with_repository
+from ..constants import *  # NOQA
+from ..helpers import format_archive, CommandError, bin_to_hex, archivename_validator
+from ..manifest import Manifest
+
+from ..logger import create_logger
+
+logger = create_logger()
+
+
+class UnDeleteMixIn:
+    @with_repository(manifest=False)
+    def do_undelete(self, args, repository):
+        """Undelete archives"""
+        self.output_list = args.output_list
+        dry_run = args.dry_run
+        manifest = Manifest.load(repository, (Manifest.Operation.DELETE,))
+        if args.name:
+            archive_infos = [manifest.archives.get_one([args.name], deleted=True)]
+        else:
+            args.deleted = True
+            archive_infos = manifest.archives.list_considering(args)
+        count = len(archive_infos)
+        if count == 0:
+            return
+        if not args.name and not args.match_archives and args.first == 0 and args.last == 0:
+            raise CommandError("Aborting: if you really want to undelete all archives, please use -a 'sh:*'.")
+
+        undeleted = False
+        logger_list = logging.getLogger("borg.output.list")
+        for i, archive_info in enumerate(archive_infos, 1):
+            name, id, hex_id = archive_info.name, archive_info.id, bin_to_hex(archive_info.id)
+            try:
+                if not dry_run:
+                    manifest.archives.undelete_by_id(id)
+            except KeyError:
+                self.print_warning(f"Archive {name} {hex_id} not found ({i}/{count}).")
+            else:
+                undeleted = True
+                if self.output_list:
+                    msg = "Would undelete: {} ({}/{})" if dry_run else "Undeleted archive: {} ({}/{})"
+                    logger_list.info(msg.format(format_archive(archive_info), i, count))
+        if dry_run:
+            logger.info("Finished dry-run.")
+        elif undeleted:
+            manifest.write()
+            self.print_warning("Done.", wc=None)
+        else:
+            self.print_warning("Aborted.", wc=None)
+        return
+
+    def build_parser_undelete(self, subparsers, common_parser, mid_common_parser):
+        from ._common import process_epilog, define_archive_filters_group
+
+        undelete_epilog = process_epilog(
+            """
+        This command undeletes archives in the repository.
+
+        Important: Undeleting archives is only possible before compacting.
+        Once ``borg compact`` has run, all disk space occupied only by the
+        deleted archives will be freed and undelete is not possible anymore.
+
+        When in doubt, use ``--dry-run --list`` to see what would be undeleted.
+
+        You can undelete multiple archives by specifying a matching pattern,
+        using the ``--match-archives PATTERN`` option (for more info on these patterns,
+        see :ref:`borg_patterns`).
+        """
+        )
+        subparser = subparsers.add_parser(
+            "undelete",
+            parents=[common_parser],
+            add_help=False,
+            description=self.do_undelete.__doc__,
+            epilog=undelete_epilog,
+            formatter_class=argparse.RawDescriptionHelpFormatter,
+            help="undelete archive",
+        )
+        subparser.set_defaults(func=self.do_undelete)
+        subparser.add_argument("-n", "--dry-run", dest="dry_run", action="store_true", help="do not change repository")
+        subparser.add_argument(
+            "--list", dest="output_list", action="store_true", help="output verbose list of archives"
+        )
+        define_archive_filters_group(subparser)
+        subparser.add_argument(
+            "name", metavar="NAME", nargs="?", type=archivename_validator, help="specify the archive name"
+        )

+ 3 - 2
src/borg/helpers/parseformat.py

@@ -718,7 +718,7 @@ class ArchiveFormatter(BaseFormatter):
         ("size", "nfiles"),
         ("size", "nfiles"),
     )
     )
 
 
-    def __init__(self, format, repository, manifest, key, *, iec=False):
+    def __init__(self, format, repository, manifest, key, *, iec=False, deleted=False):
         static_data = {}  # here could be stuff on repo level, above archive level
         static_data = {}  # here could be stuff on repo level, above archive level
         static_data.update(self.FIXED_KEYS)
         static_data.update(self.FIXED_KEYS)
         super().__init__(format, static_data)
         super().__init__(format, static_data)
@@ -728,6 +728,7 @@ class ArchiveFormatter(BaseFormatter):
         self.name = None
         self.name = None
         self.id = None
         self.id = None
         self._archive = None
         self._archive = None
+        self.deleted = deleted  # True if we want to deal with deleted archives.
         self.iec = iec
         self.iec = iec
         self.format_keys = {f[1] for f in Formatter().parse(format)}
         self.format_keys = {f[1] for f in Formatter().parse(format)}
         self.call_keys = {
         self.call_keys = {
@@ -772,7 +773,7 @@ class ArchiveFormatter(BaseFormatter):
         if self._archive is None or self._archive.id != self.id:
         if self._archive is None or self._archive.id != self.id:
             from ..archive import Archive
             from ..archive import Archive
 
 
-            self._archive = Archive(self.manifest, self.id, iec=self.iec)
+            self._archive = Archive(self.manifest, self.id, iec=self.iec, deleted=self.deleted)
         return self._archive
         return self._archive
 
 
     def get_meta(self, key, default=None):
     def get_meta(self, key, default=None):

+ 45 - 16
src/borg/manifest.py

@@ -101,11 +101,17 @@ class Archives:
             manifest_archives = StableDict(self._get_raw_dict())
             manifest_archives = StableDict(self._get_raw_dict())
         return manifest_archives
         return manifest_archives
 
 
-    def ids(self):
+    def ids(self, *, deleted=False):
         # yield the binary IDs of all archives
         # yield the binary IDs of all archives
         if not self.legacy:
         if not self.legacy:
             try:
             try:
-                infos = list(self.repository.store_list("archives"))
+                infos = list(self.repository.store_list("archives", deleted=deleted))
+                if deleted:
+                    # hack: store_list(deleted=True) yields deleted AND not deleted items,
+                    # guess this should be fixed in a future borgstore release.
+                    # for now, we remove the not-deleted archives here:
+                    not_deleted_infos = set(self.repository.store_list("archives", deleted=False))
+                    infos = [info for info in infos if info not in not_deleted_infos]
             except ObjectNotFound:
             except ObjectNotFound:
                 infos = []
                 infos = []
             for info in infos:
             for info in infos:
@@ -156,13 +162,13 @@ class Archives:
             )
             )
         return metadata
         return metadata
 
 
-    def _infos(self):
+    def _infos(self, *, deleted=False):
         # yield the infos of all archives
         # yield the infos of all archives
-        for id in self.ids():
+        for id in self.ids(deleted=deleted):
             yield self._get_archive_meta(id)
             yield self._get_archive_meta(id)
 
 
-    def _info_tuples(self):
-        for info in self._infos():
+    def _info_tuples(self, *, deleted=False):
+        for info in self._infos(deleted=deleted):
             yield ArchiveInfo(
             yield ArchiveInfo(
                 name=info["name"],
                 name=info["name"],
                 id=info["id"],
                 id=info["id"],
@@ -172,8 +178,8 @@ class Archives:
                 host=info["hostname"],
                 host=info["hostname"],
             )
             )
 
 
-    def _matching_info_tuples(self, match_patterns, match_end):
-        archive_infos = list(self._info_tuples())
+    def _matching_info_tuples(self, match_patterns, match_end, *, deleted=False):
+        archive_infos = list(self._info_tuples(deleted=deleted))
         if match_patterns:
         if match_patterns:
             assert isinstance(match_patterns, list), f"match_pattern is a {type(match_patterns)}"
             assert isinstance(match_patterns, list), f"match_pattern is a {type(match_patterns)}"
             for match in match_patterns:
             for match in match_patterns:
@@ -216,6 +222,14 @@ class Archives:
         else:
         else:
             return name in self._archives
             return name in self._archives
 
 
+    def exists_id(self, id, *, deleted=False):
+        # check if an archive with this id exists
+        assert isinstance(id, bytes)
+        if not self.legacy:
+            return id in self.ids(deleted=deleted)
+        else:
+            raise NotImplementedError
+
     def exists_name_and_id(self, name, id):
     def exists_name_and_id(self, name, id):
         # check if an archive with this name AND id exists
         # check if an archive with this name AND id exists
         assert isinstance(name, str)
         assert isinstance(name, str)
@@ -279,13 +293,14 @@ class Archives:
             else:
             else:
                 return dict(name=name, id=values["id"], time=values["time"])
                 return dict(name=name, id=values["id"], time=values["time"])
 
 
-    def get_by_id(self, id, raw=False):
+    def get_by_id(self, id, raw=False, *, deleted=False):
         assert isinstance(id, bytes)
         assert isinstance(id, bytes)
         if not self.legacy:
         if not self.legacy:
-            if id in self.ids():  # check directory
+            if id in self.ids(deleted=deleted):  # check directory
                 # looks like this archive id is in the archives directory, thus it is NOT deleted.
                 # looks like this archive id is in the archives directory, thus it is NOT deleted.
+                # OR we have explicitly requested a soft-deleted archive via deleted=True.
                 archive_info = self._get_archive_meta(id)
                 archive_info = self._get_archive_meta(id)
-                if archive_info["exists"]:
+                if archive_info["exists"]:  # True means we have found Archive metadata in the repo.
                     if not raw:
                     if not raw:
                         ts = parse_timestamp(archive_info["time"])
                         ts = parse_timestamp(archive_info["time"])
                         archive_info = ArchiveInfo(
                         archive_info = ArchiveInfo(
@@ -324,10 +339,22 @@ class Archives:
             self._archives[name] = {"id": id, "time": ts}
             self._archives[name] = {"id": id, "time": ts}
 
 
     def delete_by_id(self, id):
     def delete_by_id(self, id):
-        # delete an archive
+        # soft-delete an archive
+        assert isinstance(id, bytes)
+        assert not self.legacy
+        self.repository.store_move(f"archives/{bin_to_hex(id)}", delete=True)  # soft-delete
+
+    def undelete_by_id(self, id):
+        # undelete an archive
+        assert isinstance(id, bytes)
+        assert not self.legacy
+        self.repository.store_move(f"archives/{bin_to_hex(id)}", undelete=True)
+
+    def nuke_by_id(self, id):
+        # really delete an already soft-deleted archive
         assert isinstance(id, bytes)
         assert isinstance(id, bytes)
         assert not self.legacy
         assert not self.legacy
-        self.repository.store_delete(f"archives/{bin_to_hex(id)}")
+        self.repository.store_delete(f"archives/{bin_to_hex(id)}", deleted=True)
 
 
     def list(
     def list(
         self,
         self,
@@ -342,6 +369,7 @@ class Archives:
         newer=None,
         newer=None,
         oldest=None,
         oldest=None,
         newest=None,
         newest=None,
+        deleted=False,
     ):
     ):
         """
         """
         Return list of ArchiveInfo instances according to the parameters.
         Return list of ArchiveInfo instances according to the parameters.
@@ -363,7 +391,7 @@ class Archives:
         if isinstance(sort_by, (str, bytes)):
         if isinstance(sort_by, (str, bytes)):
             raise TypeError("sort_by must be a sequence of str")
             raise TypeError("sort_by must be a sequence of str")
 
 
-        archive_infos = self._matching_info_tuples(match, match_end)
+        archive_infos = self._matching_info_tuples(match, match_end, deleted=deleted)
 
 
         if any([oldest, newest, older, newer]):
         if any([oldest, newest, older, newer]):
             archive_infos = filter_archives_by_date(
             archive_infos = filter_archives_by_date(
@@ -397,12 +425,13 @@ class Archives:
             newer=getattr(args, "newer", None),
             newer=getattr(args, "newer", None),
             oldest=getattr(args, "oldest", None),
             oldest=getattr(args, "oldest", None),
             newest=getattr(args, "newest", None),
             newest=getattr(args, "newest", None),
+            deleted=getattr(args, "deleted", False),
         )
         )
 
 
-    def get_one(self, match, *, match_end=r"\Z"):
+    def get_one(self, match, *, match_end=r"\Z", deleted=False):
         """get exactly one archive matching <match>"""
         """get exactly one archive matching <match>"""
         assert match is not None
         assert match is not None
-        archive_infos = self._matching_info_tuples(match, match_end)
+        archive_infos = self._matching_info_tuples(match, match_end, deleted=deleted)
         if len(archive_infos) != 1:
         if len(archive_infos) != 1:
             raise CommandError(f"{match} needed to match precisely one archive, but matched {len(archive_infos)}.")
             raise CommandError(f"{match} needed to match precisely one archive, but matched {len(archive_infos)}.")
         return archive_infos[0]
         return archive_infos[0]

+ 13 - 4
src/borg/remote.py

@@ -180,6 +180,7 @@ class RepositoryServer:  # pragma: no cover
         "store_load",
         "store_load",
         "store_store",
         "store_store",
         "store_delete",
         "store_delete",
+        "store_move",
     )
     )
 
 
     def __init__(self, restrict_to_paths, restrict_to_repositories, append_only, storage_quota, use_socket):
     def __init__(self, restrict_to_paths, restrict_to_repositories, append_only, storage_quota, use_socket):
@@ -1077,8 +1078,10 @@ class RemoteRepository:
     def put_manifest(self, data):
     def put_manifest(self, data):
         """actual remoting is done via self.call in the @api decorator"""
         """actual remoting is done via self.call in the @api decorator"""
 
 
-    @api(since=parse_version("2.0.0b8"))
-    def store_list(self, name):
+    @api(
+        since=parse_version("2.0.0b8"), deleted={"since": parse_version("2.0.0b13"), "previously": False}  # TODO -> b14
+    )
+    def store_list(self, name, *, deleted=False):
         """actual remoting is done via self.call in the @api decorator"""
         """actual remoting is done via self.call in the @api decorator"""
 
 
     @api(since=parse_version("2.0.0b8"))
     @api(since=parse_version("2.0.0b8"))
@@ -1089,8 +1092,14 @@ class RemoteRepository:
     def store_store(self, name, value):
     def store_store(self, name, value):
         """actual remoting is done via self.call in the @api decorator"""
         """actual remoting is done via self.call in the @api decorator"""
 
 
-    @api(since=parse_version("2.0.0b8"))
-    def store_delete(self, name):
+    @api(
+        since=parse_version("2.0.0b8"), deleted={"since": parse_version("2.0.0b13"), "previously": False}
+    )  # TODO -> b14)
+    def store_delete(self, name, *, deleted=False):
+        """actual remoting is done via self.call in the @api decorator"""
+
+    @api(since=parse_version("2.0.0b13"))  # TODO -> b14
+    def store_move(self, name, new_name=None, *, delete=False, undelete=False, deleted=False):
         """actual remoting is done via self.call in the @api decorator"""
         """actual remoting is done via self.call in the @api decorator"""
 
 
 
 

+ 8 - 4
src/borg/repository.py

@@ -519,10 +519,10 @@ class Repository:
         self._lock_refresh()
         self._lock_refresh()
         return self.store.store("config/manifest", data)
         return self.store.store("config/manifest", data)
 
 
-    def store_list(self, name):
+    def store_list(self, name, *, deleted=False):
         self._lock_refresh()
         self._lock_refresh()
         try:
         try:
-            return list(self.store.list(name))
+            return list(self.store.list(name, deleted=deleted))
         except StoreObjectNotFound:
         except StoreObjectNotFound:
             return []
             return []
 
 
@@ -534,6 +534,10 @@ class Repository:
         self._lock_refresh()
         self._lock_refresh()
         return self.store.store(name, value)
         return self.store.store(name, value)
 
 
-    def store_delete(self, name):
+    def store_delete(self, name, *, deleted=False):
         self._lock_refresh()
         self._lock_refresh()
-        return self.store.delete(name)
+        return self.store.delete(name, deleted=deleted)
+
+    def store_move(self, name, new_name=None, *, delete=False, undelete=False, deleted=False):
+        self._lock_refresh()
+        return self.store.move(name, new_name, delete=delete, undelete=undelete, deleted=deleted)

+ 12 - 8
src/borg/testsuite/archiver/check_cmd_test.py

@@ -1,4 +1,5 @@
 from datetime import datetime, timezone, timedelta
 from datetime import datetime, timezone, timedelta
+from pathlib import Path
 import shutil
 import shutil
 from unittest.mock import patch
 from unittest.mock import patch
 
 
@@ -270,18 +271,21 @@ def test_manifest_rebuild_corrupted_chunk(archivers, request):
 def test_check_undelete_archives(archivers, request):
 def test_check_undelete_archives(archivers, request):
     archiver = request.getfixturevalue(archivers)
     archiver = request.getfixturevalue(archivers)
     check_cmd_setup(archiver)  # creates archive1 and archive2
     check_cmd_setup(archiver)  # creates archive1 and archive2
-    # borg delete does it rather quick and dirty: it only kills the archives directory entry
-    cmd(archiver, "delete", "archive1")
-    cmd(archiver, "delete", "archive2")
+    existing_archive_ids = set(cmd(archiver, "repo-list", "--short").splitlines())
+    create_src_archive(archiver, "archive3")
+    archive_ids = set(cmd(archiver, "repo-list", "--short").splitlines())
+    new_archive_id_hex = (archive_ids - existing_archive_ids).pop()
+    (Path(archiver.repository_path) / "archives" / new_archive_id_hex).unlink()  # lose the entry for archive3
     output = cmd(archiver, "repo-list")
     output = cmd(archiver, "repo-list")
-    assert "archive1" not in output
-    assert "archive2" not in output
-    # borg check will re-discover archive1 and archive2 and new archives directory entries
-    # will be created because we requested undeleting archives.
-    cmd(archiver, "check", "--repair", "--undelete-archives", exit_code=0)
+    assert "archive1" in output
+    assert "archive2" in output
+    assert "archive3" not in output
+    # borg check will re-discover archive3 and create a new archives directory entry.
+    cmd(archiver, "check", "--repair", "--find-lost-archives", exit_code=0)
     output = cmd(archiver, "repo-list")
     output = cmd(archiver, "repo-list")
     assert "archive1" in output
     assert "archive1" in output
     assert "archive2" in output
     assert "archive2" in output
+    assert "archive3" in output
 
 
 
 
 def test_spoofed_archive(archivers, request):
 def test_spoofed_archive(archivers, request):

+ 20 - 0
src/borg/testsuite/archiver/repo_list_cmd_test.py

@@ -98,3 +98,23 @@ def test_repo_list_json(archivers, request):
     assert "keyfile" not in list_repo["encryption"]
     assert "keyfile" not in list_repo["encryption"]
     archive0 = list_repo["archives"][0]
     archive0 = list_repo["archives"][0]
     checkts(archive0["time"])
     checkts(archive0["time"])
+
+
+def test_repo_list_deleted(archivers, request):
+    archiver = request.getfixturevalue(archivers)
+    cmd(archiver, "repo-create", RK_ENCRYPTION)
+    cmd(archiver, "create", "normal1", src_dir)
+    cmd(archiver, "create", "deleted1", src_dir)
+    cmd(archiver, "create", "normal2", src_dir)
+    cmd(archiver, "create", "deleted2", src_dir)
+    cmd(archiver, "delete", "-a", "sh:deleted*")
+    output = cmd(archiver, "repo-list")
+    assert "normal1" in output
+    assert "normal2" in output
+    assert "deleted1" not in output
+    assert "deleted2" not in output
+    output = cmd(archiver, "repo-list", "--deleted")
+    assert "normal1" not in output
+    assert "normal2" not in output
+    assert "deleted1" in output
+    assert "deleted2" in output

+ 67 - 0
src/borg/testsuite/archiver/undelete_cmd_test.py

@@ -0,0 +1,67 @@
+from ...constants import *  # NOQA
+from . import cmd, create_regular_file, generate_archiver_tests, RK_ENCRYPTION
+
+pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary")  # NOQA
+
+
+def test_undelete_single(archivers, request):
+    archiver = request.getfixturevalue(archivers)
+    create_regular_file(archiver.input_path, "file1", size=1024 * 80)
+    cmd(archiver, "repo-create", RK_ENCRYPTION)
+    cmd(archiver, "create", "normal", "input")
+    cmd(archiver, "create", "deleted", "input")
+    cmd(archiver, "delete", "deleted")
+    output = cmd(archiver, "repo-list")
+    assert "normal" in output
+    assert "deleted" not in output
+    cmd(archiver, "undelete", "deleted")
+    output = cmd(archiver, "repo-list")
+    assert "normal" in output
+    assert "deleted" in output  # it's back!
+    cmd(archiver, "check")
+
+
+def test_undelete_multiple_dryrun(archivers, request):
+    archiver = request.getfixturevalue(archivers)
+    create_regular_file(archiver.input_path, "file1", size=1024 * 80)
+    cmd(archiver, "repo-create", RK_ENCRYPTION)
+    cmd(archiver, "create", "normal", "input")
+    cmd(archiver, "create", "deleted1", "input")
+    cmd(archiver, "create", "deleted2", "input")
+    cmd(archiver, "delete", "deleted1")
+    cmd(archiver, "delete", "deleted2")
+    output = cmd(archiver, "repo-list")
+    assert "normal" in output
+    assert "deleted1" not in output
+    assert "deleted2" not in output
+    output = cmd(archiver, "undelete", "--dry-run", "--list", "-a", "sh:*")
+    assert "normal" not in output  # not a candidate for undeletion
+    assert "deleted1" in output  # candidate for undeletion
+    assert "deleted2" in output  # candidate for undeletion
+    output = cmd(archiver, "repo-list")  # nothing change, it was a dry-run
+    assert "normal" in output
+    assert "deleted1" not in output
+    assert "deleted2" not in output
+
+
+def test_undelete_multiple_run(archivers, request):
+    archiver = request.getfixturevalue(archivers)
+    create_regular_file(archiver.input_path, "file1", size=1024 * 80)
+    cmd(archiver, "repo-create", RK_ENCRYPTION)
+    cmd(archiver, "create", "normal", "input")
+    cmd(archiver, "create", "deleted1", "input")
+    cmd(archiver, "create", "deleted2", "input")
+    cmd(archiver, "delete", "deleted1")
+    cmd(archiver, "delete", "deleted2")
+    output = cmd(archiver, "repo-list")
+    assert "normal" in output
+    assert "deleted1" not in output
+    assert "deleted2" not in output
+    output = cmd(archiver, "undelete", "--list", "-a", "sh:*")
+    assert "normal" not in output  # not undeleted
+    assert "deleted1" in output  # undeleted
+    assert "deleted2" in output  # undeleted
+    output = cmd(archiver, "repo-list")  # nothing change, it was a dry-run
+    assert "normal" in output
+    assert "deleted1" in output
+    assert "deleted2" in output