Browse Source

delete: just remove archive from manifest, let borg compact clean up later.

much faster and easier now, similar to what borg delete --force --force used to do.

considering that speed, no need for checkpointing anymore.

--stats does not work that way, thus it was removed. borg compact now shows some stats.
Thomas Waldmann 9 tháng trước cách đây
mục cha
commit
4c052cd65d

+ 5 - 78
src/borg/archive.py

@@ -1038,84 +1038,11 @@ Duration: {0.duration}
         self.set_meta("name", name)
         del self.manifest.archives[oldname]
 
-    def delete(self, stats, progress=False, forced=False):
-        class ChunksIndexError(Error):
-            """Chunk ID {} missing from chunks index, corrupted chunks index - aborting transaction."""
-
-        exception_ignored = object()
-
-        def fetch_async_response(wait=True):
-            try:
-                return self.repository.async_response(wait=wait)
-            except Repository3.ObjectNotFound:
-                nonlocal error
-                # object not in repo - strange, but we wanted to delete it anyway.
-                if forced == 0:
-                    raise
-                error = True
-                return exception_ignored  # must not return None here
-
-        def chunk_decref(id, size, stats):
-            try:
-                self.cache.chunk_decref(id, size, stats, wait=False)
-            except KeyError:
-                nonlocal error
-                if forced == 0:
-                    cid = bin_to_hex(id)
-                    raise ChunksIndexError(cid)
-                error = True
-            else:
-                fetch_async_response(wait=False)
-
-        error = False
-        try:
-            unpacker = msgpack.Unpacker(use_list=False)
-            items_ids = self.metadata.items
-            pi = ProgressIndicatorPercent(
-                total=len(items_ids), msg="Decrementing references %3.0f%%", msgid="archive.delete"
-            )
-            for i, (items_id, data) in enumerate(zip(items_ids, self.repository.get_many(items_ids))):
-                if progress:
-                    pi.show(i)
-                _, data = self.repo_objs.parse(items_id, data, ro_type=ROBJ_ARCHIVE_STREAM)
-                unpacker.feed(data)
-                chunk_decref(items_id, 1, stats)
-                try:
-                    for item in unpacker:
-                        item = Item(internal_dict=item)
-                        if "chunks" in item:
-                            for chunk_id, size in item.chunks:
-                                chunk_decref(chunk_id, size, stats)
-                except (TypeError, ValueError):
-                    # if items metadata spans multiple chunks and one chunk got dropped somehow,
-                    # it could be that unpacker yields bad types
-                    if forced == 0:
-                        raise
-                    error = True
-            if progress:
-                pi.finish()
-        except (msgpack.UnpackException, Repository3.ObjectNotFound):
-            # items metadata corrupted
-            if forced == 0:
-                raise
-            error = True
-
-        # delete the blocks that store all the references that end up being loaded into metadata.items:
-        for id in self.metadata.item_ptrs:
-            chunk_decref(id, 1, stats)
-
-        # in forced delete mode, we try hard to delete at least the manifest entry,
-        # if possible also the archive superblock, even if processing the items raises
-        # some harmless exception.
-        chunk_decref(self.id, 1, stats)
+    def delete(self):
+        # quick and dirty: we just nuke the archive from the archives list - that will
+        # potentially orphan all chunks previously referenced by the archive, except the ones also
+        # referenced by other archives. In the end, "borg compact" will clean up and free space.
         del self.manifest.archives[self.name]
-        while fetch_async_response(wait=True) is not None:
-            # we did async deletes, process outstanding results (== exceptions),
-            # so there is nothing pending when we return and our caller wants to commit.
-            pass
-        if error:
-            logger.warning("forced deletion succeeded, but the deleted archive was corrupted.")
-            logger.warning("borg check --repair is required to free all space.")
 
     @staticmethod
     def compare_archives_iter(
@@ -2501,7 +2428,7 @@ class ArchiveRecreater:
 
         target.save(comment=comment, timestamp=self.timestamp, additional_metadata=additional_metadata)
         if replace_original:
-            archive.delete(Statistics(), progress=self.progress)
+            archive.delete()
             target.rename(archive.name)
         if self.stats:
             target.start = _start

+ 24 - 90
src/borg/archiver/delete_cmd.py

@@ -1,11 +1,9 @@
 import argparse
 import logging
 
-from ._common import with_repository, Highlander
-from ..archive import Archive, Statistics
-from ..cache import Cache
+from ._common import with_repository
 from ..constants import *  # NOQA
-from ..helpers import log_multi, format_archive, sig_int, CommandError, Error
+from ..helpers import format_archive, CommandError
 from ..manifest import Manifest
 
 from ..logger import create_logger
@@ -29,67 +27,30 @@ class DeleteMixIn:
                 "or just delete the whole repository (might be much faster)."
             )
 
-        if args.forced == 2:
-            deleted = False
-            logger_list = logging.getLogger("borg.output.list")
-            for i, archive_name in enumerate(archive_names, 1):
-                try:
-                    current_archive = manifest.archives.pop(archive_name)
-                except KeyError:
-                    self.print_warning(f"Archive {archive_name} not found ({i}/{len(archive_names)}).")
-                else:
-                    deleted = True
-                    if self.output_list:
-                        msg = "Would delete: {} ({}/{})" if dry_run else "Deleted archive: {} ({}/{})"
-                        logger_list.info(msg.format(format_archive(current_archive), i, len(archive_names)))
-            if dry_run:
-                logger.info("Finished dry-run.")
-            elif deleted:
-                manifest.write()
-                # note: might crash in compact() after committing the repo
-                repository.commit(compact=False)
-                self.print_warning('Done. Run "borg check --repair" to clean up the mess.', wc=None)
+        deleted = False
+        logger_list = logging.getLogger("borg.output.list")
+        for i, archive_name in enumerate(archive_names, 1):
+            try:
+                # this does NOT use Archive.delete, so this code hopefully even works in cases a corrupt archive
+                # would make the code in class Archive crash, so the user can at least get rid of such archives.
+                current_archive = manifest.archives.pop(archive_name)
+            except KeyError:
+                self.print_warning(f"Archive {archive_name} not found ({i}/{len(archive_names)}).")
             else:
-                self.print_warning("Aborted.", wc=None)
-            return
-
-        stats = Statistics(iec=args.iec)
-        with Cache(repository, manifest, progress=args.progress, lock_wait=self.lock_wait, iec=args.iec) as cache:
-
-            def checkpoint_func():
-                manifest.write()
-                repository.commit(compact=False)
-                cache.commit()
+                deleted = True
+                if self.output_list:
+                    msg = "Would delete: {} ({}/{})" if dry_run else "Deleted archive: {} ({}/{})"
+                    logger_list.info(msg.format(format_archive(current_archive), i, len(archive_names)))
+        if dry_run:
+            logger.info("Finished dry-run.")
+        elif deleted:
+            manifest.write()
+            repository.commit(compact=False)
+            self.print_warning('Done. Run "borg compact" to free space.', wc=None)
+        else:
+            self.print_warning("Aborted.", wc=None)
+        return
 
-            msg_delete = "Would delete archive: {} ({}/{})" if dry_run else "Deleting archive: {} ({}/{})"
-            msg_not_found = "Archive {} not found ({}/{})."
-            logger_list = logging.getLogger("borg.output.list")
-            uncommitted_deletes = 0
-            for i, archive_name in enumerate(archive_names, 1):
-                if sig_int and sig_int.action_done():
-                    break
-                try:
-                    archive_info = manifest.archives[archive_name]
-                except KeyError:
-                    self.print_warning(msg_not_found.format(archive_name, i, len(archive_names)))
-                else:
-                    if self.output_list:
-                        logger_list.info(msg_delete.format(format_archive(archive_info), i, len(archive_names)))
-
-                    if not dry_run:
-                        archive = Archive(manifest, archive_name, cache=cache)
-                        archive.delete(stats, progress=args.progress, forced=args.forced)
-                        checkpointed = self.maybe_checkpoint(
-                            checkpoint_func=checkpoint_func, checkpoint_interval=args.checkpoint_interval
-                        )
-                        uncommitted_deletes = 0 if checkpointed else (uncommitted_deletes + 1)
-            if sig_int:
-                # Ctrl-C / SIGINT: do not checkpoint (commit) again, we already have a checkpoint in this case.
-                raise Error("Got Ctrl-C / SIGINT.")
-            elif uncommitted_deletes > 0:
-                checkpoint_func()
-            if args.stats:
-                log_multi(str(stats), logger=logging.getLogger("borg.output.stats"))
 
     def build_parser_delete(self, subparsers, common_parser, mid_common_parser):
         from ._common import process_epilog, define_archive_filters_group
@@ -103,16 +64,9 @@ class DeleteMixIn:
 
         When in doubt, use ``--dry-run --list`` to see what would be deleted.
 
-        When using ``--stats``, you will get some statistics about how much data was
-        deleted - the "Deleted data" deduplicated size there is most interesting as
-        that is how much your repository will shrink.
-        Please note that the "All archives" stats refer to the state after deletion.
-
         You can delete multiple archives by specifying a matching pattern,
         using the ``--match-archives PATTERN`` option (for more info on these patterns,
         see :ref:`borg_patterns`).
-
-        Always first use ``--dry-run --list`` to see what would be deleted.
         """
         )
         subparser = subparsers.add_parser(
@@ -135,24 +89,4 @@ class DeleteMixIn:
             dest="consider_checkpoints",
             help="consider checkpoint archives for deletion (default: not considered).",
         )
-        subparser.add_argument(
-            "-s", "--stats", dest="stats", action="store_true", help="print statistics for the deleted archive"
-        )
-        subparser.add_argument(
-            "--force",
-            dest="forced",
-            action="count",
-            default=0,
-            help="force deletion of corrupted archives, " "use ``--force --force`` in case ``--force`` does not work.",
-        )
-        subparser.add_argument(
-            "-c",
-            "--checkpoint-interval",
-            metavar="SECONDS",
-            dest="checkpoint_interval",
-            type=int,
-            default=1800,
-            action=Highlander,
-            help="write checkpoint every SECONDS seconds (Default: 1800)",
-        )
         define_archive_filters_group(subparser)

+ 7 - 42
src/borg/archiver/prune_cmd.py

@@ -7,10 +7,10 @@ import os
 import re
 
 from ._common import with_repository, Highlander
-from ..archive import Archive, Statistics
+from ..archive import Archive
 from ..cache import Cache
 from ..constants import *  # NOQA
-from ..helpers import ArchiveFormatter, interval, sig_int, log_multi, ProgressIndicatorPercent, CommandError, Error
+from ..helpers import ArchiveFormatter, interval, sig_int, ProgressIndicatorPercent, CommandError, Error
 from ..manifest import Manifest
 
 from ..logger import create_logger
@@ -127,14 +127,7 @@ class PruneMixIn:
                 keep += prune_split(archives, rule, num, kept_because)
 
         to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints))
-        stats = Statistics(iec=args.iec)
         with Cache(repository, manifest, lock_wait=self.lock_wait, iec=args.iec) as cache:
-
-            def checkpoint_func():
-                manifest.write()
-                repository.commit(compact=False)
-                cache.commit()
-
             list_logger = logging.getLogger("borg.output.list")
             # set up counters for the progress display
             to_delete_len = len(to_delete)
@@ -152,11 +145,8 @@ class PruneMixIn:
                         archives_deleted += 1
                         log_message = "Pruning archive (%d/%d):" % (archives_deleted, to_delete_len)
                         archive = Archive(manifest, archive.name, cache)
-                        archive.delete(stats, forced=args.forced)
-                        checkpointed = self.maybe_checkpoint(
-                            checkpoint_func=checkpoint_func, checkpoint_interval=args.checkpoint_interval
-                        )
-                        uncommitted_deletes = 0 if checkpointed else (uncommitted_deletes + 1)
+                        archive.delete()
+                        uncommitted_deletes += 1
                 else:
                     if is_checkpoint(archive.name):
                         log_message = "Keeping checkpoint archive:"
@@ -172,12 +162,11 @@ class PruneMixIn:
                     list_logger.info(f"{log_message:<44} {formatter.format_item(archive, jsonline=False)}")
             pi.finish()
             if sig_int:
-                # Ctrl-C / SIGINT: do not checkpoint (commit) again, we already have a checkpoint in this case.
                 raise Error("Got Ctrl-C / SIGINT.")
             elif uncommitted_deletes > 0:
-                checkpoint_func()
-            if args.stats:
-                log_multi(str(stats), logger=logging.getLogger("borg.output.stats"))
+                manifest.write()
+                repository.commit(compact=False)
+                cache.commit()
 
     def build_parser_prune(self, subparsers, common_parser, mid_common_parser):
         from ._common import process_epilog
@@ -235,11 +224,6 @@ class PruneMixIn:
         keep the last N archives under the assumption that you do not create more than one
         backup archive in the same second).
 
-        When using ``--stats``, you will get some statistics about how much data was
-        deleted - the "Deleted data" deduplicated size there is most interesting as
-        that is how much your repository will shrink.
-        Please note that the "All archives" stats refer to the state after pruning.
-
         You can influence how the ``--list`` output is formatted by using the ``--short``
         option (less wide output) or by giving a custom format using ``--format`` (see
         the ``borg rlist`` description for more details about the format string).
@@ -256,15 +240,6 @@ class PruneMixIn:
         )
         subparser.set_defaults(func=self.do_prune)
         subparser.add_argument("-n", "--dry-run", dest="dry_run", action="store_true", help="do not change repository")
-        subparser.add_argument(
-            "--force",
-            dest="forced",
-            action="store_true",
-            help="force pruning of corrupted archives, " "use ``--force --force`` in case ``--force`` does not work.",
-        )
-        subparser.add_argument(
-            "-s", "--stats", dest="stats", action="store_true", help="print statistics for the deleted archive"
-        )
         subparser.add_argument(
             "--list", dest="output_list", action="store_true", help="output verbose list of archives it keeps/prunes"
         )
@@ -353,13 +328,3 @@ class PruneMixIn:
             help="number of yearly archives to keep",
         )
         define_archive_filters_group(subparser, sort_by=False, first_last=False)
-        subparser.add_argument(
-            "-c",
-            "--checkpoint-interval",
-            metavar="SECONDS",
-            dest="checkpoint_interval",
-            type=int,
-            default=1800,
-            action=Highlander,
-            help="write checkpoint every SECONDS seconds (Default: 1800)",
-        )

+ 5 - 52
src/borg/testsuite/archiver/delete_cmd.py

@@ -1,13 +1,10 @@
-from ...archive import Archive
 from ...constants import *  # NOQA
-from ...manifest import Manifest
-from ...repository3 import Repository3
-from . import cmd, create_regular_file, src_file, create_src_archive, generate_archiver_tests, RK_ENCRYPTION
+from . import cmd, create_regular_file, generate_archiver_tests, RK_ENCRYPTION
 
 pytest_generate_tests = lambda metafunc: generate_archiver_tests(metafunc, kinds="local,remote,binary")  # NOQA
 
 
-def test_delete(archivers, request):
+def test_delete_options(archivers, request):
     archiver = request.getfixturevalue(archivers)
     create_regular_file(archiver.input_path, "file1", size=1024 * 80)
     create_regular_file(archiver.input_path, "dir2/file2", size=1024 * 80)
@@ -17,14 +14,11 @@ def test_delete(archivers, request):
     cmd(archiver, "create", "test.3", "input")
     cmd(archiver, "create", "another_test.1", "input")
     cmd(archiver, "create", "another_test.2", "input")
-    cmd(archiver, "extract", "test", "--dry-run")
-    cmd(archiver, "extract", "test.2", "--dry-run")
     cmd(archiver, "delete", "--match-archives", "sh:another_*")
-    cmd(archiver, "delete", "--last", "1")
+    cmd(archiver, "delete", "--last", "1")  # test.3
     cmd(archiver, "delete", "-a", "test")
-    cmd(archiver, "extract", "test.2", "--dry-run")
-    output = cmd(archiver, "delete", "-a", "test.2", "--stats")
-    assert "Original size: -" in output  # negative size == deleted data
+    cmd(archiver, "extract", "test.2", "--dry-run")  # still there?
+    cmd(archiver, "delete", "-a", "test.2")
     output = cmd(archiver, "rlist")
     assert output == ""  # no archives left!
 
@@ -35,47 +29,6 @@ def test_delete_multiple(archivers, request):
     cmd(archiver, "rcreate", RK_ENCRYPTION)
     cmd(archiver, "create", "test1", "input")
     cmd(archiver, "create", "test2", "input")
-    cmd(archiver, "create", "test3", "input")
     cmd(archiver, "delete", "-a", "test1")
     cmd(archiver, "delete", "-a", "test2")
-    cmd(archiver, "extract", "test3", "--dry-run")
-    cmd(archiver, "delete", "-a", "test3")
     assert not cmd(archiver, "rlist")
-
-
-def test_delete_force(archivers, request):
-    archiver = request.getfixturevalue(archivers)
-    cmd(archiver, "rcreate", "--encryption=none")
-    create_src_archive(archiver, "test")
-    with Repository3(archiver.repository_path, exclusive=True) as repository:
-        manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
-        archive = Archive(manifest, "test")
-        for item in archive.iter_items():
-            if item.path.endswith(src_file):
-                repository.delete(item.chunks[-1].id)
-                break
-        else:
-            assert False  # missed the file
-        repository.commit(compact=False)
-    output = cmd(archiver, "delete", "-a", "test", "--force")
-    assert "deleted archive was corrupted" in output
-
-    cmd(archiver, "check", "--repair")
-    output = cmd(archiver, "rlist")
-    assert "test" not in output
-
-
-def test_delete_double_force(archivers, request):
-    archiver = request.getfixturevalue(archivers)
-    cmd(archiver, "rcreate", "--encryption=none")
-    create_src_archive(archiver, "test")
-    with Repository3(archiver.repository_path, exclusive=True) as repository:
-        manifest = Manifest.load(repository, Manifest.NO_OPERATION_CHECK)
-        archive = Archive(manifest, "test")
-        id = archive.metadata.items[0]
-        repository.put(id, b"corrupted items metadata stream chunk")
-        repository.commit(compact=False)
-    cmd(archiver, "delete", "-a", "test", "--force", "--force")
-    cmd(archiver, "check", "--repair")
-    output = cmd(archiver, "rlist")
-    assert "test" not in output