Bläddra i källkod

Merge pull request #7837 from ThomasWaldmann/remove-recreate-recompress

Remove recreate --recompress option
TW 1 år sedan
förälder
incheckning
506718e82f

+ 4 - 26
src/borg/archive.py

@@ -23,7 +23,7 @@ from . import xattr
 from .chunker import get_chunker, Chunk
 from .cache import ChunkListEntry
 from .crypto.key import key_factory, UnsupportedPayloadError
-from .compress import Compressor, CompressionSpec
+from .compress import CompressionSpec
 from .constants import *  # NOQA
 from .crypto.low_level import IntegrityError as IntegrityErrorBase
 from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer
@@ -2349,8 +2349,6 @@ class ArchiveRecreater:
         keep_exclude_tags=False,
         chunker_params=None,
         compression=None,
-        recompress=False,
-        always_recompress=False,
         dry_run=False,
         stats=False,
         progress=False,
@@ -2374,8 +2372,6 @@ class ArchiveRecreater:
         if self.rechunkify:
             logger.debug("Rechunking archives to %s", chunker_params)
         self.chunker_params = chunker_params or CHUNKER_PARAMS
-        self.recompress = recompress
-        self.always_recompress = always_recompress
         self.compression = compression or CompressionSpec("none")
         self.seen_chunks = set()
 
@@ -2393,13 +2389,7 @@ class ArchiveRecreater:
         target = self.create_target(archive, target_name)
         if self.exclude_if_present or self.exclude_caches:
             self.matcher_add_tagged_dirs(archive)
-        if (
-            self.matcher.empty()
-            and not self.recompress
-            and not target.recreate_rechunkify
-            and comment is None
-            and target_name is None
-        ):
+        if self.matcher.empty() and not target.recreate_rechunkify and comment is None and target_name is None:
             # nothing to do
             return False
         self.process_items(archive, target)
@@ -2432,7 +2422,7 @@ class ArchiveRecreater:
         self.print_file_status(status, item.path)
 
     def process_chunks(self, archive, target, item):
-        if not self.recompress and not target.recreate_rechunkify:
+        if not target.recreate_rechunkify:
             for chunk_id, size in item.chunks:
                 self.cache.chunk_incref(chunk_id, target.stats)
             return item.chunks
@@ -2444,19 +2434,7 @@ class ArchiveRecreater:
         chunk_id, data = cached_hash(chunk, self.key.id_hash)
         if chunk_id in self.seen_chunks:
             return self.cache.chunk_incref(chunk_id, target.stats)
-        overwrite = self.recompress
-        if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks:
-            # Check if this chunk is already compressed the way we want it
-            old_meta = self.repo_objs.parse_meta(chunk_id, self.repository.get(chunk_id, read_data=False))
-            compr_hdr = bytes((old_meta["ctype"], old_meta["clevel"]))
-            compressor_cls, level = Compressor.detect(compr_hdr)
-            if (
-                compressor_cls.name == self.repo_objs.compressor.decide({}, data).name
-                and level == self.repo_objs.compressor.level
-            ):
-                # Stored chunk has the same compression method and level as we wanted
-                overwrite = False
-        chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, overwrite=overwrite, wait=False)
+        chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, wait=False)
         self.cache.repository.async_response(wait=False)
         self.seen_chunks.add(chunk_entry.id)
         return chunk_entry

+ 2 - 30
src/borg/archiver/recreate_cmd.py

@@ -21,8 +21,6 @@ class RecreateMixIn:
         matcher = build_matcher(args.patterns, args.paths)
         self.output_list = args.output_list
         self.output_filter = args.output_filter
-        recompress = args.recompress != "never"
-        always_recompress = args.recompress == "always"
 
         recreater = ArchiveRecreater(
             manifest,
@@ -33,8 +31,6 @@ class RecreateMixIn:
             keep_exclude_tags=args.keep_exclude_tags,
             chunker_params=args.chunker_params,
             compression=args.compression,
-            recompress=recompress,
-            always_recompress=always_recompress,
             progress=args.progress,
             stats=args.stats,
             file_status_printer=self.print_file_status,
@@ -81,11 +77,6 @@ class RecreateMixIn:
         Note that all paths in an archive are relative, therefore absolute patterns/paths
         will *not* match (``--exclude``, ``--exclude-from``, PATHs).
 
-        ``--recompress`` allows one to change the compression of existing data in archives.
-        Due to how Borg stores compressed size information this might display
-        incorrect information for archives that were not recreated at the same time.
-        There is no risk of data loss by this.
-
         ``--chunker-params`` will re-chunk all files in the archive, this can be
         used to have upgraded Borg 0.xx archives deduplicate with Borg 1.x archives.
 
@@ -101,9 +92,9 @@ class RecreateMixIn:
 
         With ``--target`` the original archive is not replaced, instead a new archive is created.
 
-        When rechunking (or recompressing), space usage can be substantial - expect
+        When rechunking, space usage can be substantial - expect
         at least the entire deduplicated size of the archives using the previous
-        chunker (or compression) params.
+        chunker params.
 
         If you recently ran borg check --repair and it had to fix lost chunks with all-zero
         replacement chunks, please first run another backup for the same data and re-run
@@ -201,25 +192,6 @@ class RecreateMixIn:
             action=Highlander,
             help="select compression algorithm, see the output of the " '"borg help compression" command for details.',
         )
-        archive_group.add_argument(
-            "--recompress",
-            metavar="MODE",
-            dest="recompress",
-            nargs="?",
-            default="never",
-            const="if-different",
-            choices=("never", "if-different", "always"),
-            action=Highlander,
-            help="recompress data chunks according to `MODE` and ``--compression``. "
-            "Possible modes are "
-            "`if-different`: recompress if current compression is with a different "
-            "compression algorithm or different level; "
-            "`always`: recompress unconditionally; and "
-            "`never`: do not recompress (use this option explicitly to prevent "
-            "recompression). "
-            "If no MODE is given, `if-different` will be used. "
-            'Not passing --recompress is equivalent to "--recompress never".',
-        )
         archive_group.add_argument(
             "--chunker-params",
             metavar="PARAMS",

+ 3 - 6
src/borg/cache.py

@@ -939,15 +939,13 @@ class LocalCache(CacheStatsMixin):
         self.cache_config.ignored_features.update(repo_features - my_features)
         self.cache_config.mandatory_features.update(repo_features & my_features)
 
-    def add_chunk(
-        self, id, meta, data, *, stats, overwrite=False, wait=True, compress=True, size=None, ctype=None, clevel=None
-    ):
+    def add_chunk(self, id, meta, data, *, stats, wait=True, compress=True, size=None, ctype=None, clevel=None):
         if not self.txn_active:
             self.begin_txn()
         if size is None and compress:
             size = len(data)  # data is still uncompressed
         refcount = self.seen_chunk(id, size)
-        if refcount and not overwrite:
+        if refcount:
             return self.chunk_incref(id, stats)
         if size is None:
             raise ValueError("when giving compressed data for a new chunk, the uncompressed size must be given also")
@@ -1115,8 +1113,7 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
     def memorize_file(self, hashed_path, path_hash, st, ids):
         pass
 
-    def add_chunk(self, id, meta, data, *, stats, overwrite=False, wait=True, compress=True, size=None):
-        assert not overwrite, "AdHocCache does not permit overwrites — trying to use it for recreate?"
+    def add_chunk(self, id, meta, data, *, stats, wait=True, compress=True, size=None):
         if not self._txn_active:
             self.begin_txn()
         if size is None and compress:

+ 0 - 14
src/borg/testsuite/archiver/recreate_cmd.py

@@ -191,20 +191,6 @@ def test_recreate_no_rechunkify(archivers, request):
     assert num_chunks == num_chunks_after_recreate
 
 
-def test_recreate_recompress(archivers, request):
-    archiver = request.getfixturevalue(archivers)
-    create_regular_file(archiver.input_path, "compressible", size=10000)
-    cmd(archiver, "rcreate", RK_ENCRYPTION)
-    cmd(archiver, "create", "test", "input", "-C", "none")
-    file_list = cmd(archiver, "list", "test", "input/compressible", "--format", "{size} {sha256}")
-    size, sha256_before = file_list.split(" ")
-    cmd(archiver, "recreate", "-C", "lz4", "--recompress")
-    check_cache(archiver)
-    file_list = cmd(archiver, "list", "test", "input/compressible", "--format", "{size} {sha256}")
-    size, sha256_after = file_list.split(" ")
-    assert sha256_before == sha256_after
-
-
 def test_recreate_timestamp(archivers, request):
     archiver = request.getfixturevalue(archivers)
     create_test_files(archiver.input_path)

+ 0 - 4
src/borg/testsuite/cache.py

@@ -192,10 +192,6 @@ class TestAdHocCache:
         cache.chunk_decref(H(1), Statistics())
         assert repository.get(H(1)) == b"1234"
 
-    def test_does_not_overwrite(self, cache):
-        with pytest.raises(AssertionError):
-            cache.add_chunk(H(1), {}, b"5678", stats=Statistics(), overwrite=True)
-
     def test_seen_chunk_add_chunk_size(self, cache):
         assert cache.add_chunk(H(1), {}, b"5678", stats=Statistics()) == (H(1), 4)