Browse Source

Merge pull request #7837 from ThomasWaldmann/remove-recreate-recompress

Remove recreate --recompress option
TW 1 year ago
parent
commit
506718e82f

+ 4 - 26
src/borg/archive.py

@@ -23,7 +23,7 @@ from . import xattr
 from .chunker import get_chunker, Chunk
 from .chunker import get_chunker, Chunk
 from .cache import ChunkListEntry
 from .cache import ChunkListEntry
 from .crypto.key import key_factory, UnsupportedPayloadError
 from .crypto.key import key_factory, UnsupportedPayloadError
-from .compress import Compressor, CompressionSpec
+from .compress import CompressionSpec
 from .constants import *  # NOQA
 from .constants import *  # NOQA
 from .crypto.low_level import IntegrityError as IntegrityErrorBase
 from .crypto.low_level import IntegrityError as IntegrityErrorBase
 from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer
 from .hashindex import ChunkIndex, ChunkIndexEntry, CacheSynchronizer
@@ -2349,8 +2349,6 @@ class ArchiveRecreater:
         keep_exclude_tags=False,
         keep_exclude_tags=False,
         chunker_params=None,
         chunker_params=None,
         compression=None,
         compression=None,
-        recompress=False,
-        always_recompress=False,
         dry_run=False,
         dry_run=False,
         stats=False,
         stats=False,
         progress=False,
         progress=False,
@@ -2374,8 +2372,6 @@ class ArchiveRecreater:
         if self.rechunkify:
         if self.rechunkify:
             logger.debug("Rechunking archives to %s", chunker_params)
             logger.debug("Rechunking archives to %s", chunker_params)
         self.chunker_params = chunker_params or CHUNKER_PARAMS
         self.chunker_params = chunker_params or CHUNKER_PARAMS
-        self.recompress = recompress
-        self.always_recompress = always_recompress
         self.compression = compression or CompressionSpec("none")
         self.compression = compression or CompressionSpec("none")
         self.seen_chunks = set()
         self.seen_chunks = set()
 
 
@@ -2393,13 +2389,7 @@ class ArchiveRecreater:
         target = self.create_target(archive, target_name)
         target = self.create_target(archive, target_name)
         if self.exclude_if_present or self.exclude_caches:
         if self.exclude_if_present or self.exclude_caches:
             self.matcher_add_tagged_dirs(archive)
             self.matcher_add_tagged_dirs(archive)
-        if (
-            self.matcher.empty()
-            and not self.recompress
-            and not target.recreate_rechunkify
-            and comment is None
-            and target_name is None
-        ):
+        if self.matcher.empty() and not target.recreate_rechunkify and comment is None and target_name is None:
             # nothing to do
             # nothing to do
             return False
             return False
         self.process_items(archive, target)
         self.process_items(archive, target)
@@ -2432,7 +2422,7 @@ class ArchiveRecreater:
         self.print_file_status(status, item.path)
         self.print_file_status(status, item.path)
 
 
     def process_chunks(self, archive, target, item):
     def process_chunks(self, archive, target, item):
-        if not self.recompress and not target.recreate_rechunkify:
+        if not target.recreate_rechunkify:
             for chunk_id, size in item.chunks:
             for chunk_id, size in item.chunks:
                 self.cache.chunk_incref(chunk_id, target.stats)
                 self.cache.chunk_incref(chunk_id, target.stats)
             return item.chunks
             return item.chunks
@@ -2444,19 +2434,7 @@ class ArchiveRecreater:
         chunk_id, data = cached_hash(chunk, self.key.id_hash)
         chunk_id, data = cached_hash(chunk, self.key.id_hash)
         if chunk_id in self.seen_chunks:
         if chunk_id in self.seen_chunks:
             return self.cache.chunk_incref(chunk_id, target.stats)
             return self.cache.chunk_incref(chunk_id, target.stats)
-        overwrite = self.recompress
-        if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks:
-            # Check if this chunk is already compressed the way we want it
-            old_meta = self.repo_objs.parse_meta(chunk_id, self.repository.get(chunk_id, read_data=False))
-            compr_hdr = bytes((old_meta["ctype"], old_meta["clevel"]))
-            compressor_cls, level = Compressor.detect(compr_hdr)
-            if (
-                compressor_cls.name == self.repo_objs.compressor.decide({}, data).name
-                and level == self.repo_objs.compressor.level
-            ):
-                # Stored chunk has the same compression method and level as we wanted
-                overwrite = False
-        chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, overwrite=overwrite, wait=False)
+        chunk_entry = self.cache.add_chunk(chunk_id, {}, data, stats=target.stats, wait=False)
         self.cache.repository.async_response(wait=False)
         self.cache.repository.async_response(wait=False)
         self.seen_chunks.add(chunk_entry.id)
         self.seen_chunks.add(chunk_entry.id)
         return chunk_entry
         return chunk_entry

+ 2 - 30
src/borg/archiver/recreate_cmd.py

@@ -21,8 +21,6 @@ class RecreateMixIn:
         matcher = build_matcher(args.patterns, args.paths)
         matcher = build_matcher(args.patterns, args.paths)
         self.output_list = args.output_list
         self.output_list = args.output_list
         self.output_filter = args.output_filter
         self.output_filter = args.output_filter
-        recompress = args.recompress != "never"
-        always_recompress = args.recompress == "always"
 
 
         recreater = ArchiveRecreater(
         recreater = ArchiveRecreater(
             manifest,
             manifest,
@@ -33,8 +31,6 @@ class RecreateMixIn:
             keep_exclude_tags=args.keep_exclude_tags,
             keep_exclude_tags=args.keep_exclude_tags,
             chunker_params=args.chunker_params,
             chunker_params=args.chunker_params,
             compression=args.compression,
             compression=args.compression,
-            recompress=recompress,
-            always_recompress=always_recompress,
             progress=args.progress,
             progress=args.progress,
             stats=args.stats,
             stats=args.stats,
             file_status_printer=self.print_file_status,
             file_status_printer=self.print_file_status,
@@ -81,11 +77,6 @@ class RecreateMixIn:
         Note that all paths in an archive are relative, therefore absolute patterns/paths
         Note that all paths in an archive are relative, therefore absolute patterns/paths
         will *not* match (``--exclude``, ``--exclude-from``, PATHs).
         will *not* match (``--exclude``, ``--exclude-from``, PATHs).
 
 
-        ``--recompress`` allows one to change the compression of existing data in archives.
-        Due to how Borg stores compressed size information this might display
-        incorrect information for archives that were not recreated at the same time.
-        There is no risk of data loss by this.
-
         ``--chunker-params`` will re-chunk all files in the archive, this can be
         ``--chunker-params`` will re-chunk all files in the archive, this can be
         used to have upgraded Borg 0.xx archives deduplicate with Borg 1.x archives.
         used to have upgraded Borg 0.xx archives deduplicate with Borg 1.x archives.
 
 
@@ -101,9 +92,9 @@ class RecreateMixIn:
 
 
         With ``--target`` the original archive is not replaced, instead a new archive is created.
         With ``--target`` the original archive is not replaced, instead a new archive is created.
 
 
-        When rechunking (or recompressing), space usage can be substantial - expect
+        When rechunking, space usage can be substantial - expect
         at least the entire deduplicated size of the archives using the previous
         at least the entire deduplicated size of the archives using the previous
-        chunker (or compression) params.
+        chunker params.
 
 
         If you recently ran borg check --repair and it had to fix lost chunks with all-zero
         If you recently ran borg check --repair and it had to fix lost chunks with all-zero
         replacement chunks, please first run another backup for the same data and re-run
         replacement chunks, please first run another backup for the same data and re-run
@@ -201,25 +192,6 @@ class RecreateMixIn:
             action=Highlander,
             action=Highlander,
             help="select compression algorithm, see the output of the " '"borg help compression" command for details.',
             help="select compression algorithm, see the output of the " '"borg help compression" command for details.',
         )
         )
-        archive_group.add_argument(
-            "--recompress",
-            metavar="MODE",
-            dest="recompress",
-            nargs="?",
-            default="never",
-            const="if-different",
-            choices=("never", "if-different", "always"),
-            action=Highlander,
-            help="recompress data chunks according to `MODE` and ``--compression``. "
-            "Possible modes are "
-            "`if-different`: recompress if current compression is with a different "
-            "compression algorithm or different level; "
-            "`always`: recompress unconditionally; and "
-            "`never`: do not recompress (use this option explicitly to prevent "
-            "recompression). "
-            "If no MODE is given, `if-different` will be used. "
-            'Not passing --recompress is equivalent to "--recompress never".',
-        )
         archive_group.add_argument(
         archive_group.add_argument(
             "--chunker-params",
             "--chunker-params",
             metavar="PARAMS",
             metavar="PARAMS",

+ 3 - 6
src/borg/cache.py

@@ -939,15 +939,13 @@ class LocalCache(CacheStatsMixin):
         self.cache_config.ignored_features.update(repo_features - my_features)
         self.cache_config.ignored_features.update(repo_features - my_features)
         self.cache_config.mandatory_features.update(repo_features & my_features)
         self.cache_config.mandatory_features.update(repo_features & my_features)
 
 
-    def add_chunk(
-        self, id, meta, data, *, stats, overwrite=False, wait=True, compress=True, size=None, ctype=None, clevel=None
-    ):
+    def add_chunk(self, id, meta, data, *, stats, wait=True, compress=True, size=None, ctype=None, clevel=None):
         if not self.txn_active:
         if not self.txn_active:
             self.begin_txn()
             self.begin_txn()
         if size is None and compress:
         if size is None and compress:
             size = len(data)  # data is still uncompressed
             size = len(data)  # data is still uncompressed
         refcount = self.seen_chunk(id, size)
         refcount = self.seen_chunk(id, size)
-        if refcount and not overwrite:
+        if refcount:
             return self.chunk_incref(id, stats)
             return self.chunk_incref(id, stats)
         if size is None:
         if size is None:
             raise ValueError("when giving compressed data for a new chunk, the uncompressed size must be given also")
             raise ValueError("when giving compressed data for a new chunk, the uncompressed size must be given also")
@@ -1115,8 +1113,7 @@ Chunk index:    {0.total_unique_chunks:20d}             unknown"""
     def memorize_file(self, hashed_path, path_hash, st, ids):
     def memorize_file(self, hashed_path, path_hash, st, ids):
         pass
         pass
 
 
-    def add_chunk(self, id, meta, data, *, stats, overwrite=False, wait=True, compress=True, size=None):
-        assert not overwrite, "AdHocCache does not permit overwrites — trying to use it for recreate?"
+    def add_chunk(self, id, meta, data, *, stats, wait=True, compress=True, size=None):
         if not self._txn_active:
         if not self._txn_active:
             self.begin_txn()
             self.begin_txn()
         if size is None and compress:
         if size is None and compress:

+ 0 - 14
src/borg/testsuite/archiver/recreate_cmd.py

@@ -191,20 +191,6 @@ def test_recreate_no_rechunkify(archivers, request):
     assert num_chunks == num_chunks_after_recreate
     assert num_chunks == num_chunks_after_recreate
 
 
 
 
-def test_recreate_recompress(archivers, request):
-    archiver = request.getfixturevalue(archivers)
-    create_regular_file(archiver.input_path, "compressible", size=10000)
-    cmd(archiver, "rcreate", RK_ENCRYPTION)
-    cmd(archiver, "create", "test", "input", "-C", "none")
-    file_list = cmd(archiver, "list", "test", "input/compressible", "--format", "{size} {sha256}")
-    size, sha256_before = file_list.split(" ")
-    cmd(archiver, "recreate", "-C", "lz4", "--recompress")
-    check_cache(archiver)
-    file_list = cmd(archiver, "list", "test", "input/compressible", "--format", "{size} {sha256}")
-    size, sha256_after = file_list.split(" ")
-    assert sha256_before == sha256_after
-
-
 def test_recreate_timestamp(archivers, request):
 def test_recreate_timestamp(archivers, request):
     archiver = request.getfixturevalue(archivers)
     archiver = request.getfixturevalue(archivers)
     create_test_files(archiver.input_path)
     create_test_files(archiver.input_path)

+ 0 - 4
src/borg/testsuite/cache.py

@@ -192,10 +192,6 @@ class TestAdHocCache:
         cache.chunk_decref(H(1), Statistics())
         cache.chunk_decref(H(1), Statistics())
         assert repository.get(H(1)) == b"1234"
         assert repository.get(H(1)) == b"1234"
 
 
-    def test_does_not_overwrite(self, cache):
-        with pytest.raises(AssertionError):
-            cache.add_chunk(H(1), {}, b"5678", stats=Statistics(), overwrite=True)
-
     def test_seen_chunk_add_chunk_size(self, cache):
     def test_seen_chunk_add_chunk_size(self, cache):
         assert cache.add_chunk(H(1), {}, b"5678", stats=Statistics()) == (H(1), 4)
         assert cache.add_chunk(H(1), {}, b"5678", stats=Statistics()) == (H(1), 4)