9 years ago · 6e9debb027
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -19,6 +19,7 @@ logger = create_logger()
 
															 from . import xattr
														
 
															 from .cache import ChunkListEntry
														
 
															 from .chunker import Chunker
														
 
															+from .compress import Compressor
														
 
															 from .constants import *  # NOQA
														
 
															 from .hashindex import ChunkIndex, ChunkIndexEntry
														
 
															 from .helpers import Manifest
														
@@ -1298,7 +1299,7 @@ class ArchiveRecreater:
 
															     def __init__(self, repository, manifest, key, cache, matcher,
														
 
															                  exclude_caches=False, exclude_if_present=None, keep_tag_files=False,
														
 
															-                 chunker_params=None, compression=None, compression_files=None,
														
 
															+                 chunker_params=None, compression=None, compression_files=None, always_recompress=False,
														
 
															                  dry_run=False, stats=False, progress=False, file_status_printer=None):
														
 
															         self.repository = repository
														
 
															         self.key = key
														
@@ -1312,10 +1313,11 @@ class ArchiveRecreater:
 
															         self.chunker_params = chunker_params or CHUNKER_PARAMS
														
 
															         self.recompress = bool(compression)
														
 
															+        self.always_recompress = always_recompress
														
 
															         self.compression = compression or CompressionSpec('none')
														
 
															         self.seen_chunks = set()
														
 
															         self.compression_decider1 = CompressionDecider1(compression or CompressionSpec('none'),
														
 
															-                                                            compression_files or [])
														
 
															+                                                        compression_files or [])
														
 
															         key.compression_decider2 = CompressionDecider2(compression or CompressionSpec('none'))
														
 
															         self.autocommit_threshold = max(self.AUTOCOMMIT_THRESHOLD, self.cache.chunks_stored_size() / 100)
														
@@ -1329,10 +1331,10 @@ class ArchiveRecreater:
 
															         self.interrupt = False
														
 
															         self.errors = False
														
 
															-    def recreate(self, archive_name, comment=None):
														
 
															+    def recreate(self, archive_name, comment=None, target_name=None):
														
 
															         assert not self.is_temporary_archive(archive_name)
														
 
															         archive = self.open_archive(archive_name)
														
 
															-        target, resume_from = self.create_target_or_resume(archive)
														
 
															+        target, resume_from = self.create_target_or_resume(archive, target_name)
														
 
															         if self.exclude_if_present or self.exclude_caches:
														
 
															             self.matcher_add_tagged_dirs(archive)
														
 
															         if self.matcher.empty() and not self.recompress and not target.recreate_rechunkify and comment is None:
														
@@ -1342,7 +1344,8 @@ class ArchiveRecreater:
 
															             self.process_items(archive, target, resume_from)
														
 
															         except self.Interrupted as e:
														
 
															             return self.save(archive, target, completed=False, metadata=e.metadata)
														
 
															-        return self.save(archive, target, comment)
														
 
															+        replace_original = target_name is None
														
 
															+        return self.save(archive, target, comment, replace_original=replace_original)
														
 
															     def process_items(self, archive, target, resume_from=None):
														
 
															         matcher = self.matcher
														
@@ -1404,7 +1407,6 @@ class ArchiveRecreater:
 
															     def process_chunks(self, archive, target, item):
														
 
															         """Return new chunk ID list for 'item'."""
														
 
															-        # TODO: support --compression-from
														
 
															         if not self.recompress and not target.recreate_rechunkify:
														
 
															             for chunk_id, size, csize in item.chunks:
														
 
															                 self.cache.chunk_incref(chunk_id, target.stats)
														
@@ -1412,13 +1414,22 @@ class ArchiveRecreater:
 
															         new_chunks = self.process_partial_chunks(target)
														
 
															         chunk_iterator = self.create_chunk_iterator(archive, target, item)
														
 
															         consume(chunk_iterator, len(new_chunks))
														
 
															+        compress = self.compression_decider1.decide(item.path)
														
 
															         for chunk in chunk_iterator:
														
 
															+            chunk.meta['compress'] = compress
														
 
															             chunk_id = self.key.id_hash(chunk.data)
														
 
															             if chunk_id in self.seen_chunks:
														
 
															                 new_chunks.append(self.cache.chunk_incref(chunk_id, target.stats))
														
 
															             else:
														
 
															-                # TODO: detect / skip / --always-recompress
														
 
															-                chunk_id, size, csize = self.cache.add_chunk(chunk_id, chunk, target.stats, overwrite=self.recompress)
														
 
															+                compression_spec, chunk = self.key.compression_decider2.decide(chunk)
														
 
															+                overwrite = self.recompress
														
 
															+                if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks:
														
 
															+                    # Check if this chunk is already compressed the way we want it
														
 
															+                    old_chunk = self.key.decrypt(None, self.repository.get(chunk_id), decompress=False)
														
 
															+                    if Compressor.detect(old_chunk.data).name == compression_spec['name']:
														
 
															+                        # Stored chunk has the same compression we wanted
														
 
															+                        overwrite = False
														
 
															+                chunk_id, size, csize = self.cache.add_chunk(chunk_id, chunk, target.stats, overwrite=overwrite)
														
 
															                 new_chunks.append((chunk_id, size, csize))
														
 
															                 self.seen_chunks.add(chunk_id)
														
 
															                 if self.recompress:
														
@@ -1465,7 +1476,7 @@ class ArchiveRecreater:
 
															         logger.debug('Copied %d chunks from a partially processed item', len(partial_chunks))
														
 
															         return partial_chunks
														
 
															-    def save(self, archive, target, comment=None, completed=True, metadata=None):
														
 
															+    def save(self, archive, target, comment=None, completed=True, metadata=None, replace_original=True):
														
 
															         """Save target archive. If completed, replace source. If not, save temporary with additional 'metadata' dict."""
														
 
															         if self.dry_run:
														
 
															             return completed
														
@@ -1477,8 +1488,9 @@ class ArchiveRecreater:
 
															                 'cmdline': archive.metadata[b'cmdline'],
														
 
															                 'recreate_cmdline': sys.argv,
														
 
															             })
														
 
															-            archive.delete(Statistics(), progress=self.progress)
														
 
															-            target.rename(archive.name)
														
 
															+            if replace_original:
														
 
															+                archive.delete(Statistics(), progress=self.progress)
														
 
															+                target.rename(archive.name)
														
 
															             if self.stats:
														
 
															                 target.end = datetime.utcnow()
														
 
															                 log_multi(DASHES,
														
@@ -1530,11 +1542,11 @@ class ArchiveRecreater:
 
															         matcher.add(tag_files, True)
														
 
															         matcher.add(tagged_dirs, False)
														
 
															-    def create_target_or_resume(self, archive):
														
 
															+    def create_target_or_resume(self, archive, target_name=None):
														
 
															         """Create new target archive or resume from temporary archive, if it exists. Return archive, resume from path"""
														
 
															         if self.dry_run:
														
 
															             return self.FakeTargetArchive(), None
														
 
															-        target_name = archive.name + '.recreate'
														
 
															+        target_name = target_name or archive.name + '.recreate'
														
 
															         resume = target_name in self.manifest.archives
														
 
															         target, resume_from = None, None
														
 
															         if resume:
														
--- a/src/borg/archiver.py
+++ b/src/borg/archiver.py
@@ -957,6 +957,7 @@ class Archiver:
 
															                                      exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
														
 
															                                      keep_tag_files=args.keep_tag_files, chunker_params=args.chunker_params,
														
 
															                                      compression=args.compression, compression_files=args.compression_files,
														
 
															+                                     always_recompress=args.always_recompress,
														
 
															                                      progress=args.progress, stats=args.stats,
														
 
															                                      file_status_printer=self.print_file_status,
														
 
															                                      dry_run=args.dry_run)
														
@@ -968,8 +969,11 @@ class Archiver:
 
															                 if recreater.is_temporary_archive(name):
														
 
															                     self.print_error('Refusing to work on temporary archive of prior recreate: %s', name)
														
 
															                     return self.exit_code
														
 
															-                recreater.recreate(name, args.comment)
														
 
															+                recreater.recreate(name, args.comment, args.target)
														
 
															             else:
														
 
															+                if args.target is not None:
														
 
															+                    self.print_error('--target: Need to specify single archive')
														
 
															+                    return self.exit_code
														
 
															                 for archive in manifest.list_archive_infos(sort_by='ts'):
														
 
															                     name = archive.name
														
 
															                     if recreater.is_temporary_archive(name):
														
@@ -2007,6 +2011,9 @@ class Archiver:
 
															         as in "borg create". If PATHs are specified the resulting archive
														
 
															         will only contain files from these PATHs.
														
 
															+        Note that all paths in an archive are relative, therefore absolute patterns/paths
														
 
															+        will *not* match (--exclude, --exclude-from, --compression-from, PATHs).
														
 
															+
														
 
															         --compression: all chunks seen will be stored using the given method.
														
 
															         Due to how Borg stores compressed size information this might display
														
 
															         incorrect information for archives that were not recreated at the same time.
														
@@ -2035,6 +2042,8 @@ class Archiver:
 
															         archive that is built during the operation exists at the same time at
														
 
															         "<ARCHIVE>.recreate". The new archive will have a different archive ID.
														
 
															+        With --target the original archive is not replaced, instead a new archive is created.
														
 
															+
														
 
															         When rechunking space usage can be substantial, expect at least the entire
														
 
															         deduplicated size of the archives using the previous chunker params.
														
 
															         When recompressing approximately 1 % of the repository size or 512 MB
														
@@ -2080,6 +2089,10 @@ class Archiver:
 
															                                    help='keep tag files of excluded caches/directories')
														
 
															         archive_group = subparser.add_argument_group('Archive options')
														
 
															+        archive_group.add_argument('--target', dest='target', metavar='TARGET', default=None,
														
 
															+                                   type=archivename_validator(),
														
 
															+                                   help='create a new archive with the name ARCHIVE, do not replace existing archive '
														
 
															+                                        '(only applies for a single archive)')
														
 
															         archive_group.add_argument('--comment', dest='comment', metavar='COMMENT', default=None,
														
 
															                                    help='add a comment text to the archive')
														
 
															         archive_group.add_argument('--timestamp', dest='timestamp',
														
@@ -2098,6 +2111,9 @@ class Archiver:
 
															                                         'zlib,0 .. zlib,9 == zlib (with level 0..9),\n'
														
 
															                                         'lzma == lzma (default level 6),\n'
														
 
															                                         'lzma,0 .. lzma,9 == lzma (with level 0..9).')
														
 
															+        archive_group.add_argument('--always-recompress', dest='always_recompress', action='store_true',
														
 
															+                                   help='always recompress chunks, don\'t skip chunks already compressed with the same'
														
 
															+                                        'algorithm.')
														
 
															         archive_group.add_argument('--compression-from', dest='compression_files',
														
 
															                                    type=argparse.FileType('r'), action='append',
														
 
															                                    metavar='COMPRESSIONCONFIG', help='read compression patterns from COMPRESSIONCONFIG, one per line')
														
--- a/src/borg/compress.pyx
+++ b/src/borg/compress.pyx
@@ -6,6 +6,8 @@ except ImportError:
 
															 from .helpers import Buffer
														
 
															+API_VERSION = 2
														
 
															+
														
 
															 cdef extern from "lz4.h":
														
 
															     int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
														
 
															     int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
														
@@ -194,9 +196,14 @@ class Compressor:
 
															         return self.compressor.compress(data)
														
 
															     def decompress(self, data):
														
 
															+        compressor_cls = self.detect(data)
														
 
															+        return compressor_cls(**self.params).decompress(data)
														
 
															+
														
 
															+    @staticmethod
														
 
															+    def detect(data):
														
 
															         hdr = bytes(data[:2])  # detect() does not work with memoryview
														
 
															         for cls in COMPRESSOR_LIST:
														
 
															             if cls.detect(hdr):
														
 
															-                return cls(**self.params).decompress(data)
														
 
															+                return cls
														
 
															         else:
														
 
															             raise ValueError('No decompressor for this data found: %r.', data[:2])
														
--- a/src/borg/helpers.py
+++ b/src/borg/helpers.py
@@ -84,11 +84,13 @@ class PlaceholderError(Error):
 
															 def check_extension_modules():
														
 
															-    from . import platform
														
 
															+    from . import platform, compress
														
 
															     if hashindex.API_VERSION != 3:
														
 
															         raise ExtensionModuleError
														
 
															     if chunker.API_VERSION != 2:
														
 
															         raise ExtensionModuleError
														
 
															+    if compress.API_VERSION != 2:
														
 
															+        raise ExtensionModuleError
														
 
															     if crypto.API_VERSION != 3:
														
 
															         raise ExtensionModuleError
														
 
															     if platform.API_VERSION != 3:
														
--- a/src/borg/key.py
+++ b/src/borg/key.py
@@ -105,9 +105,15 @@ class KeyBase:
 
															     def encrypt(self, chunk):
														
 
															         pass
														
 
															-    def decrypt(self, id, data):
														
 
															+    def decrypt(self, id, data, decompress=True):
														
 
															         pass
														
 
															+    def assert_id(self, id, data):
														
 
															+        if id:
														
 
															+            id_computed = self.id_hash(data)
														
 
															+            if not compare_digest(id_computed, id):
														
 
															+                raise IntegrityError('Chunk id verification failed')
														
 
															+
														
 
															 class PlaintextKey(KeyBase):
														
 
															     TYPE = 0x02
														
@@ -130,12 +136,14 @@ class PlaintextKey(KeyBase):
 
															         chunk = self.compress(chunk)
														
 
															         return b''.join([self.TYPE_STR, chunk.data])
														
 
															-    def decrypt(self, id, data):
														
 
															+    def decrypt(self, id, data, decompress=True):
														
 
															         if data[0] != self.TYPE:
														
 
															             raise IntegrityError('Invalid encryption envelope')
														
 
															-        data = self.compressor.decompress(memoryview(data)[1:])
														
 
															-        if id and sha256(data).digest() != id:
														
 
															-            raise IntegrityError('Chunk id verification failed')
														
 
															+        payload = memoryview(data)[1:]
														
 
															+        if not decompress:
														
 
															+            return Chunk(payload)
														
 
															+        data = self.compressor.decompress(payload)
														
 
															+        self.assert_id(id, data)
														
 
															         return Chunk(data)
														
@@ -166,7 +174,7 @@ class AESKeyBase(KeyBase):
 
															         hmac = hmac_sha256(self.enc_hmac_key, data)
														
 
															         return b''.join((self.TYPE_STR, hmac, data))
														
 
															-    def decrypt(self, id, data):
														
 
															+    def decrypt(self, id, data, decompress=True):
														
 
															         if not (data[0] == self.TYPE or
														
 
															             data[0] == PassphraseKey.TYPE and isinstance(self, RepoKey)):
														
 
															             raise IntegrityError('Invalid encryption envelope')
														
@@ -176,12 +184,11 @@ class AESKeyBase(KeyBase):
 
															         if not compare_digest(hmac_computed, hmac_given):
														
 
															             raise IntegrityError('Encryption envelope checksum mismatch')
														
 
															         self.dec_cipher.reset(iv=PREFIX + data[33:41])
														
 
															-        data = self.compressor.decompress(self.dec_cipher.decrypt(data_view[41:]))
														
 
															-        if id:
														
 
															-            hmac_given = id
														
 
															-            hmac_computed = hmac_sha256(self.id_key, data)
														
 
															-            if not compare_digest(hmac_computed, hmac_given):
														
 
															-                raise IntegrityError('Chunk id verification failed')
														
 
															+        payload = self.dec_cipher.decrypt(data_view[41:])
														
 
															+        if not decompress:
														
 
															+            return Chunk(payload)
														
 
															+        data = self.compressor.decompress(payload)
														
 
															+        self.assert_id(id, data)
														
 
															         return Chunk(data)
														
 
															     def extract_nonce(self, payload):
														
--- a/src/borg/testsuite/archiver.py
+++ b/src/borg/testsuite/archiver.py
@@ -1522,6 +1522,28 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
															             self.cmd('init', self.repository_location, exit_code=1)
														
 
															         assert not os.path.exists(self.repository_location)
														
 
															+    def test_recreate_target_rc(self):
														
 
															+        self.cmd('init', self.repository_location)
														
 
															+        output = self.cmd('recreate', self.repository_location, '--target=asdf', exit_code=2)
														
 
															+        assert 'Need to specify single archive' in output
														
 
															+
														
 
															+    def test_recreate_target(self):
														
 
															+        self.create_test_files()
														
 
															+        self.cmd('init', self.repository_location)
														
 
															+        archive = self.repository_location + '::test0'
														
 
															+        self.cmd('create', archive, 'input')
														
 
															+        original_archive = self.cmd('list', self.repository_location)
														
 
															+        self.cmd('recreate', archive, 'input/dir2', '-e', 'input/dir2/file3', '--target=new-archive')
														
 
															+        archives = self.cmd('list', self.repository_location)
														
 
															+        assert original_archive in archives
														
 
															+        assert 'new-archive' in archives
														
 
															+
														
 
															+        archive = self.repository_location + '::new-archive'
														
 
															+        listing = self.cmd('list', '--short', archive)
														
 
															+        assert 'file1' not in listing
														
 
															+        assert 'dir2/file2' in listing
														
 
															+        assert 'dir2/file3' not in listing
														
 
															+
														
 
															     def test_recreate_basic(self):
														
 
															         self.create_test_files()
														
 
															         self.create_regular_file('dir2/file3', size=1024 * 80)
														
--- a/src/borg/testsuite/key.py
+++ b/src/borg/testsuite/key.py
@@ -43,6 +43,14 @@ class TestKey:
 
															         monkeypatch.setenv('BORG_KEYS_DIR', tmpdir)
														
 
															         return tmpdir
														
 
															+    @pytest.fixture(params=(
														
 
															+        KeyfileKey,
														
 
															+        PlaintextKey
														
 
															+    ))
														
 
															+    def key(self, request, monkeypatch):
														
 
															+        monkeypatch.setenv('BORG_PASSPHRASE', 'test')
														
 
															+        return request.param.create(self.MockRepository(), self.MockArgs())
														
 
															+
														
 
															     class MockRepository:
														
 
															         class _Location:
														
 
															             orig = '/some/place'
														
@@ -155,6 +163,24 @@ class TestKey:
 
															             id[12] = 0
														
 
															             key.decrypt(id, data)
														
 
															+    def test_decrypt_decompress(self, key):
														
 
															+        plaintext = Chunk(b'123456789')
														
 
															+        encrypted = key.encrypt(plaintext)
														
 
															+        assert key.decrypt(None, encrypted, decompress=False) != plaintext
														
 
															+        assert key.decrypt(None, encrypted) == plaintext
														
 
															+
														
 
															+    def test_assert_id(self, key):
														
 
															+        plaintext = b'123456789'
														
 
															+        id = key.id_hash(plaintext)
														
 
															+        key.assert_id(id, plaintext)
														
 
															+        id_changed = bytearray(id)
														
 
															+        id_changed[0] += 1
														
 
															+        with pytest.raises(IntegrityError):
														
 
															+            key.assert_id(id_changed, plaintext)
														
 
															+        plaintext_changed = plaintext + b'1'
														
 
															+        with pytest.raises(IntegrityError):
														
 
															+            key.assert_id(id, plaintext_changed)
														
 
															+
														
 
															 class TestPassphrase:
														
 
															     def test_passphrase_new_verification(self, capsys, monkeypatch):