浏览代码

Merge pull request #1420 from enkore/f/recreate1.1rc1

recreate goals for 1.1rc1
TW 8 年之前
父节点
当前提交
6e9debb027
共有 7 个文件被更改,包括 120 次插入28 次删除
  1. 25 13
      src/borg/archive.py
  2. 17 1
      src/borg/archiver.py
  3. 8 1
      src/borg/compress.pyx
  4. 3 1
      src/borg/helpers.py
  5. 19 12
      src/borg/key.py
  6. 22 0
      src/borg/testsuite/archiver.py
  7. 26 0
      src/borg/testsuite/key.py

+ 25 - 13
src/borg/archive.py

@@ -19,6 +19,7 @@ logger = create_logger()
 from . import xattr
 from . import xattr
 from .cache import ChunkListEntry
 from .cache import ChunkListEntry
 from .chunker import Chunker
 from .chunker import Chunker
+from .compress import Compressor
 from .constants import *  # NOQA
 from .constants import *  # NOQA
 from .hashindex import ChunkIndex, ChunkIndexEntry
 from .hashindex import ChunkIndex, ChunkIndexEntry
 from .helpers import Manifest
 from .helpers import Manifest
@@ -1298,7 +1299,7 @@ class ArchiveRecreater:
 
 
     def __init__(self, repository, manifest, key, cache, matcher,
     def __init__(self, repository, manifest, key, cache, matcher,
                  exclude_caches=False, exclude_if_present=None, keep_tag_files=False,
                  exclude_caches=False, exclude_if_present=None, keep_tag_files=False,
-                 chunker_params=None, compression=None, compression_files=None,
+                 chunker_params=None, compression=None, compression_files=None, always_recompress=False,
                  dry_run=False, stats=False, progress=False, file_status_printer=None):
                  dry_run=False, stats=False, progress=False, file_status_printer=None):
         self.repository = repository
         self.repository = repository
         self.key = key
         self.key = key
@@ -1312,10 +1313,11 @@ class ArchiveRecreater:
 
 
         self.chunker_params = chunker_params or CHUNKER_PARAMS
         self.chunker_params = chunker_params or CHUNKER_PARAMS
         self.recompress = bool(compression)
         self.recompress = bool(compression)
+        self.always_recompress = always_recompress
         self.compression = compression or CompressionSpec('none')
         self.compression = compression or CompressionSpec('none')
         self.seen_chunks = set()
         self.seen_chunks = set()
         self.compression_decider1 = CompressionDecider1(compression or CompressionSpec('none'),
         self.compression_decider1 = CompressionDecider1(compression or CompressionSpec('none'),
-                                                            compression_files or [])
+                                                        compression_files or [])
         key.compression_decider2 = CompressionDecider2(compression or CompressionSpec('none'))
         key.compression_decider2 = CompressionDecider2(compression or CompressionSpec('none'))
 
 
         self.autocommit_threshold = max(self.AUTOCOMMIT_THRESHOLD, self.cache.chunks_stored_size() / 100)
         self.autocommit_threshold = max(self.AUTOCOMMIT_THRESHOLD, self.cache.chunks_stored_size() / 100)
@@ -1329,10 +1331,10 @@ class ArchiveRecreater:
         self.interrupt = False
         self.interrupt = False
         self.errors = False
         self.errors = False
 
 
-    def recreate(self, archive_name, comment=None):
+    def recreate(self, archive_name, comment=None, target_name=None):
         assert not self.is_temporary_archive(archive_name)
         assert not self.is_temporary_archive(archive_name)
         archive = self.open_archive(archive_name)
         archive = self.open_archive(archive_name)
-        target, resume_from = self.create_target_or_resume(archive)
+        target, resume_from = self.create_target_or_resume(archive, target_name)
         if self.exclude_if_present or self.exclude_caches:
         if self.exclude_if_present or self.exclude_caches:
             self.matcher_add_tagged_dirs(archive)
             self.matcher_add_tagged_dirs(archive)
         if self.matcher.empty() and not self.recompress and not target.recreate_rechunkify and comment is None:
         if self.matcher.empty() and not self.recompress and not target.recreate_rechunkify and comment is None:
@@ -1342,7 +1344,8 @@ class ArchiveRecreater:
             self.process_items(archive, target, resume_from)
             self.process_items(archive, target, resume_from)
         except self.Interrupted as e:
         except self.Interrupted as e:
             return self.save(archive, target, completed=False, metadata=e.metadata)
             return self.save(archive, target, completed=False, metadata=e.metadata)
-        return self.save(archive, target, comment)
+        replace_original = target_name is None
+        return self.save(archive, target, comment, replace_original=replace_original)
 
 
     def process_items(self, archive, target, resume_from=None):
     def process_items(self, archive, target, resume_from=None):
         matcher = self.matcher
         matcher = self.matcher
@@ -1404,7 +1407,6 @@ class ArchiveRecreater:
 
 
     def process_chunks(self, archive, target, item):
     def process_chunks(self, archive, target, item):
         """Return new chunk ID list for 'item'."""
         """Return new chunk ID list for 'item'."""
-        # TODO: support --compression-from
         if not self.recompress and not target.recreate_rechunkify:
         if not self.recompress and not target.recreate_rechunkify:
             for chunk_id, size, csize in item.chunks:
             for chunk_id, size, csize in item.chunks:
                 self.cache.chunk_incref(chunk_id, target.stats)
                 self.cache.chunk_incref(chunk_id, target.stats)
@@ -1412,13 +1414,22 @@ class ArchiveRecreater:
         new_chunks = self.process_partial_chunks(target)
         new_chunks = self.process_partial_chunks(target)
         chunk_iterator = self.create_chunk_iterator(archive, target, item)
         chunk_iterator = self.create_chunk_iterator(archive, target, item)
         consume(chunk_iterator, len(new_chunks))
         consume(chunk_iterator, len(new_chunks))
+        compress = self.compression_decider1.decide(item.path)
         for chunk in chunk_iterator:
         for chunk in chunk_iterator:
+            chunk.meta['compress'] = compress
             chunk_id = self.key.id_hash(chunk.data)
             chunk_id = self.key.id_hash(chunk.data)
             if chunk_id in self.seen_chunks:
             if chunk_id in self.seen_chunks:
                 new_chunks.append(self.cache.chunk_incref(chunk_id, target.stats))
                 new_chunks.append(self.cache.chunk_incref(chunk_id, target.stats))
             else:
             else:
-                # TODO: detect / skip / --always-recompress
-                chunk_id, size, csize = self.cache.add_chunk(chunk_id, chunk, target.stats, overwrite=self.recompress)
+                compression_spec, chunk = self.key.compression_decider2.decide(chunk)
+                overwrite = self.recompress
+                if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks:
+                    # Check if this chunk is already compressed the way we want it
+                    old_chunk = self.key.decrypt(None, self.repository.get(chunk_id), decompress=False)
+                    if Compressor.detect(old_chunk.data).name == compression_spec['name']:
+                        # Stored chunk has the same compression we wanted
+                        overwrite = False
+                chunk_id, size, csize = self.cache.add_chunk(chunk_id, chunk, target.stats, overwrite=overwrite)
                 new_chunks.append((chunk_id, size, csize))
                 new_chunks.append((chunk_id, size, csize))
                 self.seen_chunks.add(chunk_id)
                 self.seen_chunks.add(chunk_id)
                 if self.recompress:
                 if self.recompress:
@@ -1465,7 +1476,7 @@ class ArchiveRecreater:
         logger.debug('Copied %d chunks from a partially processed item', len(partial_chunks))
         logger.debug('Copied %d chunks from a partially processed item', len(partial_chunks))
         return partial_chunks
         return partial_chunks
 
 
-    def save(self, archive, target, comment=None, completed=True, metadata=None):
+    def save(self, archive, target, comment=None, completed=True, metadata=None, replace_original=True):
         """Save target archive. If completed, replace source. If not, save temporary with additional 'metadata' dict."""
         """Save target archive. If completed, replace source. If not, save temporary with additional 'metadata' dict."""
         if self.dry_run:
         if self.dry_run:
             return completed
             return completed
@@ -1477,8 +1488,9 @@ class ArchiveRecreater:
                 'cmdline': archive.metadata[b'cmdline'],
                 'cmdline': archive.metadata[b'cmdline'],
                 'recreate_cmdline': sys.argv,
                 'recreate_cmdline': sys.argv,
             })
             })
-            archive.delete(Statistics(), progress=self.progress)
-            target.rename(archive.name)
+            if replace_original:
+                archive.delete(Statistics(), progress=self.progress)
+                target.rename(archive.name)
             if self.stats:
             if self.stats:
                 target.end = datetime.utcnow()
                 target.end = datetime.utcnow()
                 log_multi(DASHES,
                 log_multi(DASHES,
@@ -1530,11 +1542,11 @@ class ArchiveRecreater:
         matcher.add(tag_files, True)
         matcher.add(tag_files, True)
         matcher.add(tagged_dirs, False)
         matcher.add(tagged_dirs, False)
 
 
-    def create_target_or_resume(self, archive):
+    def create_target_or_resume(self, archive, target_name=None):
         """Create new target archive or resume from temporary archive, if it exists. Return archive, resume from path"""
         """Create new target archive or resume from temporary archive, if it exists. Return archive, resume from path"""
         if self.dry_run:
         if self.dry_run:
             return self.FakeTargetArchive(), None
             return self.FakeTargetArchive(), None
-        target_name = archive.name + '.recreate'
+        target_name = target_name or archive.name + '.recreate'
         resume = target_name in self.manifest.archives
         resume = target_name in self.manifest.archives
         target, resume_from = None, None
         target, resume_from = None, None
         if resume:
         if resume:

+ 17 - 1
src/borg/archiver.py

@@ -957,6 +957,7 @@ class Archiver:
                                      exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
                                      exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
                                      keep_tag_files=args.keep_tag_files, chunker_params=args.chunker_params,
                                      keep_tag_files=args.keep_tag_files, chunker_params=args.chunker_params,
                                      compression=args.compression, compression_files=args.compression_files,
                                      compression=args.compression, compression_files=args.compression_files,
+                                     always_recompress=args.always_recompress,
                                      progress=args.progress, stats=args.stats,
                                      progress=args.progress, stats=args.stats,
                                      file_status_printer=self.print_file_status,
                                      file_status_printer=self.print_file_status,
                                      dry_run=args.dry_run)
                                      dry_run=args.dry_run)
@@ -968,8 +969,11 @@ class Archiver:
                 if recreater.is_temporary_archive(name):
                 if recreater.is_temporary_archive(name):
                     self.print_error('Refusing to work on temporary archive of prior recreate: %s', name)
                     self.print_error('Refusing to work on temporary archive of prior recreate: %s', name)
                     return self.exit_code
                     return self.exit_code
-                recreater.recreate(name, args.comment)
+                recreater.recreate(name, args.comment, args.target)
             else:
             else:
+                if args.target is not None:
+                    self.print_error('--target: Need to specify single archive')
+                    return self.exit_code
                 for archive in manifest.list_archive_infos(sort_by='ts'):
                 for archive in manifest.list_archive_infos(sort_by='ts'):
                     name = archive.name
                     name = archive.name
                     if recreater.is_temporary_archive(name):
                     if recreater.is_temporary_archive(name):
@@ -2007,6 +2011,9 @@ class Archiver:
         as in "borg create". If PATHs are specified the resulting archive
         as in "borg create". If PATHs are specified the resulting archive
         will only contain files from these PATHs.
         will only contain files from these PATHs.
 
 
+        Note that all paths in an archive are relative, therefore absolute patterns/paths
+        will *not* match (--exclude, --exclude-from, --compression-from, PATHs).
+
         --compression: all chunks seen will be stored using the given method.
         --compression: all chunks seen will be stored using the given method.
         Due to how Borg stores compressed size information this might display
         Due to how Borg stores compressed size information this might display
         incorrect information for archives that were not recreated at the same time.
         incorrect information for archives that were not recreated at the same time.
@@ -2035,6 +2042,8 @@ class Archiver:
         archive that is built during the operation exists at the same time at
         archive that is built during the operation exists at the same time at
         "<ARCHIVE>.recreate". The new archive will have a different archive ID.
         "<ARCHIVE>.recreate". The new archive will have a different archive ID.
 
 
+        With --target the original archive is not replaced, instead a new archive is created.
+
         When rechunking space usage can be substantial, expect at least the entire
         When rechunking space usage can be substantial, expect at least the entire
         deduplicated size of the archives using the previous chunker params.
         deduplicated size of the archives using the previous chunker params.
         When recompressing approximately 1 % of the repository size or 512 MB
         When recompressing approximately 1 % of the repository size or 512 MB
@@ -2080,6 +2089,10 @@ class Archiver:
                                    help='keep tag files of excluded caches/directories')
                                    help='keep tag files of excluded caches/directories')
 
 
         archive_group = subparser.add_argument_group('Archive options')
         archive_group = subparser.add_argument_group('Archive options')
+        archive_group.add_argument('--target', dest='target', metavar='TARGET', default=None,
+                                   type=archivename_validator(),
+                                   help='create a new archive with the name ARCHIVE, do not replace existing archive '
+                                        '(only applies for a single archive)')
         archive_group.add_argument('--comment', dest='comment', metavar='COMMENT', default=None,
         archive_group.add_argument('--comment', dest='comment', metavar='COMMENT', default=None,
                                    help='add a comment text to the archive')
                                    help='add a comment text to the archive')
         archive_group.add_argument('--timestamp', dest='timestamp',
         archive_group.add_argument('--timestamp', dest='timestamp',
@@ -2098,6 +2111,9 @@ class Archiver:
                                         'zlib,0 .. zlib,9 == zlib (with level 0..9),\n'
                                         'zlib,0 .. zlib,9 == zlib (with level 0..9),\n'
                                         'lzma == lzma (default level 6),\n'
                                         'lzma == lzma (default level 6),\n'
                                         'lzma,0 .. lzma,9 == lzma (with level 0..9).')
                                         'lzma,0 .. lzma,9 == lzma (with level 0..9).')
+        archive_group.add_argument('--always-recompress', dest='always_recompress', action='store_true',
+                                   help='always recompress chunks, don\'t skip chunks already compressed with the same'
+                                        'algorithm.')
         archive_group.add_argument('--compression-from', dest='compression_files',
         archive_group.add_argument('--compression-from', dest='compression_files',
                                    type=argparse.FileType('r'), action='append',
                                    type=argparse.FileType('r'), action='append',
                                    metavar='COMPRESSIONCONFIG', help='read compression patterns from COMPRESSIONCONFIG, one per line')
                                    metavar='COMPRESSIONCONFIG', help='read compression patterns from COMPRESSIONCONFIG, one per line')

+ 8 - 1
src/borg/compress.pyx

@@ -6,6 +6,8 @@ except ImportError:
 
 
 from .helpers import Buffer
 from .helpers import Buffer
 
 
+API_VERSION = 2
+
 cdef extern from "lz4.h":
 cdef extern from "lz4.h":
     int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
     int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
     int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
     int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
@@ -194,9 +196,14 @@ class Compressor:
         return self.compressor.compress(data)
         return self.compressor.compress(data)
 
 
     def decompress(self, data):
     def decompress(self, data):
+        compressor_cls = self.detect(data)
+        return compressor_cls(**self.params).decompress(data)
+
+    @staticmethod
+    def detect(data):
         hdr = bytes(data[:2])  # detect() does not work with memoryview
         hdr = bytes(data[:2])  # detect() does not work with memoryview
         for cls in COMPRESSOR_LIST:
         for cls in COMPRESSOR_LIST:
             if cls.detect(hdr):
             if cls.detect(hdr):
-                return cls(**self.params).decompress(data)
+                return cls
         else:
         else:
             raise ValueError('No decompressor for this data found: %r.', data[:2])
             raise ValueError('No decompressor for this data found: %r.', data[:2])

+ 3 - 1
src/borg/helpers.py

@@ -84,11 +84,13 @@ class PlaceholderError(Error):
 
 
 
 
 def check_extension_modules():
 def check_extension_modules():
-    from . import platform
+    from . import platform, compress
     if hashindex.API_VERSION != 3:
     if hashindex.API_VERSION != 3:
         raise ExtensionModuleError
         raise ExtensionModuleError
     if chunker.API_VERSION != 2:
     if chunker.API_VERSION != 2:
         raise ExtensionModuleError
         raise ExtensionModuleError
+    if compress.API_VERSION != 2:
+        raise ExtensionModuleError
     if crypto.API_VERSION != 3:
     if crypto.API_VERSION != 3:
         raise ExtensionModuleError
         raise ExtensionModuleError
     if platform.API_VERSION != 3:
     if platform.API_VERSION != 3:

+ 19 - 12
src/borg/key.py

@@ -105,9 +105,15 @@ class KeyBase:
     def encrypt(self, chunk):
     def encrypt(self, chunk):
         pass
         pass
 
 
-    def decrypt(self, id, data):
+    def decrypt(self, id, data, decompress=True):
         pass
         pass
 
 
+    def assert_id(self, id, data):
+        if id:
+            id_computed = self.id_hash(data)
+            if not compare_digest(id_computed, id):
+                raise IntegrityError('Chunk id verification failed')
+
 
 
 class PlaintextKey(KeyBase):
 class PlaintextKey(KeyBase):
     TYPE = 0x02
     TYPE = 0x02
@@ -130,12 +136,14 @@ class PlaintextKey(KeyBase):
         chunk = self.compress(chunk)
         chunk = self.compress(chunk)
         return b''.join([self.TYPE_STR, chunk.data])
         return b''.join([self.TYPE_STR, chunk.data])
 
 
-    def decrypt(self, id, data):
+    def decrypt(self, id, data, decompress=True):
         if data[0] != self.TYPE:
         if data[0] != self.TYPE:
             raise IntegrityError('Invalid encryption envelope')
             raise IntegrityError('Invalid encryption envelope')
-        data = self.compressor.decompress(memoryview(data)[1:])
-        if id and sha256(data).digest() != id:
-            raise IntegrityError('Chunk id verification failed')
+        payload = memoryview(data)[1:]
+        if not decompress:
+            return Chunk(payload)
+        data = self.compressor.decompress(payload)
+        self.assert_id(id, data)
         return Chunk(data)
         return Chunk(data)
 
 
 
 
@@ -166,7 +174,7 @@ class AESKeyBase(KeyBase):
         hmac = hmac_sha256(self.enc_hmac_key, data)
         hmac = hmac_sha256(self.enc_hmac_key, data)
         return b''.join((self.TYPE_STR, hmac, data))
         return b''.join((self.TYPE_STR, hmac, data))
 
 
-    def decrypt(self, id, data):
+    def decrypt(self, id, data, decompress=True):
         if not (data[0] == self.TYPE or
         if not (data[0] == self.TYPE or
             data[0] == PassphraseKey.TYPE and isinstance(self, RepoKey)):
             data[0] == PassphraseKey.TYPE and isinstance(self, RepoKey)):
             raise IntegrityError('Invalid encryption envelope')
             raise IntegrityError('Invalid encryption envelope')
@@ -176,12 +184,11 @@ class AESKeyBase(KeyBase):
         if not compare_digest(hmac_computed, hmac_given):
         if not compare_digest(hmac_computed, hmac_given):
             raise IntegrityError('Encryption envelope checksum mismatch')
             raise IntegrityError('Encryption envelope checksum mismatch')
         self.dec_cipher.reset(iv=PREFIX + data[33:41])
         self.dec_cipher.reset(iv=PREFIX + data[33:41])
-        data = self.compressor.decompress(self.dec_cipher.decrypt(data_view[41:]))
-        if id:
-            hmac_given = id
-            hmac_computed = hmac_sha256(self.id_key, data)
-            if not compare_digest(hmac_computed, hmac_given):
-                raise IntegrityError('Chunk id verification failed')
+        payload = self.dec_cipher.decrypt(data_view[41:])
+        if not decompress:
+            return Chunk(payload)
+        data = self.compressor.decompress(payload)
+        self.assert_id(id, data)
         return Chunk(data)
         return Chunk(data)
 
 
     def extract_nonce(self, payload):
     def extract_nonce(self, payload):

+ 22 - 0
src/borg/testsuite/archiver.py

@@ -1522,6 +1522,28 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             self.cmd('init', self.repository_location, exit_code=1)
             self.cmd('init', self.repository_location, exit_code=1)
         assert not os.path.exists(self.repository_location)
         assert not os.path.exists(self.repository_location)
 
 
+    def test_recreate_target_rc(self):
+        self.cmd('init', self.repository_location)
+        output = self.cmd('recreate', self.repository_location, '--target=asdf', exit_code=2)
+        assert 'Need to specify single archive' in output
+
+    def test_recreate_target(self):
+        self.create_test_files()
+        self.cmd('init', self.repository_location)
+        archive = self.repository_location + '::test0'
+        self.cmd('create', archive, 'input')
+        original_archive = self.cmd('list', self.repository_location)
+        self.cmd('recreate', archive, 'input/dir2', '-e', 'input/dir2/file3', '--target=new-archive')
+        archives = self.cmd('list', self.repository_location)
+        assert original_archive in archives
+        assert 'new-archive' in archives
+
+        archive = self.repository_location + '::new-archive'
+        listing = self.cmd('list', '--short', archive)
+        assert 'file1' not in listing
+        assert 'dir2/file2' in listing
+        assert 'dir2/file3' not in listing
+
     def test_recreate_basic(self):
     def test_recreate_basic(self):
         self.create_test_files()
         self.create_test_files()
         self.create_regular_file('dir2/file3', size=1024 * 80)
         self.create_regular_file('dir2/file3', size=1024 * 80)

+ 26 - 0
src/borg/testsuite/key.py

@@ -43,6 +43,14 @@ class TestKey:
         monkeypatch.setenv('BORG_KEYS_DIR', tmpdir)
         monkeypatch.setenv('BORG_KEYS_DIR', tmpdir)
         return tmpdir
         return tmpdir
 
 
+    @pytest.fixture(params=(
+        KeyfileKey,
+        PlaintextKey
+    ))
+    def key(self, request, monkeypatch):
+        monkeypatch.setenv('BORG_PASSPHRASE', 'test')
+        return request.param.create(self.MockRepository(), self.MockArgs())
+
     class MockRepository:
     class MockRepository:
         class _Location:
         class _Location:
             orig = '/some/place'
             orig = '/some/place'
@@ -155,6 +163,24 @@ class TestKey:
             id[12] = 0
             id[12] = 0
             key.decrypt(id, data)
             key.decrypt(id, data)
 
 
+    def test_decrypt_decompress(self, key):
+        plaintext = Chunk(b'123456789')
+        encrypted = key.encrypt(plaintext)
+        assert key.decrypt(None, encrypted, decompress=False) != plaintext
+        assert key.decrypt(None, encrypted) == plaintext
+
+    def test_assert_id(self, key):
+        plaintext = b'123456789'
+        id = key.id_hash(plaintext)
+        key.assert_id(id, plaintext)
+        id_changed = bytearray(id)
+        id_changed[0] += 1
+        with pytest.raises(IntegrityError):
+            key.assert_id(id_changed, plaintext)
+        plaintext_changed = plaintext + b'1'
+        with pytest.raises(IntegrityError):
+            key.assert_id(id, plaintext_changed)
+
 
 
 class TestPassphrase:
 class TestPassphrase:
     def test_passphrase_new_verification(self, capsys, monkeypatch):
     def test_passphrase_new_verification(self, capsys, monkeypatch):