Browse Source

recreate: --always-recompress, --compression-from

what a mess
Marian Beermann 9 years ago
parent
commit
88798ae949
4 changed files with 24 additions and 6 deletions
  1. 15 5
      src/borg/archive.py
  2. 4 0
      src/borg/archiver.py
  3. 2 0
      src/borg/compress.pyx
  4. 3 1
      src/borg/helpers.py

+ 15 - 5
src/borg/archive.py

@@ -19,6 +19,7 @@ logger = create_logger()
 from . import xattr
 from . import xattr
 from .cache import ChunkListEntry
 from .cache import ChunkListEntry
 from .chunker import Chunker
 from .chunker import Chunker
+from .compress import Compressor
 from .constants import *  # NOQA
 from .constants import *  # NOQA
 from .hashindex import ChunkIndex, ChunkIndexEntry
 from .hashindex import ChunkIndex, ChunkIndexEntry
 from .helpers import Manifest
 from .helpers import Manifest
@@ -1298,7 +1299,7 @@ class ArchiveRecreater:
 
 
     def __init__(self, repository, manifest, key, cache, matcher,
     def __init__(self, repository, manifest, key, cache, matcher,
                  exclude_caches=False, exclude_if_present=None, keep_tag_files=False,
                  exclude_caches=False, exclude_if_present=None, keep_tag_files=False,
-                 chunker_params=None, compression=None, compression_files=None,
+                 chunker_params=None, compression=None, compression_files=None, always_recompress=False,
                  dry_run=False, stats=False, progress=False, file_status_printer=None):
                  dry_run=False, stats=False, progress=False, file_status_printer=None):
         self.repository = repository
         self.repository = repository
         self.key = key
         self.key = key
@@ -1312,10 +1313,11 @@ class ArchiveRecreater:
 
 
         self.chunker_params = chunker_params or CHUNKER_PARAMS
         self.chunker_params = chunker_params or CHUNKER_PARAMS
         self.recompress = bool(compression)
         self.recompress = bool(compression)
+        self.always_recompress = always_recompress
         self.compression = compression or CompressionSpec('none')
         self.compression = compression or CompressionSpec('none')
         self.seen_chunks = set()
         self.seen_chunks = set()
         self.compression_decider1 = CompressionDecider1(compression or CompressionSpec('none'),
         self.compression_decider1 = CompressionDecider1(compression or CompressionSpec('none'),
-                                                            compression_files or [])
+                                                        compression_files or [])
         key.compression_decider2 = CompressionDecider2(compression or CompressionSpec('none'))
         key.compression_decider2 = CompressionDecider2(compression or CompressionSpec('none'))
 
 
         self.autocommit_threshold = max(self.AUTOCOMMIT_THRESHOLD, self.cache.chunks_stored_size() / 100)
         self.autocommit_threshold = max(self.AUTOCOMMIT_THRESHOLD, self.cache.chunks_stored_size() / 100)
@@ -1404,7 +1406,6 @@ class ArchiveRecreater:
 
 
     def process_chunks(self, archive, target, item):
     def process_chunks(self, archive, target, item):
         """Return new chunk ID list for 'item'."""
         """Return new chunk ID list for 'item'."""
-        # TODO: support --compression-from
         if not self.recompress and not target.recreate_rechunkify:
         if not self.recompress and not target.recreate_rechunkify:
             for chunk_id, size, csize in item.chunks:
             for chunk_id, size, csize in item.chunks:
                 self.cache.chunk_incref(chunk_id, target.stats)
                 self.cache.chunk_incref(chunk_id, target.stats)
@@ -1412,13 +1413,22 @@ class ArchiveRecreater:
         new_chunks = self.process_partial_chunks(target)
         new_chunks = self.process_partial_chunks(target)
         chunk_iterator = self.create_chunk_iterator(archive, target, item)
         chunk_iterator = self.create_chunk_iterator(archive, target, item)
         consume(chunk_iterator, len(new_chunks))
         consume(chunk_iterator, len(new_chunks))
+        compress = self.compression_decider1.decide(item.path)
         for chunk in chunk_iterator:
         for chunk in chunk_iterator:
+            chunk.meta['compress'] = compress
             chunk_id = self.key.id_hash(chunk.data)
             chunk_id = self.key.id_hash(chunk.data)
             if chunk_id in self.seen_chunks:
             if chunk_id in self.seen_chunks:
                 new_chunks.append(self.cache.chunk_incref(chunk_id, target.stats))
                 new_chunks.append(self.cache.chunk_incref(chunk_id, target.stats))
             else:
             else:
-                # TODO: detect / skip / --always-recompress
-                chunk_id, size, csize = self.cache.add_chunk(chunk_id, chunk, target.stats, overwrite=self.recompress)
+                compression_spec, chunk = self.key.compression_decider2.decide(chunk)
+                overwrite = self.recompress
+                if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks:
+                    # Check if this chunk is already compressed the way we want it
+                    old_chunk = self.key.decrypt(None, self.repository.get(chunk_id), decompress=False)
+                    if Compressor.detect(old_chunk.data).name == compression_spec['name']:
+                        # Stored chunk has the same compression we wanted
+                        overwrite = False
+                chunk_id, size, csize = self.cache.add_chunk(chunk_id, chunk, target.stats, overwrite=overwrite)
                 new_chunks.append((chunk_id, size, csize))
                 new_chunks.append((chunk_id, size, csize))
                 self.seen_chunks.add(chunk_id)
                 self.seen_chunks.add(chunk_id)
                 if self.recompress:
                 if self.recompress:

+ 4 - 0
src/borg/archiver.py

@@ -957,6 +957,7 @@ class Archiver:
                                      exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
                                      exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
                                      keep_tag_files=args.keep_tag_files, chunker_params=args.chunker_params,
                                      keep_tag_files=args.keep_tag_files, chunker_params=args.chunker_params,
                                      compression=args.compression, compression_files=args.compression_files,
                                      compression=args.compression, compression_files=args.compression_files,
+                                     always_recompress=args.always_recompress,
                                      progress=args.progress, stats=args.stats,
                                      progress=args.progress, stats=args.stats,
                                      file_status_printer=self.print_file_status,
                                      file_status_printer=self.print_file_status,
                                      dry_run=args.dry_run)
                                      dry_run=args.dry_run)
@@ -2098,6 +2099,9 @@ class Archiver:
                                         'zlib,0 .. zlib,9 == zlib (with level 0..9),\n'
                                         'zlib,0 .. zlib,9 == zlib (with level 0..9),\n'
                                         'lzma == lzma (default level 6),\n'
                                         'lzma == lzma (default level 6),\n'
                                         'lzma,0 .. lzma,9 == lzma (with level 0..9).')
                                         'lzma,0 .. lzma,9 == lzma (with level 0..9).')
+        archive_group.add_argument('--always-recompress', dest='always_recompress', action='store_true',
+                                   help='always recompress chunks, don\'t skip chunks already compressed with the same'
+                                        'algorithm.')
         archive_group.add_argument('--compression-from', dest='compression_files',
         archive_group.add_argument('--compression-from', dest='compression_files',
                                    type=argparse.FileType('r'), action='append',
                                    type=argparse.FileType('r'), action='append',
                                    metavar='COMPRESSIONCONFIG', help='read compression patterns from COMPRESSIONCONFIG, one per line')
                                    metavar='COMPRESSIONCONFIG', help='read compression patterns from COMPRESSIONCONFIG, one per line')

+ 2 - 0
src/borg/compress.pyx

@@ -6,6 +6,8 @@ except ImportError:
 
 
 from .helpers import Buffer
 from .helpers import Buffer
 
 
+API_VERSION = 2
+
 cdef extern from "lz4.h":
 cdef extern from "lz4.h":
     int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
     int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
     int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
     int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil

+ 3 - 1
src/borg/helpers.py

@@ -84,11 +84,13 @@ class PlaceholderError(Error):
 
 
 
 
 def check_extension_modules():
 def check_extension_modules():
-    from . import platform
+    from . import platform, compress
     if hashindex.API_VERSION != 3:
     if hashindex.API_VERSION != 3:
         raise ExtensionModuleError
         raise ExtensionModuleError
     if chunker.API_VERSION != 2:
     if chunker.API_VERSION != 2:
         raise ExtensionModuleError
         raise ExtensionModuleError
+    if compress.API_VERSION != 2:
+        raise ExtensionModuleError
     if crypto.API_VERSION != 3:
     if crypto.API_VERSION != 3:
         raise ExtensionModuleError
         raise ExtensionModuleError
     if platform.API_VERSION != 3:
     if platform.API_VERSION != 3: