浏览代码

remove --compression-from

Marian Beermann 8 年之前
父节点
当前提交
69fb9bd403
共有 6 个文件被更改,包括 14 次插入138 次删除
  1. 8 17
      src/borg/archive.py
  2. 3 45
      src/borg/archiver.py
  3. 0 12
      src/borg/compress.pyx
  4. 0 33
      src/borg/helpers.py
  5. 3 7
      src/borg/key.py
  6. 0 24
      src/borg/testsuite/helpers.py

+ 8 - 17
src/borg/archive.py

@@ -36,7 +36,6 @@ from .helpers import bin_to_hex
 from .helpers import safe_ns
 from .helpers import safe_ns
 from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi
 from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi
 from .helpers import PathPrefixPattern, FnmatchPattern
 from .helpers import PathPrefixPattern, FnmatchPattern
-from .helpers import CompressionDecider
 from .item import Item, ArchiveItem
 from .item import Item, ArchiveItem
 from .key import key_factory
 from .key import key_factory
 from .platform import acl_get, acl_set, set_flags, get_flags, swidth
 from .platform import acl_get, acl_set, set_flags, get_flags, swidth
@@ -278,7 +277,7 @@ class Archive:
 
 
     def __init__(self, repository, key, manifest, name, cache=None, create=False,
     def __init__(self, repository, key, manifest, name, cache=None, create=False,
                  checkpoint_interval=300, numeric_owner=False, noatime=False, noctime=False, progress=False,
                  checkpoint_interval=300, numeric_owner=False, noatime=False, noctime=False, progress=False,
-                 chunker_params=CHUNKER_PARAMS, start=None, start_monotonic=None, end=None, compression=None, compression_files=None,
+                 chunker_params=CHUNKER_PARAMS, start=None, start_monotonic=None, end=None,
                  consider_part_files=False, log_json=False):
                  consider_part_files=False, log_json=False):
         self.cwd = os.getcwd()
         self.cwd = os.getcwd()
         self.key = key
         self.key = key
@@ -307,11 +306,8 @@ class Archive:
         self.pipeline = DownloadPipeline(self.repository, self.key)
         self.pipeline = DownloadPipeline(self.repository, self.key)
         self.create = create
         self.create = create
         if self.create:
         if self.create:
-            self.file_compression_logger = create_logger('borg.debug.file-compression')
             self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats)
             self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats)
             self.chunker = Chunker(self.key.chunk_seed, *chunker_params)
             self.chunker = Chunker(self.key.chunk_seed, *chunker_params)
-            self.compression_decider = CompressionDecider(compression or CompressionSpec('none'),
-                                                          compression_files or [])
             if name in manifest.archives:
             if name in manifest.archives:
                 raise self.AlreadyExists(name)
                 raise self.AlreadyExists(name)
             self.last_checkpoint = time.monotonic()
             self.last_checkpoint = time.monotonic()
@@ -970,12 +966,10 @@ Utilization of max. archive size: {csize_max:.0%}
         if chunks is not None:
         if chunks is not None:
             item.chunks = chunks
             item.chunks = chunks
         else:
         else:
-            compressor = self.compression_decider.decide(path)
-            self.file_compression_logger.debug('%s -> compression %s', path, compressor.name)
             with backup_io('open'):
             with backup_io('open'):
                 fh = Archive._open_rb(path)
                 fh = Archive._open_rb(path)
             with os.fdopen(fh, 'rb') as fd:
             with os.fdopen(fh, 'rb') as fd:
-                self.chunk_file(item, cache, self.stats, backup_io_iter(self.chunker.chunkify(fd, fh)), compressor=compressor)
+                self.chunk_file(item, cache, self.stats, backup_io_iter(self.chunker.chunkify(fd, fh)))
             if not is_special_file:
             if not is_special_file:
                 # we must not memorize special files, because the contents of e.g. a
                 # we must not memorize special files, because the contents of e.g. a
                 # block or char device will change without its mtime/size/inode changing.
                 # block or char device will change without its mtime/size/inode changing.
@@ -1561,7 +1555,7 @@ class ArchiveRecreater:
 
 
     def __init__(self, repository, manifest, key, cache, matcher,
     def __init__(self, repository, manifest, key, cache, matcher,
                  exclude_caches=False, exclude_if_present=None, keep_exclude_tags=False,
                  exclude_caches=False, exclude_if_present=None, keep_exclude_tags=False,
-                 chunker_params=None, compression=None, compression_files=None, always_recompress=False,
+                 chunker_params=None, compression=None, always_recompress=False,
                  dry_run=False, stats=False, progress=False, file_status_printer=None,
                  dry_run=False, stats=False, progress=False, file_status_printer=None,
                  checkpoint_interval=1800):
                  checkpoint_interval=1800):
         self.repository = repository
         self.repository = repository
@@ -1582,8 +1576,6 @@ class ArchiveRecreater:
         self.always_recompress = always_recompress
         self.always_recompress = always_recompress
         self.compression = compression or CompressionSpec('none')
         self.compression = compression or CompressionSpec('none')
         self.seen_chunks = set()
         self.seen_chunks = set()
-        self.compression_decider = CompressionDecider(compression or CompressionSpec('none'),
-                                                      compression_files or [])
 
 
         self.dry_run = dry_run
         self.dry_run = dry_run
         self.stats = stats
         self.stats = stats
@@ -1652,11 +1644,10 @@ class ArchiveRecreater:
                 self.cache.chunk_incref(chunk_id, target.stats)
                 self.cache.chunk_incref(chunk_id, target.stats)
             return item.chunks
             return item.chunks
         chunk_iterator = self.iter_chunks(archive, target, list(item.chunks))
         chunk_iterator = self.iter_chunks(archive, target, list(item.chunks))
-        compressor = self.compression_decider.decide(item.path)
-        chunk_processor = partial(self.chunk_processor, target, compressor)
+        chunk_processor = partial(self.chunk_processor, target)
         target.chunk_file(item, self.cache, target.stats, chunk_iterator, chunk_processor)
         target.chunk_file(item, self.cache, target.stats, chunk_iterator, chunk_processor)
 
 
-    def chunk_processor(self, target, compressor, data):
+    def chunk_processor(self, target, data):
         chunk_id = self.key.id_hash(data)
         chunk_id = self.key.id_hash(data)
         if chunk_id in self.seen_chunks:
         if chunk_id in self.seen_chunks:
             return self.cache.chunk_incref(chunk_id, target.stats)
             return self.cache.chunk_incref(chunk_id, target.stats)
@@ -1664,10 +1655,10 @@ class ArchiveRecreater:
         if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks:
         if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks:
             # Check if this chunk is already compressed the way we want it
             # Check if this chunk is already compressed the way we want it
             old_chunk = self.key.decrypt(None, self.repository.get(chunk_id), decompress=False)
             old_chunk = self.key.decrypt(None, self.repository.get(chunk_id), decompress=False)
-            if Compressor.detect(old_chunk.data).name == compressor.decide(data).name:
+            if Compressor.detect(old_chunk.data).name == self.key.compressor.decide(data).name:
                 # Stored chunk has the same compression we wanted
                 # Stored chunk has the same compression we wanted
                 overwrite = False
                 overwrite = False
-        chunk = Chunk(data, compressor=compressor)
+        chunk = Chunk(data)
         chunk_entry = self.cache.add_chunk(chunk_id, chunk, target.stats, overwrite=overwrite, wait=False)
         chunk_entry = self.cache.add_chunk(chunk_id, chunk, target.stats, overwrite=overwrite, wait=False)
         self.cache.repository.async_response(wait=False)
         self.cache.repository.async_response(wait=False)
         self.seen_chunks.add(chunk_entry.id)
         self.seen_chunks.add(chunk_entry.id)
@@ -1753,7 +1744,7 @@ class ArchiveRecreater:
     def create_target_archive(self, name):
     def create_target_archive(self, name):
         target = Archive(self.repository, self.key, self.manifest, name, create=True,
         target = Archive(self.repository, self.key, self.manifest, name, create=True,
                           progress=self.progress, chunker_params=self.chunker_params, cache=self.cache,
                           progress=self.progress, chunker_params=self.chunker_params, cache=self.cache,
-                          checkpoint_interval=self.checkpoint_interval, compression=self.compression)
+                          checkpoint_interval=self.checkpoint_interval)
         return target
         return target
 
 
     def open_archive(self, name, **kwargs):
     def open_archive(self, name, **kwargs):

+ 3 - 45
src/borg/archiver.py

@@ -481,7 +481,6 @@ class Archiver:
                                   numeric_owner=args.numeric_owner, noatime=args.noatime, noctime=args.noctime,
                                   numeric_owner=args.numeric_owner, noatime=args.noatime, noctime=args.noctime,
                                   progress=args.progress,
                                   progress=args.progress,
                                   chunker_params=args.chunker_params, start=t0, start_monotonic=t0_monotonic,
                                   chunker_params=args.chunker_params, start=t0, start_monotonic=t0_monotonic,
-                                  compression=args.compression, compression_files=args.compression_files,
                                   log_json=args.log_json)
                                   log_json=args.log_json)
                 create_inner(archive, cache)
                 create_inner(archive, cache)
         else:
         else:
@@ -1335,8 +1334,7 @@ class Archiver:
         recreater = ArchiveRecreater(repository, manifest, key, cache, matcher,
         recreater = ArchiveRecreater(repository, manifest, key, cache, matcher,
                                      exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
                                      exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
                                      keep_exclude_tags=args.keep_exclude_tags, chunker_params=args.chunker_params,
                                      keep_exclude_tags=args.keep_exclude_tags, chunker_params=args.chunker_params,
-                                     compression=args.compression, compression_files=args.compression_files,
-                                     always_recompress=args.always_recompress,
+                                     compression=args.compression, always_recompress=args.always_recompress,
                                      progress=args.progress, stats=args.stats,
                                      progress=args.progress, stats=args.stats,
                                      file_status_printer=self.print_file_status,
                                      file_status_printer=self.print_file_status,
                                      checkpoint_interval=args.checkpoint_interval,
                                      checkpoint_interval=args.checkpoint_interval,
@@ -1799,43 +1797,13 @@ class Archiver:
             For compressible data, it uses the given C[,L] compression - with C[,L]
             For compressible data, it uses the given C[,L] compression - with C[,L]
             being any valid compression specifier.
             being any valid compression specifier.
 
 
-        The decision about which compression to use is done by borg like this:
-
-        1. find a compression specifier (per file):
-           match the path/filename against all patterns in all --compression-from
-           files (if any). If a pattern matches, use the compression spec given for
-           that pattern. If no pattern matches (and also if you do not give any
-           --compression-from option), default to the compression spec given by
-           --compression. See docs/misc/compression.conf for an example config.
-
-        2. if the found compression spec is not "auto", the decision is taken:
-           use the found compression spec.
-
-        3. if the found compression spec is "auto", test compressibility of each
-           chunk using lz4.
-           If it is compressible, use the C,[L] compression spec given within the
-           "auto" specifier. If it is not compressible, use no compression.
-
         Examples::
         Examples::
 
 
             borg create --compression lz4 REPO::ARCHIVE data
             borg create --compression lz4 REPO::ARCHIVE data
             borg create --compression zlib REPO::ARCHIVE data
             borg create --compression zlib REPO::ARCHIVE data
             borg create --compression zlib,1 REPO::ARCHIVE data
             borg create --compression zlib,1 REPO::ARCHIVE data
             borg create --compression auto,lzma,6 REPO::ARCHIVE data
             borg create --compression auto,lzma,6 REPO::ARCHIVE data
-            borg create --compression-from compression.conf --compression auto,lzma ...
-
-        compression.conf has entries like::
-
-            # example config file for --compression-from option
-            #
-            # Format of non-comment / non-empty lines:
-            # <compression-spec>:<path/filename pattern>
-            # compression-spec is same format as for --compression option
-            # path/filename pattern is same format as for --exclude option
-            none:*.gz
-            none:*.zip
-            none:*.mp3
-            none:*.ogg
+            borg create --compression auto,lzma ...
 
 
         General remarks:
         General remarks:
 
 
@@ -2424,11 +2392,6 @@ class Archiver:
                                    type=CompressionSpec, default=CompressionSpec('lz4'), metavar='COMPRESSION',
                                    type=CompressionSpec, default=CompressionSpec('lz4'), metavar='COMPRESSION',
                                    help='select compression algorithm, see the output of the '
                                    help='select compression algorithm, see the output of the '
                                         '"borg help compression" command for details.')
                                         '"borg help compression" command for details.')
-        archive_group.add_argument('--compression-from', dest='compression_files',
-                                   type=argparse.FileType('r'), action='append',
-                                   metavar='COMPRESSIONCONFIG',
-                                   help='read compression patterns from COMPRESSIONCONFIG, see the output of the '
-                                        '"borg help compression" command for details.')
 
 
         subparser.add_argument('location', metavar='ARCHIVE',
         subparser.add_argument('location', metavar='ARCHIVE',
                                type=location_validator(archive=True),
                                type=location_validator(archive=True),
@@ -2964,7 +2927,7 @@ class Archiver:
         resulting archive will only contain files from these PATHs.
         resulting archive will only contain files from these PATHs.
 
 
         Note that all paths in an archive are relative, therefore absolute patterns/paths
         Note that all paths in an archive are relative, therefore absolute patterns/paths
-        will *not* match (--exclude, --exclude-from, --compression-from, PATHs).
+        will *not* match (--exclude, --exclude-from, PATHs).
 
 
         --compression: all chunks seen will be stored using the given method.
         --compression: all chunks seen will be stored using the given method.
         Due to how Borg stores compressed size information this might display
         Due to how Borg stores compressed size information this might display
@@ -3059,11 +3022,6 @@ class Archiver:
         archive_group.add_argument('--always-recompress', dest='always_recompress', action='store_true',
         archive_group.add_argument('--always-recompress', dest='always_recompress', action='store_true',
                                    help='always recompress chunks, don\'t skip chunks already compressed with the same '
                                    help='always recompress chunks, don\'t skip chunks already compressed with the same '
                                         'algorithm.')
                                         'algorithm.')
-        archive_group.add_argument('--compression-from', dest='compression_files',
-                                   type=argparse.FileType('r'), action='append',
-                                   metavar='COMPRESSIONCONFIG',
-                                   help='read compression patterns from COMPRESSIONCONFIG, see the output of the '
-                                        '"borg help compression" command for details.')
         archive_group.add_argument('--chunker-params', dest='chunker_params',
         archive_group.add_argument('--chunker-params', dest='chunker_params',
                                    type=ChunkerParams, default=CHUNKER_PARAMS,
                                    type=ChunkerParams, default=CHUNKER_PARAMS,
                                    metavar='PARAMS',
                                    metavar='PARAMS',

+ 0 - 12
src/borg/compress.pyx

@@ -5,18 +5,6 @@ borg.compress
 Compression is applied to chunks after ID hashing (so the ID is a direct function of the
 Compression is applied to chunks after ID hashing (so the ID is a direct function of the
 plain chunk, compression is irrelevant to it), and of course before encryption.
 plain chunk, compression is irrelevant to it), and of course before encryption.
 
 
-Borg has a flexible scheme for deciding which compression to use for chunks.
-
-First, there is a global default set by the --compression command line option,
-which sets the .compressor attribute on the Key.
-
-For chunks that emanate from files CompressionDecider may set a specific
-Compressor based on patterns (this is the --compression-from option). This is stored
-as a Compressor instance in the "compressor" key in the Chunk's meta dictionary.
-
-When compressing (KeyBase.compress) either the Compressor specified in the Chunk's
-meta dictionary is used, or the default Compressor of the key.
-
 The "auto" mode (e.g. --compression auto,lzma,4) is implemented as a meta Compressor,
 The "auto" mode (e.g. --compression auto,lzma,4) is implemented as a meta Compressor,
 meaning that Auto acts like a Compressor, but defers actual work to others (namely
 meaning that Auto acts like a Compressor, but defers actual work to others (namely
 LZ4 as a heuristic whether compression is worth it, and the specified Compressor
 LZ4 as a heuristic whether compression is worth it, and the specified Compressor

+ 0 - 33
src/borg/helpers.py

@@ -2096,39 +2096,6 @@ def clean_lines(lines, lstrip=None, rstrip=None, remove_empty=True, remove_comme
         yield line
         yield line
 
 
 
 
-class CompressionDecider:
-    def __init__(self, compression, compression_files):
-        """
-        Initialize a CompressionDecider instance (and read config files, if needed).
-
-        :param compression: default CompressionSpec (e.g. from --compression option)
-        :param compression_files: list of compression config files (e.g. from --compression-from) or
-                                  a list of other line iterators
-        """
-        from .compress import CompressionSpec
-        self.compressor = compression.compressor
-        if not compression_files:
-            self.matcher = None
-        else:
-            self.matcher = PatternMatcher(fallback=compression.compressor)
-            for file in compression_files:
-                try:
-                    for line in clean_lines(file):
-                        try:
-                            compr_spec, fn_pattern = line.split(':', 1)
-                        except:
-                            continue
-                        self.matcher.add([parse_pattern(fn_pattern)], CompressionSpec(compr_spec).compressor)
-                finally:
-                    if hasattr(file, 'close'):
-                        file.close()
-
-    def decide(self, path):
-        if self.matcher is not None:
-            return self.matcher.match(path)
-        return self.compressor
-
-
 class ErrorIgnoringTextIOWrapper(io.TextIOWrapper):
 class ErrorIgnoringTextIOWrapper(io.TextIOWrapper):
     def read(self, n):
     def read(self, n):
         if not self.closed:
         if not self.closed:

+ 3 - 7
src/borg/key.py

@@ -152,10 +152,6 @@ class KeyBase:
         """Return HMAC hash using the "id" HMAC key
         """Return HMAC hash using the "id" HMAC key
         """
         """
 
 
-    def compress(self, chunk):
-        meta, data = chunk
-        return meta.get('compressor', self.compressor).compress(data)
-
     def encrypt(self, chunk):
     def encrypt(self, chunk):
         pass
         pass
 
 
@@ -256,7 +252,7 @@ class PlaintextKey(KeyBase):
         return sha256(data).digest()
         return sha256(data).digest()
 
 
     def encrypt(self, chunk):
     def encrypt(self, chunk):
-        data = self.compress(chunk)
+        data = self.compressor.compress(chunk.data)
         return b''.join([self.TYPE_STR, data])
         return b''.join([self.TYPE_STR, data])
 
 
     def decrypt(self, id, data, decompress=True):
     def decrypt(self, id, data, decompress=True):
@@ -334,7 +330,7 @@ class AESKeyBase(KeyBase):
     MAC = hmac_sha256
     MAC = hmac_sha256
 
 
     def encrypt(self, chunk):
     def encrypt(self, chunk):
-        data = self.compress(chunk)
+        data = self.compressor.compress(chunk.data)
         self.nonce_manager.ensure_reservation(num_aes_blocks(len(data)))
         self.nonce_manager.ensure_reservation(num_aes_blocks(len(data)))
         self.enc_cipher.reset()
         self.enc_cipher.reset()
         data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data)))
         data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data)))
@@ -746,7 +742,7 @@ class AuthenticatedKey(ID_BLAKE2b_256, RepoKey):
     STORAGE = KeyBlobStorage.REPO
     STORAGE = KeyBlobStorage.REPO
 
 
     def encrypt(self, chunk):
     def encrypt(self, chunk):
-        data = self.compress(chunk)
+        data = self.compressor.compress(chunk.data)
         return b''.join([self.TYPE_STR, data])
         return b''.join([self.TYPE_STR, data])
 
 
     def decrypt(self, id, data, decompress=True):
     def decrypt(self, id, data, decompress=True):

+ 0 - 24
src/borg/testsuite/helpers.py

@@ -12,7 +12,6 @@ import msgpack
 import msgpack.fallback
 import msgpack.fallback
 
 
 from .. import platform
 from .. import platform
-from ..compress import CompressionSpec
 from ..helpers import Location
 from ..helpers import Location
 from ..helpers import Buffer
 from ..helpers import Buffer
 from ..helpers import partial_format, format_file_size, parse_file_size, format_timedelta, format_line, PlaceholderError, replace_placeholders
 from ..helpers import partial_format, format_file_size, parse_file_size, format_timedelta, format_line, PlaceholderError, replace_placeholders
@@ -25,7 +24,6 @@ from ..helpers import StableDict, int_to_bigint, bigint_to_int, bin_to_hex
 from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams, Chunk
 from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams, Chunk
 from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
 from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
 from ..helpers import load_exclude_file, load_pattern_file
 from ..helpers import load_exclude_file, load_pattern_file
-from ..helpers import CompressionDecider
 from ..helpers import parse_pattern, PatternMatcher
 from ..helpers import parse_pattern, PatternMatcher
 from ..helpers import PathFullPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, RegexPattern
 from ..helpers import PathFullPattern, PathPrefixPattern, FnmatchPattern, ShellPattern, RegexPattern
 from ..helpers import swidth_slice
 from ..helpers import swidth_slice
@@ -1202,28 +1200,6 @@ data2
     assert list(clean_lines(conf, remove_comments=False)) == ['#comment', 'data1 #data1', 'data2', 'data3', ]
     assert list(clean_lines(conf, remove_comments=False)) == ['#comment', 'data1 #data1', 'data2', 'data3', ]
 
 
 
 
-def test_compression_decider():
-    default = CompressionSpec('zlib')
-    conf = """
-# use super-fast lz4 compression on huge VM files in this path:
-lz4:/srv/vm_disks
-
-# jpeg or zip files do not compress:
-none:*.jpeg
-none:*.zip
-""".splitlines()
-
-    cd = CompressionDecider(default, [])  # no conf, always use default
-    assert cd.decide('/srv/vm_disks/linux').name == 'zlib'
-    assert cd.decide('test.zip').name == 'zlib'
-    assert cd.decide('test').name == 'zlib'
-
-    cd = CompressionDecider(default, [conf, ])
-    assert cd.decide('/srv/vm_disks/linux').name == 'lz4'
-    assert cd.decide('test.zip').name == 'none'
-    assert cd.decide('test').name == 'zlib'  # no match in conf, use default
-
-
 def test_format_line():
 def test_format_line():
     data = dict(foo='bar baz')
     data = dict(foo='bar baz')
     assert format_line('', data) == ''
     assert format_line('', data) == ''