Ver Fonte

borg create --compression 0..9 for variable compression

Thomas Waldmann há 10 anos atrás
pai
commit
6964799d13
3 ficheiros alterados com 138 adições e 2 exclusões
  1. 5 0
      borg/archiver.py
  2. 3 2
      borg/key.py
  3. 130 0
      docs/misc/create_compression.txt

+ 5 - 0
borg/archiver.py

@@ -101,6 +101,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         t0 = datetime.now()
         t0 = datetime.now()
         repository = self.open_repository(args.archive, exclusive=True)
         repository = self.open_repository(args.archive, exclusive=True)
         manifest, key = Manifest.load(repository)
         manifest, key = Manifest.load(repository)
+        key.compression_level = args.compression
         cache = Cache(repository, key, manifest, do_files=args.cache_files)
         cache = Cache(repository, key, manifest, do_files=args.cache_files)
         archive = Archive(repository, key, manifest, args.archive.archive, cache=cache,
         archive = Archive(repository, key, manifest, args.archive.archive, cache=cache,
                           create=True, checkpoint_interval=args.checkpoint_interval,
                           create=True, checkpoint_interval=args.checkpoint_interval,
@@ -630,6 +631,10 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                type=ChunkerParams, default=CHUNKER_PARAMS,
                                type=ChunkerParams, default=CHUNKER_PARAMS,
                                metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE',
                                metavar='CHUNK_MIN_EXP,CHUNK_MAX_EXP,HASH_MASK_BITS,HASH_WINDOW_SIZE',
                                help='specify the chunker parameters. default: %d,%d,%d,%d' % CHUNKER_PARAMS)
                                help='specify the chunker parameters. default: %d,%d,%d,%d' % CHUNKER_PARAMS)
+        subparser.add_argument('-C', '--compression', dest='compression',
+                               type=int, default=0, metavar='N',
+                               help='select compression algorithm and level. 0..9 is supported and means zlib '
+                                    'level 0 (no compression, fast, default) .. zlib level 9 (high compression, slow).')
         subparser.add_argument('archive', metavar='ARCHIVE',
         subparser.add_argument('archive', metavar='ARCHIVE',
                                type=location_validator(archive=True),
                                type=location_validator(archive=True),
                                help='archive to create')
                                help='archive to create')

+ 3 - 2
borg/key.py

@@ -53,6 +53,7 @@ class KeyBase:
 
 
     def __init__(self):
     def __init__(self):
         self.TYPE_STR = bytes([self.TYPE])
         self.TYPE_STR = bytes([self.TYPE])
+        self.compression_level = 0
 
 
     def id_hash(self, data):
     def id_hash(self, data):
         """Return HMAC hash using the "id" HMAC key
         """Return HMAC hash using the "id" HMAC key
@@ -83,7 +84,7 @@ class PlaintextKey(KeyBase):
         return sha256(data).digest()
         return sha256(data).digest()
 
 
     def encrypt(self, data):
     def encrypt(self, data):
-        return b''.join([self.TYPE_STR, zlib.compress(data)])
+        return b''.join([self.TYPE_STR, zlib.compress(data, self.compression_level)])
 
 
     def decrypt(self, id, data):
     def decrypt(self, id, data):
         if data[0] != self.TYPE:
         if data[0] != self.TYPE:
@@ -115,7 +116,7 @@ class AESKeyBase(KeyBase):
         return HMAC(self.id_key, data, sha256).digest()
         return HMAC(self.id_key, data, sha256).digest()
 
 
     def encrypt(self, data):
     def encrypt(self, data):
-        data = zlib.compress(data)
+        data = zlib.compress(data, self.compression_level)
         self.enc_cipher.reset()
         self.enc_cipher.reset()
         data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data)))
         data = b''.join((self.enc_cipher.iv[8:], self.enc_cipher.encrypt(data)))
         hmac = HMAC(self.enc_hmac_key, data, sha256).digest()
         hmac = HMAC(self.enc_hmac_key, data, sha256).digest()

+ 130 - 0
docs/misc/create_compression.txt

@@ -0,0 +1,130 @@
+data compression
+================
+
+borg create --compression N repo::archive data
+
+Currently, borg only supports zlib compression. There are plans to expand this
+to other, faster or better compression algorithms in the future.
+
+N == 0 -> zlib level 0 == very quick, no compression
+N == 1 -> zlib level 1 == quick, low compression
+...
+N == 9 -> zlib level 9 == slow, high compression
+
+Measurements made on a Haswell Ultrabook, SSD storage, Linux.
+
+
+Example 1: lots of relatively small text files (linux kernel src)
+-----------------------------------------------------------------
+
+N == 1 does a good job here, it saves the additional time needed for
+compression because it needs to store less into storage (see N == 0).
+
+N == 6 is also quite ok, a little slower, a little less repo size.
+6 was the old default of borg.
+
+High compression levels only give a little more compression, but take a lot
+of cpu time.
+
+$ borg create --stats --compression 0
+------------------------------------------------------------------------------ 
+Duration: 50.40 seconds
+Number of files: 72890
+
+                       Original size      Compressed size    Deduplicated size
+This archive:                1.17 GB              1.18 GB              1.01 GB
+
+                       Unique chunks         Total chunks
+Chunk index:                   70263                82309
+------------------------------------------------------------------------------ 
+
+$ borg create --stats --compression 1
+------------------------------------------------------------------------------ 
+Duration: 49.29 seconds
+Number of files: 72890
+
+                       Original size      Compressed size    Deduplicated size
+This archive:                1.17 GB            368.62 MB            295.22 MB
+
+                       Unique chunks         Total chunks
+Chunk index:                   70280                82326
+------------------------------------------------------------------------------
+
+$ borg create --stats --compression 5
+------------------------------------------------------------------------------ 
+Duration: 59.99 seconds
+Number of files: 72890
+
+                       Original size      Compressed size    Deduplicated size
+This archive:                1.17 GB            331.70 MB            262.20 MB
+
+                       Unique chunks         Total chunks
+Chunk index:                   70290                82336
+------------------------------------------------------------------------------
+
+$ borg create --stats --compression 6
+------------------------------------------------------------------------------ 
+Duration: 1 minutes 13.64 seconds
+Number of files: 72890
+
+                       Original size      Compressed size    Deduplicated size
+This archive:                1.17 GB            328.79 MB            259.56 MB
+
+                       Unique chunks         Total chunks
+Chunk index:                   70279                82325
+------------------------------------------------------------------------------
+
+$ borg create --stats --compression 9
+------------------------------------------------------------------------------
+Duration: 3 minutes 1.58 seconds
+Number of files: 72890
+
+                       Original size      Compressed size    Deduplicated size
+This archive:                1.17 GB            326.57 MB            257.57 MB
+
+                       Unique chunks         Total chunks
+Chunk index:                   70292                82338
+------------------------------------------------------------------------------
+
+
+Example 2: large VM disk file (sparse file)
+-------------------------------------------
+
+The file's directory size is 80GB, but a lot of it is sparse (and reads as
+zeros).
+
+$ borg create --stats --compression 0
+------------------------------------------------------------------------------
+Duration: 13 minutes 48.47 seconds
+Number of files: 1
+
+                       Original size      Compressed size    Deduplicated size
+This archive:               80.54 GB             80.55 GB             10.87 GB
+
+                       Unique chunks         Total chunks
+Chunk index:                  147307               177109
+------------------------------------------------------------------------------
+
+$ borg create --stats --compression 1
+------------------------------------------------------------------------------
+Duration: 15 minutes 31.34 seconds
+Number of files: 1
+
+                       Original size      Compressed size    Deduplicated size
+This archive:               80.54 GB              6.68 GB              5.67 GB
+
+                       Unique chunks         Total chunks
+Chunk index:                  147309               177111
+------------------------------------------------------------------------------
+
+$ borg create --stats --compression 6
+------------------------------------------------------------------------------
+Duration: 18 minutes 57.54 seconds
+Number of files: 1
+
+                       Original size      Compressed size    Deduplicated size
+This archive:               80.54 GB              6.19 GB              5.44 GB
+
+                       Unique chunks         Total chunks
+Chunk index:                  147307               177109
+------------------------------------------------------------------------------