ソースを参照

borg create --chunker-params=...

Thomas Waldmann 10 年 前
コミット
3b9b976f2a
3 ファイル変更25 行追加10 行削除
  1. 12 7
      borg/archive.py
  2. 8 3
      borg/archiver.py
  3. 5 0
      borg/helpers.py

+ 12 - 7
borg/archive.py

@@ -21,10 +21,12 @@ from .helpers import parse_timestamp, Error, uid2user, user2uid, gid2group, grou
     Manifest, Statistics, decode_dict, st_mtime_ns, make_path_safe, StableDict, int_to_bigint, bigint_to_int
     Manifest, Statistics, decode_dict, st_mtime_ns, make_path_safe, StableDict, int_to_bigint, bigint_to_int
 
 
 ITEMS_BUFFER = 1024 * 1024
 ITEMS_BUFFER = 1024 * 1024
+
 CHUNK_MIN = 1024
 CHUNK_MIN = 1024
 CHUNK_MAX = 10 * 1024 * 1024
 CHUNK_MAX = 10 * 1024 * 1024
 WINDOW_SIZE = 0xfff
 WINDOW_SIZE = 0xfff
 CHUNK_MASK = 0xffff
 CHUNK_MASK = 0xffff
+CHUNKER_PARAMS = (WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, CHUNK_MAX)
 
 
 ZEROS = b'\0' * CHUNK_MAX
 ZEROS = b'\0' * CHUNK_MAX
 
 
@@ -69,12 +71,13 @@ class DownloadPipeline:
 class ChunkBuffer:
 class ChunkBuffer:
     BUFFER_SIZE = 1 * 1024 * 1024
     BUFFER_SIZE = 1 * 1024 * 1024
 
 
-    def __init__(self, key):
+    def __init__(self, key, chunker_params=CHUNKER_PARAMS):
         self.buffer = BytesIO()
         self.buffer = BytesIO()
         self.packer = msgpack.Packer(unicode_errors='surrogateescape')
         self.packer = msgpack.Packer(unicode_errors='surrogateescape')
         self.chunks = []
         self.chunks = []
         self.key = key
         self.key = key
-        self.chunker = Chunker(WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, CHUNK_MAX,self.key.chunk_seed)
+        chunker_params += (self.key.chunk_seed, )
+        self.chunker = Chunker(*chunker_params)
 
 
     def add(self, item):
     def add(self, item):
         self.buffer.write(self.packer.pack(StableDict(item)))
         self.buffer.write(self.packer.pack(StableDict(item)))
@@ -104,8 +107,8 @@ class ChunkBuffer:
 
 
 class CacheChunkBuffer(ChunkBuffer):
 class CacheChunkBuffer(ChunkBuffer):
 
 
-    def __init__(self, cache, key, stats):
-        super(CacheChunkBuffer, self).__init__(key)
+    def __init__(self, cache, key, stats, chunker_params=CHUNKER_PARAMS):
+        super(CacheChunkBuffer, self).__init__(key, chunker_params)
         self.cache = cache
         self.cache = cache
         self.stats = stats
         self.stats = stats
 
 
@@ -127,7 +130,8 @@ class Archive:
 
 
 
 
     def __init__(self, repository, key, manifest, name, cache=None, create=False,
     def __init__(self, repository, key, manifest, name, cache=None, create=False,
-                 checkpoint_interval=300, numeric_owner=False, progress=False):
+                 checkpoint_interval=300, numeric_owner=False, progress=False,
+                 chunker_params=CHUNKER_PARAMS):
         self.cwd = os.getcwd()
         self.cwd = os.getcwd()
         self.key = key
         self.key = key
         self.repository = repository
         self.repository = repository
@@ -142,8 +146,9 @@ class Archive:
         self.numeric_owner = numeric_owner
         self.numeric_owner = numeric_owner
         self.pipeline = DownloadPipeline(self.repository, self.key)
         self.pipeline = DownloadPipeline(self.repository, self.key)
         if create:
         if create:
-            self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats)
-            self.chunker = Chunker(WINDOW_SIZE, CHUNK_MASK, CHUNK_MIN, CHUNK_MAX, self.key.chunk_seed)
+            self.items_buffer = CacheChunkBuffer(self.cache, self.key, self.stats, chunker_params)
+            chunker_params += (self.key.chunk_seed, )
+            self.chunker = Chunker(*chunker_params)
             if name in manifest.archives:
             if name in manifest.archives:
                 raise self.AlreadyExists(name)
                 raise self.AlreadyExists(name)
             self.last_checkpoint = time.time()
             self.last_checkpoint = time.time()

+ 8 - 3
borg/archiver.py

@@ -13,7 +13,7 @@ import textwrap
 import traceback
 import traceback
 
 
 from . import __version__
 from . import __version__
-from .archive import Archive, ArchiveChecker
+from .archive import Archive, ArchiveChecker, CHUNKER_PARAMS
 from .repository import Repository
 from .repository import Repository
 from .cache import Cache
 from .cache import Cache
 from .key import key_creator
 from .key import key_creator
@@ -21,7 +21,7 @@ from .helpers import Error, location_validator, format_time, format_file_size, \
     format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \
     format_file_mode, ExcludePattern, exclude_path, adjust_patterns, to_localtime, timestamp, \
     get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \
     get_cache_dir, get_keys_dir, format_timedelta, prune_within, prune_split, \
     Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
     Manifest, remove_surrogates, update_excludes, format_archive, check_extension_modules, Statistics, \
-    is_cachedir, bigint_to_int
+    is_cachedir, bigint_to_int, ChunkerParams
 from .remote import RepositoryServer, RemoteRepository
 from .remote import RepositoryServer, RemoteRepository
 
 
 
 
@@ -104,7 +104,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         cache = Cache(repository, key, manifest, do_files=args.cache_files)
         cache = Cache(repository, key, manifest, do_files=args.cache_files)
         archive = Archive(repository, key, manifest, args.archive.archive, cache=cache,
         archive = Archive(repository, key, manifest, args.archive.archive, cache=cache,
                           create=True, checkpoint_interval=args.checkpoint_interval,
                           create=True, checkpoint_interval=args.checkpoint_interval,
-                          numeric_owner=args.numeric_owner, progress=args.progress)
+                          numeric_owner=args.numeric_owner, progress=args.progress,
+                          chunker_params=args.chunker_params)
         # Add cache dir to inode_skip list
         # Add cache dir to inode_skip list
         skip_inodes = set()
         skip_inodes = set()
         try:
         try:
@@ -625,6 +626,10 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                                metavar='yyyy-mm-ddThh:mm:ss',
                                metavar='yyyy-mm-ddThh:mm:ss',
                                help='manually specify the archive creation date/time (UTC). '
                                help='manually specify the archive creation date/time (UTC). '
                                     'alternatively, give a reference file/directory.')
                                     'alternatively, give a reference file/directory.')
+        subparser.add_argument('--chunker-params', dest='chunker_params',
+                               type=ChunkerParams, default=CHUNKER_PARAMS,
+                               metavar='WINDOW_SIZE,CHUNK_MASK,CHUNK_MIN,CHUNK_MAX',
+                               help='specify the chunker parameters. default: %r' % (CHUNKER_PARAMS, ))
         subparser.add_argument('archive', metavar='ARCHIVE',
         subparser.add_argument('archive', metavar='ARCHIVE',
                                type=location_validator(archive=True),
                                type=location_validator(archive=True),
                                help='archive to create')
                                help='archive to create')

+ 5 - 0
borg/helpers.py

@@ -313,6 +313,11 @@ def timestamp(s):
         raise ValueError
         raise ValueError
 
 
 
 
+def ChunkerParams(s):
+    window_size, chunk_mask, chunk_min, chunk_max = s.split(',')
+    return int(window_size), int(chunk_mask), int(chunk_min), int(chunk_max)
+
+
 def is_cachedir(path):
 def is_cachedir(path):
     """Determines whether the specified path is a cache directory (and
     """Determines whether the specified path is a cache directory (and
     therefore should potentially be excluded from the backup) according to
     therefore should potentially be excluded from the backup) according to