|
@@ -17,10 +17,11 @@ import time
|
|
|
from io import BytesIO
|
|
|
from . import xattr
|
|
|
from .compress import Compressor, COMPR_BUFFER
|
|
|
+from .constants import * # NOQA
|
|
|
from .helpers import Error, uid2user, user2uid, gid2group, group2gid, \
|
|
|
parse_timestamp, to_localtime, format_time, format_timedelta, \
|
|
|
Manifest, Statistics, decode_dict, make_path_safe, StableDict, int_to_bigint, bigint_to_int, \
|
|
|
- ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, DASHES, \
|
|
|
+ ProgressIndicatorPercent, ChunkIteratorFileWrapper, remove_surrogates, log_multi, \
|
|
|
PathPrefixPattern, FnmatchPattern, open_item, file_status, format_file_size, consume
|
|
|
from .repository import Repository
|
|
|
from .platform import acl_get, acl_set
|
|
@@ -29,19 +30,6 @@ from .hashindex import ChunkIndex, ChunkIndexEntry
|
|
|
from .cache import ChunkListEntry
|
|
|
import msgpack
|
|
|
|
|
|
-ITEMS_BUFFER = 1024 * 1024
|
|
|
-
|
|
|
-CHUNK_MIN_EXP = 19 # 2**19 == 512kiB
|
|
|
-CHUNK_MAX_EXP = 23 # 2**23 == 8MiB
|
|
|
-HASH_WINDOW_SIZE = 0xfff # 4095B
|
|
|
-HASH_MASK_BITS = 21 # results in ~2MiB chunks statistically
|
|
|
-
|
|
|
-# defaults, use --chunker-params to override
|
|
|
-CHUNKER_PARAMS = (CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH_WINDOW_SIZE)
|
|
|
-
|
|
|
-# chunker params for the items metadata stream, finer granularity
|
|
|
-ITEMS_CHUNKER_PARAMS = (12, 16, 14, HASH_WINDOW_SIZE)
|
|
|
-
|
|
|
has_lchmod = hasattr(os, 'lchmod')
|
|
|
has_lchflags = hasattr(os, 'lchflags')
|
|
|
|
|
@@ -59,7 +47,7 @@ class DownloadPipeline:
|
|
|
unpacker = msgpack.Unpacker(use_list=False)
|
|
|
for data in self.fetch_many(ids):
|
|
|
unpacker.feed(data)
|
|
|
- items = [decode_dict(item, (b'path', b'source', b'user', b'group')) for item in unpacker]
|
|
|
+ items = [decode_dict(item, ITEM_TEXT_KEYS) for item in unpacker]
|
|
|
if filter:
|
|
|
items = [item for item in items if filter(item)]
|
|
|
for item in items:
|
|
@@ -187,7 +175,7 @@ class Archive:
|
|
|
def load(self, id):
|
|
|
self.id = id
|
|
|
self.metadata = self._load_meta(self.id)
|
|
|
- decode_dict(self.metadata, (b'name', b'comment', b'hostname', b'username', b'time', b'time_end'))
|
|
|
+ decode_dict(self.metadata, ARCHIVE_TEXT_KEYS)
|
|
|
self.metadata[b'cmdline'] = [arg.decode('utf-8', 'surrogateescape') for arg in self.metadata[b'cmdline']]
|
|
|
self.name = self.metadata[b'name']
|
|
|
|
|
@@ -233,7 +221,7 @@ Number of files: {0.stats.nfiles}'''.format(
|
|
|
|
|
|
def add_item(self, item):
|
|
|
unknown_keys = set(item) - ITEM_KEYS
|
|
|
- assert not unknown_keys, ('unknown item metadata keys detected, please update ITEM_KEYS: %s',
|
|
|
+ assert not unknown_keys, ('unknown item metadata keys detected, please update constants.ITEM_KEYS: %s',
|
|
|
','.join(k.decode('ascii') for k in unknown_keys))
|
|
|
if self.show_progress:
|
|
|
self.stats.show_progress(item=item, dt=0.2)
|
|
@@ -631,12 +619,6 @@ Number of files: {0.stats.nfiles}'''.format(
|
|
|
return os.open(path, flags_normal)
|
|
|
|
|
|
|
|
|
-# this set must be kept complete, otherwise the RobustUnpacker might malfunction:
|
|
|
-ITEM_KEYS = set([b'path', b'source', b'rdev', b'chunks', b'hardlink_master',
|
|
|
- b'mode', b'user', b'group', b'uid', b'gid', b'mtime', b'atime', b'ctime',
|
|
|
- b'xattrs', b'bsdflags', b'acl_nfs4', b'acl_access', b'acl_default', b'acl_extended', ])
|
|
|
-
|
|
|
-
|
|
|
class RobustUnpacker:
|
|
|
"""A restartable/robust version of the streaming msgpack unpacker
|
|
|
"""
|
|
@@ -894,7 +876,7 @@ class ArchiveChecker:
|
|
|
archive = StableDict(msgpack.unpackb(data))
|
|
|
if archive[b'version'] != 1:
|
|
|
raise Exception('Unknown archive metadata version')
|
|
|
- decode_dict(archive, (b'name', b'comment', b'hostname', b'username', b'time', b'time_end'))
|
|
|
+ decode_dict(archive, ARCHIVE_TEXT_KEYS)
|
|
|
archive[b'cmdline'] = [arg.decode('utf-8', 'surrogateescape') for arg in archive[b'cmdline']]
|
|
|
items_buffer = ChunkBuffer(self.key)
|
|
|
items_buffer.write_chunk = add_callback
|
|
@@ -1154,24 +1136,23 @@ class ArchiveRecreater:
|
|
|
matcher = self.matcher
|
|
|
tag_files = []
|
|
|
tagged_dirs = []
|
|
|
- # build hardlink masters, but only for paths ending in CACHEDIR.TAG, so we can read hard-linked CACHEDIR.TAGs
|
|
|
+ # build hardlink masters, but only for paths ending in CACHE_TAG_NAME, so we can read hard-linked TAGs
|
|
|
cachedir_masters = {}
|
|
|
|
|
|
for item in archive.iter_items(
|
|
|
- filter=lambda item: item[b'path'].endswith('CACHEDIR.TAG') or matcher.match(item[b'path'])):
|
|
|
- if item[b'path'].endswith('CACHEDIR.TAG'):
|
|
|
+ filter=lambda item: item[b'path'].endswith(CACHE_TAG_NAME) or matcher.match(item[b'path'])):
|
|
|
+ if item[b'path'].endswith(CACHE_TAG_NAME):
|
|
|
cachedir_masters[item[b'path']] = item
|
|
|
if stat.S_ISREG(item[b'mode']):
|
|
|
dir, tag_file = os.path.split(item[b'path'])
|
|
|
if tag_file in self.exclude_if_present:
|
|
|
exclude(dir, item)
|
|
|
- if self.exclude_caches and tag_file == 'CACHEDIR.TAG':
|
|
|
- tag_contents = b'Signature: 8a477f597d28d172789f06886806bc55'
|
|
|
+ if self.exclude_caches and tag_file == CACHE_TAG_NAME:
|
|
|
if b'chunks' in item:
|
|
|
file = open_item(archive, item)
|
|
|
else:
|
|
|
file = open_item(archive, cachedir_masters[item[b'source']])
|
|
|
- if file.read(len(tag_contents)).startswith(tag_contents):
|
|
|
+ if file.read(len(CACHE_TAG_CONTENTS)).startswith(CACHE_TAG_CONTENTS):
|
|
|
exclude(dir, item)
|
|
|
matcher.add(tag_files, True)
|
|
|
matcher.add(tagged_dirs, False)
|