|
@@ -30,6 +30,7 @@ from .helpers import format_file_size
|
|
|
from .logger import create_logger, setup_logging
|
|
|
from .repository import Repository, MAX_OBJECT_SIZE, LIST_SCAN_LIMIT
|
|
|
from .version import parse_version, format_version
|
|
|
+from .algorithms.checksums import xxh64
|
|
|
|
|
|
logger = create_logger(__name__)
|
|
|
|
|
@@ -1086,6 +1087,9 @@ class RepositoryCache(RepositoryNoCache):
|
|
|
should return the initial data (as returned by *transform*).
|
|
|
"""
|
|
|
|
|
|
+ class InvalidateCacheEntry(Exception):
|
|
|
+ pass
|
|
|
+
|
|
|
def __init__(self, repository, pack=None, unpack=None, transform=None):
|
|
|
super().__init__(repository, transform)
|
|
|
self.pack = pack or (lambda data: data)
|
|
@@ -1100,6 +1104,7 @@ class RepositoryCache(RepositoryNoCache):
|
|
|
self.slow_misses = 0
|
|
|
self.slow_lat = 0.0
|
|
|
self.evictions = 0
|
|
|
+ self.checksum_errors = 0
|
|
|
self.enospc = 0
|
|
|
|
|
|
def query_size_limit(self):
|
|
@@ -1144,10 +1149,10 @@ class RepositoryCache(RepositoryNoCache):
|
|
|
|
|
|
def close(self):
|
|
|
logger.debug('RepositoryCache: current items %d, size %s / %s, %d hits, %d misses, %d slow misses (+%.1fs), '
|
|
|
- '%d evictions, %d ENOSPC hit',
|
|
|
+ '%d evictions, %d ENOSPC hit, %d checksum errors',
|
|
|
len(self.cache), format_file_size(self.size), format_file_size(self.size_limit),
|
|
|
self.hits, self.misses, self.slow_misses, self.slow_lat,
|
|
|
- self.evictions, self.enospc)
|
|
|
+ self.evictions, self.enospc, self.checksum_errors)
|
|
|
self.cache.clear()
|
|
|
shutil.rmtree(self.basedir)
|
|
|
|
|
@@ -1157,30 +1162,37 @@ class RepositoryCache(RepositoryNoCache):
|
|
|
for key in keys:
|
|
|
if key in self.cache:
|
|
|
file = self.key_filename(key)
|
|
|
- with open(file, 'rb') as fd:
|
|
|
- self.hits += 1
|
|
|
- yield self.unpack(fd.read())
|
|
|
- else:
|
|
|
- for key_, data in repository_iterator:
|
|
|
- if key_ == key:
|
|
|
- transformed = self.add_entry(key, data, cache)
|
|
|
- self.misses += 1
|
|
|
- yield transformed
|
|
|
- break
|
|
|
- else:
|
|
|
- # slow path: eviction during this get_many removed this key from the cache
|
|
|
- t0 = time.perf_counter()
|
|
|
- data = self.repository.get(key)
|
|
|
- self.slow_lat += time.perf_counter() - t0
|
|
|
+ try:
|
|
|
+ with open(file, 'rb') as fd:
|
|
|
+ self.hits += 1
|
|
|
+ yield self.unpack(fd.read())
|
|
|
+ continue # go to the next key
|
|
|
+ except self.InvalidateCacheEntry:
|
|
|
+ self.cache.remove(key)
|
|
|
+ self.size -= os.stat(file).st_size
|
|
|
+ self.checksum_errors += 1
|
|
|
+ os.unlink(file)
|
|
|
+ # fall through to fetch the object again
|
|
|
+ for key_, data in repository_iterator:
|
|
|
+ if key_ == key:
|
|
|
transformed = self.add_entry(key, data, cache)
|
|
|
- self.slow_misses += 1
|
|
|
+ self.misses += 1
|
|
|
yield transformed
|
|
|
+ break
|
|
|
+ else:
|
|
|
+ # slow path: eviction during this get_many removed this key from the cache
|
|
|
+ t0 = time.perf_counter()
|
|
|
+ data = self.repository.get(key)
|
|
|
+ self.slow_lat += time.perf_counter() - t0
|
|
|
+ transformed = self.add_entry(key, data, cache)
|
|
|
+ self.slow_misses += 1
|
|
|
+ yield transformed
|
|
|
# Consume any pending requests
|
|
|
for _ in repository_iterator:
|
|
|
pass
|
|
|
|
|
|
|
|
|
-def cache_if_remote(repository, *, decrypted_cache=False, pack=None, unpack=None, transform=None):
|
|
|
+def cache_if_remote(repository, *, decrypted_cache=False, pack=None, unpack=None, transform=None, force_cache=False):
|
|
|
"""
|
|
|
Return a Repository(No)Cache for *repository*.
|
|
|
|
|
@@ -1194,21 +1206,30 @@ def cache_if_remote(repository, *, decrypted_cache=False, pack=None, unpack=None
|
|
|
raise ValueError('decrypted_cache and pack/unpack/transform are incompatible')
|
|
|
elif decrypted_cache:
|
|
|
key = decrypted_cache
|
|
|
- cache_struct = struct.Struct('=I')
|
|
|
+ # 32 bit csize, 64 bit (8 byte) xxh64
|
|
|
+ cache_struct = struct.Struct('=I8s')
|
|
|
compressor = LZ4()
|
|
|
|
|
|
def pack(data):
|
|
|
- return cache_struct.pack(data[0]) + compressor.compress(data[1])
|
|
|
+ csize, decrypted = data
|
|
|
+ compressed = compressor.compress(decrypted)
|
|
|
+ return cache_struct.pack(csize, xxh64(compressed)) + compressed
|
|
|
|
|
|
def unpack(data):
|
|
|
- return cache_struct.unpack(data[:cache_struct.size])[0], compressor.decompress(data[cache_struct.size:])
|
|
|
+ data = memoryview(data)
|
|
|
+ csize, checksum = cache_struct.unpack(data[:cache_struct.size])
|
|
|
+ compressed = data[cache_struct.size:]
|
|
|
+ if checksum != xxh64(compressed):
|
|
|
+ logger.warning('Repository metadata cache: detected corrupted data in cache!')
|
|
|
+ raise RepositoryCache.InvalidateCacheEntry
|
|
|
+ return csize, compressor.decompress(compressed)
|
|
|
|
|
|
def transform(id_, data):
|
|
|
csize = len(data)
|
|
|
decrypted = key.decrypt(id_, data)
|
|
|
return csize, decrypted
|
|
|
|
|
|
- if isinstance(repository, RemoteRepository):
|
|
|
+ if isinstance(repository, RemoteRepository) or force_cache:
|
|
|
return RepositoryCache(repository, pack, unpack, transform)
|
|
|
else:
|
|
|
return RepositoryNoCache(repository, transform)
|