|
@@ -1,6 +1,8 @@
|
|
# -*- coding: utf-8 -*-
|
|
# -*- coding: utf-8 -*-
|
|
|
|
|
|
-API_VERSION = '1.1_02'
|
|
|
|
|
|
+API_VERSION = '1.1_03'
|
|
|
|
+
|
|
|
|
+import os
|
|
|
|
|
|
from libc.stdlib cimport free
|
|
from libc.stdlib cimport free
|
|
|
|
|
|
@@ -17,6 +19,67 @@ cdef extern from "_chunker.c":
|
|
uint32_t c_buzhash_update "buzhash_update"(uint32_t sum, unsigned char remove, unsigned char add, size_t len, uint32_t *h)
|
|
uint32_t c_buzhash_update "buzhash_update"(uint32_t sum, unsigned char remove, unsigned char add, size_t len, uint32_t *h)
|
|
|
|
|
|
|
|
|
|
|
|
+class ChunkerFixed:
|
|
|
|
+ """
|
|
|
|
+ Fixed blocksize Chunker, optionally supporting a header block of different size.
|
|
|
|
+
|
|
|
|
+ This is a very simple chunker for input data with known block/record sizes:
|
|
|
|
+
|
|
|
|
+ - raw disk images
|
|
|
|
+ - block devices
|
|
|
|
+ - database files with simple header + fixed-size records layout
|
|
|
|
+
|
|
|
|
+ Note: the last block of the input data may be less than the block size,
|
|
|
|
+ this is supported and not considered to be an error.
|
|
|
|
+ """
|
|
|
|
+ def __init__(self, block_size, header_size=0):
|
|
|
|
+ self.block_size = block_size
|
|
|
|
+ self.header_size = header_size
|
|
|
|
+
|
|
|
|
+ def chunkify(self, fd, fh=-1):
|
|
|
|
+ """
|
|
|
|
+ Cut a file into chunks.
|
|
|
|
+
|
|
|
|
+ :param fd: Python file object
|
|
|
|
+ :param fh: OS-level file handle (if available),
|
|
|
|
+ defaults to -1 which means not to use OS-level fd.
|
|
|
|
+ """
|
|
|
|
+ offset = 0
|
|
|
|
+ use_fh = fh >= 0
|
|
|
|
+
|
|
|
|
+ if use_fh:
|
|
|
|
+ def read(size):
|
|
|
|
+ nonlocal offset
|
|
|
|
+ data = os.read(fh, size)
|
|
|
|
+ amount = len(data)
|
|
|
|
+ if hasattr(os, 'posix_fadvise'):
|
|
|
|
+ # UNIX only and, in case of block sizes that are not a multiple of the
|
|
|
|
+ # system's page size, better be used with a bug fixed linux kernel > 4.6.0,
|
|
|
|
+ # see comment/workaround in _chunker.c and borgbackup issue #907.
|
|
|
|
+ os.posix_fadvise(fh, offset, amount, os.POSIX_FADV_DONTNEED)
|
|
|
|
+ offset += amount
|
|
|
|
+ return data
|
|
|
|
+ else:
|
|
|
|
+ def read(size):
|
|
|
|
+ nonlocal offset
|
|
|
|
+ data = fd.read(size)
|
|
|
|
+ amount = len(data)
|
|
|
|
+ offset += amount
|
|
|
|
+ return data
|
|
|
|
+
|
|
|
|
+ if self.header_size > 0:
|
|
|
|
+ data = read(self.header_size)
|
|
|
|
+ if data:
|
|
|
|
+ yield data
|
|
|
|
+ else:
|
|
|
|
+ data = True # get into next while loop
|
|
|
|
+ while data:
|
|
|
|
+ data = read(self.block_size)
|
|
|
|
+ if data:
|
|
|
|
+ yield data
|
|
|
|
+ # empty data means we are at EOF and we terminate the generator.
|
|
|
|
+
|
|
|
|
+
|
|
cdef class Chunker:
|
|
cdef class Chunker:
|
|
"""
|
|
"""
|
|
Content-Defined Chunker, variable chunk sizes.
|
|
Content-Defined Chunker, variable chunk sizes.
|
|
@@ -65,6 +128,8 @@ def get_chunker(algo, *params, **kw):
|
|
if algo == 'buzhash':
|
|
if algo == 'buzhash':
|
|
seed = kw['seed']
|
|
seed = kw['seed']
|
|
return Chunker(seed, *params)
|
|
return Chunker(seed, *params)
|
|
|
|
+ if algo == 'fixed':
|
|
|
|
+ return ChunkerFixed(*params)
|
|
raise TypeError('unsupported chunker algo %r' % algo)
|
|
raise TypeError('unsupported chunker algo %r' % algo)
|
|
|
|
|
|
|
|
|
|
@@ -72,6 +137,8 @@ def max_chunk_size(algo, *params):
|
|
# see also parseformat.ChunkerParams return values
|
|
# see also parseformat.ChunkerParams return values
|
|
if algo == 'buzhash':
|
|
if algo == 'buzhash':
|
|
return 1 << params[1]
|
|
return 1 << params[1]
|
|
|
|
+ if algo == 'fixed':
|
|
|
|
+ return max(params[0], params[1])
|
|
raise TypeError('unsupported chunker algo %r' % algo)
|
|
raise TypeError('unsupported chunker algo %r' % algo)
|
|
|
|
|
|
|
|
|