7 年之前 · 80e0b42f7d
--- a/src/borg/chunker.pyx
+++ b/src/borg/chunker.pyx
@@ -1,6 +1,8 @@
 
															 # -*- coding: utf-8 -*-
														
 
															-API_VERSION = '1.1_02'
														
 
															+API_VERSION = '1.1_03'
														
 
															+
														
 
															+import os
														
 
															 from libc.stdlib cimport free
														
@@ -17,6 +19,67 @@ cdef extern from "_chunker.c":
 
															     uint32_t c_buzhash_update  "buzhash_update"(uint32_t sum, unsigned char remove, unsigned char add, size_t len, uint32_t *h)
														
 
															+class ChunkerFixed:
														
 
															+    """
														
 
															+    Fixed blocksize Chunker, optionally supporting a header block of different size.
														
 
															+
														
 
															+    This is a very simple chunker for input data with known block/record sizes:
														
 
															+
														
 
															+    - raw disk images
														
 
															+    - block devices
														
 
															+    - database files with simple header + fixed-size records layout
														
 
															+
														
 
															+    Note: the last block of the input data may be less than the block size,
														
 
															+          this is supported and not considered to be an error.
														
 
															+    """
														
 
															+    def __init__(self, block_size, header_size=0):
														
 
															+        self.block_size = block_size
														
 
															+        self.header_size = header_size
														
 
															+
														
 
															+    def chunkify(self, fd, fh=-1):
														
 
															+        """
														
 
															+        Cut a file into chunks.
														
 
															+
														
 
															+        :param fd: Python file object
														
 
															+        :param fh: OS-level file handle (if available),
														
 
															+                   defaults to -1 which means not to use OS-level fd.
														
 
															+        """
														
 
															+        offset = 0
														
 
															+        use_fh = fh >= 0
														
 
															+
														
 
															+        if use_fh:
														
 
															+            def read(size):
														
 
															+                nonlocal offset
														
 
															+                data = os.read(fh, size)
														
 
															+                amount = len(data)
														
 
															+                if hasattr(os, 'posix_fadvise'):
														
 
															+                    # UNIX only and, in case of block sizes that are not a multiple of the
														
 
															+                    # system's page size, better be used with a bug fixed linux kernel > 4.6.0,
														
 
															+                    # see comment/workaround in _chunker.c and borgbackup issue #907.
														
 
															+                    os.posix_fadvise(fh, offset, amount, os.POSIX_FADV_DONTNEED)
														
 
															+                offset += amount
														
 
															+                return data
														
 
															+        else:
														
 
															+            def read(size):
														
 
															+                nonlocal offset
														
 
															+                data = fd.read(size)
														
 
															+                amount = len(data)
														
 
															+                offset += amount
														
 
															+                return data
														
 
															+
														
 
															+        if self.header_size > 0:
														
 
															+            data = read(self.header_size)
														
 
															+            if data:
														
 
															+                yield data
														
 
															+        else:
														
 
															+            data = True  # get into next while loop
														
 
															+        while data:
														
 
															+            data = read(self.block_size)
														
 
															+            if data:
														
 
															+                yield data
														
 
															+        # empty data means we are at EOF and we terminate the generator.
														
 
															+
														
 
															+
														
 
															 cdef class Chunker:
														
 
															     """
														
 
															     Content-Defined Chunker, variable chunk sizes.
														
@@ -65,6 +128,8 @@ def get_chunker(algo, *params, **kw):
 
															     if algo == 'buzhash':
														
 
															         seed = kw['seed']
														
 
															         return Chunker(seed, *params)
														
 
															+    if algo == 'fixed':
														
 
															+        return ChunkerFixed(*params)
														
 
															     raise TypeError('unsupported chunker algo %r' % algo)
														
@@ -72,6 +137,8 @@ def max_chunk_size(algo, *params):
 
															     # see also parseformat.ChunkerParams return values
														
 
															     if algo == 'buzhash':
														
 
															         return 1 << params[1]
														
 
															+    if algo == 'fixed':
														
 
															+        return max(params[0], params[1])
														
 
															     raise TypeError('unsupported chunker algo %r' % algo)
														
--- a/src/borg/helpers/checks.py
+++ b/src/borg/helpers/checks.py
@@ -27,7 +27,7 @@ def check_extension_modules():
 
															     from .. import platform, compress, item, chunker, hashindex
														
 
															     if hashindex.API_VERSION != '1.1_07':
														
 
															         raise ExtensionModuleError
														
 
															-    if chunker.API_VERSION != '1.1_02':
														
 
															+    if chunker.API_VERSION != '1.1_03':
														
 
															         raise ExtensionModuleError
														
 
															     if compress.API_VERSION != '1.1_06':
														
 
															         raise ExtensionModuleError
														
--- a/src/borg/helpers/parseformat.py
+++ b/src/borg/helpers/parseformat.py
@@ -113,6 +113,10 @@ def ChunkerParams(s):
 
															     if count == 0:
														
 
															         raise ValueError('no chunker params given')
														
 
															     algo = params[0].lower()
														
 
															+    if algo == 'fixed' and 2 <= count <= 3:  # fixed, block_size[, header_size]
														
 
															+        block_size = int(params[1])
														
 
															+        header_size = int(params[2]) if count == 3 else 0
														
 
															+        return algo, block_size, header_size
														
 
															     if algo == 'default' and count == 1:  # default
														
 
															         return CHUNKER_PARAMS
														
 
															     # this must stay last as it deals with old-style compat mode (no algorithm, 4 params, buzhash):
														
--- a/src/borg/testsuite/chunker.py
+++ b/src/borg/testsuite/chunker.py
@@ -1,6 +1,6 @@
 
															 from io import BytesIO
														
 
															-from ..chunker import Chunker, get_chunker, buzhash, buzhash_update
														
 
															+from ..chunker import ChunkerFixed, Chunker, get_chunker, buzhash, buzhash_update
														
 
															 from ..constants import *  # NOQA
														
 
															 from . import BaseTestCase
														
@@ -8,6 +8,21 @@ from . import BaseTestCase
 
															 #       See borg.selftest for details. If you add/remove test methods, update SELFTEST_COUNT
														
 
															+class ChunkerFixedTestCase(BaseTestCase):
														
 
															+
														
 
															+    def test_chunkify_just_blocks(self):
														
 
															+        data = b'foobar' * 1500
														
 
															+        chunker = ChunkerFixed(4096)
														
 
															+        parts = [c for c in chunker.chunkify(BytesIO(data))]
														
 
															+        self.assert_equal(parts, [data[0:4096], data[4096:8192], data[8192:]])
														
 
															+
														
 
															+    def test_chunkify_header_and_blocks(self):
														
 
															+        data = b'foobar' * 1500
														
 
															+        chunker = ChunkerFixed(4096, 123)
														
 
															+        parts = [c for c in chunker.chunkify(BytesIO(data))]
														
 
															+        self.assert_equal(parts, [data[0:123], data[123:123+4096], data[123+4096:123+8192], data[123+8192:]])
														
 
															+
														
 
															+
														
 
															 class ChunkerTestCase(BaseTestCase):
														
 
															     def test_chunkify(self):
														
--- a/src/borg/testsuite/helpers.py
+++ b/src/borg/testsuite/helpers.py
@@ -315,6 +315,8 @@ def test_chunkerparams():
 
															     assert ChunkerParams('10,23,16,4095') == ('buzhash', 10, 23, 16, 4095)
														
 
															     with pytest.raises(ValueError):
														
 
															         ChunkerParams('19,24,21,4095')
														
 
															+    assert ChunkerParams('fixed,4096') == ('fixed', 4096, 0)
														
 
															+    assert ChunkerParams('fixed,4096,200') == ('fixed', 4096, 200)
														
 
															     with pytest.raises(ValueError):
														
 
															         ChunkerParams('crap,1,2,3,4')