4 rokov pred · 2851a84003
--- a/docs/internals/data-structures.rst
+++ b/docs/internals/data-structures.rst
@@ -596,14 +596,20 @@ The fixed chunker triggers (chunks) at even-spaced offsets, e.g. every 4MiB,
 
															 producing chunks of same block size (the last chunk is not required to be
														
 
															 full-size).
														
 
															-Optionally, it can cut the first "header" chunk with a different size (the
														
 
															-default is not to have a differently sized header chunk).
														
 
															+Optionally, it supports processing a differently sized "header" first, before
														
 
															+it starts to cut chunks of the desired block size.
														
 
															+The default is not to have a differently sized header.
														
 
															 ``borg create --chunker-params fixed,BLOCK_SIZE[,HEADER_SIZE]``
														
 
															 - BLOCK_SIZE: no default value, multiple of the system page size (usually 4096
														
 
															   bytes) recommended. E.g.: 4194304 would cut 4MiB sized chunks.
														
 
															-- HEADER_SIZE: optional, defaults to 0 (no header chunk).
														
 
															+- HEADER_SIZE: optional, defaults to 0 (no header).
														
 
															+
														
 
															+The fixed chunker also supports processing sparse files (reading only the ranges
														
 
															+with data and seeking over the empty hole ranges).
														
 
															+
														
 
															+``borg create --sparse --chunker-params fixed,BLOCK_SIZE[,HEADER_SIZE]``
														
 
															 "buzhash" chunker
														
 
															 +++++++++++++++++
														
--- a/docs/usage/create.rst
+++ b/docs/usage/create.rst
@@ -43,7 +43,10 @@ Examples
 
															     $ borg create --chunker-params buzhash,10,23,16,4095 /path/to/repo::small /smallstuff
														
 
															     # Backup a raw device (must not be active/in use/mounted at that time)
														
 
															-    $ dd if=/dev/sdx bs=4M | borg create --chunker-params fixed,4194304 /path/to/repo::my-sdx -
														
 
															+    $ borg create --read-special --chunker-params fixed,4194304 /path/to/repo::my-sdx /dev/sdX
														
 
															+
														
 
															+    # Backup a sparse disk image (must not be active/in use/mounted at that time)
														
 
															+    $ borg create --sparse --chunker-params fixed,4194304 /path/to/repo::my-disk my-disk.raw
														
 
															     # No compression (none)
														
 
															     $ borg create --compression none /path/to/repo::arch ~
														
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -1172,7 +1172,7 @@ class FilesystemObjectProcessors:
 
															     def __init__(self, *, metadata_collector, cache, key,
														
 
															                  add_item, process_file_chunks,
														
 
															-                 chunker_params, show_progress):
														
 
															+                 chunker_params, show_progress, sparse):
														
 
															         self.metadata_collector = metadata_collector
														
 
															         self.cache = cache
														
 
															         self.key = key
														
@@ -1183,7 +1183,7 @@ class FilesystemObjectProcessors:
 
															         self.hard_links = {}
														
 
															         self.stats = Statistics()  # threading: done by cache (including progress)
														
 
															         self.cwd = os.getcwd()
														
 
															-        self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed)
														
 
															+        self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed, sparse=sparse)
														
 
															     @contextmanager
														
 
															     def create_helper(self, path, st, status=None, hardlinkable=True):
														
--- a/src/borg/archiver.py
+++ b/src/borg/archiver.py
@@ -653,7 +653,7 @@ class Archiver:
 
															                     checkpoint_interval=args.checkpoint_interval, rechunkify=False)
														
 
															                 fso = FilesystemObjectProcessors(metadata_collector=metadata_collector, cache=cache, key=key,
														
 
															                     process_file_chunks=cp.process_file_chunks, add_item=archive.add_item,
														
 
															-                    chunker_params=args.chunker_params, show_progress=args.progress)
														
 
															+                    chunker_params=args.chunker_params, show_progress=args.progress, sparse=args.sparse)
														
 
															                 create_inner(archive, cache, fso)
														
 
															         else:
														
 
															             create_inner(None, None, None)
														
@@ -3354,6 +3354,8 @@ class Archiver:
 
															                               help='deprecated, use ``--noflags`` instead')
														
 
															         fs_group.add_argument('--noflags', dest='noflags', action='store_true',
														
 
															                               help='do not read and store flags (e.g. NODUMP, IMMUTABLE) into archive')
														
 
															+        fs_group.add_argument('--sparse', dest='sparse', action='store_true',
														
 
															+                               help='detect sparse holes in input (supported only by fixed chunker)')
														
 
															         fs_group.add_argument('--files-cache', metavar='MODE', dest='files_cache_mode',
														
 
															                               type=FilesCacheMode, default=DEFAULT_FILES_CACHE_MODE_UI,
														
 
															                               help='operate files cache in MODE. default: %s' % DEFAULT_FILES_CACHE_MODE_UI)
														
--- a/src/borg/chunker.pyx
+++ b/src/borg/chunker.pyx
@@ -2,6 +2,7 @@
 
															 API_VERSION = '1.2_01'
														
 
															+import errno
														
 
															 import os
														
 
															 from libc.stdlib cimport free
														
@@ -19,65 +20,176 @@ cdef extern from "_chunker.c":
 
															     uint32_t c_buzhash_update  "buzhash_update"(uint32_t sum, unsigned char remove, unsigned char add, size_t len, uint32_t *h)
														
 
															-class ChunkerFixed:
														
 
															+# this will be True if Python's seek implementation supports data/holes seeking.
														
 
															+# this does not imply that it will actually work on the filesystem,
														
 
															+# because the FS also needs to support this.
														
 
															+has_seek_hole = hasattr(os, 'SEEK_DATA') and hasattr(os, 'SEEK_HOLE')
														
 
															+
														
 
															+
														
 
															+def dread(offset, size, fd=None, fh=-1):
														
 
															+    use_fh = fh >= 0
														
 
															+    if use_fh:
														
 
															+        data = os.read(fh, size)
														
 
															+        if hasattr(os, 'posix_fadvise'):
														
 
															+            # UNIX only and, in case of block sizes that are not a multiple of the
														
 
															+            # system's page size, better be used with a bug fixed linux kernel > 4.6.0,
														
 
															+            # see comment/workaround in _chunker.c and borgbackup issue #907.
														
 
															+            os.posix_fadvise(fh, offset, len(data), os.POSIX_FADV_DONTNEED)
														
 
															+        return data
														
 
															+    else:
														
 
															+        return fd.read(size)
														
 
															+
														
 
															+
														
 
															+def dseek(amount, whence, fd=None, fh=-1):
														
 
															+    use_fh = fh >= 0
														
 
															+    if use_fh:
														
 
															+        return os.lseek(fh, amount, whence)
														
 
															+    else:
														
 
															+        return fd.seek(amount, whence)
														
 
															+
														
 
															+
														
 
															+def dpos_curr_end(fd=None, fh=-1):
														
 
															+    """
														
 
															+    determine current position, file end position (== file length)
														
 
															     """
														
 
															-    Fixed blocksize Chunker, optionally supporting a header block of different size.
														
 
															+    curr = dseek(0, os.SEEK_CUR, fd, fh)
														
 
															+    end = dseek(0, os.SEEK_END, fd, fh)
														
 
															+    dseek(curr, os.SEEK_SET, fd, fh)
														
 
															+    return curr, end
														
 
															-    This is a very simple chunker for input data with known block/record sizes:
														
 
															+
														
 
															+def sparsemap(fd=None, fh=-1):
														
 
															+    """
														
 
															+    generator yielding a (start, length, is_data) tuple for each range.
														
 
															+    is_data is indicating data ranges (True) or hole ranges (False).
														
 
															+
														
 
															+    note:
														
 
															+    the map is generated starting from the current seek position (it
														
 
															+    is not required to be 0 / to be at the start of the file) and
														
 
															+    work from there up to the end of the file.
														
 
															+    when the generator is finished, the file pointer position will be
														
 
															+    reset to where it was before calling this function.
														
 
															+    """
														
 
															+    curr, file_len = dpos_curr_end(fd, fh)
														
 
															+    start = curr
														
 
															+    try:
														
 
															+        whence = os.SEEK_HOLE
														
 
															+        while True:
														
 
															+            is_data = whence == os.SEEK_HOLE  # True: range with data, False: range is a hole
														
 
															+            try:
														
 
															+                end = dseek(start, whence, fd, fh)
														
 
															+            except OSError as e:
														
 
															+                if e.errno == errno.ENXIO:
														
 
															+                    if not is_data and start < file_len:
														
 
															+                        # if there is a hole at the end of a file, we can not find the file end by SEEK_DATA
														
 
															+                        # (because we run into ENXIO), thus we must manually deal with this case:
														
 
															+                        end = file_len
														
 
															+                        yield (start, end - start, is_data)
														
 
															+                    break
														
 
															+                else:
														
 
															+                    raise
														
 
															+            # we do not want to yield zero-length ranges with start == end:
														
 
															+            if end > start:
														
 
															+                yield (start, end - start, is_data)
														
 
															+            start = end
														
 
															+            whence = os.SEEK_DATA if is_data else os.SEEK_HOLE
														
 
															+    finally:
														
 
															+        # seek to same position as before calling this function
														
 
															+        dseek(curr, os.SEEK_SET, fd, fh)
														
 
															+
														
 
															+
														
 
															+class ChunkerFixed:
														
 
															+    """
														
 
															+    This is a simple chunker for input data with data usually staying at same
														
 
															+    offset and / or with known block/record sizes:
														
 
															     - raw disk images
														
 
															     - block devices
														
 
															     - database files with simple header + fixed-size records layout
														
 
															-    Note: the last block of the input data may be less than the block size,
														
 
															+    It optionally supports:
														
 
															+
														
 
															+    - a header block of different size
														
 
															+    - using a sparsemap to only read data ranges and seek over hole ranges
														
 
															+      for sparse files.
														
 
															+    - using an externally given filemap to only read specific ranges from
														
 
															+      a file.
														
 
															+
														
 
															+    Note: the last block of a data or hole range may be less than the block size,
														
 
															           this is supported and not considered to be an error.
														
 
															     """
														
 
															-    def __init__(self, block_size, header_size=0):
														
 
															+    def __init__(self, block_size, header_size=0, sparse=False):
														
 
															         self.block_size = block_size
														
 
															         self.header_size = header_size
														
 
															+        # should borg try to do sparse input processing?
														
 
															+        # whether it actually can be done depends on the input file being seekable.
														
 
															+        self.try_sparse = sparse and has_seek_hole
														
 
															+        self.zeros = memoryview(bytes(block_size))
														
 
															-    def chunkify(self, fd, fh=-1):
														
 
															+    def chunkify(self, fd=None, fh=-1, fmap=None):
														
 
															         """
														
 
															         Cut a file into chunks.
														
 
															         :param fd: Python file object
														
 
															         :param fh: OS-level file handle (if available),
														
 
															                    defaults to -1 which means not to use OS-level fd.
														
 
															+        :param fmap: a file map, same format as generated by sparsemap
														
 
															         """
														
 
															+        if fmap is None:
														
 
															+            if self.try_sparse:
														
 
															+                try:
														
 
															+                    if self.header_size > 0:
														
 
															+                        header_map = [(0, self.header_size, True), ]
														
 
															+                        dseek(self.header_size, os.SEEK_SET, fd, fh)
														
 
															+                        body_map = list(sparsemap(fd, fh))
														
 
															+                        dseek(0, os.SEEK_SET, fd, fh)
														
 
															+                    else:
														
 
															+                        header_map = []
														
 
															+                        body_map = list(sparsemap(fd, fh))
														
 
															+                except OSError as err:
														
 
															+                    # seeking did not work
														
 
															+                    pass
														
 
															+                else:
														
 
															+                    fmap = header_map + body_map
														
 
															+
														
 
															+            if fmap is None:
														
 
															+                # either sparse processing (building the fmap) was not tried or it failed.
														
 
															+                # in these cases, we just build a "fake fmap" that considers the whole file
														
 
															+                # as range(s) of data (no holes), so we can use the same code.
														
 
															+                # we build different fmaps here for the purpose of correct block alignment
														
 
															+                # with or without a header block (of potentially different size).
														
 
															+                if self.header_size > 0:
														
 
															+                    header_map = [(0, self.header_size, True), ]
														
 
															+                    body_map = [(self.header_size, 2 ** 62, True), ]
														
 
															+                else:
														
 
															+                    header_map = []
														
 
															+                    body_map = [(0, 2 ** 62, True), ]
														
 
															+                fmap = header_map + body_map
														
 
															+
														
 
															         offset = 0
														
 
															-        use_fh = fh >= 0
														
 
															-
														
 
															-        if use_fh:
														
 
															-            def read(size):
														
 
															-                nonlocal offset
														
 
															-                data = os.read(fh, size)
														
 
															-                amount = len(data)
														
 
															-                if hasattr(os, 'posix_fadvise'):
														
 
															-                    # UNIX only and, in case of block sizes that are not a multiple of the
														
 
															-                    # system's page size, better be used with a bug fixed linux kernel > 4.6.0,
														
 
															-                    # see comment/workaround in _chunker.c and borgbackup issue #907.
														
 
															-                    os.posix_fadvise(fh, offset, amount, os.POSIX_FADV_DONTNEED)
														
 
															-                offset += amount
														
 
															-                return data
														
 
															-        else:
														
 
															-            def read(size):
														
 
															-                nonlocal offset
														
 
															-                data = fd.read(size)
														
 
															-                amount = len(data)
														
 
															-                offset += amount
														
 
															-                return data
														
 
															-
														
 
															-        if self.header_size > 0:
														
 
															-            data = read(self.header_size)
														
 
															-            if data:
														
 
															-                yield data
														
 
															-        else:
														
 
															-            data = True  # get into next while loop
														
 
															-        while data:
														
 
															-            data = read(self.block_size)
														
 
															-            if data:
														
 
															-                yield data
														
 
															-        # empty data means we are at EOF and we terminate the generator.
														
 
															+        for range_start, range_size, is_data in fmap:
														
 
															+            if range_start != offset:
														
 
															+                # this is for the case when the fmap does not cover the file completely,
														
 
															+                # e.g. it could be without the ranges of holes or of unchanged data.
														
 
															+                offset = range_start
														
 
															+                dseek(offset, os.SEEK_SET, fd, fh)
														
 
															+            while range_size:
														
 
															+                wanted = min(range_size, self.block_size)
														
 
															+                if is_data:
														
 
															+                    # read block from the range
														
 
															+                    data = dread(offset, wanted, fd, fh)
														
 
															+                else:  # hole
														
 
															+                    # seek over block from the range
														
 
															+                    pos = dseek(wanted, os.SEEK_CUR, fd, fh)
														
 
															+                    data = self.zeros[:pos - offset]  # for now, create zero-bytes here
														
 
															+                got = len(data)
														
 
															+                if got > 0:
														
 
															+                    offset += got
														
 
															+                    range_size -= got
														
 
															+                    yield data  # later, use a better api that tags data vs. hole
														
 
															+                if got < wanted:
														
 
															+                    # we did not get enough data, looks like EOF.
														
 
															+                    return
														
 
															 cdef class Chunker:
														
@@ -129,7 +241,8 @@ def get_chunker(algo, *params, **kw):
 
															         seed = kw['seed']
														
 
															         return Chunker(seed, *params)
														
 
															     if algo == 'fixed':
														
 
															-        return ChunkerFixed(*params)
														
 
															+        sparse = kw['sparse']
														
 
															+        return ChunkerFixed(*params, sparse=sparse)
														
 
															     raise TypeError('unsupported chunker algo %r' % algo)
														
--- a/src/borg/testsuite/archiver.py
+++ b/src/borg/testsuite/archiver.py
@@ -32,6 +32,7 @@ from .. import xattr, helpers, platform
 
															 from ..archive import Archive, ChunkBuffer
														
 
															 from ..archiver import Archiver, parse_storage_quota, PURE_PYTHON_MSGPACK_WARNING
														
 
															 from ..cache import Cache, LocalCache
														
 
															+from ..chunker import has_seek_hole
														
 
															 from ..constants import *  # NOQA
														
 
															 from ..crypto.low_level import bytes_to_long, num_cipher_blocks
														
 
															 from ..crypto.key import KeyfileKeyBase, RepoKey, KeyfileKey, Passphrase, TAMRequiredError
														
@@ -563,7 +564,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
															             sparse = True
														
 
															             if sparse and hasattr(st, 'st_blocks') and st.st_blocks * 512 >= st.st_size:
														
 
															                 sparse = False
														
 
															-            if sparse and hasattr(os, 'SEEK_HOLE') and hasattr(os, 'SEEK_DATA'):
														
 
															+            if sparse and has_seek_hole:
														
 
															                 with open(fn, 'rb') as fd:
														
 
															                     # only check if the first hole is as expected, because the 2nd hole check
														
 
															                     # is problematic on xfs due to its "dynamic speculative EOF preallocation
														
--- a/src/borg/testsuite/chunker.py
+++ b/src/borg/testsuite/chunker.py
@@ -22,6 +22,55 @@ class ChunkerFixedTestCase(BaseTestCase):
 
															         parts = [c for c in chunker.chunkify(BytesIO(data))]
														
 
															         self.assert_equal(parts, [data[0:123], data[123:123+4096], data[123+4096:123+8192], data[123+8192:]])
														
 
															+    def test_chunkify_just_blocks_fmap_complete(self):
														
 
															+        data = b'foobar' * 1500
														
 
															+        chunker = ChunkerFixed(4096)
														
 
															+        fmap = [
														
 
															+            (0, 4096, True),
														
 
															+            (4096, 8192, True),
														
 
															+            (8192, 99999999, True),
														
 
															+        ]
														
 
															+        parts = [c for c in chunker.chunkify(BytesIO(data), fmap=fmap)]
														
 
															+        self.assert_equal(parts, [data[0:4096], data[4096:8192], data[8192:]])
														
 
															+
														
 
															+    def test_chunkify_header_and_blocks_fmap_complete(self):
														
 
															+        data = b'foobar' * 1500
														
 
															+        chunker = ChunkerFixed(4096, 123)
														
 
															+        fmap = [
														
 
															+            (0, 123, True),
														
 
															+            (123, 4096, True),
														
 
															+            (123+4096, 4096, True),
														
 
															+            (123+8192, 4096, True),
														
 
															+        ]
														
 
															+        parts = [c for c in chunker.chunkify(BytesIO(data), fmap=fmap)]
														
 
															+        self.assert_equal(parts, [data[0:123], data[123:123+4096], data[123+4096:123+8192], data[123+8192:]])
														
 
															+
														
 
															+    def test_chunkify_header_and_blocks_fmap_zeros(self):
														
 
															+        data = b'H' * 123 + b'_' * 4096 + b'X' * 4096 + b'_' * 4096
														
 
															+        chunker = ChunkerFixed(4096, 123)
														
 
															+        fmap = [
														
 
															+            (0, 123, True),
														
 
															+            (123, 4096, False),
														
 
															+            (123+4096, 4096, True),
														
 
															+            (123+8192, 4096, False),
														
 
															+        ]
														
 
															+        parts = [c for c in chunker.chunkify(BytesIO(data), fmap=fmap)]
														
 
															+        # because we marked the '_' ranges as holes, we will get '\0' ranges instead!
														
 
															+        self.assert_equal(parts, [data[0:123], b'\0' * 4096, data[123+4096:123+8192], b'\0' * 4096])
														
 
															+
														
 
															+    def test_chunkify_header_and_blocks_fmap_partial(self):
														
 
															+        data = b'H' * 123 + b'_' * 4096 + b'X' * 4096 + b'_' * 4096
														
 
															+        chunker = ChunkerFixed(4096, 123)
														
 
															+        fmap = [
														
 
															+            (0, 123, True),
														
 
															+            # (123, 4096, False),
														
 
															+            (123+4096, 4096, True),
														
 
															+            # (123+8192, 4096, False),
														
 
															+        ]
														
 
															+        parts = [c for c in chunker.chunkify(BytesIO(data), fmap=fmap)]
														
 
															+        # because we left out the '_' ranges from the fmap, we will not get them at all!
														
 
															+        self.assert_equal(parts, [data[0:123], data[123+4096:123+8192]])
														
 
															+
														
 
															 class ChunkerTestCase(BaseTestCase):
														
--- a/src/borg/testsuite/chunker_pytest.py
+++ b/src/borg/testsuite/chunker_pytest.py
@@ -0,0 +1,139 @@
 
															+from io import BytesIO
														
 
															+import os
														
 
															+import tempfile
														
 
															+
														
 
															+import pytest
														
 
															+
														
 
															+from ..chunker import ChunkerFixed, sparsemap, has_seek_hole
														
 
															+from ..constants import *  # NOQA
														
 
															+
														
 
															+BS = 4096  # fs block size
														
 
															+
														
 
															+# some sparse files. X = content blocks, _ = sparse blocks.
														
 
															+# X__XXX____
														
 
															+map_sparse1 = [
														
 
															+    (0 * BS, 1 * BS, True),
														
 
															+    (1 * BS, 2 * BS, False),
														
 
															+    (3 * BS, 3 * BS, True),
														
 
															+    (6 * BS, 4 * BS, False),
														
 
															+]
														
 
															+
														
 
															+# _XX___XXXX
														
 
															+map_sparse2 = [
														
 
															+    (0 * BS, 1 * BS, False),
														
 
															+    (1 * BS, 2 * BS, True),
														
 
															+    (3 * BS, 3 * BS, False),
														
 
															+    (6 * BS, 4 * BS, True),
														
 
															+]
														
 
															+
														
 
															+# XXX
														
 
															+map_notsparse = [(0 * BS, 3 * BS, True), ]
														
 
															+
														
 
															+# ___
														
 
															+map_onlysparse = [(0 * BS, 3 * BS, False), ]
														
 
															+
														
 
															+
														
 
															+def make_sparsefile(fname, sparsemap, header_size=0):
														
 
															+    with open(fname, 'wb') as fd:
														
 
															+        total = 0
														
 
															+        if header_size:
														
 
															+            fd.write(b'H' * header_size)
														
 
															+            total += header_size
														
 
															+        for offset, size, is_data in sparsemap:
														
 
															+            if is_data:
														
 
															+                fd.write(b'X' * size)
														
 
															+            else:
														
 
															+                fd.seek(size, os.SEEK_CUR)
														
 
															+            total += size
														
 
															+        fd.truncate(total)
														
 
															+    assert os.path.getsize(fname) == total
														
 
															+
														
 
															+
														
 
															+def make_content(sparsemap, header_size=0):
														
 
															+    with BytesIO() as fd:
														
 
															+        total = 0
														
 
															+        if header_size:
														
 
															+            fd.write(b'H' * header_size)
														
 
															+            total += header_size
														
 
															+        for offset, size, is_data in sparsemap:
														
 
															+            if is_data:
														
 
															+                fd.write(b'X' * size)
														
 
															+            else:
														
 
															+                fd.write(b'\0' * size)
														
 
															+            total += size
														
 
															+        content = fd.getvalue()
														
 
															+    assert len(content) == total
														
 
															+    return content
														
 
															+
														
 
															+
														
 
															+def fs_supports_sparse():
														
 
															+    if not has_seek_hole:
														
 
															+        return False
														
 
															+    with tempfile.TemporaryDirectory() as tmpdir:
														
 
															+        fn = os.path.join(tmpdir, 'test_sparse')
														
 
															+        make_sparsefile(fn, [(0, BS, False), (BS, BS, True)])
														
 
															+        with open(fn, 'rb') as f:
														
 
															+            try:
														
 
															+                offset_hole = f.seek(0, os.SEEK_HOLE)
														
 
															+                offset_data = f.seek(0, os.SEEK_DATA)
														
 
															+            except OSError:
														
 
															+                # no sparse support if these seeks do not work
														
 
															+                return False
														
 
															+        return offset_hole == 0 and offset_data == BS
														
 
															+
														
 
															+
														
 
															+@pytest.mark.skipif(not fs_supports_sparse(), reason='fs does not support sparse files')
														
 
															+@pytest.mark.parametrize("fname, sparse_map", [
														
 
															+    ('sparse1', map_sparse1),
														
 
															+    ('sparse2', map_sparse2),
														
 
															+    ('onlysparse', map_onlysparse),
														
 
															+    ('notsparse', map_notsparse),
														
 
															+])
														
 
															+def test_sparsemap(tmpdir, fname, sparse_map):
														
 
															+
														
 
															+    def get_sparsemap_fh(fname):
														
 
															+        fh = os.open(fname, flags=os.O_RDONLY)
														
 
															+        try:
														
 
															+            return list(sparsemap(fh=fh))
														
 
															+        finally:
														
 
															+            os.close(fh)
														
 
															+
														
 
															+    def get_sparsemap_fd(fname):
														
 
															+        with open(fname, 'rb') as fd:
														
 
															+            return list(sparsemap(fd=fd))
														
 
															+
														
 
															+    fn = str(tmpdir / fname)
														
 
															+    make_sparsefile(fn, sparse_map)
														
 
															+    assert get_sparsemap_fh(fn) == sparse_map
														
 
															+    assert get_sparsemap_fd(fn) == sparse_map
														
 
															+
														
 
															+
														
 
															+@pytest.mark.skipif(not fs_supports_sparse(), reason='fs does not support sparse files')
														
 
															+@pytest.mark.parametrize("fname, sparse_map, header_size, sparse", [
														
 
															+    ('sparse1', map_sparse1, 0, False),
														
 
															+    ('sparse1', map_sparse1, 0, True),
														
 
															+    ('sparse1', map_sparse1, BS, False),
														
 
															+    ('sparse1', map_sparse1, BS, True),
														
 
															+    ('sparse2', map_sparse2, 0, False),
														
 
															+    ('sparse2', map_sparse2, 0, True),
														
 
															+    ('sparse2', map_sparse2, BS, False),
														
 
															+    ('sparse2', map_sparse2, BS, True),
														
 
															+    ('onlysparse', map_onlysparse, 0, False),
														
 
															+    ('onlysparse', map_onlysparse, 0, True),
														
 
															+    ('onlysparse', map_onlysparse, BS, False),
														
 
															+    ('onlysparse', map_onlysparse, BS, True),
														
 
															+    ('notsparse', map_notsparse, 0, False),
														
 
															+    ('notsparse', map_notsparse, 0, True),
														
 
															+    ('notsparse', map_notsparse, BS, False),
														
 
															+    ('notsparse', map_notsparse, BS, True),
														
 
															+])
														
 
															+def test_chunkify_sparse(tmpdir, fname, sparse_map, header_size, sparse):
														
 
															+
														
 
															+    def get_chunks(fname, sparse, header_size):
														
 
															+        chunker = ChunkerFixed(4096, header_size=header_size, sparse=sparse)
														
 
															+        with open(fname, 'rb') as fd:
														
 
															+            return b''.join([c for c in chunker.chunkify(fd)])
														
 
															+
														
 
															+    fn = str(tmpdir / fname)
														
 
															+    make_sparsefile(fn, sparse_map, header_size=header_size)
														
 
															+    get_chunks(fn, sparse=sparse, header_size=header_size) == make_content(sparse_map, header_size=header_size)