4 سال پیش · 2851a84003
--- a/docs/internals/data-structures.rst
+++ b/docs/internals/data-structures.rst
@@ -596,14 +596,20 @@ The fixed chunker triggers (chunks) at even-spaced offsets, e.g. every 4MiB,
 
				 producing chunks of same block size (the last chunk is not required to be
			
 
				 full-size).
			
 
				 
			
 
				-Optionally, it can cut the first "header" chunk with a different size (the
			
 
				-default is not to have a differently sized header chunk).
			
 
				+Optionally, it supports processing a differently sized "header" first, before
			
 
				+it starts to cut chunks of the desired block size.
			
 
				+The default is not to have a differently sized header.
			
 
				 
			
 
				 ``borg create --chunker-params fixed,BLOCK_SIZE[,HEADER_SIZE]``
			
 
				 
			
 
				 - BLOCK_SIZE: no default value, multiple of the system page size (usually 4096
			
 
				   bytes) recommended. E.g.: 4194304 would cut 4MiB sized chunks.
			
 
				-- HEADER_SIZE: optional, defaults to 0 (no header chunk).
			
 
				+- HEADER_SIZE: optional, defaults to 0 (no header).
			
 
				+
			
 
				+The fixed chunker also supports processing sparse files (reading only the ranges
			
 
				+with data and seeking over the empty hole ranges).
			
 
				+
			
 
				+``borg create --sparse --chunker-params fixed,BLOCK_SIZE[,HEADER_SIZE]``
			
 
				 
			
 
				 "buzhash" chunker
			
 
				 +++++++++++++++++
			
--- a/docs/usage/create.rst
+++ b/docs/usage/create.rst
@@ -43,7 +43,10 @@ Examples
 
				     $ borg create --chunker-params buzhash,10,23,16,4095 /path/to/repo::small /smallstuff
			
 
				 
			
 
				     # Backup a raw device (must not be active/in use/mounted at that time)
			
 
				-    $ dd if=/dev/sdx bs=4M | borg create --chunker-params fixed,4194304 /path/to/repo::my-sdx -
			
 
				+    $ borg create --read-special --chunker-params fixed,4194304 /path/to/repo::my-sdx /dev/sdX
			
 
				+
			
 
				+    # Backup a sparse disk image (must not be active/in use/mounted at that time)
			
 
				+    $ borg create --sparse --chunker-params fixed,4194304 /path/to/repo::my-disk my-disk.raw
			
 
				 
			
 
				     # No compression (none)
			
 
				     $ borg create --compression none /path/to/repo::arch ~
			
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -1172,7 +1172,7 @@ class FilesystemObjectProcessors:
 
				 
			
 
				     def __init__(self, *, metadata_collector, cache, key,
			
 
				                  add_item, process_file_chunks,
			
 
				-                 chunker_params, show_progress):
			
 
				+                 chunker_params, show_progress, sparse):
			
 
				         self.metadata_collector = metadata_collector
			
 
				         self.cache = cache
			
 
				         self.key = key
			
@@ -1183,7 +1183,7 @@ class FilesystemObjectProcessors:
 
				         self.hard_links = {}
			
 
				         self.stats = Statistics()  # threading: done by cache (including progress)
			
 
				         self.cwd = os.getcwd()
			
 
				-        self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed)
			
 
				+        self.chunker = get_chunker(*chunker_params, seed=key.chunk_seed, sparse=sparse)
			
 
				 
			
 
				     @contextmanager
			
 
				     def create_helper(self, path, st, status=None, hardlinkable=True):
			
--- a/src/borg/archiver.py
+++ b/src/borg/archiver.py
@@ -653,7 +653,7 @@ class Archiver:
 
				                     checkpoint_interval=args.checkpoint_interval, rechunkify=False)
			
 
				                 fso = FilesystemObjectProcessors(metadata_collector=metadata_collector, cache=cache, key=key,
			
 
				                     process_file_chunks=cp.process_file_chunks, add_item=archive.add_item,
			
 
				-                    chunker_params=args.chunker_params, show_progress=args.progress)
			
 
				+                    chunker_params=args.chunker_params, show_progress=args.progress, sparse=args.sparse)
			
 
				                 create_inner(archive, cache, fso)
			
 
				         else:
			
 
				             create_inner(None, None, None)
			
@@ -3354,6 +3354,8 @@ class Archiver:
 
				                               help='deprecated, use ``--noflags`` instead')
			
 
				         fs_group.add_argument('--noflags', dest='noflags', action='store_true',
			
 
				                               help='do not read and store flags (e.g. NODUMP, IMMUTABLE) into archive')
			
 
				+        fs_group.add_argument('--sparse', dest='sparse', action='store_true',
			
 
				+                               help='detect sparse holes in input (supported only by fixed chunker)')
			
 
				         fs_group.add_argument('--files-cache', metavar='MODE', dest='files_cache_mode',
			
 
				                               type=FilesCacheMode, default=DEFAULT_FILES_CACHE_MODE_UI,
			
 
				                               help='operate files cache in MODE. default: %s' % DEFAULT_FILES_CACHE_MODE_UI)
			
--- a/src/borg/chunker.pyx
+++ b/src/borg/chunker.pyx
@@ -2,6 +2,7 @@
 
				 
			
 
				 API_VERSION = '1.2_01'
			
 
				 
			
 
				+import errno
			
 
				 import os
			
 
				 
			
 
				 from libc.stdlib cimport free
			
@@ -19,65 +20,176 @@ cdef extern from "_chunker.c":
 
				     uint32_t c_buzhash_update  "buzhash_update"(uint32_t sum, unsigned char remove, unsigned char add, size_t len, uint32_t *h)
			
 
				 
			
 
				 
			
 
				-class ChunkerFixed:
			
 
				+# this will be True if Python's seek implementation supports data/holes seeking.
			
 
				+# this does not imply that it will actually work on the filesystem,
			
 
				+# because the FS also needs to support this.
			
 
				+has_seek_hole = hasattr(os, 'SEEK_DATA') and hasattr(os, 'SEEK_HOLE')
			
 
				+
			
 
				+
			
 
				+def dread(offset, size, fd=None, fh=-1):
			
 
				+    use_fh = fh >= 0
			
 
				+    if use_fh:
			
 
				+        data = os.read(fh, size)
			
 
				+        if hasattr(os, 'posix_fadvise'):
			
 
				+            # UNIX only and, in case of block sizes that are not a multiple of the
			
 
				+            # system's page size, better be used with a bug fixed linux kernel > 4.6.0,
			
 
				+            # see comment/workaround in _chunker.c and borgbackup issue #907.
			
 
				+            os.posix_fadvise(fh, offset, len(data), os.POSIX_FADV_DONTNEED)
			
 
				+        return data
			
 
				+    else:
			
 
				+        return fd.read(size)
			
 
				+
			
 
				+
			
 
				+def dseek(amount, whence, fd=None, fh=-1):
			
 
				+    use_fh = fh >= 0
			
 
				+    if use_fh:
			
 
				+        return os.lseek(fh, amount, whence)
			
 
				+    else:
			
 
				+        return fd.seek(amount, whence)
			
 
				+
			
 
				+
			
 
				+def dpos_curr_end(fd=None, fh=-1):
			
 
				+    """
			
 
				+    determine current position, file end position (== file length)
			
 
				     """
			
 
				-    Fixed blocksize Chunker, optionally supporting a header block of different size.
			
 
				+    curr = dseek(0, os.SEEK_CUR, fd, fh)
			
 
				+    end = dseek(0, os.SEEK_END, fd, fh)
			
 
				+    dseek(curr, os.SEEK_SET, fd, fh)
			
 
				+    return curr, end
			
 
				 
			
 
				-    This is a very simple chunker for input data with known block/record sizes:
			
 
				+
			
 
				+def sparsemap(fd=None, fh=-1):
			
 
				+    """
			
 
				+    generator yielding a (start, length, is_data) tuple for each range.
			
 
				+    is_data is indicating data ranges (True) or hole ranges (False).
			
 
				+
			
 
				+    note:
			
 
				+    the map is generated starting from the current seek position (it
			
 
				+    is not required to be 0 / to be at the start of the file) and
			
 
				+    work from there up to the end of the file.
			
 
				+    when the generator is finished, the file pointer position will be
			
 
				+    reset to where it was before calling this function.
			
 
				+    """
			
 
				+    curr, file_len = dpos_curr_end(fd, fh)
			
 
				+    start = curr
			
 
				+    try:
			
 
				+        whence = os.SEEK_HOLE
			
 
				+        while True:
			
 
				+            is_data = whence == os.SEEK_HOLE  # True: range with data, False: range is a hole
			
 
				+            try:
			
 
				+                end = dseek(start, whence, fd, fh)
			
 
				+            except OSError as e:
			
 
				+                if e.errno == errno.ENXIO:
			
 
				+                    if not is_data and start < file_len:
			
 
				+                        # if there is a hole at the end of a file, we can not find the file end by SEEK_DATA
			
 
				+                        # (because we run into ENXIO), thus we must manually deal with this case:
			
 
				+                        end = file_len
			
 
				+                        yield (start, end - start, is_data)
			
 
				+                    break
			
 
				+                else:
			
 
				+                    raise
			
 
				+            # we do not want to yield zero-length ranges with start == end:
			
 
				+            if end > start:
			
 
				+                yield (start, end - start, is_data)
			
 
				+            start = end
			
 
				+            whence = os.SEEK_DATA if is_data else os.SEEK_HOLE
			
 
				+    finally:
			
 
				+        # seek to same position as before calling this function
			
 
				+        dseek(curr, os.SEEK_SET, fd, fh)
			
 
				+
			
 
				+
			
 
				+class ChunkerFixed:
			
 
				+    """
			
 
				+    This is a simple chunker for input data with data usually staying at same
			
 
				+    offset and / or with known block/record sizes:
			
 
				 
			
 
				     - raw disk images
			
 
				     - block devices
			
 
				     - database files with simple header + fixed-size records layout
			
 
				 
			
 
				-    Note: the last block of the input data may be less than the block size,
			
 
				+    It optionally supports:
			
 
				+
			
 
				+    - a header block of different size
			
 
				+    - using a sparsemap to only read data ranges and seek over hole ranges
			
 
				+      for sparse files.
			
 
				+    - using an externally given filemap to only read specific ranges from
			
 
				+      a file.
			
 
				+
			
 
				+    Note: the last block of a data or hole range may be less than the block size,
			
 
				           this is supported and not considered to be an error.
			
 
				     """
			
 
				-    def __init__(self, block_size, header_size=0):
			
 
				+    def __init__(self, block_size, header_size=0, sparse=False):
			
 
				         self.block_size = block_size
			
 
				         self.header_size = header_size
			
 
				+        # should borg try to do sparse input processing?
			
 
				+        # whether it actually can be done depends on the input file being seekable.
			
 
				+        self.try_sparse = sparse and has_seek_hole
			
 
				+        self.zeros = memoryview(bytes(block_size))
			
 
				 
			
 
				-    def chunkify(self, fd, fh=-1):
			
 
				+    def chunkify(self, fd=None, fh=-1, fmap=None):
			
 
				         """
			
 
				         Cut a file into chunks.
			
 
				 
			
 
				         :param fd: Python file object
			
 
				         :param fh: OS-level file handle (if available),
			
 
				                    defaults to -1 which means not to use OS-level fd.
			
 
				+        :param fmap: a file map, same format as generated by sparsemap
			
 
				         """
			
 
				+        if fmap is None:
			
 
				+            if self.try_sparse:
			
 
				+                try:
			
 
				+                    if self.header_size > 0:
			
 
				+                        header_map = [(0, self.header_size, True), ]
			
 
				+                        dseek(self.header_size, os.SEEK_SET, fd, fh)
			
 
				+                        body_map = list(sparsemap(fd, fh))
			
 
				+                        dseek(0, os.SEEK_SET, fd, fh)
			
 
				+                    else:
			
 
				+                        header_map = []
			
 
				+                        body_map = list(sparsemap(fd, fh))
			
 
				+                except OSError as err:
			
 
				+                    # seeking did not work
			
 
				+                    pass
			
 
				+                else:
			
 
				+                    fmap = header_map + body_map
			
 
				+
			
 
				+            if fmap is None:
			
 
				+                # either sparse processing (building the fmap) was not tried or it failed.
			
 
				+                # in these cases, we just build a "fake fmap" that considers the whole file
			
 
				+                # as range(s) of data (no holes), so we can use the same code.
			
 
				+                # we build different fmaps here for the purpose of correct block alignment
			
 
				+                # with or without a header block (of potentially different size).
			
 
				+                if self.header_size > 0:
			
 
				+                    header_map = [(0, self.header_size, True), ]
			
 
				+                    body_map = [(self.header_size, 2 ** 62, True), ]
			
 
				+                else:
			
 
				+                    header_map = []
			
 
				+                    body_map = [(0, 2 ** 62, True), ]
			
 
				+                fmap = header_map + body_map
			
 
				+
			
 
				         offset = 0
			
 
				-        use_fh = fh >= 0
			
 
				-
			
 
				-        if use_fh:
			
 
				-            def read(size):
			
 
				-                nonlocal offset
			
 
				-                data = os.read(fh, size)
			
 
				-                amount = len(data)
			
 
				-                if hasattr(os, 'posix_fadvise'):
			
 
				-                    # UNIX only and, in case of block sizes that are not a multiple of the
			
 
				-                    # system's page size, better be used with a bug fixed linux kernel > 4.6.0,
			
 
				-                    # see comment/workaround in _chunker.c and borgbackup issue #907.
			
 
				-                    os.posix_fadvise(fh, offset, amount, os.POSIX_FADV_DONTNEED)
			
 
				-                offset += amount
			
 
				-                return data
			
 
				-        else:
			
 
				-            def read(size):
			
 
				-                nonlocal offset
			
 
				-                data = fd.read(size)
			
 
				-                amount = len(data)
			
 
				-                offset += amount
			
 
				-                return data
			
 
				-
			
 
				-        if self.header_size > 0:
			
 
				-            data = read(self.header_size)
			
 
				-            if data:
			
 
				-                yield data
			
 
				-        else:
			
 
				-            data = True  # get into next while loop
			
 
				-        while data:
			
 
				-            data = read(self.block_size)
			
 
				-            if data:
			
 
				-                yield data
			
 
				-        # empty data means we are at EOF and we terminate the generator.
			
 
				+        for range_start, range_size, is_data in fmap:
			
 
				+            if range_start != offset:
			
 
				+                # this is for the case when the fmap does not cover the file completely,
			
 
				+                # e.g. it could be without the ranges of holes or of unchanged data.
			
 
				+                offset = range_start
			
 
				+                dseek(offset, os.SEEK_SET, fd, fh)
			
 
				+            while range_size:
			
 
				+                wanted = min(range_size, self.block_size)
			
 
				+                if is_data:
			
 
				+                    # read block from the range
			
 
				+                    data = dread(offset, wanted, fd, fh)
			
 
				+                else:  # hole
			
 
				+                    # seek over block from the range
			
 
				+                    pos = dseek(wanted, os.SEEK_CUR, fd, fh)
			
 
				+                    data = self.zeros[:pos - offset]  # for now, create zero-bytes here
			
 
				+                got = len(data)
			
 
				+                if got > 0:
			
 
				+                    offset += got
			
 
				+                    range_size -= got
			
 
				+                    yield data  # later, use a better api that tags data vs. hole
			
 
				+                if got < wanted:
			
 
				+                    # we did not get enough data, looks like EOF.
			
 
				+                    return
			
 
				 
			
 
				 
			
 
				 cdef class Chunker:
			
@@ -129,7 +241,8 @@ def get_chunker(algo, *params, **kw):
 
				         seed = kw['seed']
			
 
				         return Chunker(seed, *params)
			
 
				     if algo == 'fixed':
			
 
				-        return ChunkerFixed(*params)
			
 
				+        sparse = kw['sparse']
			
 
				+        return ChunkerFixed(*params, sparse=sparse)
			
 
				     raise TypeError('unsupported chunker algo %r' % algo)
			
 
				 
			
 
				 
			
--- a/src/borg/testsuite/archiver.py
+++ b/src/borg/testsuite/archiver.py
@@ -32,6 +32,7 @@ from .. import xattr, helpers, platform
 
				 from ..archive import Archive, ChunkBuffer
			
 
				 from ..archiver import Archiver, parse_storage_quota, PURE_PYTHON_MSGPACK_WARNING
			
 
				 from ..cache import Cache, LocalCache
			
 
				+from ..chunker import has_seek_hole
			
 
				 from ..constants import *  # NOQA
			
 
				 from ..crypto.low_level import bytes_to_long, num_cipher_blocks
			
 
				 from ..crypto.key import KeyfileKeyBase, RepoKey, KeyfileKey, Passphrase, TAMRequiredError
			
@@ -563,7 +564,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
				             sparse = True
			
 
				             if sparse and hasattr(st, 'st_blocks') and st.st_blocks * 512 >= st.st_size:
			
 
				                 sparse = False
			
 
				-            if sparse and hasattr(os, 'SEEK_HOLE') and hasattr(os, 'SEEK_DATA'):
			
 
				+            if sparse and has_seek_hole:
			
 
				                 with open(fn, 'rb') as fd:
			
 
				                     # only check if the first hole is as expected, because the 2nd hole check
			
 
				                     # is problematic on xfs due to its "dynamic speculative EOF preallocation
			
--- a/src/borg/testsuite/chunker.py
+++ b/src/borg/testsuite/chunker.py
@@ -22,6 +22,55 @@ class ChunkerFixedTestCase(BaseTestCase):
 
				         parts = [c for c in chunker.chunkify(BytesIO(data))]
			
 
				         self.assert_equal(parts, [data[0:123], data[123:123+4096], data[123+4096:123+8192], data[123+8192:]])
			
 
				 
			
 
				+    def test_chunkify_just_blocks_fmap_complete(self):
			
 
				+        data = b'foobar' * 1500
			
 
				+        chunker = ChunkerFixed(4096)
			
 
				+        fmap = [
			
 
				+            (0, 4096, True),
			
 
				+            (4096, 8192, True),
			
 
				+            (8192, 99999999, True),
			
 
				+        ]
			
 
				+        parts = [c for c in chunker.chunkify(BytesIO(data), fmap=fmap)]
			
 
				+        self.assert_equal(parts, [data[0:4096], data[4096:8192], data[8192:]])
			
 
				+
			
 
				+    def test_chunkify_header_and_blocks_fmap_complete(self):
			
 
				+        data = b'foobar' * 1500
			
 
				+        chunker = ChunkerFixed(4096, 123)
			
 
				+        fmap = [
			
 
				+            (0, 123, True),
			
 
				+            (123, 4096, True),
			
 
				+            (123+4096, 4096, True),
			
 
				+            (123+8192, 4096, True),
			
 
				+        ]
			
 
				+        parts = [c for c in chunker.chunkify(BytesIO(data), fmap=fmap)]
			
 
				+        self.assert_equal(parts, [data[0:123], data[123:123+4096], data[123+4096:123+8192], data[123+8192:]])
			
 
				+
			
 
				+    def test_chunkify_header_and_blocks_fmap_zeros(self):
			
 
				+        data = b'H' * 123 + b'_' * 4096 + b'X' * 4096 + b'_' * 4096
			
 
				+        chunker = ChunkerFixed(4096, 123)
			
 
				+        fmap = [
			
 
				+            (0, 123, True),
			
 
				+            (123, 4096, False),
			
 
				+            (123+4096, 4096, True),
			
 
				+            (123+8192, 4096, False),
			
 
				+        ]
			
 
				+        parts = [c for c in chunker.chunkify(BytesIO(data), fmap=fmap)]
			
 
				+        # because we marked the '_' ranges as holes, we will get '\0' ranges instead!
			
 
				+        self.assert_equal(parts, [data[0:123], b'\0' * 4096, data[123+4096:123+8192], b'\0' * 4096])
			
 
				+
			
 
				+    def test_chunkify_header_and_blocks_fmap_partial(self):
			
 
				+        data = b'H' * 123 + b'_' * 4096 + b'X' * 4096 + b'_' * 4096
			
 
				+        chunker = ChunkerFixed(4096, 123)
			
 
				+        fmap = [
			
 
				+            (0, 123, True),
			
 
				+            # (123, 4096, False),
			
 
				+            (123+4096, 4096, True),
			
 
				+            # (123+8192, 4096, False),
			
 
				+        ]
			
 
				+        parts = [c for c in chunker.chunkify(BytesIO(data), fmap=fmap)]
			
 
				+        # because we left out the '_' ranges from the fmap, we will not get them at all!
			
 
				+        self.assert_equal(parts, [data[0:123], data[123+4096:123+8192]])
			
 
				+
			
 
				 
			
 
				 class ChunkerTestCase(BaseTestCase):
			
 
				 
			
--- a/src/borg/testsuite/chunker_pytest.py
+++ b/src/borg/testsuite/chunker_pytest.py
@@ -0,0 +1,139 @@
 
				+from io import BytesIO
			
 
				+import os
			
 
				+import tempfile
			
 
				+
			
 
				+import pytest
			
 
				+
			
 
				+from ..chunker import ChunkerFixed, sparsemap, has_seek_hole
			
 
				+from ..constants import *  # NOQA
			
 
				+
			
 
				+BS = 4096  # fs block size
			
 
				+
			
 
				+# some sparse files. X = content blocks, _ = sparse blocks.
			
 
				+# X__XXX____
			
 
				+map_sparse1 = [
			
 
				+    (0 * BS, 1 * BS, True),
			
 
				+    (1 * BS, 2 * BS, False),
			
 
				+    (3 * BS, 3 * BS, True),
			
 
				+    (6 * BS, 4 * BS, False),
			
 
				+]
			
 
				+
			
 
				+# _XX___XXXX
			
 
				+map_sparse2 = [
			
 
				+    (0 * BS, 1 * BS, False),
			
 
				+    (1 * BS, 2 * BS, True),
			
 
				+    (3 * BS, 3 * BS, False),
			
 
				+    (6 * BS, 4 * BS, True),
			
 
				+]
			
 
				+
			
 
				+# XXX
			
 
				+map_notsparse = [(0 * BS, 3 * BS, True), ]
			
 
				+
			
 
				+# ___
			
 
				+map_onlysparse = [(0 * BS, 3 * BS, False), ]
			
 
				+
			
 
				+
			
 
				+def make_sparsefile(fname, sparsemap, header_size=0):
			
 
				+    with open(fname, 'wb') as fd:
			
 
				+        total = 0
			
 
				+        if header_size:
			
 
				+            fd.write(b'H' * header_size)
			
 
				+            total += header_size
			
 
				+        for offset, size, is_data in sparsemap:
			
 
				+            if is_data:
			
 
				+                fd.write(b'X' * size)
			
 
				+            else:
			
 
				+                fd.seek(size, os.SEEK_CUR)
			
 
				+            total += size
			
 
				+        fd.truncate(total)
			
 
				+    assert os.path.getsize(fname) == total
			
 
				+
			
 
				+
			
 
				+def make_content(sparsemap, header_size=0):
			
 
				+    with BytesIO() as fd:
			
 
				+        total = 0
			
 
				+        if header_size:
			
 
				+            fd.write(b'H' * header_size)
			
 
				+            total += header_size
			
 
				+        for offset, size, is_data in sparsemap:
			
 
				+            if is_data:
			
 
				+                fd.write(b'X' * size)
			
 
				+            else:
			
 
				+                fd.write(b'\0' * size)
			
 
				+            total += size
			
 
				+        content = fd.getvalue()
			
 
				+    assert len(content) == total
			
 
				+    return content
			
 
				+
			
 
				+
			
 
				+def fs_supports_sparse():
			
 
				+    if not has_seek_hole:
			
 
				+        return False
			
 
				+    with tempfile.TemporaryDirectory() as tmpdir:
			
 
				+        fn = os.path.join(tmpdir, 'test_sparse')
			
 
				+        make_sparsefile(fn, [(0, BS, False), (BS, BS, True)])
			
 
				+        with open(fn, 'rb') as f:
			
 
				+            try:
			
 
				+                offset_hole = f.seek(0, os.SEEK_HOLE)
			
 
				+                offset_data = f.seek(0, os.SEEK_DATA)
			
 
				+            except OSError:
			
 
				+                # no sparse support if these seeks do not work
			
 
				+                return False
			
 
				+        return offset_hole == 0 and offset_data == BS
			
 
				+
			
 
				+
			
 
				+@pytest.mark.skipif(not fs_supports_sparse(), reason='fs does not support sparse files')
			
 
				+@pytest.mark.parametrize("fname, sparse_map", [
			
 
				+    ('sparse1', map_sparse1),
			
 
				+    ('sparse2', map_sparse2),
			
 
				+    ('onlysparse', map_onlysparse),
			
 
				+    ('notsparse', map_notsparse),
			
 
				+])
			
 
				+def test_sparsemap(tmpdir, fname, sparse_map):
			
 
				+
			
 
				+    def get_sparsemap_fh(fname):
			
 
				+        fh = os.open(fname, flags=os.O_RDONLY)
			
 
				+        try:
			
 
				+            return list(sparsemap(fh=fh))
			
 
				+        finally:
			
 
				+            os.close(fh)
			
 
				+
			
 
				+    def get_sparsemap_fd(fname):
			
 
				+        with open(fname, 'rb') as fd:
			
 
				+            return list(sparsemap(fd=fd))
			
 
				+
			
 
				+    fn = str(tmpdir / fname)
			
 
				+    make_sparsefile(fn, sparse_map)
			
 
				+    assert get_sparsemap_fh(fn) == sparse_map
			
 
				+    assert get_sparsemap_fd(fn) == sparse_map
			
 
				+
			
 
				+
			
 
				+@pytest.mark.skipif(not fs_supports_sparse(), reason='fs does not support sparse files')
			
 
				+@pytest.mark.parametrize("fname, sparse_map, header_size, sparse", [
			
 
				+    ('sparse1', map_sparse1, 0, False),
			
 
				+    ('sparse1', map_sparse1, 0, True),
			
 
				+    ('sparse1', map_sparse1, BS, False),
			
 
				+    ('sparse1', map_sparse1, BS, True),
			
 
				+    ('sparse2', map_sparse2, 0, False),
			
 
				+    ('sparse2', map_sparse2, 0, True),
			
 
				+    ('sparse2', map_sparse2, BS, False),
			
 
				+    ('sparse2', map_sparse2, BS, True),
			
 
				+    ('onlysparse', map_onlysparse, 0, False),
			
 
				+    ('onlysparse', map_onlysparse, 0, True),
			
 
				+    ('onlysparse', map_onlysparse, BS, False),
			
 
				+    ('onlysparse', map_onlysparse, BS, True),
			
 
				+    ('notsparse', map_notsparse, 0, False),
			
 
				+    ('notsparse', map_notsparse, 0, True),
			
 
				+    ('notsparse', map_notsparse, BS, False),
			
 
				+    ('notsparse', map_notsparse, BS, True),
			
 
				+])
			
 
				+def test_chunkify_sparse(tmpdir, fname, sparse_map, header_size, sparse):
			
 
				+
			
 
				+    def get_chunks(fname, sparse, header_size):
			
 
				+        chunker = ChunkerFixed(4096, header_size=header_size, sparse=sparse)
			
 
				+        with open(fname, 'rb') as fd:
			
 
				+            return b''.join([c for c in chunker.chunkify(fd)])
			
 
				+
			
 
				+    fn = str(tmpdir / fname)
			
 
				+    make_sparsefile(fn, sparse_map, header_size=header_size)
			
 
				+    get_chunks(fn, sparse=sparse, header_size=header_size) == make_content(sparse_map, header_size=header_size)