|
@@ -5,7 +5,7 @@ import tempfile
|
|
import pytest
|
|
import pytest
|
|
|
|
|
|
from .chunker_test import cf
|
|
from .chunker_test import cf
|
|
-from ..chunker import Chunker, ChunkerFixed, sparsemap, has_seek_hole, ChunkerFailing, FileReader, Chunk
|
|
|
|
|
|
+from ..chunker import Chunker, ChunkerFixed, sparsemap, has_seek_hole, ChunkerFailing, FileReader, FileFMAPReader, Chunk
|
|
from ..constants import * # NOQA
|
|
from ..constants import * # NOQA
|
|
|
|
|
|
BS = 4096 # fs block size
|
|
BS = 4096 # fs block size
|
|
@@ -284,3 +284,190 @@ def test_filereader_read_with_mock(mock_chunks, read_size, expected_data, expect
|
|
assert chunk.data == expected_data
|
|
assert chunk.data == expected_data
|
|
assert chunk.meta["allocation"] == expected_allocation
|
|
assert chunk.meta["allocation"] == expected_allocation
|
|
assert chunk.meta["size"] == expected_size
|
|
assert chunk.meta["size"] == expected_size
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+@pytest.mark.parametrize(
|
|
|
|
+ "file_content, read_size, expected_chunks",
|
|
|
|
+ [
|
|
|
|
+ # Empty file
|
|
|
|
+ (b"", 1024, []),
|
|
|
|
+ # Small data
|
|
|
|
+ (b"data", 1024, [{"data": b"data", "allocation": CH_DATA, "size": 4}]),
|
|
|
|
+ # Data larger than read_size
|
|
|
|
+ (
|
|
|
|
+ b"0123456789",
|
|
|
|
+ 4,
|
|
|
|
+ [
|
|
|
|
+ {"data": b"0123", "allocation": CH_DATA, "size": 4},
|
|
|
|
+ {"data": b"4567", "allocation": CH_DATA, "size": 4},
|
|
|
|
+ {"data": b"89", "allocation": CH_DATA, "size": 2},
|
|
|
|
+ ],
|
|
|
|
+ ),
|
|
|
|
+ # Data with zeros (should be detected as allocated zeros)
|
|
|
|
+ (
|
|
|
|
+ b"data" + b"\0" * 8 + b"more",
|
|
|
|
+ 4,
|
|
|
|
+ [
|
|
|
|
+ {"data": b"data", "allocation": CH_DATA, "size": 4},
|
|
|
|
+ {"data": None, "allocation": CH_ALLOC, "size": 4},
|
|
|
|
+ {"data": None, "allocation": CH_ALLOC, "size": 4},
|
|
|
|
+ {"data": b"more", "allocation": CH_DATA, "size": 4},
|
|
|
|
+ ],
|
|
|
|
+ ),
|
|
|
|
+ ],
|
|
|
|
+)
|
|
|
|
+def test_filefmapreader_basic(file_content, read_size, expected_chunks):
|
|
|
|
+ """Test basic functionality of FileFMAPReader with different file contents."""
|
|
|
|
+ reader = FileFMAPReader(fd=BytesIO(file_content), fh=-1, read_size=read_size, sparse=False, fmap=None)
|
|
|
|
+
|
|
|
|
+ # Collect all chunks from blockify
|
|
|
|
+ chunks = list(reader.blockify())
|
|
|
|
+
|
|
|
|
+ # Check the number of chunks
|
|
|
|
+ assert len(chunks) == len(expected_chunks)
|
|
|
|
+
|
|
|
|
+ # Check each chunk
|
|
|
|
+ for i, chunk in enumerate(chunks):
|
|
|
|
+ assert chunk.data == expected_chunks[i]["data"]
|
|
|
|
+ assert chunk.meta["allocation"] == expected_chunks[i]["allocation"]
|
|
|
|
+ assert chunk.meta["size"] == expected_chunks[i]["size"]
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+@pytest.mark.parametrize(
|
|
|
|
+ "file_content, fmap, read_size, expected_chunks",
|
|
|
|
+ [
|
|
|
|
+ # Custom fmap with data and holes
|
|
|
|
+ (
|
|
|
|
+ b"dataXXXXmore",
|
|
|
|
+ [(0, 4, True), (4, 4, False), (8, 4, True)],
|
|
|
|
+ 4,
|
|
|
|
+ [
|
|
|
|
+ {"data": b"data", "allocation": CH_DATA, "size": 4},
|
|
|
|
+ {"data": None, "allocation": CH_HOLE, "size": 4},
|
|
|
|
+ {"data": b"more", "allocation": CH_DATA, "size": 4},
|
|
|
|
+ ],
|
|
|
|
+ ),
|
|
|
|
+ # Custom fmap with only holes
|
|
|
|
+ (
|
|
|
|
+ b"\0\0\0\0\0\0\0\0",
|
|
|
|
+ [(0, 8, False)],
|
|
|
|
+ 4,
|
|
|
|
+ [{"data": None, "allocation": CH_HOLE, "size": 4}, {"data": None, "allocation": CH_HOLE, "size": 4}],
|
|
|
|
+ ),
|
|
|
|
+ # Custom fmap with only data
|
|
|
|
+ (
|
|
|
|
+ b"datadata",
|
|
|
|
+ [(0, 8, True)],
|
|
|
|
+ 4,
|
|
|
|
+ [{"data": b"data", "allocation": CH_DATA, "size": 4}, {"data": b"data", "allocation": CH_DATA, "size": 4}],
|
|
|
|
+ ),
|
|
|
|
+ # Custom fmap with partial coverage (should seek to the right position)
|
|
|
|
+ (
|
|
|
|
+ b"skipthispartreadthispart",
|
|
|
|
+ [(12, 12, True)],
|
|
|
|
+ 4,
|
|
|
|
+ [
|
|
|
|
+ {"data": b"read", "allocation": CH_DATA, "size": 4},
|
|
|
|
+ {"data": b"this", "allocation": CH_DATA, "size": 4},
|
|
|
|
+ {"data": b"part", "allocation": CH_DATA, "size": 4},
|
|
|
|
+ ],
|
|
|
|
+ ),
|
|
|
|
+ ],
|
|
|
|
+)
|
|
|
|
+def test_filefmapreader_with_fmap(file_content, fmap, read_size, expected_chunks):
|
|
|
|
+ """Test FileFMAPReader with an externally provided file map."""
|
|
|
|
+ reader = FileFMAPReader(fd=BytesIO(file_content), fh=-1, read_size=read_size, sparse=False, fmap=fmap)
|
|
|
|
+
|
|
|
|
+ # Collect all chunks from blockify
|
|
|
|
+ chunks = list(reader.blockify())
|
|
|
|
+
|
|
|
|
+ # Check the number of chunks
|
|
|
|
+ assert len(chunks) == len(expected_chunks)
|
|
|
|
+
|
|
|
|
+ # Check each chunk
|
|
|
|
+ for i, chunk in enumerate(chunks):
|
|
|
|
+ assert chunk.data == expected_chunks[i]["data"]
|
|
|
|
+ assert chunk.meta["allocation"] == expected_chunks[i]["allocation"]
|
|
|
|
+ assert chunk.meta["size"] == expected_chunks[i]["size"]
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+@pytest.mark.parametrize(
|
|
|
|
+ "zeros_length, read_size, expected_allocation",
|
|
|
|
+ [(4, 4, CH_ALLOC), (8192, 4096, CH_ALLOC)], # Small block of zeros # Large block of zeros
|
|
|
|
+)
|
|
|
|
+def test_filefmapreader_allocation_types(zeros_length, read_size, expected_allocation):
|
|
|
|
+ """Test FileFMAPReader's handling of different allocation types."""
|
|
|
|
+ # Create a file with all zeros
|
|
|
|
+ file_content = b"\0" * zeros_length
|
|
|
|
+
|
|
|
|
+ reader = FileFMAPReader(fd=BytesIO(file_content), fh=-1, read_size=read_size, sparse=False, fmap=None)
|
|
|
|
+
|
|
|
|
+ # Collect all chunks from blockify
|
|
|
|
+ chunks = list(reader.blockify())
|
|
|
|
+
|
|
|
|
+ # Check that all chunks are of the expected allocation type
|
|
|
|
+ for chunk in chunks:
|
|
|
|
+ assert chunk.meta["allocation"] == expected_allocation
|
|
|
|
+ assert chunk.data is None # All-zero data should be None
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+@pytest.mark.skipif(not fs_supports_sparse(), reason="fs does not support sparse files")
|
|
|
|
+def test_filefmapreader_with_real_sparse_file(tmpdir):
|
|
|
|
+ """Test FileFMAPReader with a real sparse file."""
|
|
|
|
+ # Create a sparse file
|
|
|
|
+ fn = str(tmpdir / "sparse_file")
|
|
|
|
+ sparse_map = [(0, BS, True), (BS, 2 * BS, False), (3 * BS, BS, True)]
|
|
|
|
+ make_sparsefile(fn, sparse_map)
|
|
|
|
+
|
|
|
|
+ # Expected chunks when reading with sparse=True
|
|
|
|
+ expected_chunks_sparse = [
|
|
|
|
+ {"data_type": bytes, "allocation": CH_DATA, "size": BS},
|
|
|
|
+ {"data_type": type(None), "allocation": CH_HOLE, "size": BS},
|
|
|
|
+ {"data_type": type(None), "allocation": CH_HOLE, "size": BS},
|
|
|
|
+ {"data_type": bytes, "allocation": CH_DATA, "size": BS},
|
|
|
|
+ ]
|
|
|
|
+
|
|
|
|
+ # Expected chunks when reading with sparse=False
|
|
|
|
+ expected_chunks_non_sparse = [
|
|
|
|
+ {"data_type": bytes, "allocation": CH_DATA, "size": BS},
|
|
|
|
+ {"data_type": bytes, "allocation": CH_DATA, "size": BS},
|
|
|
|
+ {"data_type": bytes, "allocation": CH_DATA, "size": BS},
|
|
|
|
+ {"data_type": bytes, "allocation": CH_DATA, "size": BS},
|
|
|
|
+ ]
|
|
|
|
+
|
|
|
|
+ # Test with sparse=True
|
|
|
|
+ with open(fn, "rb") as fd:
|
|
|
|
+ reader = FileFMAPReader(fd=fd, fh=-1, read_size=BS, sparse=True, fmap=None)
|
|
|
|
+ chunks = list(reader.blockify())
|
|
|
|
+
|
|
|
|
+ assert len(chunks) == len(expected_chunks_sparse)
|
|
|
|
+ for i, chunk in enumerate(chunks):
|
|
|
|
+ assert isinstance(chunk.data, expected_chunks_sparse[i]["data_type"])
|
|
|
|
+ assert chunk.meta["allocation"] == expected_chunks_sparse[i]["allocation"]
|
|
|
|
+ assert chunk.meta["size"] == expected_chunks_sparse[i]["size"]
|
|
|
|
+
|
|
|
|
+ # Test with sparse=False
|
|
|
|
+ with open(fn, "rb") as fd:
|
|
|
|
+ reader = FileFMAPReader(fd=fd, fh=-1, read_size=BS, sparse=False, fmap=None)
|
|
|
|
+ chunks = list(reader.blockify())
|
|
|
|
+
|
|
|
|
+ assert len(chunks) == len(expected_chunks_non_sparse)
|
|
|
|
+ for i, chunk in enumerate(chunks):
|
|
|
|
+ assert isinstance(chunk.data, expected_chunks_non_sparse[i]["data_type"])
|
|
|
|
+ assert chunk.meta["allocation"] == expected_chunks_non_sparse[i]["allocation"]
|
|
|
|
+ assert chunk.meta["size"] == expected_chunks_non_sparse[i]["size"]
|
|
|
|
+
|
|
|
|
+
|
|
|
|
+def test_filefmapreader_build_fmap():
|
|
|
|
+ """Test FileFMAPReader's _build_fmap method."""
|
|
|
|
+ # Create a reader with sparse=False
|
|
|
|
+ reader = FileFMAPReader(fd=BytesIO(b"data"), fh=-1, read_size=4, sparse=False, fmap=None)
|
|
|
|
+
|
|
|
|
+ # Call _build_fmap
|
|
|
|
+ fmap = reader._build_fmap()
|
|
|
|
+
|
|
|
|
+ # Check that a default fmap is created
|
|
|
|
+ assert len(fmap) == 1
|
|
|
|
+ assert fmap[0][0] == 0 # start
|
|
|
|
+ assert fmap[0][1] == 2**62 # size
|
|
|
|
+ assert fmap[0][2] is True # is_data
|