Sfoglia il codice sorgente

Merge pull request #7350 from ThomasWaldmann/testing-chunker

implement "fail" chunker for testing purposes
TW 2 anni fa
parent
commit
6cfe77ebaf

+ 1 - 1
setup.cfg

@@ -170,7 +170,7 @@ per_file_ignores =
     src/borg/testsuite/archiver/return_codes.py:F401,F405,F811
     src/borg/testsuite/benchmark.py:F401,F811
     src/borg/testsuite/chunker.py:E501,F405
-    src/borg/testsuite/chunker_pytest.py:F401
+    src/borg/testsuite/chunker_pytest.py:F401,F405
     src/borg/testsuite/chunker_slow.py:F405
     src/borg/testsuite/crypto.py:E126,E501,E741
     src/borg/testsuite/file_integrity.py:F401

+ 4 - 0
src/borg/chunker.pyi

@@ -17,6 +17,10 @@ fmap_entry = Tuple[int, int, bool]
 
 def sparsemap(fd: BinaryIO = None, fh: int = -1) -> List[fmap_entry]: ...
 
+class ChunkerFailing:
+    def __init__(self, block_size: int, map: str) -> None: ...
+    def chunkify(self, fd: BinaryIO = None, fh: int = -1) -> Iterator: ...
+
 class ChunkerFixed:
     def __init__(self, block_size: int, header_size: int = 0, sparse: bool = False) -> None: ...
     def chunkify(self, fd: BinaryIO = None, fh: int = -1, fmap: List[fmap_entry] = None) -> Iterator: ...

+ 49 - 0
src/borg/chunker.pyx

@@ -123,6 +123,53 @@ def sparsemap(fd=None, fh=-1):
         dseek(curr, os.SEEK_SET, fd, fh)
 
 
+class ChunkerFailing:
+    """
+    This is a very simple chunker for testing purposes.
+
+    Reads block_size chunks, starts failing at block <fail_start>, <fail_count> failures, then succeeds.
+    """
+    def __init__(self, block_size, map):
+        self.block_size = block_size
+        # one char per block: r/R = successful read, e/E = I/O Error, e.g.: "rrrrErrrEEr"
+        # blocks beyond the map will have same behaviour as the last map char indicates.
+        map = map.upper()
+        if not set(map).issubset({"R", "E"}):
+            raise ValueError("unsupported map character")
+        self.map = map
+        self.count = 0
+        self.chunking_time = 0.0  # not updated, just provided so that caller does not crash
+
+    def chunkify(self, fd=None, fh=-1):
+        """
+        Cut a file into chunks.
+
+        :param fd: Python file object
+        :param fh: OS-level file handle (if available),
+                   defaults to -1 which means not to use OS-level fd.
+        """
+        use_fh = fh >= 0
+        wanted = self.block_size
+        while True:
+            data = os.read(fh, wanted) if use_fh else fd.read(wanted)
+            got = len(data)
+            if got > 0:
+                idx = self.count if self.count < len(self.map) else -1
+                behaviour = self.map[idx]
+                if behaviour == "E":
+                    self.count += 1
+                    fname = None if use_fh else getattr(fd, "name", None)
+                    raise OSError(errno.EIO, "simulated I/O error", fname)
+                elif behaviour == "R":
+                    self.count += 1
+                    yield Chunk(data, size=got, allocation=CH_DATA)
+                else:
+                    raise ValueError("unsupported map character")
+            if got < wanted:
+                # we did not get enough data, looks like EOF.
+                return
+
+
 class ChunkerFixed:
     """
     This is a simple chunker for input data with data usually staying at same
@@ -294,6 +341,8 @@ def get_chunker(algo, *params, **kw):
     if algo == 'fixed':
         sparse = kw['sparse']
         return ChunkerFixed(*params, sparse=sparse)
+    if algo == 'fail':
+        return ChunkerFailing(*params)
     raise TypeError('unsupported chunker algo %r' % algo)
 
 

+ 1 - 0
src/borg/constants.py

@@ -78,6 +78,7 @@ FD_MAX_AGE = 4 * 60  # 4 minutes
 # chunker algorithms
 CH_BUZHASH = "buzhash"
 CH_FIXED = "fixed"
+CH_FAIL = "fail"
 
 # buzhash chunker params
 CHUNK_MIN_EXP = 19  # 2**19 == 512kiB

+ 4 - 0
src/borg/helpers/parseformat.py

@@ -139,6 +139,10 @@ def ChunkerParams(s):
     if count == 0:
         raise ValueError("no chunker params given")
     algo = params[0].lower()
+    if algo == CH_FAIL and count == 3:
+        block_size = int(params[1])
+        fail_map = str(params[2])
+        return algo, block_size, fail_map
     if algo == CH_FIXED and 2 <= count <= 3:  # fixed, block_size[, header_size]
         block_size = int(params[1])
         header_size = int(params[2]) if count == 3 else 0

+ 26 - 0
src/borg/testsuite/archiver/create_cmd.py

@@ -191,6 +191,32 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         out = self.cmd(f"--repo={self.repository_location}", "extract", "test", "stdin", "--stdout", binary_output=True)
         assert out == input_data
 
+    def test_create_erroneous_file(self):
+        chunk_size = 1000  # fixed chunker with this size, also volume based checkpointing after that volume
+        self.create_regular_file(os.path.join(self.input_path, "file1"), size=chunk_size * 2)
+        self.create_regular_file(os.path.join(self.input_path, "file2"), size=chunk_size * 2)
+        self.create_regular_file(os.path.join(self.input_path, "file3"), size=chunk_size * 2)
+        self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
+        flist = "".join(f"input/file{n}\n" for n in range(1, 4))
+        out = self.cmd(
+            f"--repo={self.repository_location}",
+            "create",
+            f"--chunker-params=fail,{chunk_size},RRRERRR",
+            "--paths-from-stdin",
+            "--list",
+            "test",
+            input=flist.encode(),
+            exit_code=1,
+        )
+        assert "E input/file2" in out
+        # repo looking good overall? checks for rc == 0.
+        self.cmd(f"--repo={self.repository_location}", "check", "--debug")
+        # check files in created archive
+        out = self.cmd(f"--repo={self.repository_location}", "list", "test")
+        assert "input/file1" in out
+        assert "input/file2" not in out
+        assert "input/file3" in out
+
     def test_create_content_from_command(self):
         self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
         input_data = "some test content"

+ 27 - 1
src/borg/testsuite/chunker_pytest.py

@@ -5,7 +5,7 @@ import tempfile
 import pytest
 
 from .chunker import cf
-from ..chunker import ChunkerFixed, sparsemap, has_seek_hole
+from ..chunker import ChunkerFixed, sparsemap, has_seek_hole, ChunkerFailing
 from ..constants import *  # NOQA
 
 BS = 4096  # fs block size
@@ -125,3 +125,29 @@ def test_chunkify_sparse(tmpdir, fname, sparse_map, header_size, sparse):
     fn = str(tmpdir / fname)
     make_sparsefile(fn, sparse_map, header_size=header_size)
     get_chunks(fn, sparse=sparse, header_size=header_size) == make_content(sparse_map, header_size=header_size)
+
+
+def test_chunker_failing():
+    SIZE = 4096
+    data = bytes(2 * SIZE + 1000)
+    chunker = ChunkerFailing(SIZE, "rEErrr")  # cut <SIZE> chunks, start failing at block 1, fail 2 times
+    with BytesIO(data) as fd:
+        ch = chunker.chunkify(fd)
+        c1 = next(ch)  # block 0: ok
+        assert c1.meta["allocation"] == CH_DATA
+        assert c1.data == data[:SIZE]
+        with pytest.raises(OSError):  # block 1: failure 1
+            next(ch)
+    with BytesIO(data) as fd:
+        ch = chunker.chunkify(fd)
+        with pytest.raises(OSError):  # block 2: failure 2
+            next(ch)
+    with BytesIO(data) as fd:
+        ch = chunker.chunkify(fd)
+        c1 = next(ch)  # block 3: success!
+        c2 = next(ch)  # block 4: success!
+        c3 = next(ch)  # block 5: success!
+        assert c1.meta["allocation"] == c2.meta["allocation"] == c3.meta["allocation"] == CH_DATA
+        assert c1.data == data[:SIZE]
+        assert c2.data == data[SIZE : 2 * SIZE]
+        assert c3.data == data[2 * SIZE :]