소스 검색

add a test for borg 1 -> 2 repo objects transformation

Thomas Waldmann 2 년 전
부모
커밋
b6cbf045ff
4개의 변경된 파일49개의 추가작업 그리고 10개의 파일을 삭제
  1. 4 2
      src/borg/archive.py
  2. 5 4
      src/borg/repoobj.py
  3. 2 2
      src/borg/testsuite/remote.py
  4. 38 2
      src/borg/testsuite/repoobj.py

+ 4 - 2
src/borg/archive.py

@@ -2269,8 +2269,10 @@ class ArchiveRecreater:
         overwrite = self.recompress
         overwrite = self.recompress
         if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks:
         if self.recompress and not self.always_recompress and chunk_id in self.cache.chunks:
             # Check if this chunk is already compressed the way we want it
             # Check if this chunk is already compressed the way we want it
-            _, old_chunk = self.repo_objs.parse(chunk_id, self.repository.get(chunk_id), decompress=False)
-            compressor_cls, level = Compressor.detect(old_chunk)
+            old_meta, old_data = self.repo_objs.parse(chunk_id, self.repository.get(chunk_id), decompress=False)
+            # TODO simplify code below
+            compr_hdr = bytes((old_meta["ctype"], old_meta["clevel"]))
+            compressor_cls, level = Compressor.detect(compr_hdr)
             if (
             if (
                 compressor_cls.name == self.repo_objs.compressor.decide(data).name
                 compressor_cls.name == self.repo_objs.compressor.decide(data).name
                 and level == self.repo_objs.compressor.level
                 and level == self.repo_objs.compressor.level

+ 5 - 4
src/borg/repoobj.py

@@ -44,11 +44,12 @@ class RepoObj:
             data_compressed = self.compressor.compress(data)  # TODO: compressor also adds compressor type/level bytes
             data_compressed = self.compressor.compress(data)  # TODO: compressor also adds compressor type/level bytes
             ctype = data_compressed[0]
             ctype = data_compressed[0]
             clevel = data_compressed[1]
             clevel = data_compressed[1]
+            data_compressed = data_compressed[2:]  # strip the type/level bytes
         else:
         else:
             assert isinstance(size, int)
             assert isinstance(size, int)
             assert isinstance(ctype, int)
             assert isinstance(ctype, int)
             assert isinstance(clevel, int)
             assert isinstance(clevel, int)
-            data_compressed = data  # is already compressed
+            data_compressed = data  # is already compressed, is NOT prefixed by type/level bytes
         meta["size"] = size
         meta["size"] = size
         meta["csize"] = len(data_compressed)
         meta["csize"] = len(data_compressed)
         meta["ctype"] = ctype
         meta["ctype"] = ctype
@@ -94,10 +95,10 @@ class RepoObj:
             compr_hdr = bytes((ctype, clevel))
             compr_hdr = bytes((ctype, clevel))
             compressor_cls, compression_level = Compressor.detect(compr_hdr)
             compressor_cls, compression_level = Compressor.detect(compr_hdr)
             compressor = compressor_cls(level=compression_level)
             compressor = compressor_cls(level=compression_level)
-            data = compressor.decompress(data_compressed)  # TODO: decompressor still needs type/level bytes
+            data = compressor.decompress(compr_hdr + data_compressed)  # TODO: decompressor still needs type/level bytes
             self.key.assert_id(id, data)
             self.key.assert_id(id, data)
         else:
         else:
-            data = data_compressed
+            data = data_compressed  # does not include the type/level bytes
         return meta, data
         return meta, data
 
 
 
 
@@ -125,7 +126,7 @@ class RepoObj1:  # legacy
             data_compressed = self.compressor.compress(data)  # TODO: compressor also adds compressor type/level bytes
             data_compressed = self.compressor.compress(data)  # TODO: compressor also adds compressor type/level bytes
         else:
         else:
             assert isinstance(size, int)
             assert isinstance(size, int)
-            data_compressed = data  # is already compressed
+            data_compressed = data  # is already compressed, must include type/level bytes
         data_encrypted = self.key.encrypt(id, data_compressed)
         data_encrypted = self.key.encrypt(id, data_compressed)
         return data_encrypted
         return data_encrypted
 
 

+ 2 - 2
src/borg/testsuite/remote.py

@@ -191,7 +191,7 @@ class TestRepositoryCache:
         list(decrypted_cache.get_many([H1, H2, H3]))
         list(decrypted_cache.get_many([H1, H2, H3]))
 
 
         iterator = decrypted_cache.get_many([H1, H2, H3])
         iterator = decrypted_cache.get_many([H1, H2, H3])
-        assert next(iterator) == (6, b"1234")
+        assert next(iterator) == (4, b"1234")
 
 
         with open(decrypted_cache.key_filename(H2), "a+b") as fd:
         with open(decrypted_cache.key_filename(H2), "a+b") as fd:
             fd.seek(-1, io.SEEK_END)
             fd.seek(-1, io.SEEK_END)
@@ -201,4 +201,4 @@ class TestRepositoryCache:
             fd.truncate()
             fd.truncate()
 
 
         with pytest.raises(IntegrityError):
         with pytest.raises(IntegrityError):
-            assert next(iterator) == (26, b"5678")
+            assert next(iterator) == (4, b"5678")

+ 38 - 2
src/borg/testsuite/repoobj.py

@@ -3,6 +3,7 @@ import pytest
 from ..crypto.key import PlaintextKey
 from ..crypto.key import PlaintextKey
 from ..repository import Repository
 from ..repository import Repository
 from ..repoobj import RepoObj, RepoObj1
 from ..repoobj import RepoObj, RepoObj1
+from ..compress import LZ4
 
 
 
 
 @pytest.fixture
 @pytest.fixture
@@ -34,9 +35,8 @@ def test_format_parse_roundtrip(key):
     assert data == got_data
     assert data == got_data
 
 
     edata = repo_objs.extract_crypted_data(cdata)
     edata = repo_objs.extract_crypted_data(cdata)
-    compressor = repo_objs.compressor
     key = repo_objs.key
     key = repo_objs.key
-    assert edata.startswith(bytes((key.TYPE, compressor.ID[0], compressor.level)))
+    assert edata.startswith(bytes((key.TYPE,)))
 
 
 
 
 def test_format_parse_roundtrip_borg1(key):  # legacy
 def test_format_parse_roundtrip_borg1(key):  # legacy
@@ -57,3 +57,39 @@ def test_format_parse_roundtrip_borg1(key):  # legacy
     compressor = repo_objs.compressor
     compressor = repo_objs.compressor
     key = repo_objs.key
     key = repo_objs.key
     assert edata.startswith(bytes((key.TYPE, compressor.ID[0], compressor.level)))
     assert edata.startswith(bytes((key.TYPE, compressor.ID[0], compressor.level)))
+
+
+def test_borg1_borg2_transition(key):
+    # borg transfer reads borg 1.x repo objects (without decompressing them),
+    # writes borg 2 repo objects (giving already compressed data to avoid compression).
+    meta = {}  # borg1 does not support this kind of metadata
+    data = b"foobar" * 10
+    len_data = len(data)
+    repo_objs1 = RepoObj1(key)
+    id = repo_objs1.id_hash(data)
+    borg1_cdata = repo_objs1.format(id, meta, data)
+    meta1, compr_data1 = repo_objs1.parse(id, borg1_cdata, decompress=False)  # borg transfer avoids (de)compression
+    # in borg 1, we can only get this metadata after decrypting the whole chunk (and we do not have "size" here):
+    assert meta1["ctype"] == LZ4.ID[0]  # default compression
+    assert meta1["clevel"] == 0xFF  # lz4 does not know levels (yet?)
+    assert meta1["csize"] < len_data  # lz4 should make it smaller
+
+    repo_objs2 = RepoObj(key)
+    # note: as we did not decompress, we do not have "size" and we need to get it from somewhere else.
+    # here, we just use len_data. for borg transfer, we also know the size from another metadata source.
+    borg2_cdata = repo_objs2.format(
+        id, meta1, compr_data1[2:], compress=False, size=len_data, ctype=meta1["ctype"], clevel=meta1["clevel"]
+    )
+    meta2, data2 = repo_objs2.parse(id, borg2_cdata)
+    assert data2 == data
+    assert meta2["ctype"] == LZ4.ID[0]
+    assert meta2["clevel"] == 0xFF
+    assert meta2["csize"] == meta1["csize"] - 2  # borg2 does not store the type/level bytes there
+    assert meta2["size"] == len_data
+
+    meta2 = repo_objs2.parse_meta(id, borg2_cdata)
+    # now, in borg 2, we have nice and separately decrypted metadata (no need to decrypt the whole chunk):
+    assert meta2["ctype"] == LZ4.ID[0]
+    assert meta2["clevel"] == 0xFF
+    assert meta2["csize"] == meta1["csize"] - 2  # borg2 does not store the type/level bytes there
+    assert meta2["size"] == len_data