소스 검색

Merge pull request #5693 from ThomasWaldmann/repo-fixes-1.1

Repo changes backport to 1.1-maint
TW 4 년 전
부모
커밋
641418d06d
2개의 변경된 파일64개의 추가작업 그리고 11개의 파일을 삭제
  1. 22 8
      src/borg/repository.py
  2. 42 3
      src/borg/testsuite/repository.py

+ 22 - 8
src/borg/repository.py

@@ -774,6 +774,8 @@ class Repository:
                     try:
                         self.shadow_index[key].remove(segment)
                     except (KeyError, ValueError):
+                        # do not remove entry with empty shadowed_segments list here,
+                        # it is needed for shadowed_put_exists code (see below)!
                         pass
                 elif tag == TAG_DELETE and not in_index:
                     # If the shadow index doesn't contain this key, then we can't say if there's a shadowed older tag,
@@ -825,6 +827,11 @@ class Repository:
                             new_segment, size = self.io.write_delete(key)
                         self.compact[new_segment] += size
                         segments.setdefault(new_segment, 0)
+                    else:
+                        # we did not keep the delete tag for key (see if-branch)
+                        if not self.shadow_index[key]:
+                            # shadowed segments list is empty -> remove it
+                            del self.shadow_index[key]
             assert segments[segment] == 0, 'Corrupted segment reference count - corrupted index or hints'
             unused.append(segment)
             pi.show()
@@ -1135,13 +1142,11 @@ class Repository:
         except KeyError:
             pass
         else:
-            self.segments[segment] -= 1
-            size = self.io.read(segment, offset, id, read_data=False)
-            self.storage_quota_use -= size
-            self.compact[segment] += size
-            segment, size = self.io.write_delete(id)
-            self.compact[segment] += size
-            self.segments.setdefault(segment, 0)
+            # note: doing a delete first will do some bookkeeping.
+            # we do not want to update the shadow_index here, because
+            # we know already that we will PUT to this id, so it will
+            # be in the repo index (and we won't need it in the shadow_index).
+            self._delete(id, segment, offset, update_shadow_index=False)
         segment, offset = self.io.write_put(id, data)
         self.storage_quota_use += len(data) + self.io.put_header_fmt.size
         self.segments.setdefault(segment, 0)
@@ -1164,7 +1169,16 @@ class Repository:
             segment, offset = self.index.pop(id)
         except KeyError:
             raise self.ObjectNotFound(id, self.path) from None
-        self.shadow_index.setdefault(id, []).append(segment)
+        # if we get here, there is an object with this id in the repo,
+        # we write a DEL here that shadows the respective PUT.
+        # after the delete, the object is not in the repo index any more,
+        # for the compaction code, we need to update the shadow_index in this case.
+        self._delete(id, segment, offset, update_shadow_index=True)
+
+    def _delete(self, id, segment, offset, *, update_shadow_index):
+        # common code used by put and delete
+        if update_shadow_index:
+            self.shadow_index.setdefault(id, []).append(segment)
         self.segments[segment] -= 1
         size = self.io.read(segment, offset, id, read_data=False)
         self.storage_quota_use -= size

+ 42 - 3
src/borg/testsuite/repository.py

@@ -14,7 +14,7 @@ from ..helpers import IntegrityError
 from ..helpers import msgpack
 from ..locking import Lock, LockFailed
 from ..remote import RemoteRepository, InvalidRPCMethod, PathNotAllowed, ConnectionClosedWithHint, handle_remote_line
-from ..repository import Repository, LoggedIO, MAGIC, MAX_DATA_SIZE, TAG_DELETE
+from ..repository import Repository, LoggedIO, MAGIC, MAX_DATA_SIZE, TAG_DELETE, TAG_PUT, TAG_COMMIT
 from . import BaseTestCase
 from .hashindex import H
 
@@ -54,6 +54,16 @@ class RepositoryTestCaseBase(BaseTestCase):
         self.repository.put(H(2), b'boo')
         self.repository.delete(H(3))
 
+    def repo_dump(self, label=None):
+        label = label + ': ' if label is not None else ''
+        H_trans = {H(i): i for i in range(10)}
+        H_trans[None] = -1  # key == None appears in commits
+        tag_trans = {TAG_PUT: 'put', TAG_DELETE: 'del', TAG_COMMIT: 'comm'}
+        for segment, fn in self.repository.io.segment_iterator():
+            for tag, key, offset, size in self.repository.io.iter_objects(segment):
+                print("%s%s H(%d) -> %s[%d..+%d]" % (label, tag_trans[tag], H_trans[key], fn, offset, size))
+        print()
+
 
 class RepositoryTestCase(RepositoryTestCaseBase):
 
@@ -313,8 +323,10 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase):
         self.repository.put(H(1), b'1')
         self.repository.put(H(2), b'2')
         self.repository.commit()
+        self.repo_dump('p1 p2 cc')
         self.repository.delete(H(1))
         self.repository.commit()
+        self.repo_dump('d1 cc')
         last_segment = self.repository.io.get_latest_segment() - 1
         num_deletes = 0
         for tag, key, offset, size in self.repository.io.iter_objects(last_segment):
@@ -325,11 +337,16 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase):
         assert last_segment in self.repository.compact
         self.repository.put(H(3), b'3')
         self.repository.commit()
+        self.repo_dump('p3 cc')
         assert last_segment not in self.repository.compact
         assert not self.repository.io.segment_exists(last_segment)
         for segment, _ in self.repository.io.segment_iterator():
             for tag, key, offset, size in self.repository.io.iter_objects(segment):
                 assert tag != TAG_DELETE
+                assert key != H(1)
+        # after compaction, there should be no empty shadowed_segments lists left over.
+        # we have no put or del any more for H(1), so we lost knowledge about H(1).
+        assert H(1) not in self.repository.shadow_index
 
     def test_shadowed_entries_are_preserved(self):
         get_latest_segment = self.repository.io.get_latest_segment
@@ -370,16 +387,19 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase):
         self.repository.delete(H(1))
         assert self.repository.shadow_index[H(1)] == [0]
         self.repository.commit()
+        self.repo_dump('p1 d1 cc')
         # note how an empty list means that nothing is shadowed for sure
-        assert self.repository.shadow_index[H(1)] == []
+        assert self.repository.shadow_index[H(1)] == []  # because the delete is considered unstable
         self.repository.put(H(1), b'1')
         self.repository.delete(H(1))
+        self.repo_dump('p1 d1')
         # 0 put/delete; 1 commit; 2 compacted; 3 commit; 4 put/delete
         assert self.repository.shadow_index[H(1)] == [4]
         self.repository.rollback()
+        self.repo_dump('r')
         self.repository.put(H(2), b'1')
         # After the rollback segment 4 shouldn't be considered anymore
-        assert self.repository.shadow_index[H(1)] == []
+        assert self.repository.shadow_index[H(1)] == []  # because the delete is considered unstable
 
 
 class RepositoryAppendOnlyTestCase(RepositoryTestCaseBase):
@@ -790,6 +810,25 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
             self.assert_equal(self.repository.get(H(0)), b'data2')
 
 
+class RepositoryHintsTestCase(RepositoryTestCaseBase):
+
+    def test_hints_behaviour(self):
+        self.repository.put(H(0), b'data')
+        self.assert_equal(self.repository.shadow_index, {})
+        self.assert_true(len(self.repository.compact) == 0)
+        self.repository.delete(H(0))
+        self.repository.commit()
+        # this is to make the previous delete "stable", see delete_is_not_stable:
+        self.repository.put(H(1), b'data')
+        self.repository.commit()
+        # in 1.1-maint, commit() always implicitly compacts.
+        # nothing to compact any more! no info left about stuff that does not exist any more:
+        self.assert_not_in(H(0), self.repository.shadow_index)
+        # segment 0 was compacted away, no info about it left:
+        self.assert_not_in(0, self.repository.compact)
+        self.assert_not_in(0, self.repository.segments)
+
+
 class RemoteRepositoryTestCase(RepositoryTestCase):
     repository = None  # type: RemoteRepository