Browse Source

Merge pull request #3004 from ThomasWaldmann/fd-cache-invalidation-1.0

FD cache invalidation (1.0-maint)
TW 7 years ago
parent
commit
1c6b38a0b4
2 changed files with 30 additions and 1 deletions
  1. 8 0
      borg/repository.py
  2. 22 1
      borg/testsuite/repository.py

+ 8 - 0
borg/repository.py

@@ -631,6 +631,8 @@ class LoggedIO:
         self.segment = transaction_id + 1
         self.segment = transaction_id + 1
         for segment, filename in self.segment_iterator(reverse=True):
         for segment, filename in self.segment_iterator(reverse=True):
             if segment > transaction_id:
             if segment > transaction_id:
+                if segment in self.fds:
+                    del self.fds[segment]
                 os.unlink(filename)
                 os.unlink(filename)
             else:
             else:
                 break
                 break
@@ -685,6 +687,12 @@ class LoggedIO:
             self._write_fd = open(self.segment_filename(self.segment), 'xb')
             self._write_fd = open(self.segment_filename(self.segment), 'xb')
             self._write_fd.write(MAGIC)
             self._write_fd.write(MAGIC)
             self.offset = MAGIC_LEN
             self.offset = MAGIC_LEN
+            if self.segment in self.fds:
+                # we may have a cached fd for a segment file we already deleted and
+                # we are writing now a new segment file to same file name. get rid of
+                # of the cached fd that still refers to the old file, so it will later
+                # get repopulated (on demand) with a fd that refers to the new file.
+                del self.fds[self.segment]
         return self._write_fd
         return self._write_fd
 
 
     def get_fd(self, segment):
     def get_fd(self, segment):

+ 22 - 1
borg/testsuite/repository.py

@@ -11,7 +11,7 @@ from ..hashindex import NSIndex
 from ..helpers import Location, IntegrityError
 from ..helpers import Location, IntegrityError
 from ..locking import Lock, LockFailed
 from ..locking import Lock, LockFailed
 from ..remote import RemoteRepository, InvalidRPCMethod
 from ..remote import RemoteRepository, InvalidRPCMethod
-from ..repository import Repository, LoggedIO, TAG_DELETE, MAX_DATA_SIZE
+from ..repository import Repository, LoggedIO, TAG_DELETE, MAX_DATA_SIZE, MAGIC
 from . import BaseTestCase
 from . import BaseTestCase
 from .hashindex import H
 from .hashindex import H
 
 
@@ -151,6 +151,27 @@ class RepositoryCommitTestCase(RepositoryTestCaseBase):
         self.repository.put(H(2), b'boo')
         self.repository.put(H(2), b'boo')
         self.repository.delete(H(3))
         self.repository.delete(H(3))
 
 
+    def test_uncommitted_garbage(self):
+        # uncommitted garbage should be no problem, it is cleaned up automatically.
+        # we just have to be careful with invalidation of cached FDs in LoggedIO.
+        self.repository.put(H(0), b'foo')
+        self.repository.commit()
+        # write some crap to a uncommitted segment file
+        last_segment = self.repository.io.get_latest_segment()
+        with open(self.repository.io.segment_filename(last_segment + 1), 'wb') as f:
+            f.write(MAGIC + b'crapcrapcrap')
+        self.repository.close()
+        # usually, opening the repo and starting a transaction should trigger a cleanup.
+        self.repository = self.open()
+        with self.repository:
+            # the next 2 lines get the FD of the crap segment file cached:
+            segment = self.repository.io.get_latest_segment()
+            self.repository.io.get_fd(segment)
+            # when the put triggers the start of a transaction, crap should be cleaned up:
+            self.repository.put(H(0), b'bar')  # this may trigger compact_segments()
+            self.repository.commit()
+        # the point here is that nothing blows up with an exception.
+
     def test_replay_of_missing_index(self):
     def test_replay_of_missing_index(self):
         self.add_keys()
         self.add_keys()
         for name in os.listdir(self.repository.path):
         for name in os.listdir(self.repository.path):