Sfoglia il codice sorgente

create a RepositoryCache implementation that can cope with any amount of data, fixes attic #326

the old code blows up with an integer OverflowError when the cache file goes beyond 2GiB size.
the new code just reuses the Repository implementation as a local temporary key/value store.

still an issue: if the place where the temporary RepositoryCache is stored (usually /tmp) can't
cope with the cache size and runs full.

if you copy data from a fuse mount, the cache size is the copied deduplicated data size.
so, if you have lots of data to extract (more than your /tmp can hold), rather do not use fuse!

besides fuse mounts, this also affects attic check and cache sync (in these cases, only the
metadata size counts, but even that can go beyond 2GiB for some people).
Thomas Waldmann 10 anni fa
parent
commit
bd354d7bb4
2 ha cambiato i file con 13 aggiunte e 37 eliminazioni
  1. 8 37
      borg/remote.py
  2. 5 0
      borg/repository.py

+ 8 - 37
borg/remote.py

@@ -3,7 +3,6 @@ import fcntl
 import msgpack
 import os
 import select
-import shutil
 from subprocess import Popen, PIPE
 import sys
 import tempfile
@@ -11,7 +10,6 @@ import traceback
 
 from . import __version__
 
-from .hashindex import NSIndex
 from .helpers import Error, IntegrityError
 from .repository import Repository
 
@@ -292,56 +290,29 @@ class RemoteRepository:
 class RepositoryCache:
     """A caching Repository wrapper
 
-    Caches Repository GET operations using a temporary file
+    Caches Repository GET operations using a local temporary Repository.
     """
     def __init__(self, repository):
-        self.tmppath = None
-        self.index = None
-        self.data_fd = None
         self.repository = repository
-        self.entries = {}
-        self.initialize()
+        tmppath = tempfile.mkdtemp(prefix='borg-tmp')
+        self.caching_repo = Repository(tmppath, create=True, exclusive=True)
 
     def __del__(self):
-        self.cleanup()
-
-    def initialize(self):
-        self.tmppath = tempfile.mkdtemp(prefix='borg-tmp')
-        self.index = NSIndex()
-        self.data_fd = open(os.path.join(self.tmppath, 'data'), 'a+b')
-
-    def cleanup(self):
-        del self.index
-        if self.data_fd:
-            self.data_fd.close()
-        if self.tmppath:
-            shutil.rmtree(self.tmppath)
-
-    def load_object(self, offset, size):
-        self.data_fd.seek(offset)
-        data = self.data_fd.read(size)
-        assert len(data) == size
-        return data
-
-    def store_object(self, key, data):
-        self.data_fd.seek(0, os.SEEK_END)
-        self.data_fd.write(data)
-        offset = self.data_fd.tell()
-        self.index[key] = offset - len(data), len(data)
+        self.caching_repo.destroy()
 
     def get(self, key):
         return next(self.get_many([key]))
 
     def get_many(self, keys):
-        unknown_keys = [key for key in keys if key not in self.index]
+        unknown_keys = [key for key in keys if key not in self.caching_repo]
         repository_iterator = zip(unknown_keys, self.repository.get_many(unknown_keys))
         for key in keys:
             try:
-                yield self.load_object(*self.index[key])
-            except KeyError:
+                yield self.caching_repo.get(key)
+            except Repository.ObjectNotFound:
                 for key_, data in repository_iterator:
                     if key_ == key:
-                        self.store_object(key, data)
+                        self.caching_repo.put(key, data)
                         yield data
                         break
         # Consume any pending requests

+ 5 - 0
borg/repository.py

@@ -341,6 +341,11 @@ class Repository:
             self.index = self.open_index(self.get_transaction_id())
         return len(self.index)
 
+    def __contains__(self, id):
+        if not self.index:
+            self.index = self.open_index(self.get_transaction_id())
+        return id in self.index
+
     def list(self, limit=None, marker=None):
         if not self.index:
             self.index = self.open_index(self.get_transaction_id())