Browse Source

Reduce memory usage when backing up many small files

Closes #69.
Jonas Borgström 11 years ago
parent
commit
055a40910b
2 changed files with 11 additions and 6 deletions
  1. 1 0
      CHANGES
  2. 10 6
      attic/cache.py

+ 1 - 0
CHANGES

@@ -8,6 +8,7 @@ Version 0.13
 
 (feature release, released on X)
 
+- Reduced memory usage when backing up many small files (#69)
 - Experimental Linux and FreeBSD ACL support (#66)
 - Added support for backup and restore of BSDFlags (OSX, FreeBSD) (#56)
 - Fix bug where xattrs on symlinks were not correctly restored

+ 10 - 6
attic/cache.py

@@ -12,6 +12,8 @@ from .hashindex import ChunkIndex
 class Cache(object):
     """Client Side cache
     """
+    # Do not cache file metadata for files smaller than this
+    FILE_MIN_SIZE = 4096
 
     class RepositoryReplay(Error):
         """Cache is newer than repository, refusing to continue"""
@@ -81,9 +83,10 @@ class Cache(object):
                 if not data:
                     break
                 u.feed(data)
-                for hash, item in u:
+                for path_hash, item in u:
+                    if item[2] > self.FILE_MIN_SIZE:
                         item[0] += 1
-                        self.files[hash] = item
+                        self.files[path_hash] = item
 
     def begin_txn(self):
         # Initialize transaction snapshot
@@ -218,7 +221,8 @@ class Cache(object):
             return None
 
     def memorize_file(self, path_hash, st, ids):
-        # Entry: Age, inode, size, mtime, chunk ids
-        mtime_ns = st_mtime_ns(st)
-        self.files[path_hash] = 0, st.st_ino, st.st_size, mtime_ns, ids
-        self._newest_mtime = max(self._newest_mtime, mtime_ns)
+        if st.st_size > self.FILE_MIN_SIZE:
+            # Entry: Age, inode, size, mtime, chunk ids
+            mtime_ns = st_mtime_ns(st)
+            self.files[path_hash] = 0, st.st_ino, st.st_size, mtime_ns, ids
+            self._newest_mtime = max(self._newest_mtime, mtime_ns)