Selaa lähdekoodia

add global option --no-cache-files to lower memory consumption

When given, attic does not use the "files" cache. Saves about 240B RAM per file
(that sounds only a little, but consider that backups nowadays are often millions of files).

So try this if attic eats more memory than you have as RAM (usually means paging or
MemoryErrors). Of course, saving memory is not for free. In my one experiment, run time
increased from 3.5 to 23 minutes (my system has enough RAM).
Thomas Waldmann 10 vuotta sitten
vanhempi
sitoutus
4633931413
2 muutettua tiedostoa jossa 14 lisäystä ja 5 poistoa
  1. 7 4
      attic/archiver.py
  2. 7 1
      attic/cache.py

+ 7 - 4
attic/archiver.py

@@ -97,7 +97,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         t0 = datetime.now()
         t0 = datetime.now()
         repository = self.open_repository(args.archive, exclusive=True)
         repository = self.open_repository(args.archive, exclusive=True)
         manifest, key = Manifest.load(repository)
         manifest, key = Manifest.load(repository)
-        cache = Cache(repository, key, manifest)
+        cache = Cache(repository, key, manifest, do_files=args.cache_files)
         archive = Archive(repository, key, manifest, args.archive.archive, cache=cache,
         archive = Archive(repository, key, manifest, args.archive.archive, cache=cache,
                           create=True, checkpoint_interval=args.checkpoint_interval,
                           create=True, checkpoint_interval=args.checkpoint_interval,
                           numeric_owner=args.numeric_owner)
                           numeric_owner=args.numeric_owner)
@@ -227,7 +227,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         """Delete an existing archive"""
         """Delete an existing archive"""
         repository = self.open_repository(args.archive, exclusive=True)
         repository = self.open_repository(args.archive, exclusive=True)
         manifest, key = Manifest.load(repository)
         manifest, key = Manifest.load(repository)
-        cache = Cache(repository, key, manifest)
+        cache = Cache(repository, key, manifest, do_files=args.cache_files)
         archive = Archive(repository, key, manifest, args.archive.archive, cache=cache)
         archive = Archive(repository, key, manifest, args.archive.archive, cache=cache)
         stats = Statistics()
         stats = Statistics()
         archive.delete(stats)
         archive.delete(stats)
@@ -302,7 +302,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         """Show archive details such as disk space used"""
         """Show archive details such as disk space used"""
         repository = self.open_repository(args.archive)
         repository = self.open_repository(args.archive)
         manifest, key = Manifest.load(repository)
         manifest, key = Manifest.load(repository)
-        cache = Cache(repository, key, manifest)
+        cache = Cache(repository, key, manifest, do_files=args.cache_files)
         archive = Archive(repository, key, manifest, args.archive.archive, cache=cache)
         archive = Archive(repository, key, manifest, args.archive.archive, cache=cache)
         stats = archive.calc_stats(cache)
         stats = archive.calc_stats(cache)
         print('Name:', archive.name)
         print('Name:', archive.name)
@@ -319,7 +319,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         """Prune repository archives according to specified rules"""
         """Prune repository archives according to specified rules"""
         repository = self.open_repository(args.repository, exclusive=True)
         repository = self.open_repository(args.repository, exclusive=True)
         manifest, key = Manifest.load(repository)
         manifest, key = Manifest.load(repository)
-        cache = Cache(repository, key, manifest)
+        cache = Cache(repository, key, manifest, do_files=args.cache_files)
         archives = list(sorted(Archive.list_archives(repository, key, manifest, cache),
         archives = list(sorted(Archive.list_archives(repository, key, manifest, cache),
                                key=attrgetter('ts'), reverse=True))
                                key=attrgetter('ts'), reverse=True))
         if args.hourly + args.daily + args.weekly + args.monthly + args.yearly == 0 and args.within is None:
         if args.hourly + args.daily + args.weekly + args.monthly + args.yearly == 0 and args.within is None:
@@ -447,6 +447,9 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         common_parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
         common_parser.add_argument('-v', '--verbose', dest='verbose', action='store_true',
                             default=False,
                             default=False,
                             help='verbose output')
                             help='verbose output')
+        common_parser.add_argument('--no-files-cache', dest='cache_files', action='store_false',
+                            default=True,
+                            help='do not use the "files" cache')
 
 
         # We can't use argparse for "serve" since we don't want it to show up in "Available commands"
         # We can't use argparse for "serve" since we don't want it to show up in "Available commands"
         if args:
         if args:

+ 7 - 1
attic/cache.py

@@ -16,13 +16,14 @@ class Cache(object):
     class RepositoryReplay(Error):
     class RepositoryReplay(Error):
         """Cache is newer than repository, refusing to continue"""
         """Cache is newer than repository, refusing to continue"""
 
 
-    def __init__(self, repository, key, manifest, path=None, sync=True):
+    def __init__(self, repository, key, manifest, path=None, sync=True, do_files=False):
         self.timestamp = None
         self.timestamp = None
         self.txn_active = False
         self.txn_active = False
         self.repository = repository
         self.repository = repository
         self.key = key
         self.key = key
         self.manifest = manifest
         self.manifest = manifest
         self.path = path or os.path.join(get_cache_dir(), hexlify(repository.id).decode('ascii'))
         self.path = path or os.path.join(get_cache_dir(), hexlify(repository.id).decode('ascii'))
+        self.do_files = do_files
         if not os.path.exists(self.path):
         if not os.path.exists(self.path):
             self.create()
             self.create()
         self.open()
         self.open()
@@ -83,6 +84,7 @@ class Cache(object):
                 u.feed(data)
                 u.feed(data)
                 for path_hash, item in u:
                 for path_hash, item in u:
                     item[0] += 1
                     item[0] += 1
+                    # in the end, this takes about 240 Bytes per file
                     self.files[path_hash] = msgpack.packb(item)
                     self.files[path_hash] = msgpack.packb(item)
 
 
     def begin_txn(self):
     def begin_txn(self):
@@ -206,6 +208,8 @@ class Cache(object):
             stats.update(-size, -csize, False)
             stats.update(-size, -csize, False)
 
 
     def file_known_and_unchanged(self, path_hash, st):
     def file_known_and_unchanged(self, path_hash, st):
+        if not self.do_files:
+            return None
         if self.files is None:
         if self.files is None:
             self._read_files()
             self._read_files()
         entry = self.files.get(path_hash)
         entry = self.files.get(path_hash)
@@ -221,6 +225,8 @@ class Cache(object):
             return None
             return None
 
 
     def memorize_file(self, path_hash, st, ids):
     def memorize_file(self, path_hash, st, ids):
+        if not self.do_files:
+            return
         # Entry: Age, inode, size, mtime, chunk ids
         # Entry: Age, inode, size, mtime, chunk ids
         mtime_ns = st_mtime_ns(st)
         mtime_ns = st_mtime_ns(st)
         self.files[path_hash] = msgpack.packb((0, st.st_ino, st.st_size, int_to_bigint(mtime_ns), ids))
         self.files[path_hash] = msgpack.packb((0, st.st_ino, st.st_size, int_to_bigint(mtime_ns), ids))