فهرست منبع

efficient archive list from manifest

a lot of speedup for:
"list <repo>", "delete <repo>" list, "prune" - esp. for slow connections to remote repositories.

the previous method used metadata from the archive itself, which is (in total) rather large.
so if you had many archives and a slow (remote) connection, it was very slow.

but there is a lot easier way: just use the archives list from the repository manifest - we already
have it anyway and it also has name, id and timestamp for all archives - and that's all we need.

I defined a ArchiveInfo namedtuple that has same element names as seen as attribute names
of the Archive object, so as long as name, id, ts is enough, it can be used in its place.
Thomas Waldmann 10 سال پیش
والد
کامیت
d067bc3178
3فایلهای تغییر یافته به همراه20 افزوده شده و 7 حذف شده
  1. 1 0
      borg/archive.py
  2. 6 7
      borg/archiver.py
  3. 13 0
      borg/helpers.py

+ 1 - 0
borg/archive.py

@@ -494,6 +494,7 @@ class Archive:
 
     @staticmethod
     def list_archives(repository, key, manifest, cache=None):
+        # expensive! see also Manifest.list_archive_infos.
         for name, info in manifest.archives.items():
             yield Archive(repository, key, manifest, name, cache=cache)
 

+ 6 - 7
borg/archiver.py

@@ -284,8 +284,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                 stats.print_('Deleted data:', cache)
         else:
             print("You requested to completely DELETE the repository *including* all archives it contains:")
-            for archive in sorted(Archive.list_archives(repository, key, manifest), key=attrgetter('ts')):
-                print(format_archive(archive))
+            for archive_info in manifest.list_archive_infos(sort_by='ts'):
+                print(format_archive(archive_info))
             print("""Type "YES" if you understand this and want to continue.\n""")
             if input('Do you want to continue? ') == 'YES':
                 repository.destroy()
@@ -354,8 +354,8 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                     item[b'group'] or item[b'gid'], size, format_time(mtime),
                     remove_surrogates(item[b'path']), extra))
         else:
-            for archive in sorted(Archive.list_archives(repository, key, manifest), key=attrgetter('ts')):
-                print(format_archive(archive))
+            for archive_info in manifest.list_archive_infos(sort_by='ts'):
+                print(format_archive(archive_info))
         return self.exit_code
 
     def do_info(self, args):
@@ -380,8 +380,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
         repository = self.open_repository(args.repository, exclusive=True)
         manifest, key = Manifest.load(repository)
         cache = Cache(repository, key, manifest, do_files=args.cache_files)
-        archives = list(sorted(Archive.list_archives(repository, key, manifest, cache),
-                               key=attrgetter('ts'), reverse=True))
+        archives = manifest.list_archive_infos(sort_by='ts', reverse=True)  # just a ArchiveInfo list
         if args.hourly + args.daily + args.weekly + args.monthly + args.yearly == 0 and args.within is None:
             self.print_error('At least one of the "within", "hourly", "daily", "weekly", "monthly" or "yearly" '
                              'settings must be specified')
@@ -412,7 +411,7 @@ Type "Yes I am sure" if you understand this and want to continue.\n""")
                 self.print_verbose('Would prune:     %s' % format_archive(archive))
             else:
                 self.print_verbose('Pruning archive: %s' % format_archive(archive))
-                archive.delete(stats)
+                Archive(repository, key, manifest, archive.name, cache).delete(stats)
         if to_delete and not args.dry_run:
             manifest.write()
             repository.commit()

+ 13 - 0
borg/helpers.py

@@ -1,5 +1,6 @@
 import argparse
 import binascii
+from collections import namedtuple
 import grp
 import msgpack
 import os
@@ -119,6 +120,18 @@ class Manifest:
         self.id = self.key.id_hash(data)
         self.repository.put(self.MANIFEST_ID, self.key.encrypt(data))
 
+    def list_archive_infos(self, sort_by=None, reverse=False):
+        # inexpensive Archive.list_archives replacement if we just need .name, .id, .ts
+        ArchiveInfo = namedtuple('ArchiveInfo', 'name id ts')
+        archives = []
+        for name, values in self.archives.items():
+            ts = parse_timestamp(values[b'time'].decode('utf-8'))
+            id = values[b'id']
+            archives.append(ArchiveInfo(name=name, id=id, ts=ts))
+        if sort_by is not None:
+            archives = sorted(archives, key=attrgetter(sort_by), reverse=reverse)
+        return archives
+
 
 def prune_within(archives, within):
     multiplier = {'H': 1, 'd': 24, 'w': 24*7, 'm': 24*31, 'y': 24*365}