Bladeren bron

Improved purging algorithm

Jonas Borgström 14 jaren geleden
bovenliggende
commit
86a4353d32
3 gewijzigde bestanden met toevoegingen van 77 en 44 verwijderingen
  1. 3 0
      darc/archive.py
  2. 49 40
      darc/archiver.py
  3. 25 4
      darc/helpers.py

+ 3 - 0
darc/archive.py

@@ -55,6 +55,9 @@ class Archive(object):
         t, f = self.metadata['time'].split('.', 1)
         return datetime.strptime(t, '%Y-%m-%dT%H:%M:%S') + timedelta(seconds=float('.' + f))
 
+    def __repr__(self):
+        return 'Archive(%r)' % self.name
+
     def iter_items(self, callback):
         unpacker = msgpack.Unpacker()
         counter = Counter(0)

+ 49 - 40
darc/archiver.py

@@ -1,5 +1,5 @@
 import argparse
-from datetime import datetime, date, timedelta
+from datetime import datetime, timedelta
 from operator import attrgetter
 import os
 import stat
@@ -9,9 +9,9 @@ from .archive import Archive
 from .store import Store
 from .cache import Cache
 from .key import Key
-from .helpers import location_validator, format_file_size, format_time,\
+from .helpers import location_validator, format_time, \
     format_file_mode, IncludePattern, ExcludePattern, exclude_path, to_localtime, \
-    get_cache_dir, day_of_year, format_timedelta
+    get_cache_dir, format_timedelta, Purger
 from .remote import StoreServer, RemoteStore
 
 class Archiver(object):
@@ -85,7 +85,7 @@ class Archiver(object):
             print 'Archive fingerprint: %s' % archive.hash.encode('hex')
             print 'Start time: %s' % t0.strftime('%c')
             print 'End time: %s' % t.strftime('%c')
-            print 'Duration: %.2f (%s)' % (diff.total_seconds(), format_timedelta(diff))
+            print 'Duration: %s' % format_timedelta(diff)
             archive.stats.print_()
             print '-' * 40
         return self.exit_code
@@ -235,48 +235,57 @@ class Archiver(object):
         cache = Cache(store, key)
         archives = list(sorted(Archive.list_archives(store, key, cache),
                                key=attrgetter('ts'), reverse=True))
-        num_daily = args.daily
-        num_weekly = args.weekly
-        num_monthly = args.monthly
-        num_yearly = args.yearly
+        daily = []
+        weekly = []
+        monthly = []
+        yearly = []
         if args.daily + args.weekly + args.monthly + args.yearly == 0:
             self.print_error('At least one of the "daily", "weekly", "monthly" or "yearly" '
                              'settings must be specified')
             return 1
-        t0 = date.today() + timedelta(days=1) # Tomorrow
-        daily = weekly = monthly = yearly = 0
+
+        if args.prefix:
+            archives = [archive for archive in archives if archive.name.startswith(args.prefix)]
+        purger = Purger()
         for archive in archives:
-            if args.prefix and not archive.name.startswith(args.prefix):
-                continue
-            t = to_localtime(archive.ts).date()
-            if daily < args.daily and t < t0:
-                daily += 1
-                self.print_verbose('Archive "%s" is daily archive number %d',
-                                   archive.name, daily)
-                t0 = t
-            elif weekly < args.weekly and t < t0 and t.weekday() == 1:
-                weekly += 1
-                self.print_verbose('Archive "%s" is weekly archive number %d',
-                                   archive.name, weekly)
-                t0 = t
-            elif monthly < args.monthly and t < t0 and t.day == 1:
-                monthly += 1
-                self.print_verbose('Archive "%s" is monthly archive number %d',
-                                   archive.name, monthly)
-                t0 = t
-            elif yearly < args.yearly and t < t0 and day_of_year(t) == 1:
-                yearly += 1
-                self.print_verbose('Archive "%s" is yearly archive number %d',
-                                   archive.name, yearly)
-                t0 = t
+            purger.insert(to_localtime(archive.ts).date(), archive)
+        archives, to_delete = purger.purge(len(purger.items))
+        if args.yearly:
+            purger = Purger()
+            for archive in archives:
+                purger.insert(to_localtime(archive.ts).strftime('%Y'), archive)
+            yearly, archives = purger.purge(args.yearly)
+        if args.monthly:
+            purger = Purger()
+            for archive in archives:
+                purger.insert(to_localtime(archive.ts).strftime('%Y-%m'), archive)
+            monthly, archives = purger.purge(args.monthly)
+        if args.weekly:
+            purger = Purger()
+            for archive in archives:
+                purger.insert(to_localtime(archive.ts).strftime('%Y-%V'), archive)
+            weekly, archives = purger.purge(args.weekly)
+        if args.daily:
+            daily = archives[-args.daily:]
+            archives = archives[:-args.daily]
+        to_delete += archives
+
+        for i, archive in enumerate(yearly):
+            self.print_verbose('Keeping "%s" as yearly archive %d' % (archive.name, i + 1))
+        for i, archive in enumerate(monthly):
+            self.print_verbose('Keeping "%s" as monthly archive %d' % (archive.name, i + 1))
+        for i, archive in enumerate(weekly):
+            self.print_verbose('Keeping "%s" as weekly archive %d' % (archive.name, i + 1))
+        for i, archive in enumerate(daily):
+            self.print_verbose('Keeping "%s" as daily archive %d' % (archive.name, i + 1))
+        for archive in to_delete:
+            if args.really:
+                self.print_verbose('Purging archive "%s"', archive.name)
+                archive.delete(cache)
             else:
-                self.print_verbose('Purging archive %s', archive.name)
-                if args.really:
-                    archive.delete(cache)
-                else:
-                    print ('Archive "%s" marked for deletion. '
-                           'Use the "--really" option to actually delete it'
-                           % archive.metadata['name'])
+                print ('Archive "%s" marked for deletion. '
+                       'Use the "--really" option to actually delete it'
+                       % archive.metadata['name'])
         return self.exit_code
 
     def run(self, args=None):

+ 25 - 4
darc/helpers.py

@@ -2,6 +2,7 @@ from __future__ import with_statement
 import argparse
 from datetime import datetime, timedelta
 from fnmatch import fnmatchcase
+from operator import attrgetter
 import grp
 import os
 import pwd
@@ -13,6 +14,30 @@ import time
 import urllib
 
 
+class Purger(object):
+    """Purging helper"""
+
+    def __init__(self):
+        self.items = {}
+
+    def insert(self, key, value):
+        self.items.setdefault(key, [])
+        self.items[key].append(value)
+
+    def purge(self, n, reverse=False):
+        keep = []
+        delete = []
+        for key, values in sorted(self.items.items(), reverse=reverse):
+            if n:
+                values.sort(key=attrgetter('ts'), reverse=reverse)
+                keep.append(values[0])
+                delete += values[1:]
+                n -= 1
+            else:
+                delete += values
+        return keep, delete
+
+
 class Statistics(object):
 
     def __init__(self):
@@ -30,10 +55,6 @@ class Statistics(object):
         print 'Compressed size: %s (%s)'% (self.csize, format_file_size(self.csize))
         print 'Unique data: %d (%s)' % (self.usize, format_file_size(self.usize))
 
-def day_of_year(d):
-    """Calculate the "day of year" from a date object"""
-    return int(d.strftime('%j'))
-
 
 # OSX filenames are UTF-8 Only so any non-utf8 filenames are url encoded
 if sys.platform == 'darwin':