Browse Source

prune: Show which rule was applied to keep archive

Prune now shows for each kept archive:
 * Which rule is responsible for keeping this archive
 * How many archived have been kept by this rule so far

Ref #2886
Niklas Meinzer 7 years ago
parent
commit
14782a831b
4 changed files with 125 additions and 80 deletions
  1. 31 28
      src/borg/archiver.py
  2. 29 7
      src/borg/helpers/misc.py
  3. 10 10
      src/borg/testsuite/archiver.py
  4. 55 35
      src/borg/testsuite/helpers.py

+ 31 - 28
src/borg/archiver.py

@@ -49,7 +49,7 @@ from .helpers import PrefixSpec, SortBySpec, FilesCacheMode
 from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter
 from .helpers import format_timedelta, format_file_size, parse_file_size, format_archive
 from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict
-from .helpers import interval, prune_within, prune_split
+from .helpers import interval, prune_within, prune_split, PRUNING_PATTERNS
 from .helpers import timestamp
 from .helpers import get_cache_dir
 from .helpers import Manifest, AI_HUMAN_SORT_KEYS
@@ -1333,45 +1333,48 @@ class Archiver:
         # that is newer than a successfully completed backup - and killing the successful backup.
         archives = [arch for arch in archives_checkpoints if arch not in checkpoints]
         keep = []
+        # collect the rule responsible for the keeping of each archive in this dict
+        # keys are archive ids, values are a tuple
+        #   (<rulename>, <how many archives were kept by this rule so far >)
+        kept_because = {}
+
+        # find archives which need to be kept because of the keep-within rule
         if args.within:
-            keep += prune_within(archives, args.within)
-        if args.secondly:
-            keep += prune_split(archives, '%Y-%m-%d %H:%M:%S', args.secondly, keep)
-        if args.minutely:
-            keep += prune_split(archives, '%Y-%m-%d %H:%M', args.minutely, keep)
-        if args.hourly:
-            keep += prune_split(archives, '%Y-%m-%d %H', args.hourly, keep)
-        if args.daily:
-            keep += prune_split(archives, '%Y-%m-%d', args.daily, keep)
-        if args.weekly:
-            keep += prune_split(archives, '%G-%V', args.weekly, keep)
-        if args.monthly:
-            keep += prune_split(archives, '%Y-%m', args.monthly, keep)
-        if args.yearly:
-            keep += prune_split(archives, '%Y', args.yearly, keep)
+            keep += prune_within(archives, args.within, kept_because)
+
+        # find archives which need to be kept because of the various time period rules
+        for rule in PRUNING_PATTERNS.keys():
+            num = getattr(args, rule, None)
+            if num is not None:
+                keep += prune_split(archives, rule, num, kept_because)
+
         to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints))
         stats = Statistics()
         with Cache(repository, key, manifest, do_files=False, lock_wait=self.lock_wait) as cache:
             list_logger = logging.getLogger('borg.output.list')
-            if args.output_list:
-                # set up counters for the progress display
-                to_delete_len = len(to_delete)
-                archives_deleted = 0
+            # set up counters for the progress display
+            to_delete_len = len(to_delete)
+            archives_deleted = 0
             for archive in archives_checkpoints:
                 if archive in to_delete:
                     if args.dry_run:
-                        if args.output_list:
-                            list_logger.info('Would prune:     %s' % format_archive(archive))
+                        log_message = 'Would prune:'
                     else:
-                        if args.output_list:
-                            archives_deleted += 1
-                            list_logger.info('Pruning archive: %s (%d/%d)' % (format_archive(archive),
-                                                                              archives_deleted, to_delete_len))
+                        archives_deleted += 1
+                        log_message = 'Pruning archive (%d/%d):' % (archives_deleted, to_delete_len)
                         Archive(repository, key, manifest, archive.name, cache,
                                 progress=args.progress).delete(stats, forced=args.forced)
                 else:
-                    if args.output_list:
-                        list_logger.info('Keeping archive: %s' % format_archive(archive))
+                    if is_checkpoint(archive.name):
+                        log_message = 'Keeping checkpoint archive:'
+                    else:
+                        log_message = 'Keeping archive (rule: {rule} #{num}):'.format(
+                            rule=kept_because[archive.id][0], num=kept_because[archive.id][1]
+                        )
+                if args.output_list:
+                    list_logger.info("{message:<40} {archive}".format(
+                        message=log_message, archive=format_archive(archive)
+                    ))
             if to_delete and not args.dry_run:
                 manifest.write()
                 repository.commit(save_space=args.save_space)

+ 29 - 7
src/borg/helpers/misc.py

@@ -4,7 +4,7 @@ import os
 import os.path
 import platform
 import sys
-from collections import deque
+from collections import deque, OrderedDict
 from datetime import datetime, timezone, timedelta
 from itertools import islice
 from operator import attrgetter
@@ -17,22 +17,44 @@ from .. import __version__ as borg_version
 from .. import chunker
 
 
-def prune_within(archives, hours):
+def prune_within(archives, hours, kept_because):
     target = datetime.now(timezone.utc) - timedelta(seconds=hours * 3600)
-    return [a for a in archives if a.ts > target]
-
-
-def prune_split(archives, pattern, n, skip=[]):
+    kept_counter = 0
+    result = []
+    for a in archives:
+        if a.ts > target:
+            kept_counter += 1
+            kept_because[a.id] = ("within", kept_counter)
+            result.append(a)
+    return result
+
+
+PRUNING_PATTERNS = OrderedDict([
+    ("secondly", '%Y-%m-%d %H:%M:%S'),
+    ("minutely", '%Y-%m-%d %H:%M'),
+    ("hourly", '%Y-%m-%d %H'),
+    ("daily", '%Y-%m-%d'),
+    ("weekly", '%G-%V'),
+    ("monthly", '%Y-%m'),
+    ("yearly", '%Y'),
+])
+
+
+def prune_split(archives, rule, n, kept_because=None):
     last = None
     keep = []
+    pattern = PRUNING_PATTERNS[rule]
+    if kept_because is None:
+        kept_because = {}
     if n == 0:
         return keep
     for a in sorted(archives, key=attrgetter('ts'), reverse=True):
         period = to_localtime(a.ts).strftime(pattern)
         if period != last:
             last = period
-            if a not in skip:
+            if a.id not in kept_because:
                 keep.append(a)
+                kept_because[a.id] = (rule, len(keep))
                 if len(keep) == n:
                     break
     return keep

+ 10 - 10
src/borg/testsuite/archiver.py

@@ -6,6 +6,7 @@ import logging
 import os
 import pstats
 import random
+import re
 import shutil
 import socket
 import stat
@@ -1731,12 +1732,11 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd('create', self.repository_location + '::test3.checkpoint.1', src_dir)
         self.cmd('create', self.repository_location + '::test4.checkpoint', src_dir)
         output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2')
-        self.assert_in('Keeping archive: test2', output)
-        self.assert_in('Would prune:     test1', output)
+        assert re.search(r'Would prune:\s+test1', output)
         # must keep the latest non-checkpoint archive:
-        self.assert_in('Keeping archive: test2', output)
+        assert re.search(r'Keeping archive \(rule: daily #1\):\s+test2', output)
         # must keep the latest checkpoint archive:
-        self.assert_in('Keeping archive: test4.checkpoint', output)
+        assert re.search(r'Keeping checkpoint archive:\s+test4.checkpoint', output)
         output = self.cmd('list', self.repository_location)
         self.assert_in('test1', output)
         self.assert_in('test2', output)
@@ -1766,8 +1766,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd('create', self.repository_location + '::test1', src_dir)
         self.cmd('create', self.repository_location + '::test2', src_dir)
         output = self.cmd('prune', '--list', '--stats', '--dry-run', self.repository_location, '--keep-daily=2')
-        self.assert_in('Keeping archive: test2', output)
-        self.assert_in('Would prune:     test1', output)
+        assert re.search(r'Keeping archive \(rule: daily #1\):\s+test2', output)
+        assert re.search(r'Would prune:\s+test1', output)
         self.assert_in('Deleted data:', output)
         output = self.cmd('list', self.repository_location)
         self.assert_in('test1', output)
@@ -1784,8 +1784,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd('create', self.repository_location + '::bar-2015-08-12-10:00', src_dir)
         self.cmd('create', self.repository_location + '::bar-2015-08-12-20:00', src_dir)
         output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2', '--prefix=foo-')
-        self.assert_in('Keeping archive: foo-2015-08-12-20:00', output)
-        self.assert_in('Would prune:     foo-2015-08-12-10:00', output)
+        assert re.search(r'Keeping archive \(rule: daily #1\):\s+foo-2015-08-12-20:00', output)
+        assert re.search(r'Would prune:\s+foo-2015-08-12-10:00', output)
         output = self.cmd('list', self.repository_location)
         self.assert_in('foo-2015-08-12-10:00', output)
         self.assert_in('foo-2015-08-12-20:00', output)
@@ -1805,8 +1805,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd('create', self.repository_location + '::2015-08-12-10:00-bar', src_dir)
         self.cmd('create', self.repository_location + '::2015-08-12-20:00-bar', src_dir)
         output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2', '--glob-archives=2015-*-foo')
-        self.assert_in('Keeping archive: 2015-08-12-20:00-foo', output)
-        self.assert_in('Would prune:     2015-08-12-10:00-foo', output)
+        assert re.search(r'Keeping archive \(rule: daily #1\):\s+2015-08-12-20:00-foo', output)
+        assert re.search(r'Would prune:\s+2015-08-12-10:00-foo', output)
         output = self.cmd('list', self.repository_location)
         self.assert_in('2015-08-12-10:00-foo', output)
         self.assert_in('2015-08-12-20:00-foo', output)

+ 55 - 35
src/borg/testsuite/helpers.py

@@ -1,11 +1,10 @@
 import hashlib
-import io
 import os
 import shutil
 import sys
 from argparse import ArgumentTypeError
 from datetime import datetime, timezone, timedelta
-from time import mktime, strptime, sleep
+from time import sleep
 
 import pytest
 
@@ -333,40 +332,56 @@ class MakePathSafeTestCase(BaseTestCase):
 
 class MockArchive:
 
-    def __init__(self, ts):
+    def __init__(self, ts, id):
         self.ts = ts
+        self.id = id
 
     def __repr__(self):
-        return repr(self.ts)
-
-
-class PruneSplitTestCase(BaseTestCase):
-
-    def test(self):
-
-        def local_to_UTC(month, day):
-            """Convert noon on the month and day in 2013 to UTC."""
-            seconds = mktime(strptime('2013-%02d-%02d 12:00' % (month, day), '%Y-%m-%d %H:%M'))
-            return datetime.fromtimestamp(seconds, tz=timezone.utc)
-
-        def subset(lst, indices):
-            return {lst[i] for i in indices}
-
-        def dotest(test_archives, n, skip, indices):
-            for ta in test_archives, reversed(test_archives):
-                self.assert_equal(set(prune_split(ta, '%Y-%m', n, skip)),
-                                  subset(test_archives, indices))
-
-        test_pairs = [(1, 1), (2, 1), (2, 28), (3, 1), (3, 2), (3, 31), (5, 1)]
-        test_dates = [local_to_UTC(month, day) for month, day in test_pairs]
-        test_archives = [MockArchive(date) for date in test_dates]
-
-        dotest(test_archives, 3, [], [6, 5, 2])
-        dotest(test_archives, -1, [], [6, 5, 2, 0])
-        dotest(test_archives, 3, [test_archives[6]], [5, 2, 0])
-        dotest(test_archives, 3, [test_archives[5]], [6, 2, 0])
-        dotest(test_archives, 3, [test_archives[4]], [6, 5, 2])
-        dotest(test_archives, 0, [], [])
+        return "{0}: {1}".format(self.id, self.ts.isoformat())
+
+
+@pytest.mark.parametrize(
+    "rule,num_to_keep,expected_ids", [
+        ("yearly", 3, (13, 2, 1)),
+        ("monthly", 3, (13, 8, 4)),
+        ("weekly", 2, (13, 8)),
+        ("daily", 3, (13, 8, 7)),
+        ("hourly", 3, (13, 10, 8)),
+        ("minutely", 3, (13, 10, 9)),
+        ("secondly", 4, (13, 12, 11, 10)),
+        ("daily", 0, []),
+    ]
+)
+def test_prune_split(rule, num_to_keep, expected_ids):
+    def subset(lst, ids):
+        return {i for i in lst if i.id in ids}
+
+    archives = [
+        # years apart
+        MockArchive(datetime(2015, 1, 1, 10, 0, 0, tzinfo=timezone.utc), 1),
+        MockArchive(datetime(2016, 1, 1, 10, 0, 0, tzinfo=timezone.utc), 2),
+        MockArchive(datetime(2017, 1, 1, 10, 0, 0, tzinfo=timezone.utc), 3),
+        # months apart
+        MockArchive(datetime(2017, 2, 1, 10, 0, 0, tzinfo=timezone.utc), 4),
+        MockArchive(datetime(2017, 3, 1, 10, 0, 0, tzinfo=timezone.utc), 5),
+        # days apart
+        MockArchive(datetime(2017, 3, 2, 10, 0, 0, tzinfo=timezone.utc), 6),
+        MockArchive(datetime(2017, 3, 3, 10, 0, 0, tzinfo=timezone.utc), 7),
+        MockArchive(datetime(2017, 3, 4, 10, 0, 0, tzinfo=timezone.utc), 8),
+        # minutes apart
+        MockArchive(datetime(2017, 10, 1, 9, 45, 0, tzinfo=timezone.utc), 9),
+        MockArchive(datetime(2017, 10, 1, 9, 55, 0, tzinfo=timezone.utc), 10),
+        # seconds apart
+        MockArchive(datetime(2017, 10, 1, 10, 0, 1, tzinfo=timezone.utc), 11),
+        MockArchive(datetime(2017, 10, 1, 10, 0, 3, tzinfo=timezone.utc), 12),
+        MockArchive(datetime(2017, 10, 1, 10, 0, 5, tzinfo=timezone.utc), 13),
+    ]
+    kept_because = {}
+    keep = prune_split(archives, rule, num_to_keep, kept_because)
+
+    assert set(keep) == subset(archives, expected_ids)
+    for item in keep:
+        assert kept_because[item.id][0] == rule
 
 
 class IntervalTestCase(BaseTestCase):
@@ -410,14 +425,19 @@ class PruneWithinTestCase(BaseTestCase):
 
         def dotest(test_archives, within, indices):
             for ta in test_archives, reversed(test_archives):
-                self.assert_equal(set(prune_within(ta, interval(within))),
+                kept_because = {}
+                keep = prune_within(ta, interval(within), kept_because)
+                self.assert_equal(set(keep),
                                   subset(test_archives, indices))
+                assert all("within" == kept_because[a.id][0] for a in keep)
 
         # 1 minute, 1.5 hours, 2.5 hours, 3.5 hours, 25 hours, 49 hours
         test_offsets = [60, 90*60, 150*60, 210*60, 25*60*60, 49*60*60]
         now = datetime.now(timezone.utc)
         test_dates = [now - timedelta(seconds=s) for s in test_offsets]
-        test_archives = [MockArchive(date) for date in test_dates]
+        test_archives = [
+            MockArchive(date, i) for i, date in enumerate(test_dates)
+        ]
 
         dotest(test_archives, '1H', [0])
         dotest(test_archives, '2H', [0, 1])