Browse Source

prune: Show which rule was applied to keep archive

Prune now shows for each kept archive:
 * Which rule is responsible for keeping this archive
 * How many archived have been kept by this rule so far

Ref #2886
Niklas Meinzer 7 years ago
parent
commit
14782a831b
4 changed files with 125 additions and 80 deletions
  1. 31 28
      src/borg/archiver.py
  2. 29 7
      src/borg/helpers/misc.py
  3. 10 10
      src/borg/testsuite/archiver.py
  4. 55 35
      src/borg/testsuite/helpers.py

+ 31 - 28
src/borg/archiver.py

@@ -49,7 +49,7 @@ from .helpers import PrefixSpec, SortBySpec, FilesCacheMode
 from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter
 from .helpers import BaseFormatter, ItemFormatter, ArchiveFormatter
 from .helpers import format_timedelta, format_file_size, parse_file_size, format_archive
 from .helpers import format_timedelta, format_file_size, parse_file_size, format_archive
 from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict
 from .helpers import safe_encode, remove_surrogates, bin_to_hex, prepare_dump_dict
-from .helpers import interval, prune_within, prune_split
+from .helpers import interval, prune_within, prune_split, PRUNING_PATTERNS
 from .helpers import timestamp
 from .helpers import timestamp
 from .helpers import get_cache_dir
 from .helpers import get_cache_dir
 from .helpers import Manifest, AI_HUMAN_SORT_KEYS
 from .helpers import Manifest, AI_HUMAN_SORT_KEYS
@@ -1333,45 +1333,48 @@ class Archiver:
         # that is newer than a successfully completed backup - and killing the successful backup.
         # that is newer than a successfully completed backup - and killing the successful backup.
         archives = [arch for arch in archives_checkpoints if arch not in checkpoints]
         archives = [arch for arch in archives_checkpoints if arch not in checkpoints]
         keep = []
         keep = []
+        # collect the rule responsible for the keeping of each archive in this dict
+        # keys are archive ids, values are a tuple
+        #   (<rulename>, <how many archives were kept by this rule so far >)
+        kept_because = {}
+
+        # find archives which need to be kept because of the keep-within rule
         if args.within:
         if args.within:
-            keep += prune_within(archives, args.within)
-        if args.secondly:
-            keep += prune_split(archives, '%Y-%m-%d %H:%M:%S', args.secondly, keep)
-        if args.minutely:
-            keep += prune_split(archives, '%Y-%m-%d %H:%M', args.minutely, keep)
-        if args.hourly:
-            keep += prune_split(archives, '%Y-%m-%d %H', args.hourly, keep)
-        if args.daily:
-            keep += prune_split(archives, '%Y-%m-%d', args.daily, keep)
-        if args.weekly:
-            keep += prune_split(archives, '%G-%V', args.weekly, keep)
-        if args.monthly:
-            keep += prune_split(archives, '%Y-%m', args.monthly, keep)
-        if args.yearly:
-            keep += prune_split(archives, '%Y', args.yearly, keep)
+            keep += prune_within(archives, args.within, kept_because)
+
+        # find archives which need to be kept because of the various time period rules
+        for rule in PRUNING_PATTERNS.keys():
+            num = getattr(args, rule, None)
+            if num is not None:
+                keep += prune_split(archives, rule, num, kept_because)
+
         to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints))
         to_delete = (set(archives) | checkpoints) - (set(keep) | set(keep_checkpoints))
         stats = Statistics()
         stats = Statistics()
         with Cache(repository, key, manifest, do_files=False, lock_wait=self.lock_wait) as cache:
         with Cache(repository, key, manifest, do_files=False, lock_wait=self.lock_wait) as cache:
             list_logger = logging.getLogger('borg.output.list')
             list_logger = logging.getLogger('borg.output.list')
-            if args.output_list:
-                # set up counters for the progress display
-                to_delete_len = len(to_delete)
-                archives_deleted = 0
+            # set up counters for the progress display
+            to_delete_len = len(to_delete)
+            archives_deleted = 0
             for archive in archives_checkpoints:
             for archive in archives_checkpoints:
                 if archive in to_delete:
                 if archive in to_delete:
                     if args.dry_run:
                     if args.dry_run:
-                        if args.output_list:
-                            list_logger.info('Would prune:     %s' % format_archive(archive))
+                        log_message = 'Would prune:'
                     else:
                     else:
-                        if args.output_list:
-                            archives_deleted += 1
-                            list_logger.info('Pruning archive: %s (%d/%d)' % (format_archive(archive),
-                                                                              archives_deleted, to_delete_len))
+                        archives_deleted += 1
+                        log_message = 'Pruning archive (%d/%d):' % (archives_deleted, to_delete_len)
                         Archive(repository, key, manifest, archive.name, cache,
                         Archive(repository, key, manifest, archive.name, cache,
                                 progress=args.progress).delete(stats, forced=args.forced)
                                 progress=args.progress).delete(stats, forced=args.forced)
                 else:
                 else:
-                    if args.output_list:
-                        list_logger.info('Keeping archive: %s' % format_archive(archive))
+                    if is_checkpoint(archive.name):
+                        log_message = 'Keeping checkpoint archive:'
+                    else:
+                        log_message = 'Keeping archive (rule: {rule} #{num}):'.format(
+                            rule=kept_because[archive.id][0], num=kept_because[archive.id][1]
+                        )
+                if args.output_list:
+                    list_logger.info("{message:<40} {archive}".format(
+                        message=log_message, archive=format_archive(archive)
+                    ))
             if to_delete and not args.dry_run:
             if to_delete and not args.dry_run:
                 manifest.write()
                 manifest.write()
                 repository.commit(save_space=args.save_space)
                 repository.commit(save_space=args.save_space)

+ 29 - 7
src/borg/helpers/misc.py

@@ -4,7 +4,7 @@ import os
 import os.path
 import os.path
 import platform
 import platform
 import sys
 import sys
-from collections import deque
+from collections import deque, OrderedDict
 from datetime import datetime, timezone, timedelta
 from datetime import datetime, timezone, timedelta
 from itertools import islice
 from itertools import islice
 from operator import attrgetter
 from operator import attrgetter
@@ -17,22 +17,44 @@ from .. import __version__ as borg_version
 from .. import chunker
 from .. import chunker
 
 
 
 
-def prune_within(archives, hours):
+def prune_within(archives, hours, kept_because):
     target = datetime.now(timezone.utc) - timedelta(seconds=hours * 3600)
     target = datetime.now(timezone.utc) - timedelta(seconds=hours * 3600)
-    return [a for a in archives if a.ts > target]
-
-
-def prune_split(archives, pattern, n, skip=[]):
+    kept_counter = 0
+    result = []
+    for a in archives:
+        if a.ts > target:
+            kept_counter += 1
+            kept_because[a.id] = ("within", kept_counter)
+            result.append(a)
+    return result
+
+
+PRUNING_PATTERNS = OrderedDict([
+    ("secondly", '%Y-%m-%d %H:%M:%S'),
+    ("minutely", '%Y-%m-%d %H:%M'),
+    ("hourly", '%Y-%m-%d %H'),
+    ("daily", '%Y-%m-%d'),
+    ("weekly", '%G-%V'),
+    ("monthly", '%Y-%m'),
+    ("yearly", '%Y'),
+])
+
+
+def prune_split(archives, rule, n, kept_because=None):
     last = None
     last = None
     keep = []
     keep = []
+    pattern = PRUNING_PATTERNS[rule]
+    if kept_because is None:
+        kept_because = {}
     if n == 0:
     if n == 0:
         return keep
         return keep
     for a in sorted(archives, key=attrgetter('ts'), reverse=True):
     for a in sorted(archives, key=attrgetter('ts'), reverse=True):
         period = to_localtime(a.ts).strftime(pattern)
         period = to_localtime(a.ts).strftime(pattern)
         if period != last:
         if period != last:
             last = period
             last = period
-            if a not in skip:
+            if a.id not in kept_because:
                 keep.append(a)
                 keep.append(a)
+                kept_because[a.id] = (rule, len(keep))
                 if len(keep) == n:
                 if len(keep) == n:
                     break
                     break
     return keep
     return keep

+ 10 - 10
src/borg/testsuite/archiver.py

@@ -6,6 +6,7 @@ import logging
 import os
 import os
 import pstats
 import pstats
 import random
 import random
+import re
 import shutil
 import shutil
 import socket
 import socket
 import stat
 import stat
@@ -1731,12 +1732,11 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd('create', self.repository_location + '::test3.checkpoint.1', src_dir)
         self.cmd('create', self.repository_location + '::test3.checkpoint.1', src_dir)
         self.cmd('create', self.repository_location + '::test4.checkpoint', src_dir)
         self.cmd('create', self.repository_location + '::test4.checkpoint', src_dir)
         output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2')
         output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2')
-        self.assert_in('Keeping archive: test2', output)
-        self.assert_in('Would prune:     test1', output)
+        assert re.search(r'Would prune:\s+test1', output)
         # must keep the latest non-checkpoint archive:
         # must keep the latest non-checkpoint archive:
-        self.assert_in('Keeping archive: test2', output)
+        assert re.search(r'Keeping archive \(rule: daily #1\):\s+test2', output)
         # must keep the latest checkpoint archive:
         # must keep the latest checkpoint archive:
-        self.assert_in('Keeping archive: test4.checkpoint', output)
+        assert re.search(r'Keeping checkpoint archive:\s+test4.checkpoint', output)
         output = self.cmd('list', self.repository_location)
         output = self.cmd('list', self.repository_location)
         self.assert_in('test1', output)
         self.assert_in('test1', output)
         self.assert_in('test2', output)
         self.assert_in('test2', output)
@@ -1766,8 +1766,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd('create', self.repository_location + '::test1', src_dir)
         self.cmd('create', self.repository_location + '::test1', src_dir)
         self.cmd('create', self.repository_location + '::test2', src_dir)
         self.cmd('create', self.repository_location + '::test2', src_dir)
         output = self.cmd('prune', '--list', '--stats', '--dry-run', self.repository_location, '--keep-daily=2')
         output = self.cmd('prune', '--list', '--stats', '--dry-run', self.repository_location, '--keep-daily=2')
-        self.assert_in('Keeping archive: test2', output)
-        self.assert_in('Would prune:     test1', output)
+        assert re.search(r'Keeping archive \(rule: daily #1\):\s+test2', output)
+        assert re.search(r'Would prune:\s+test1', output)
         self.assert_in('Deleted data:', output)
         self.assert_in('Deleted data:', output)
         output = self.cmd('list', self.repository_location)
         output = self.cmd('list', self.repository_location)
         self.assert_in('test1', output)
         self.assert_in('test1', output)
@@ -1784,8 +1784,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd('create', self.repository_location + '::bar-2015-08-12-10:00', src_dir)
         self.cmd('create', self.repository_location + '::bar-2015-08-12-10:00', src_dir)
         self.cmd('create', self.repository_location + '::bar-2015-08-12-20:00', src_dir)
         self.cmd('create', self.repository_location + '::bar-2015-08-12-20:00', src_dir)
         output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2', '--prefix=foo-')
         output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2', '--prefix=foo-')
-        self.assert_in('Keeping archive: foo-2015-08-12-20:00', output)
-        self.assert_in('Would prune:     foo-2015-08-12-10:00', output)
+        assert re.search(r'Keeping archive \(rule: daily #1\):\s+foo-2015-08-12-20:00', output)
+        assert re.search(r'Would prune:\s+foo-2015-08-12-10:00', output)
         output = self.cmd('list', self.repository_location)
         output = self.cmd('list', self.repository_location)
         self.assert_in('foo-2015-08-12-10:00', output)
         self.assert_in('foo-2015-08-12-10:00', output)
         self.assert_in('foo-2015-08-12-20:00', output)
         self.assert_in('foo-2015-08-12-20:00', output)
@@ -1805,8 +1805,8 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd('create', self.repository_location + '::2015-08-12-10:00-bar', src_dir)
         self.cmd('create', self.repository_location + '::2015-08-12-10:00-bar', src_dir)
         self.cmd('create', self.repository_location + '::2015-08-12-20:00-bar', src_dir)
         self.cmd('create', self.repository_location + '::2015-08-12-20:00-bar', src_dir)
         output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2', '--glob-archives=2015-*-foo')
         output = self.cmd('prune', '--list', '--dry-run', self.repository_location, '--keep-daily=2', '--glob-archives=2015-*-foo')
-        self.assert_in('Keeping archive: 2015-08-12-20:00-foo', output)
-        self.assert_in('Would prune:     2015-08-12-10:00-foo', output)
+        assert re.search(r'Keeping archive \(rule: daily #1\):\s+2015-08-12-20:00-foo', output)
+        assert re.search(r'Would prune:\s+2015-08-12-10:00-foo', output)
         output = self.cmd('list', self.repository_location)
         output = self.cmd('list', self.repository_location)
         self.assert_in('2015-08-12-10:00-foo', output)
         self.assert_in('2015-08-12-10:00-foo', output)
         self.assert_in('2015-08-12-20:00-foo', output)
         self.assert_in('2015-08-12-20:00-foo', output)

+ 55 - 35
src/borg/testsuite/helpers.py

@@ -1,11 +1,10 @@
 import hashlib
 import hashlib
-import io
 import os
 import os
 import shutil
 import shutil
 import sys
 import sys
 from argparse import ArgumentTypeError
 from argparse import ArgumentTypeError
 from datetime import datetime, timezone, timedelta
 from datetime import datetime, timezone, timedelta
-from time import mktime, strptime, sleep
+from time import sleep
 
 
 import pytest
 import pytest
 
 
@@ -333,40 +332,56 @@ class MakePathSafeTestCase(BaseTestCase):
 
 
 class MockArchive:
 class MockArchive:
 
 
-    def __init__(self, ts):
+    def __init__(self, ts, id):
         self.ts = ts
         self.ts = ts
+        self.id = id
 
 
     def __repr__(self):
     def __repr__(self):
-        return repr(self.ts)
-
-
-class PruneSplitTestCase(BaseTestCase):
-
-    def test(self):
-
-        def local_to_UTC(month, day):
-            """Convert noon on the month and day in 2013 to UTC."""
-            seconds = mktime(strptime('2013-%02d-%02d 12:00' % (month, day), '%Y-%m-%d %H:%M'))
-            return datetime.fromtimestamp(seconds, tz=timezone.utc)
-
-        def subset(lst, indices):
-            return {lst[i] for i in indices}
-
-        def dotest(test_archives, n, skip, indices):
-            for ta in test_archives, reversed(test_archives):
-                self.assert_equal(set(prune_split(ta, '%Y-%m', n, skip)),
-                                  subset(test_archives, indices))
-
-        test_pairs = [(1, 1), (2, 1), (2, 28), (3, 1), (3, 2), (3, 31), (5, 1)]
-        test_dates = [local_to_UTC(month, day) for month, day in test_pairs]
-        test_archives = [MockArchive(date) for date in test_dates]
-
-        dotest(test_archives, 3, [], [6, 5, 2])
-        dotest(test_archives, -1, [], [6, 5, 2, 0])
-        dotest(test_archives, 3, [test_archives[6]], [5, 2, 0])
-        dotest(test_archives, 3, [test_archives[5]], [6, 2, 0])
-        dotest(test_archives, 3, [test_archives[4]], [6, 5, 2])
-        dotest(test_archives, 0, [], [])
+        return "{0}: {1}".format(self.id, self.ts.isoformat())
+
+
+@pytest.mark.parametrize(
+    "rule,num_to_keep,expected_ids", [
+        ("yearly", 3, (13, 2, 1)),
+        ("monthly", 3, (13, 8, 4)),
+        ("weekly", 2, (13, 8)),
+        ("daily", 3, (13, 8, 7)),
+        ("hourly", 3, (13, 10, 8)),
+        ("minutely", 3, (13, 10, 9)),
+        ("secondly", 4, (13, 12, 11, 10)),
+        ("daily", 0, []),
+    ]
+)
+def test_prune_split(rule, num_to_keep, expected_ids):
+    def subset(lst, ids):
+        return {i for i in lst if i.id in ids}
+
+    archives = [
+        # years apart
+        MockArchive(datetime(2015, 1, 1, 10, 0, 0, tzinfo=timezone.utc), 1),
+        MockArchive(datetime(2016, 1, 1, 10, 0, 0, tzinfo=timezone.utc), 2),
+        MockArchive(datetime(2017, 1, 1, 10, 0, 0, tzinfo=timezone.utc), 3),
+        # months apart
+        MockArchive(datetime(2017, 2, 1, 10, 0, 0, tzinfo=timezone.utc), 4),
+        MockArchive(datetime(2017, 3, 1, 10, 0, 0, tzinfo=timezone.utc), 5),
+        # days apart
+        MockArchive(datetime(2017, 3, 2, 10, 0, 0, tzinfo=timezone.utc), 6),
+        MockArchive(datetime(2017, 3, 3, 10, 0, 0, tzinfo=timezone.utc), 7),
+        MockArchive(datetime(2017, 3, 4, 10, 0, 0, tzinfo=timezone.utc), 8),
+        # minutes apart
+        MockArchive(datetime(2017, 10, 1, 9, 45, 0, tzinfo=timezone.utc), 9),
+        MockArchive(datetime(2017, 10, 1, 9, 55, 0, tzinfo=timezone.utc), 10),
+        # seconds apart
+        MockArchive(datetime(2017, 10, 1, 10, 0, 1, tzinfo=timezone.utc), 11),
+        MockArchive(datetime(2017, 10, 1, 10, 0, 3, tzinfo=timezone.utc), 12),
+        MockArchive(datetime(2017, 10, 1, 10, 0, 5, tzinfo=timezone.utc), 13),
+    ]
+    kept_because = {}
+    keep = prune_split(archives, rule, num_to_keep, kept_because)
+
+    assert set(keep) == subset(archives, expected_ids)
+    for item in keep:
+        assert kept_because[item.id][0] == rule
 
 
 
 
 class IntervalTestCase(BaseTestCase):
 class IntervalTestCase(BaseTestCase):
@@ -410,14 +425,19 @@ class PruneWithinTestCase(BaseTestCase):
 
 
         def dotest(test_archives, within, indices):
         def dotest(test_archives, within, indices):
             for ta in test_archives, reversed(test_archives):
             for ta in test_archives, reversed(test_archives):
-                self.assert_equal(set(prune_within(ta, interval(within))),
+                kept_because = {}
+                keep = prune_within(ta, interval(within), kept_because)
+                self.assert_equal(set(keep),
                                   subset(test_archives, indices))
                                   subset(test_archives, indices))
+                assert all("within" == kept_because[a.id][0] for a in keep)
 
 
         # 1 minute, 1.5 hours, 2.5 hours, 3.5 hours, 25 hours, 49 hours
         # 1 minute, 1.5 hours, 2.5 hours, 3.5 hours, 25 hours, 49 hours
         test_offsets = [60, 90*60, 150*60, 210*60, 25*60*60, 49*60*60]
         test_offsets = [60, 90*60, 150*60, 210*60, 25*60*60, 49*60*60]
         now = datetime.now(timezone.utc)
         now = datetime.now(timezone.utc)
         test_dates = [now - timedelta(seconds=s) for s in test_offsets]
         test_dates = [now - timedelta(seconds=s) for s in test_offsets]
-        test_archives = [MockArchive(date) for date in test_dates]
+        test_archives = [
+            MockArchive(date, i) for i, date in enumerate(test_dates)
+        ]
 
 
         dotest(test_archives, '1H', [0])
         dotest(test_archives, '1H', [0])
         dotest(test_archives, '2H', [0, 1])
         dotest(test_archives, '2H', [0, 1])