Browse Source

Add quarterly pruning strategy. (#8337)

Add 13weekly and 3monthly quarterly pruning strategies.
William D. Jones 9 months ago
parent
commit
81cb1cdc56
4 changed files with 165 additions and 14 deletions
  1. 24 0
      docs/misc/prune-example.txt
  2. 16 5
      src/borg/archiver.py
  3. 55 9
      src/borg/helpers/misc.py
  4. 70 0
      src/borg/testsuite/archiver.py

+ 24 - 0
docs/misc/prune-example.txt

@@ -100,3 +100,27 @@ example simple. They all work in basically the same way.
 
 
 The weekly rule is easy to understand roughly, but hard to understand in all
 The weekly rule is easy to understand roughly, but hard to understand in all
 details. If interested, read "ISO 8601:2000 standard week-based year".
 details. If interested, read "ISO 8601:2000 standard week-based year".
+
+The 13weekly and 3monthly rules are two different strategies for keeping one
+every quarter of a year. There are `multiple ways` to define a quarter-year;
+borg prune recognizes two:
+
+* --keep-13weekly keeps one backup every 13 weeks using ISO 8601:2000's
+  definition of the week-based year. January 4th is always included in the
+  first week of a year, and January 1st to 3rd may be in week 52 or 53 of the
+  previous year. Week 53 is also in the fourth quarter of the year.
+* --keep-3monthly keeps one backup every 3 months. January 1st to
+  March 31, April 1st to June 30th, July 1st to September 30th, and October 1st
+  to December 31st form the quarters.
+
+If the subtleties of the definition of a quarter year don't matter to you, a
+short summary of behavior is:
+
+* --keep-13weekly favors keeping backups at the beginning of Jan, Apr, July,
+  and Oct.
+* --keep-3monthly favors keeping backups at the end of Dec, Mar, Jun, and Sept.
+* Both strategies will have some overlap in which backups are kept.
+* The differences are negligible unless backups considered for deletion were
+  created weekly or more frequently.
+
+.. _multiple ways: https://en.wikipedia.org/wiki/Calendar_year#Quarter_year

+ 16 - 5
src/borg/archiver.py

@@ -1529,10 +1529,12 @@ class Archiver:
     def do_prune(self, args, repository, manifest, key):
     def do_prune(self, args, repository, manifest, key):
         """Prune repository archives according to specified rules"""
         """Prune repository archives according to specified rules"""
         if not any((args.secondly, args.minutely, args.hourly, args.daily,
         if not any((args.secondly, args.minutely, args.hourly, args.daily,
-                    args.weekly, args.monthly, args.yearly, args.within)):
+                    args.weekly, args.monthly, args.quarterly_13weekly,
+                    args.quarterly_3monthly, args.yearly, args.within)):
             raise CommandError('At least one of the "keep-within", "keep-last", '
             raise CommandError('At least one of the "keep-within", "keep-last", '
                                '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
                                '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
-                               '"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.')
+                               '"keep-weekly", "keep-monthly", "keep-13weekly", "keep-3monthly", '
+                               'or "keep-yearly" settings must be specified.')
         if args.prefix is not None:
         if args.prefix is not None:
             args.glob_archives = args.prefix + '*'
             args.glob_archives = args.prefix + '*'
         checkpoint_re = r'\.checkpoint(\.\d+)?'
         checkpoint_re = r'\.checkpoint(\.\d+)?'
@@ -4684,9 +4686,13 @@ class Archiver:
         the local timezone, and weeks go from Monday to Sunday. Specifying a
         the local timezone, and weeks go from Monday to Sunday. Specifying a
         negative number of archives to keep means that there is no limit. As of borg
         negative number of archives to keep means that there is no limit. As of borg
         1.2.0, borg will retain the oldest archive if any of the secondly, minutely,
         1.2.0, borg will retain the oldest archive if any of the secondly, minutely,
-        hourly, daily, weekly, monthly, or yearly rules was not otherwise able to meet
-        its retention target. This enables the first chronological archive to continue
-        aging until it is replaced by a newer archive that meets the retention criteria.
+        hourly, daily, weekly, monthly, quarterly, or yearly rules was not otherwise
+        able to meet its retention target. This enables the first chronological archive
+        to continue aging until it is replaced by a newer archive that meets the
+        retention criteria.
+
+        The ``--keep-13weekly`` and ``--keep-3monthly`` rules are two different
+        strategies for keeping archives every quarter year.
 
 
         The ``--keep-last N`` option is doing the same as ``--keep-secondly N`` (and it will
         The ``--keep-last N`` option is doing the same as ``--keep-secondly N`` (and it will
         keep the last N archives under the assumption that you do not create more than one
         keep the last N archives under the assumption that you do not create more than one
@@ -4726,6 +4732,11 @@ class Archiver:
                                help='number of weekly archives to keep')
                                help='number of weekly archives to keep')
         subparser.add_argument('-m', '--keep-monthly', dest='monthly', type=int, default=0,
         subparser.add_argument('-m', '--keep-monthly', dest='monthly', type=int, default=0,
                                help='number of monthly archives to keep')
                                help='number of monthly archives to keep')
+        quarterly_group = subparser.add_mutually_exclusive_group()
+        quarterly_group.add_argument('--keep-13weekly', dest='quarterly_13weekly', type=int, default=0,
+                                     help='number of quarterly archives to keep (13 week strategy)')
+        quarterly_group.add_argument('--keep-3monthly', dest='quarterly_3monthly', type=int, default=0,
+                                     help='number of quarterly archives to keep (3 month strategy)')
         subparser.add_argument('-y', '--keep-yearly', dest='yearly', type=int, default=0,
         subparser.add_argument('-y', '--keep-yearly', dest='yearly', type=int, default=0,
                                help='number of yearly archives to keep')
                                help='number of yearly archives to keep')
         define_archive_filters_group(subparser, sort_by=False, first_last=False)
         define_archive_filters_group(subparser, sort_by=False, first_last=False)

+ 55 - 9
src/borg/helpers/misc.py

@@ -31,21 +31,67 @@ def prune_within(archives, hours, kept_because):
     return result
     return result
 
 
 
 
+def default_period_func(pattern):
+    def inner(a):
+        return to_localtime(a.ts).strftime(pattern)
+
+    return inner
+
+
+def quarterly_13weekly_period_func(a):
+    (year, week, _) = to_localtime(a.ts).isocalendar()
+    if week <= 13:
+        # Weeks containing Jan 4th to Mar 28th (leap year) or 29th- 91 (13*7)
+        # days later.
+        return (year, 1)
+    elif 14 <= week <= 26:
+        # Weeks containing Apr 4th (leap year) or 5th to Jun 27th or 28th- 91
+        # days later.
+        return (year, 2)
+    elif 27 <= week <= 39:
+        # Weeks containing Jul 4th (leap year) or 5th to Sep 26th or 27th-
+        # at least 91 days later.
+        return (year, 3)
+    else:
+        # Everything else, Oct 3rd (leap year) or 4th onward, will always
+        # include week of Dec 26th (leap year) or Dec 27th, may also include
+        # up to possibly Jan 3rd of next year.
+        return (year, 4)
+
+
+def quarterly_3monthly_period_func(a):
+    lt = to_localtime(a.ts)
+    if lt.month <= 3:
+        # 1-1 to 3-31
+        return (lt.year, 1)
+    elif 4 <= lt.month <= 6:
+        # 4-1 to 6-30
+        return (lt.year, 2)
+    elif 7 <= lt.month <= 9:
+        # 7-1 to 9-30
+        return (lt.year, 3)
+    else:
+        # 10-1 to 12-31
+        return (lt.year, 4)
+
+
 PRUNING_PATTERNS = OrderedDict([
 PRUNING_PATTERNS = OrderedDict([
-    ("secondly", '%Y-%m-%d %H:%M:%S'),
-    ("minutely", '%Y-%m-%d %H:%M'),
-    ("hourly", '%Y-%m-%d %H'),
-    ("daily", '%Y-%m-%d'),
-    ("weekly", '%G-%V'),
-    ("monthly", '%Y-%m'),
-    ("yearly", '%Y'),
+    ("secondly", default_period_func('%Y-%m-%d %H:%M:%S')),
+    ("minutely", default_period_func('%Y-%m-%d %H:%M')),
+    ("hourly", default_period_func('%Y-%m-%d %H')),
+    ("daily", default_period_func('%Y-%m-%d')),
+    ("weekly", default_period_func('%G-%V')),
+    ("monthly", default_period_func('%Y-%m')),
+    ("quarterly_13weekly", quarterly_13weekly_period_func),
+    ("quarterly_3monthly", quarterly_3monthly_period_func),
+    ("yearly", default_period_func('%Y')),
 ])
 ])
 
 
 
 
 def prune_split(archives, rule, n, kept_because=None):
 def prune_split(archives, rule, n, kept_because=None):
     last = None
     last = None
     keep = []
     keep = []
-    pattern = PRUNING_PATTERNS[rule]
+    period_func = PRUNING_PATTERNS[rule]
     if kept_because is None:
     if kept_because is None:
         kept_because = {}
         kept_because = {}
     if n == 0:
     if n == 0:
@@ -53,7 +99,7 @@ def prune_split(archives, rule, n, kept_because=None):
 
 
     a = None
     a = None
     for a in sorted(archives, key=attrgetter('ts'), reverse=True):
     for a in sorted(archives, key=attrgetter('ts'), reverse=True):
-        period = to_localtime(a.ts).strftime(pattern)
+        period = period_func(a)
         if period != last:
         if period != last:
             last = period
             last = period
             if a.id not in kept_because:
             if a.id not in kept_because:

+ 70 - 0
src/borg/testsuite/archiver.py

@@ -2315,6 +2315,76 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         for i in range(22, 25):
         for i in range(22, 25):
             self.assert_not_in('test%02d' % i, output)
             self.assert_not_in('test%02d' % i, output)
 
 
+    def test_prune_quarterly(self):
+        # Example worked through by hand when developing quarterly
+        # strategy, based upon existing backups where quarterly strategy
+        # is desired. Weekly/monthly backups that don't affect results were
+        # trimmed to speed up the test.
+        #
+        # Week number is shown in comment for every row in the below list.
+        # Year is also shown when it doesn't match the year given in the
+        # date tuple.
+        test_dates = [
+                (2020, 12, 6), (2021, 1, 3),   # 49, 2020-53
+                (2021, 3, 28), (2021, 4, 25),  # 12, 16
+                (2021, 6, 27), (2021, 7, 4),   # 25, 26
+                (2021, 9, 26), (2021, 10, 3),  # 38, 39
+                (2021, 12, 26), (2022, 1, 2)   # 51, 2021-52
+        ]
+
+        def mk_name(tup):
+            (y, m, d) = tup
+            suff = datetime(y, m, d).strftime("%Y-%m-%d")
+            return f"test-{suff}"
+
+        # The kept repos are based on working on an example by hand,
+        # archives made on the following dates should be kept:
+        EXPECTED_KEPT = {
+            "13weekly": [
+                (2020, 12, 6), (2021, 1, 3), (2021, 3, 28), (2021, 7, 4),
+                (2021, 10, 3), (2022, 1, 2)
+            ],
+            "3monthly": [
+                (2020, 12, 6), (2021, 3, 28), (2021, 6, 27), (2021, 9, 26),
+                (2021, 12, 26), (2022, 1, 2)
+            ]
+        }
+
+        for (strat, to_keep) in EXPECTED_KEPT.items():
+            # Initialize our repo.
+            self.cmd('init', '--encryption=repokey', self.repository_location)
+            for a, (y, m, d) in zip(map(mk_name, test_dates), test_dates):
+                self._create_archive_ts(a, y, m, d)
+
+            to_prune = list(set(test_dates) - set(to_keep))
+
+            # Use 99 instead of -1 to test that oldest backup is kept.
+            output = self.cmd('prune', '--list', '--dry-run', self.repository_location, f"--keep-{strat}=99")
+            for a in map(mk_name, to_prune):
+                assert re.search(fr"Would prune:\s+{a}", output)
+
+            oldest = r"\[oldest\]" if strat in ("13weekly") else ""
+            assert re.search(fr"Keeping archive \(rule: quarterly_{strat}{oldest} #\d+\):\s+test-2020-12-06", output)
+            for a in map(mk_name, to_keep[1:]):
+                assert re.search(fr"Keeping archive \(rule: quarterly_{strat} #\d+\):\s+{a}", output)
+
+            output = self.cmd('list', self.repository_location)
+            # Nothing pruned after dry run
+            for a in map(mk_name, test_dates):
+                self.assert_in(a, output)
+
+            self.cmd('prune', self.repository_location, f"--keep-{strat}=99")
+            output = self.cmd('list', self.repository_location)
+            # All matching backups plus oldest kept
+            for a in map(mk_name, to_keep):
+                self.assert_in(a, output)
+            # Other backups have been pruned
+            for a in map(mk_name, to_prune):
+                self.assert_not_in(a, output)
+
+            # Delete repo and begin anew
+            self.cmd('delete', self.repository_location)
+
     # With an initial and daily backup, prune daily until oldest is replaced by a monthly backup
     # With an initial and daily backup, prune daily until oldest is replaced by a monthly backup
     def test_prune_retain_and_expire_oldest(self):
     def test_prune_retain_and_expire_oldest(self):
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('init', '--encryption=repokey', self.repository_location)