Bläddra i källkod

Add quarterly pruning strategy. (#8337)

Add 13weekly and 3monthly quarterly pruning strategies.
William D. Jones 9 månader sedan
förälder
incheckning
81cb1cdc56
4 ändrade filer med 165 tillägg och 14 borttagningar
  1. 24 0
      docs/misc/prune-example.txt
  2. 16 5
      src/borg/archiver.py
  3. 55 9
      src/borg/helpers/misc.py
  4. 70 0
      src/borg/testsuite/archiver.py

+ 24 - 0
docs/misc/prune-example.txt

@@ -100,3 +100,27 @@ example simple. They all work in basically the same way.
 
 The weekly rule is easy to understand roughly, but hard to understand in all
 details. If interested, read "ISO 8601:2000 standard week-based year".
+
+The 13weekly and 3monthly rules are two different strategies for keeping one
+every quarter of a year. There are `multiple ways` to define a quarter-year;
+borg prune recognizes two:
+
+* --keep-13weekly keeps one backup every 13 weeks using ISO 8601:2000's
+  definition of the week-based year. January 4th is always included in the
+  first week of a year, and January 1st to 3rd may be in week 52 or 53 of the
+  previous year. Week 53 is also in the fourth quarter of the year.
+* --keep-3monthly keeps one backup every 3 months. January 1st to
+  March 31, April 1st to June 30th, July 1st to September 30th, and October 1st
+  to December 31st form the quarters.
+
+If the subtleties of the definition of a quarter year don't matter to you, a
+short summary of behavior is:
+
+* --keep-13weekly favors keeping backups at the beginning of Jan, Apr, July,
+  and Oct.
+* --keep-3monthly favors keeping backups at the end of Dec, Mar, Jun, and Sept.
+* Both strategies will have some overlap in which backups are kept.
+* The differences are negligible unless backups considered for deletion were
+  created weekly or more frequently.
+
+.. _multiple ways: https://en.wikipedia.org/wiki/Calendar_year#Quarter_year

+ 16 - 5
src/borg/archiver.py

@@ -1529,10 +1529,12 @@ class Archiver:
     def do_prune(self, args, repository, manifest, key):
         """Prune repository archives according to specified rules"""
         if not any((args.secondly, args.minutely, args.hourly, args.daily,
-                    args.weekly, args.monthly, args.yearly, args.within)):
+                    args.weekly, args.monthly, args.quarterly_13weekly,
+                    args.quarterly_3monthly, args.yearly, args.within)):
             raise CommandError('At least one of the "keep-within", "keep-last", '
                                '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
-                               '"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.')
+                               '"keep-weekly", "keep-monthly", "keep-13weekly", "keep-3monthly", '
+                               'or "keep-yearly" settings must be specified.')
         if args.prefix is not None:
             args.glob_archives = args.prefix + '*'
         checkpoint_re = r'\.checkpoint(\.\d+)?'
@@ -4684,9 +4686,13 @@ class Archiver:
         the local timezone, and weeks go from Monday to Sunday. Specifying a
         negative number of archives to keep means that there is no limit. As of borg
         1.2.0, borg will retain the oldest archive if any of the secondly, minutely,
-        hourly, daily, weekly, monthly, or yearly rules was not otherwise able to meet
-        its retention target. This enables the first chronological archive to continue
-        aging until it is replaced by a newer archive that meets the retention criteria.
+        hourly, daily, weekly, monthly, quarterly, or yearly rules was not otherwise
+        able to meet its retention target. This enables the first chronological archive
+        to continue aging until it is replaced by a newer archive that meets the
+        retention criteria.
+
+        The ``--keep-13weekly`` and ``--keep-3monthly`` rules are two different
+        strategies for keeping archives every quarter year.
 
         The ``--keep-last N`` option is doing the same as ``--keep-secondly N`` (and it will
         keep the last N archives under the assumption that you do not create more than one
@@ -4726,6 +4732,11 @@ class Archiver:
                                help='number of weekly archives to keep')
         subparser.add_argument('-m', '--keep-monthly', dest='monthly', type=int, default=0,
                                help='number of monthly archives to keep')
+        quarterly_group = subparser.add_mutually_exclusive_group()
+        quarterly_group.add_argument('--keep-13weekly', dest='quarterly_13weekly', type=int, default=0,
+                                     help='number of quarterly archives to keep (13 week strategy)')
+        quarterly_group.add_argument('--keep-3monthly', dest='quarterly_3monthly', type=int, default=0,
+                                     help='number of quarterly archives to keep (3 month strategy)')
         subparser.add_argument('-y', '--keep-yearly', dest='yearly', type=int, default=0,
                                help='number of yearly archives to keep')
         define_archive_filters_group(subparser, sort_by=False, first_last=False)

+ 55 - 9
src/borg/helpers/misc.py

@@ -31,21 +31,67 @@ def prune_within(archives, hours, kept_because):
     return result
 
 
+def default_period_func(pattern):
+    def inner(a):
+        return to_localtime(a.ts).strftime(pattern)
+
+    return inner
+
+
+def quarterly_13weekly_period_func(a):
+    (year, week, _) = to_localtime(a.ts).isocalendar()
+    if week <= 13:
+        # Weeks containing Jan 4th to Mar 28th (leap year) or 29th- 91 (13*7)
+        # days later.
+        return (year, 1)
+    elif 14 <= week <= 26:
+        # Weeks containing Apr 4th (leap year) or 5th to Jun 27th or 28th- 91
+        # days later.
+        return (year, 2)
+    elif 27 <= week <= 39:
+        # Weeks containing Jul 4th (leap year) or 5th to Sep 26th or 27th-
+        # at least 91 days later.
+        return (year, 3)
+    else:
+        # Everything else, Oct 3rd (leap year) or 4th onward, will always
+        # include week of Dec 26th (leap year) or Dec 27th, may also include
+        # up to possibly Jan 3rd of next year.
+        return (year, 4)
+
+
+def quarterly_3monthly_period_func(a):
+    lt = to_localtime(a.ts)
+    if lt.month <= 3:
+        # 1-1 to 3-31
+        return (lt.year, 1)
+    elif 4 <= lt.month <= 6:
+        # 4-1 to 6-30
+        return (lt.year, 2)
+    elif 7 <= lt.month <= 9:
+        # 7-1 to 9-30
+        return (lt.year, 3)
+    else:
+        # 10-1 to 12-31
+        return (lt.year, 4)
+
+
 PRUNING_PATTERNS = OrderedDict([
-    ("secondly", '%Y-%m-%d %H:%M:%S'),
-    ("minutely", '%Y-%m-%d %H:%M'),
-    ("hourly", '%Y-%m-%d %H'),
-    ("daily", '%Y-%m-%d'),
-    ("weekly", '%G-%V'),
-    ("monthly", '%Y-%m'),
-    ("yearly", '%Y'),
+    ("secondly", default_period_func('%Y-%m-%d %H:%M:%S')),
+    ("minutely", default_period_func('%Y-%m-%d %H:%M')),
+    ("hourly", default_period_func('%Y-%m-%d %H')),
+    ("daily", default_period_func('%Y-%m-%d')),
+    ("weekly", default_period_func('%G-%V')),
+    ("monthly", default_period_func('%Y-%m')),
+    ("quarterly_13weekly", quarterly_13weekly_period_func),
+    ("quarterly_3monthly", quarterly_3monthly_period_func),
+    ("yearly", default_period_func('%Y')),
 ])
 
 
 def prune_split(archives, rule, n, kept_because=None):
     last = None
     keep = []
-    pattern = PRUNING_PATTERNS[rule]
+    period_func = PRUNING_PATTERNS[rule]
     if kept_because is None:
         kept_because = {}
     if n == 0:
@@ -53,7 +99,7 @@ def prune_split(archives, rule, n, kept_because=None):
 
     a = None
     for a in sorted(archives, key=attrgetter('ts'), reverse=True):
-        period = to_localtime(a.ts).strftime(pattern)
+        period = period_func(a)
         if period != last:
             last = period
             if a.id not in kept_because:

+ 70 - 0
src/borg/testsuite/archiver.py

@@ -2315,6 +2315,76 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         for i in range(22, 25):
             self.assert_not_in('test%02d' % i, output)
 
+    def test_prune_quarterly(self):
+        # Example worked through by hand when developing quarterly
+        # strategy, based upon existing backups where quarterly strategy
+        # is desired. Weekly/monthly backups that don't affect results were
+        # trimmed to speed up the test.
+        #
+        # Week number is shown in comment for every row in the below list.
+        # Year is also shown when it doesn't match the year given in the
+        # date tuple.
+        test_dates = [
+                (2020, 12, 6), (2021, 1, 3),   # 49, 2020-53
+                (2021, 3, 28), (2021, 4, 25),  # 12, 16
+                (2021, 6, 27), (2021, 7, 4),   # 25, 26
+                (2021, 9, 26), (2021, 10, 3),  # 38, 39
+                (2021, 12, 26), (2022, 1, 2)   # 51, 2021-52
+        ]
+
+        def mk_name(tup):
+            (y, m, d) = tup
+            suff = datetime(y, m, d).strftime("%Y-%m-%d")
+            return f"test-{suff}"
+
+        # The kept repos are based on working on an example by hand,
+        # archives made on the following dates should be kept:
+        EXPECTED_KEPT = {
+            "13weekly": [
+                (2020, 12, 6), (2021, 1, 3), (2021, 3, 28), (2021, 7, 4),
+                (2021, 10, 3), (2022, 1, 2)
+            ],
+            "3monthly": [
+                (2020, 12, 6), (2021, 3, 28), (2021, 6, 27), (2021, 9, 26),
+                (2021, 12, 26), (2022, 1, 2)
+            ]
+        }
+
+        for (strat, to_keep) in EXPECTED_KEPT.items():
+            # Initialize our repo.
+            self.cmd('init', '--encryption=repokey', self.repository_location)
+            for a, (y, m, d) in zip(map(mk_name, test_dates), test_dates):
+                self._create_archive_ts(a, y, m, d)
+
+            to_prune = list(set(test_dates) - set(to_keep))
+
+            # Use 99 instead of -1 to test that oldest backup is kept.
+            output = self.cmd('prune', '--list', '--dry-run', self.repository_location, f"--keep-{strat}=99")
+            for a in map(mk_name, to_prune):
+                assert re.search(fr"Would prune:\s+{a}", output)
+
+            oldest = r"\[oldest\]" if strat in ("13weekly") else ""
+            assert re.search(fr"Keeping archive \(rule: quarterly_{strat}{oldest} #\d+\):\s+test-2020-12-06", output)
+            for a in map(mk_name, to_keep[1:]):
+                assert re.search(fr"Keeping archive \(rule: quarterly_{strat} #\d+\):\s+{a}", output)
+
+            output = self.cmd('list', self.repository_location)
+            # Nothing pruned after dry run
+            for a in map(mk_name, test_dates):
+                self.assert_in(a, output)
+
+            self.cmd('prune', self.repository_location, f"--keep-{strat}=99")
+            output = self.cmd('list', self.repository_location)
+            # All matching backups plus oldest kept
+            for a in map(mk_name, to_keep):
+                self.assert_in(a, output)
+            # Other backups have been pruned
+            for a in map(mk_name, to_prune):
+                self.assert_not_in(a, output)
+
+            # Delete repo and begin anew
+            self.cmd('delete', self.repository_location)
+
     # With an initial and daily backup, prune daily until oldest is replaced by a monthly backup
     def test_prune_retain_and_expire_oldest(self):
         self.cmd('init', '--encryption=repokey', self.repository_location)