瀏覽代碼

Merge pull request #8459 from ThomasWaldmann/prune-quarterly-master

Add quarterly pruning strategy.
TW 8 月之前
父節點
當前提交
ee386d0bef
共有 3 個文件被更改,包括 189 次插入16 次删除
  1. 24 0
      docs/misc/prune-example.txt
  2. 94 16
      src/borg/archiver/prune_cmd.py
  3. 71 0
      src/borg/testsuite/archiver/prune_cmd_test.py

+ 24 - 0
docs/misc/prune-example.txt

@@ -100,3 +100,27 @@ example simple. They all work in basically the same way.
 
 The weekly rule is easy to understand roughly, but hard to understand in all
 details. If interested, read "ISO 8601:2000 standard week-based year".
+
+The 13weekly and 3monthly rules are two different strategies for keeping one
+every quarter of a year. There are `multiple ways` to define a quarter-year;
+borg prune recognizes two:
+
+* --keep-13weekly keeps one backup every 13 weeks using ISO 8601:2000's
+  definition of the week-based year. January 4th is always included in the
+  first week of a year, and January 1st to 3rd may be in week 52 or 53 of the
+  previous year. Week 53 is also in the fourth quarter of the year.
+* --keep-3monthly keeps one backup every 3 months. January 1st to
+  March 31, April 1st to June 30th, July 1st to September 30th, and October 1st
+  to December 31st form the quarters.
+
+If the subtleties of the definition of a quarter year don't matter to you, a
+short summary of behavior is:
+
+* --keep-13weekly favors keeping backups at the beginning of Jan, Apr, July,
+  and Oct.
+* --keep-3monthly favors keeping backups at the end of Dec, Mar, Jun, and Sept.
+* Both strategies will have some overlap in which backups are kept.
+* The differences are negligible unless backups considered for deletion were
+  created weekly or more frequently.
+
+.. _multiple ways: https://en.wikipedia.org/wiki/Calendar_year#Quarter_year

+ 94 - 16
src/borg/archiver/prune_cmd.py

@@ -30,15 +30,62 @@ def prune_within(archives, hours, kept_because):
     return result
 
 
+def default_period_func(pattern):
+    def inner(a):
+        # compute in local timezone
+        return a.ts.astimezone().strftime(pattern)
+
+    return inner
+
+
+def quarterly_13weekly_period_func(a):
+    (year, week, _) = a.ts.astimezone().isocalendar()  # local time
+    if week <= 13:
+        # Weeks containing Jan 4th to Mar 28th (leap year) or 29th- 91 (13*7)
+        # days later.
+        return (year, 1)
+    elif 14 <= week <= 26:
+        # Weeks containing Apr 4th (leap year) or 5th to Jun 27th or 28th- 91
+        # days later.
+        return (year, 2)
+    elif 27 <= week <= 39:
+        # Weeks containing Jul 4th (leap year) or 5th to Sep 26th or 27th-
+        # at least 91 days later.
+        return (year, 3)
+    else:
+        # Everything else, Oct 3rd (leap year) or 4th onward, will always
+        # include week of Dec 26th (leap year) or Dec 27th, may also include
+        # up to possibly Jan 3rd of next year.
+        return (year, 4)
+
+
+def quarterly_3monthly_period_func(a):
+    lt = a.ts.astimezone()  # local time
+    if lt.month <= 3:
+        # 1-1 to 3-31
+        return (lt.year, 1)
+    elif 4 <= lt.month <= 6:
+        # 4-1 to 6-30
+        return (lt.year, 2)
+    elif 7 <= lt.month <= 9:
+        # 7-1 to 9-30
+        return (lt.year, 3)
+    else:
+        # 10-1 to 12-31
+        return (lt.year, 4)
+
+
 PRUNING_PATTERNS = OrderedDict(
     [
-        ("secondly", "%Y-%m-%d %H:%M:%S"),
-        ("minutely", "%Y-%m-%d %H:%M"),
-        ("hourly", "%Y-%m-%d %H"),
-        ("daily", "%Y-%m-%d"),
-        ("weekly", "%G-%V"),
-        ("monthly", "%Y-%m"),
-        ("yearly", "%Y"),
+        ("secondly", default_period_func("%Y-%m-%d %H:%M:%S")),
+        ("minutely", default_period_func("%Y-%m-%d %H:%M")),
+        ("hourly", default_period_func("%Y-%m-%d %H")),
+        ("daily", default_period_func("%Y-%m-%d")),
+        ("weekly", default_period_func("%G-%V")),
+        ("monthly", default_period_func("%Y-%m")),
+        ("quarterly_13weekly", quarterly_13weekly_period_func),
+        ("quarterly_3monthly", quarterly_3monthly_period_func),
+        ("yearly", default_period_func("%Y")),
     ]
 )
 
@@ -46,7 +93,7 @@ PRUNING_PATTERNS = OrderedDict(
 def prune_split(archives, rule, n, kept_because=None):
     last = None
     keep = []
-    pattern = PRUNING_PATTERNS[rule]
+    period_func = PRUNING_PATTERNS[rule]
     if kept_because is None:
         kept_because = {}
     if n == 0:
@@ -54,8 +101,7 @@ def prune_split(archives, rule, n, kept_because=None):
 
     a = None
     for a in sorted(archives, key=attrgetter("ts"), reverse=True):
-        # we compute the pruning in local time zone
-        period = a.ts.astimezone().strftime(pattern)
+        period = period_func(a)
         if period != last:
             last = period
             if a.id not in kept_because:
@@ -75,12 +121,24 @@ class PruneMixIn:
     def do_prune(self, args, repository, manifest):
         """Prune repository archives according to specified rules"""
         if not any(
-            (args.secondly, args.minutely, args.hourly, args.daily, args.weekly, args.monthly, args.yearly, args.within)
+            (
+                args.secondly,
+                args.minutely,
+                args.hourly,
+                args.daily,
+                args.weekly,
+                args.monthly,
+                args.quarterly_13weekly,
+                args.quarterly_3monthly,
+                args.yearly,
+                args.within,
+            )
         ):
             raise CommandError(
                 'At least one of the "keep-within", "keep-last", '
                 '"keep-secondly", "keep-minutely", "keep-hourly", "keep-daily", '
-                '"keep-weekly", "keep-monthly" or "keep-yearly" settings must be specified.'
+                '"keep-weekly", "keep-monthly", "keep-13weekly", "keep-3monthly", '
+                'or "keep-yearly" settings must be specified.'
             )
 
         if args.format is not None:
@@ -190,10 +248,15 @@ class PruneMixIn:
         starts is used for pruning purposes. Dates and times are interpreted in the local
         timezone of the system where borg prune runs, and weeks go from Monday to Sunday.
         Specifying a negative number of archives to keep means that there is no limit.
-        As of borg 1.2.0, borg will retain the oldest archive if any of the secondly,
-        minutely, hourly, daily, weekly, monthly, or yearly rules was not otherwise able to
-        meet its retention target. This enables the first chronological archive to continue
-        aging until it is replaced by a newer archive that meets the retention criteria.
+
+        Borg will retain the oldest archive if any of the secondly, minutely, hourly,
+        daily, weekly, monthly, quarterly, or yearly rules was not otherwise able to
+        meet its retention target. This enables the first chronological archive to
+        continue aging until it is replaced by a newer archive that meets the retention
+        criteria.
+
+        The ``--keep-13weekly`` and ``--keep-3monthly`` rules are two different
+        strategies for keeping archives every quarter year.
 
         The ``--keep-last N`` option is doing the same as ``--keep-secondly N`` (and it will
         keep the last N archives under the assumption that you do not create more than one
@@ -293,6 +356,21 @@ class PruneMixIn:
             action=Highlander,
             help="number of monthly archives to keep",
         )
+        quarterly_group = subparser.add_mutually_exclusive_group()
+        quarterly_group.add_argument(
+            "--keep-13weekly",
+            dest="quarterly_13weekly",
+            type=int,
+            default=0,
+            help="number of quarterly archives to keep (13 week strategy)",
+        )
+        quarterly_group.add_argument(
+            "--keep-3monthly",
+            dest="quarterly_3monthly",
+            type=int,
+            default=0,
+            help="number of quarterly archives to keep (3 month strategy)",
+        )
         subparser.add_argument(
             "-y",
             "--keep-yearly",

+ 71 - 0
src/borg/testsuite/archiver/prune_cmd_test.py

@@ -100,6 +100,77 @@ def test_prune_repository_example(archivers, request):
         assert "test%02d" % i not in output
 
 
+def test_prune_quarterly(archivers, request):
+    # Example worked through by hand when developing quarterly
+    # strategy, based upon existing backups where quarterly strategy
+    # is desired. Weekly/monthly backups that don't affect results were
+    # trimmed to speed up the test.
+    #
+    # Week number is shown in comment for every row in the below list.
+    # Year is also shown when it doesn't match the year given in the
+    # date tuple.
+    archiver = request.getfixturevalue(archivers)
+    test_dates = [
+        (2020, 12, 6),
+        (2021, 1, 3),  # 49, 2020-53
+        (2021, 3, 28),
+        (2021, 4, 25),  # 12, 16
+        (2021, 6, 27),
+        (2021, 7, 4),  # 25, 26
+        (2021, 9, 26),
+        (2021, 10, 3),  # 38, 39
+        (2021, 12, 26),
+        (2022, 1, 2),  # 51, 2021-52
+    ]
+
+    def mk_name(tup):
+        (y, m, d) = tup
+        suff = datetime(y, m, d).strftime("%Y-%m-%d")
+        return f"test-{suff}"
+
+    # The kept repos are based on working on an example by hand,
+    # archives made on the following dates should be kept:
+    EXPECTED_KEPT = {
+        "13weekly": [(2020, 12, 6), (2021, 1, 3), (2021, 3, 28), (2021, 7, 4), (2021, 10, 3), (2022, 1, 2)],
+        "3monthly": [(2020, 12, 6), (2021, 3, 28), (2021, 6, 27), (2021, 9, 26), (2021, 12, 26), (2022, 1, 2)],
+    }
+
+    for strat, to_keep in EXPECTED_KEPT.items():
+        # Initialize our repo.
+        cmd(archiver, "repo-create", RK_ENCRYPTION)
+        for a, (y, m, d) in zip(map(mk_name, test_dates), test_dates):
+            _create_archive_ts(archiver, a, y, m, d)
+
+        to_prune = list(set(test_dates) - set(to_keep))
+
+        # Use 99 instead of -1 to test that oldest backup is kept.
+        output = cmd(archiver, "prune", "--list", "--dry-run", f"--keep-{strat}=99")
+        for a in map(mk_name, to_prune):
+            assert re.search(rf"Would prune:\s+{a}", output)
+
+        oldest = r"\[oldest\]" if strat in ("13weekly") else ""
+        assert re.search(rf"Keeping archive \(rule: quarterly_{strat}{oldest} #\d+\):\s+test-2020-12-06", output)
+        for a in map(mk_name, to_keep[1:]):
+            assert re.search(rf"Keeping archive \(rule: quarterly_{strat} #\d+\):\s+{a}", output)
+
+        output = cmd(archiver, "repo-list")
+        # Nothing pruned after dry run
+        for a in map(mk_name, test_dates):
+            assert a in output
+
+        cmd(archiver, "prune", f"--keep-{strat}=99")
+        output = cmd(archiver, "repo-list")
+        # All matching backups plus oldest kept
+        for a in map(mk_name, to_keep):
+            assert a in output
+        # Other backups have been pruned
+        for a in map(mk_name, to_prune):
+            assert a not in output
+
+        # Delete repo and begin anew
+        cmd(archiver, "repo-delete")
+
+
 # With an initial and daily backup, prune daily until oldest is replaced by a monthly backup
 def test_prune_retain_and_expire_oldest(archivers, request):
     archiver = request.getfixturevalue(archivers)