瀏覽代碼

Support for date-based matching during archive listing (#7272)

check --archives: add --newer/--older/--newest/--oldest, fixes #7062

Options accept a timespan, like Nd for N days or Nm for N months.

Use these to do date-based matching on archives and only check some of them,
like: borg check --archives --newer=1m --newest=7d

Author: Michael Deyaso <mdeyaso@fusioniq.io>
Michael Deyaso 2 年之前
父節點
當前提交
b2654bc17d

+ 34 - 6
src/borg/archive.py

@@ -1700,16 +1700,31 @@ class ArchiveChecker:
         self.error_found = False
         self.possibly_superseded = set()
 
-    def check(self, repository, repair=False, first=0, last=0, sort_by="", match=None, verify_data=False):
+    def check(
+        self,
+        repository,
+        repair=False,
+        first=0,
+        last=0,
+        sort_by="",
+        match=None,
+        older=None,
+        newer=None,
+        oldest=None,
+        newest=None,
+        verify_data=False,
+    ):
         """Perform a set of checks on 'repository'
 
         :param repair: enable repair mode, write updated or corrected data into repository
         :param first/last/sort_by: only check this number of first/last archives ordered by sort_by
         :param match: only check archives matching this pattern
+        :param older/newer: only check archives older/newer than timedelta from now
+        :param oldest/newest: only check archives older/newer than timedelta from oldest/newest archive timestamp
         :param verify_data: integrity verification of data referenced by archives
         """
         logger.info("Starting archive consistency check...")
-        self.check_all = not any((first, last, match))
+        self.check_all = not any((first, last, match, older, newer, oldest, newest))
         self.repair = repair
         self.repository = repository
         self.init_chunks()
@@ -1732,7 +1747,9 @@ class ArchiveChecker:
                 self.error_found = True
                 del self.chunks[Manifest.MANIFEST_ID]
                 self.manifest = self.rebuild_manifest()
-        self.rebuild_refcounts(match=match, first=first, last=last, sort_by=sort_by)
+        self.rebuild_refcounts(
+            match=match, first=first, last=last, sort_by=sort_by, older=older, oldest=oldest, newer=newer, newest=newest
+        )
         self.orphan_chunks_check()
         self.finish()
         if self.error_found:
@@ -1927,7 +1944,9 @@ class ArchiveChecker:
         logger.info("Manifest rebuild complete.")
         return manifest
 
-    def rebuild_refcounts(self, first=0, last=0, sort_by="", match=None):
+    def rebuild_refcounts(
+        self, first=0, last=0, sort_by="", match=None, older=None, newer=None, oldest=None, newest=None
+    ):
         """Rebuild object reference counts by walking the metadata
 
         Missing and/or incorrect data is repaired when detected
@@ -2121,8 +2140,17 @@ class ArchiveChecker:
                     i += 1
 
         sort_by = sort_by.split(",")
-        if any((first, last, match)):
-            archive_infos = self.manifest.archives.list(sort_by=sort_by, match=match, first=first, last=last)
+        if any((first, last, match, older, newer, newest, oldest)):
+            archive_infos = self.manifest.archives.list(
+                sort_by=sort_by,
+                match=match,
+                first=first,
+                last=last,
+                oldest=oldest,
+                newest=newest,
+                older=older,
+                newer=newer,
+            )
             if match and not archive_infos:
                 logger.warning("--match-archives %s does not match any archives", match)
             if first and len(archive_infos) < first:

+ 36 - 2
src/borg/archiver/_common.py

@@ -8,7 +8,7 @@ from ..archive import Archive
 from ..constants import *  # NOQA
 from ..cache import Cache, assert_secure
 from ..helpers import Error
-from ..helpers import SortBySpec, positive_int_validator, location_validator, Location
+from ..helpers import SortBySpec, positive_int_validator, location_validator, Location, relative_time_marker_validator
 from ..helpers.nanorst import rst_to_terminal
 from ..manifest import Manifest, AI_HUMAN_SORT_KEYS
 from ..patterns import PatternMatcher
@@ -353,7 +353,7 @@ def define_exclusion_group(subparser, **kwargs):
     return exclude_group
 
 
-def define_archive_filters_group(subparser, *, sort_by=True, first_last=True):
+def define_archive_filters_group(subparser, *, sort_by=True, first_last=True, oldest_newest=True, older_newer=True):
     filters_group = subparser.add_argument_group(
         "Archive filters", "Archive filters can be applied to repository targets."
     )
@@ -399,6 +399,40 @@ def define_archive_filters_group(subparser, *, sort_by=True, first_last=True):
             help="consider last N archives after other filters were applied",
         )
 
+    if oldest_newest:
+        group = filters_group.add_mutually_exclusive_group()
+        group.add_argument(
+            "--oldest",
+            metavar="TIMESTAMP",
+            type=relative_time_marker_validator,
+            dest="oldest",
+            help="consider archives between the oldest archive's timestamp and the TIMESTAMP offset. e.g. 3d 7m",
+        )
+        group.add_argument(
+            "--newest",
+            metavar="TIMESTAMP",
+            type=relative_time_marker_validator,
+            dest="newest",
+            help="consider archives between the newest archive's timestamp and the TIMESTAMP offset. e.g. 3d 7m",
+        )
+
+    if older_newer:
+        group = filters_group.add_mutually_exclusive_group()
+        group.add_argument(
+            "--older",
+            metavar="TIMESTAMP",
+            type=relative_time_marker_validator,
+            dest="older",
+            help="consider archives older than (now - TIMESTAMP). e.g. 3d 7m",
+        )
+        group.add_argument(
+            "--newer",
+            metavar="TIMESTAMP",
+            type=relative_time_marker_validator,
+            dest="newer",
+            help="consider archives after (now - TIMESTAMP). e.g. 3d 7m",
+        )
+
     return filters_group
 
 

+ 4 - 0
src/borg/archiver/check_cmd.py

@@ -57,6 +57,10 @@ class CheckMixIn:
             sort_by=args.sort_by or "ts",
             match=args.match_archives,
             verify_data=args.verify_data,
+            oldest=args.oldest,
+            newest=args.newest,
+            older=args.older,
+            newer=args.newer,
         ):
             return EXIT_WARNING
         return EXIT_SUCCESS

+ 3 - 3
src/borg/helpers/__init__.py

@@ -23,10 +23,10 @@ from .parseformat import text_to_json, binary_to_json, remove_surrogates, join_c
 from .parseformat import eval_escapes, decode_dict, positive_int_validator, interval
 from .parseformat import SortBySpec, ChunkerParams, FilesCacheMode, partial_format, DatetimeWrapper
 from .parseformat import format_file_size, parse_file_size, FileSize, parse_storage_quota
-from .parseformat import sizeof_fmt, sizeof_fmt_iec, sizeof_fmt_decimal
-from .parseformat import format_line, replace_placeholders, PlaceholderError
+from .parseformat import sizeof_fmt, sizeof_fmt_iec, sizeof_fmt_decimal, Location, text_validator
+from .parseformat import format_line, replace_placeholders, PlaceholderError, relative_time_marker_validator
 from .parseformat import format_archive, parse_stringified_list, clean_lines
-from .parseformat import Location, location_validator, archivename_validator, comment_validator, text_validator
+from .parseformat import location_validator, archivename_validator, comment_validator
 from .parseformat import BaseFormatter, ArchiveFormatter, ItemFormatter, file_status
 from .parseformat import swidth_slice, ellipsis_truncate
 from .parseformat import BorgJsonEncoder, basic_json_data, json_print, json_dump, prepare_dump_dict

+ 9 - 0
src/borg/helpers/parseformat.py

@@ -576,6 +576,15 @@ def location_validator(proto=None, other=False):
     return validator
 
 
+def relative_time_marker_validator(text: str):
+    time_marker_regex = r"^\d+[md]$"
+    match = re.compile(time_marker_regex).search(text)
+    if not match:
+        raise argparse.ArgumentTypeError(f"Invalid relative time marker used: {text}")
+    else:
+        return text
+
+
 def text_validator(*, name, max_length, min_length=0, invalid_ctrl_chars="\0", invalid_chars="", no_blanks=False):
     def validator(text):
         assert isinstance(text, str)

+ 46 - 1
src/borg/helpers/time.py

@@ -1,5 +1,6 @@
 import os
-from datetime import datetime, timezone
+import re
+from datetime import datetime, timezone, timedelta
 
 
 def parse_timestamp(timestamp, tzinfo=timezone.utc):
@@ -109,6 +110,50 @@ def format_timedelta(td):
     return txt
 
 
+def calculate_relative_offset(format_string, from_ts, earlier=False):
+    """
+    Calculates offset based on a relative marker. 7d (7 days), 8m (8 months)
+    earlier: whether offset should be calculated to an earlier time.
+    """
+    if from_ts is None:
+        from_ts = archive_ts_now()
+
+    if format_string is not None:
+        offset_regex = re.compile(r"(?P<offset>\d+)(?P<unit>[md])")
+        match = offset_regex.search(format_string)
+
+        if match:
+            unit = match.group("unit")
+            offset = int(match.group("offset"))
+            offset *= -1 if earlier else 1
+
+            if unit == "d":
+                return from_ts + timedelta(days=offset)
+            elif unit == "m":
+                return offset_n_months(from_ts, offset)
+
+    raise ValueError(f"Invalid relative ts offset format: {format_string}")
+
+
+def offset_n_months(from_ts, n_months):
+    def get_month_and_year_from_total(total_completed_months):
+        month = (total_completed_months % 12) + 1
+        year = total_completed_months // 12
+        return month, year
+
+    # Calculate target month and year by getting completed total_months until target_month
+    total_months = (from_ts.year * 12) + from_ts.month + n_months - 1
+    target_month, target_year = get_month_and_year_from_total(total_months)
+
+    # calculate the max days of the target month by subtracting a day from the next month
+    following_month, year_of_following_month = get_month_and_year_from_total(total_months + 1)
+    max_days_in_month = (datetime(year_of_following_month, following_month, 1) - timedelta(1)).day
+
+    return datetime(day=min(from_ts.day, max_days_in_month), month=target_month, year=target_year).replace(
+        tzinfo=from_ts.tzinfo
+    )
+
+
 class OutputTimestamp:
     def __init__(self, ts: datetime):
         self.ts = ts

+ 41 - 4
src/borg/manifest.py

@@ -14,7 +14,7 @@ logger = create_logger()
 from .constants import *  # NOQA
 from .helpers.datastruct import StableDict
 from .helpers.parseformat import bin_to_hex
-from .helpers.time import parse_timestamp
+from .helpers.time import parse_timestamp, calculate_relative_offset, archive_ts_now
 from .helpers.errors import Error
 from .patterns import get_regex_from_pattern
 from .repoobj import RepoObj
@@ -34,6 +34,29 @@ AI_HUMAN_SORT_KEYS = ["timestamp"] + list(ArchiveInfo._fields)
 AI_HUMAN_SORT_KEYS.remove("ts")
 
 
+def filter_archives_by_date(archives, older=None, newer=None, oldest=None, newest=None):
+    def get_first_and_last_archive_ts(archives_list):
+        timestamps = [x.ts for x in archives_list]
+        return min(timestamps), max(timestamps)
+
+    now = archive_ts_now()
+    earliest_ts, latest_ts = get_first_and_last_archive_ts(archives)
+
+    until_ts = calculate_relative_offset(older, now, earlier=True) if older is not None else latest_ts
+    from_ts = calculate_relative_offset(newer, now, earlier=True) if newer is not None else earliest_ts
+    archives = [x for x in archives if from_ts <= x.ts <= until_ts]
+
+    earliest_ts, latest_ts = get_first_and_last_archive_ts(archives)
+    if oldest:
+        until_ts = calculate_relative_offset(oldest, earliest_ts, earlier=False)
+        archives = [x for x in archives if x.ts <= until_ts]
+    if newest:
+        from_ts = calculate_relative_offset(newest, latest_ts, earlier=True)
+        archives = [x for x in archives if x.ts >= from_ts]
+
+    return archives
+
+
 class Archives(abc.MutableMapping):
     """
     Nice wrapper around the archives dict, making sure only valid types/values get in
@@ -82,15 +105,24 @@ class Archives(abc.MutableMapping):
         consider_checkpoints=True,
         first=None,
         last=None,
-        reverse=False
+        reverse=False,
+        older=None,
+        newer=None,
+        oldest=None,
+        newest=None
     ):
         """
         Return list of ArchiveInfo instances according to the parameters.
 
-        First match *match* (considering *match_end*), then *sort_by*.
+        First match *match* (considering *match_end*), then filter by timestamp considering *older* and *newer*.
+        Second, follow with a filter considering *oldest* and *newest*, then sort by the given *sort_by* argument.
+
         Apply *first* and *last* filters, and then possibly *reverse* the list.
 
         *sort_by* is a list of sort keys applied in reverse order.
+        *newer* and *older* are relative time markers that indicate offset from now.
+        *newest* and *oldest* are relative time markers that indicate offset from newest/oldest archive's timestamp.
+
 
         Note: for better robustness, all filtering / limiting parameters must default to
               "not limit / not filter", so a FULL archive list is produced by a simple .list().
@@ -98,9 +130,14 @@ class Archives(abc.MutableMapping):
         """
         if isinstance(sort_by, (str, bytes)):
             raise TypeError("sort_by must be a sequence of str")
+
+        archives = self.values()
         regex = get_regex_from_pattern(match or "re:.*")
         regex = re.compile(regex + match_end)
-        archives = [x for x in self.values() if regex.match(x.name) is not None]
+        archives = [x for x in archives if regex.match(x.name) is not None]
+
+        if any([oldest, newest, older, newer]) and len(archives) > 0:
+            archives = filter_archives_by_date(archives, oldest=oldest, newest=newest, newer=newer, older=older)
         if not consider_checkpoints:
             archives = [x for x in archives if ".checkpoint" not in x.name]
         for sortkey in reversed(sort_by):

+ 7 - 2
src/borg/testsuite/archiver/__init__.py

@@ -172,8 +172,13 @@ class ArchiverTestCaseBase(BaseTestCase):
         output = empty.join(line for line in output.splitlines(keepends=True) if pp_msg not in line)
         return output
 
-    def create_src_archive(self, name):
-        self.cmd(f"--repo={self.repository_location}", "create", "--compression=lz4", name, src_dir)
+    def create_src_archive(self, name, ts=None):
+        if ts:
+            self.cmd(
+                f"--repo={self.repository_location}", "create", "--compression=lz4", f"--timestamp={ts}", name, src_dir
+            )
+        else:
+            self.cmd(f"--repo={self.repository_location}", "create", "--compression=lz4", name, src_dir)
 
     def open_archive(self, name):
         repository = Repository(self.repository_path, exclusive=True)

+ 37 - 0
src/borg/testsuite/archiver/check_cmd.py

@@ -54,6 +54,43 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
         self.assert_not_in("archive1", output)
         self.assert_in("archive2", output)
 
+    def test_date_matching(self):
+        shutil.rmtree(self.repository_path)
+        self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
+        earliest_ts = "2022-11-20T23:59:59"
+        ts_in_between = "2022-12-18T23:59:59"
+        self.create_src_archive("archive1", ts=earliest_ts)
+        self.create_src_archive("archive2", ts=ts_in_between)
+        self.create_src_archive("archive3")
+        output = self.cmd(
+            f"--repo={self.repository_location}", "check", "-v", "--archives-only", "--oldest=23e", exit_code=2
+        )
+        output = self.cmd(
+            f"--repo={self.repository_location}", "check", "-v", "--archives-only", "--oldest=1m", exit_code=0
+        )
+        self.assert_in("archive1", output)
+        self.assert_in("archive2", output)
+        self.assert_not_in("archive3", output)
+
+        output = self.cmd(
+            f"--repo={self.repository_location}", "check", "-v", "--archives-only", "--newest=1m", exit_code=0
+        )
+        self.assert_in("archive3", output)
+        self.assert_not_in("archive2", output)
+        self.assert_not_in("archive1", output)
+        output = self.cmd(
+            f"--repo={self.repository_location}", "check", "-v", "--archives-only", "--newer=1d", exit_code=0
+        )
+        self.assert_in("archive3", output)
+        self.assert_not_in("archive1", output)
+        self.assert_not_in("archive2", output)
+        output = self.cmd(
+            f"--repo={self.repository_location}", "check", "-v", "--archives-only", "--older=1d", exit_code=0
+        )
+        self.assert_in("archive1", output)
+        self.assert_in("archive2", output)
+        self.assert_not_in("archive3", output)
+
     def test_missing_file_chunk(self):
         archive, repository = self.open_archive("archive1")
         with repository: