check.py 10 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319
  1. import argparse
  2. import datetime
  3. import json
  4. import logging
  5. import os
  6. import pathlib
  7. from borgmatic.borg import extract, info, state
  8. from borgmatic.execute import DO_NOT_CAPTURE, execute_command
  9. DEFAULT_CHECKS = (
  10. {'name': 'repository', 'frequency': '1 month'},
  11. {'name': 'archives', 'frequency': '1 month'},
  12. )
  13. DEFAULT_PREFIX = '{hostname}-'
  14. logger = logging.getLogger(__name__)
  15. def parse_checks(consistency_config, only_checks=None):
  16. '''
  17. Given a consistency config with a "checks" sequence of dicts and an optional list of override
  18. checks, return a tuple of named checks to run.
  19. For example, given a retention config of:
  20. {'checks': ({'name': 'repository'}, {'name': 'archives'})}
  21. This will be returned as:
  22. ('repository', 'archives')
  23. If no "checks" option is present in the config, return the DEFAULT_CHECKS. If a checks value
  24. has a name of "disabled", return an empty tuple, meaning that no checks should be run.
  25. If the "data" check is present, then make sure the "archives" check is included as well.
  26. '''
  27. checks = only_checks or tuple(
  28. check_config['name']
  29. for check_config in (consistency_config.get('checks', None) or DEFAULT_CHECKS)
  30. )
  31. checks = tuple(check.lower() for check in checks)
  32. if 'disabled' in checks:
  33. if len(checks) > 1:
  34. logger.warning(
  35. 'Multiple checks are configured, but one of them is "disabled"; not running any checks'
  36. )
  37. return ()
  38. if 'data' in checks and 'archives' not in checks:
  39. return checks + ('archives',)
  40. return checks
  41. def parse_frequency(frequency):
  42. '''
  43. Given a frequency string with a number and a unit of time, return a corresponding
  44. datetime.timedelta instance or None if the frequency is None or "always".
  45. For instance, given "3 weeks", return datetime.timedelta(weeks=3)
  46. Raise ValueError if the given frequency cannot be parsed.
  47. '''
  48. if not frequency:
  49. return None
  50. frequency = frequency.strip().lower()
  51. if frequency == 'always':
  52. return None
  53. try:
  54. number, time_unit = frequency.split(' ')
  55. number = int(number)
  56. except ValueError:
  57. raise ValueError(f"Could not parse consistency check frequency '{frequency}'")
  58. if not time_unit.endswith('s'):
  59. time_unit += 's'
  60. if time_unit == 'months':
  61. number *= 30
  62. time_unit = 'days'
  63. elif time_unit == 'years':
  64. number *= 365
  65. time_unit = 'days'
  66. try:
  67. return datetime.timedelta(**{time_unit: number})
  68. except TypeError:
  69. raise ValueError(f"Could not parse consistency check frequency '{frequency}'")
  70. def filter_checks_on_frequency(
  71. location_config, consistency_config, borg_repository_id, checks, force
  72. ):
  73. '''
  74. Given a location config, a consistency config with a "checks" sequence of dicts, a Borg
  75. repository ID, a sequence of checks, and whether to force checks to run, filter down those
  76. checks based on the configured "frequency" for each check as compared to its check time file.
  77. In other words, a check whose check time file's timestamp is too new (based on the configured
  78. frequency) will get cut from the returned sequence of checks. Example:
  79. consistency_config = {
  80. 'checks': [
  81. {
  82. 'name': 'archives',
  83. 'frequency': '2 weeks',
  84. },
  85. ]
  86. }
  87. When this function is called with that consistency_config and "archives" in checks, "archives"
  88. will get filtered out of the returned result if its check time file is newer than 2 weeks old,
  89. indicating that it's not yet time to run that check again.
  90. Raise ValueError if a frequency cannot be parsed.
  91. '''
  92. filtered_checks = list(checks)
  93. if force:
  94. return tuple(filtered_checks)
  95. for check_config in consistency_config.get('checks', DEFAULT_CHECKS):
  96. check = check_config['name']
  97. if checks and check not in checks:
  98. continue
  99. frequency_delta = parse_frequency(check_config.get('frequency'))
  100. if not frequency_delta:
  101. continue
  102. check_time = read_check_time(
  103. make_check_time_path(location_config, borg_repository_id, check)
  104. )
  105. if not check_time:
  106. continue
  107. # If we've not yet reached the time when the frequency dictates we're ready for another
  108. # check, skip this check.
  109. if datetime.datetime.now() < check_time + frequency_delta:
  110. remaining = check_time + frequency_delta - datetime.datetime.now()
  111. logger.info(
  112. f"Skipping {check} check due to configured frequency; {remaining} until next check"
  113. )
  114. filtered_checks.remove(check)
  115. return tuple(filtered_checks)
  116. def make_check_flags(checks, check_last=None, prefix=None):
  117. '''
  118. Given a parsed sequence of checks, transform it into tuple of command-line flags.
  119. For example, given parsed checks of:
  120. ('repository',)
  121. This will be returned as:
  122. ('--repository-only',)
  123. However, if both "repository" and "archives" are in checks, then omit them from the returned
  124. flags because Borg does both checks by default.
  125. Additionally, if a check_last value is given and "archives" is in checks, then include a
  126. "--last" flag. And if a prefix value is given and "archives" is in checks, then include a
  127. "--prefix" flag.
  128. '''
  129. if 'archives' in checks:
  130. last_flags = ('--last', str(check_last)) if check_last else ()
  131. prefix_flags = ('--prefix', prefix) if prefix else ()
  132. else:
  133. last_flags = ()
  134. prefix_flags = ()
  135. if check_last:
  136. logger.info('Ignoring check_last option, as "archives" is not in consistency checks')
  137. if prefix:
  138. logger.info(
  139. 'Ignoring consistency prefix option, as "archives" is not in consistency checks'
  140. )
  141. common_flags = last_flags + prefix_flags + (('--verify-data',) if 'data' in checks else ())
  142. if {'repository', 'archives'}.issubset(set(checks)):
  143. return common_flags
  144. return (
  145. tuple('--{}-only'.format(check) for check in checks if check in ('repository', 'archives'))
  146. + common_flags
  147. )
  148. def make_check_time_path(location_config, borg_repository_id, check_type):
  149. '''
  150. Given a location configuration dict, a Borg repository ID, and the name of a check type
  151. ("repository", "archives", etc.), return a path for recording that check's time (the time of
  152. that check last occurring).
  153. '''
  154. return os.path.join(
  155. os.path.expanduser(
  156. location_config.get(
  157. 'borgmatic_source_directory', state.DEFAULT_BORGMATIC_SOURCE_DIRECTORY
  158. )
  159. ),
  160. 'checks',
  161. borg_repository_id,
  162. check_type,
  163. )
  164. def write_check_time(path): # pragma: no cover
  165. '''
  166. Record a check time of now as the modification time of the given path.
  167. '''
  168. logger.debug(f'Writing check time at {path}')
  169. os.makedirs(os.path.dirname(path), mode=0o700, exist_ok=True)
  170. pathlib.Path(path, mode=0o600).touch()
  171. def read_check_time(path):
  172. '''
  173. Return the check time based on the modification time of the given path. Return None if the path
  174. doesn't exist.
  175. '''
  176. logger.debug(f'Reading check time from {path}')
  177. try:
  178. return datetime.datetime.fromtimestamp(os.stat(path).st_mtime)
  179. except FileNotFoundError:
  180. return None
  181. def check_archives(
  182. repository,
  183. location_config,
  184. storage_config,
  185. consistency_config,
  186. local_path='borg',
  187. remote_path=None,
  188. progress=None,
  189. repair=None,
  190. only_checks=None,
  191. force=None,
  192. ):
  193. '''
  194. Given a local or remote repository path, a storage config dict, a consistency config dict,
  195. local/remote commands to run, whether to include progress information, whether to attempt a
  196. repair, and an optional list of checks to use instead of configured checks, check the contained
  197. Borg archives for consistency.
  198. If there are no consistency checks to run, skip running them.
  199. Raises ValueError if the Borg repository ID cannot be determined.
  200. '''
  201. try:
  202. borg_repository_id = json.loads(
  203. info.display_archives_info(
  204. repository,
  205. storage_config,
  206. argparse.Namespace(json=True, archive=None),
  207. local_path,
  208. remote_path,
  209. )
  210. )['repository']['id']
  211. except (json.JSONDecodeError, KeyError):
  212. raise ValueError(f'Cannot determine Borg repository ID for {repository}')
  213. checks = filter_checks_on_frequency(
  214. location_config,
  215. consistency_config,
  216. borg_repository_id,
  217. parse_checks(consistency_config, only_checks),
  218. force,
  219. )
  220. check_last = consistency_config.get('check_last', None)
  221. lock_wait = None
  222. extra_borg_options = storage_config.get('extra_borg_options', {}).get('check', '')
  223. if set(checks).intersection({'repository', 'archives', 'data'}):
  224. lock_wait = storage_config.get('lock_wait', None)
  225. verbosity_flags = ()
  226. if logger.isEnabledFor(logging.INFO):
  227. verbosity_flags = ('--info',)
  228. if logger.isEnabledFor(logging.DEBUG):
  229. verbosity_flags = ('--debug', '--show-rc')
  230. prefix = consistency_config.get('prefix', DEFAULT_PREFIX)
  231. full_command = (
  232. (local_path, 'check')
  233. + (('--repair',) if repair else ())
  234. + make_check_flags(checks, check_last, prefix)
  235. + (('--remote-path', remote_path) if remote_path else ())
  236. + (('--lock-wait', str(lock_wait)) if lock_wait else ())
  237. + verbosity_flags
  238. + (('--progress',) if progress else ())
  239. + (tuple(extra_borg_options.split(' ')) if extra_borg_options else ())
  240. + (repository,)
  241. )
  242. # The Borg repair option triggers an interactive prompt, which won't work when output is
  243. # captured. And progress messes with the terminal directly.
  244. if repair or progress:
  245. execute_command(full_command, output_file=DO_NOT_CAPTURE)
  246. else:
  247. execute_command(full_command)
  248. for check in checks:
  249. write_check_time(make_check_time_path(location_config, borg_repository_id, check))
  250. if 'extract' in checks:
  251. extract.extract_last_archive_dry_run(repository, lock_wait, local_path, remote_path)
  252. write_check_time(make_check_time_path(location_config, borg_repository_id, 'extract'))