check.py 28 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819
  1. import calendar
  2. import contextlib
  3. import datetime
  4. import hashlib
  5. import itertools
  6. import logging
  7. import os
  8. import pathlib
  9. import random
  10. import shlex
  11. import shutil
  12. import textwrap
  13. import borgmatic.actions.config.bootstrap
  14. import borgmatic.actions.pattern
  15. import borgmatic.borg.check
  16. import borgmatic.borg.create
  17. import borgmatic.borg.environment
  18. import borgmatic.borg.extract
  19. import borgmatic.borg.list
  20. import borgmatic.borg.pattern
  21. import borgmatic.borg.repo_list
  22. import borgmatic.borg.state
  23. import borgmatic.config.paths
  24. import borgmatic.execute
  25. import borgmatic.hooks.command
  26. DEFAULT_CHECKS = (
  27. {'name': 'repository', 'frequency': '1 month'},
  28. {'name': 'archives', 'frequency': '1 month'},
  29. )
  30. logger = logging.getLogger(__name__)
  31. def parse_checks(config, only_checks=None):
  32. '''
  33. Given a configuration dict with a "checks" sequence of dicts and an optional list of override
  34. checks, return a tuple of named checks to run.
  35. For example, given a config of:
  36. {'checks': ({'name': 'repository'}, {'name': 'archives'})}
  37. This will be returned as:
  38. ('repository', 'archives')
  39. If no "checks" option is present in the config, return the DEFAULT_CHECKS. If a checks value
  40. has a name of "disabled", return an empty tuple, meaning that no checks should be run.
  41. '''
  42. checks = only_checks or tuple(
  43. check_config['name'] for check_config in (config.get('checks', None) or DEFAULT_CHECKS)
  44. )
  45. checks = tuple(check.lower() for check in checks)
  46. if 'disabled' in checks:
  47. logger.warning(
  48. 'The "disabled" value for the "checks" option is deprecated and will be removed from a future release; use "skip_actions" instead',
  49. )
  50. if len(checks) > 1:
  51. logger.warning(
  52. 'Multiple checks are configured, but one of them is "disabled"; not running any checks',
  53. )
  54. return ()
  55. return checks
  56. def parse_frequency(frequency):
  57. '''
  58. Given a frequency string with a number and a unit of time, return a corresponding
  59. datetime.timedelta instance or None if the frequency is None or "always".
  60. For instance, given "3 weeks", return datetime.timedelta(weeks=3)
  61. Raise ValueError if the given frequency cannot be parsed.
  62. '''
  63. if not frequency:
  64. return None
  65. frequency = frequency.strip().lower()
  66. if frequency == 'always':
  67. return None
  68. try:
  69. number, time_unit = frequency.split(' ')
  70. number = int(number)
  71. except ValueError:
  72. raise ValueError(f"Could not parse consistency check frequency '{frequency}'")
  73. if not time_unit.endswith('s'):
  74. time_unit += 's'
  75. if time_unit == 'months':
  76. number *= 30
  77. time_unit = 'days'
  78. elif time_unit == 'years':
  79. number *= 365
  80. time_unit = 'days'
  81. try:
  82. return datetime.timedelta(**{time_unit: number})
  83. except TypeError:
  84. raise ValueError(f"Could not parse consistency check frequency '{frequency}'")
  85. WEEKDAY_DAYS = calendar.day_name[0:5]
  86. WEEKEND_DAYS = calendar.day_name[5:7]
  87. def filter_checks_on_frequency(
  88. config,
  89. borg_repository_id,
  90. checks,
  91. force,
  92. archives_check_id=None,
  93. datetime_now=datetime.datetime.now,
  94. ):
  95. '''
  96. Given a configuration dict with a "checks" sequence of dicts, a Borg repository ID, a sequence
  97. of checks, whether to force checks to run, and an ID for the archives check potentially being
  98. run (if any), filter down those checks based on the configured "frequency" for each check as
  99. compared to its check time file.
  100. In other words, a check whose check time file's timestamp is too new (based on the configured
  101. frequency) will get cut from the returned sequence of checks. Example:
  102. config = {
  103. 'checks': [
  104. {
  105. 'name': 'archives',
  106. 'frequency': '2 weeks',
  107. },
  108. ]
  109. }
  110. When this function is called with that config and "archives" in checks, "archives" will get
  111. filtered out of the returned result if its check time file is newer than 2 weeks old, indicating
  112. that it's not yet time to run that check again.
  113. Raise ValueError if a frequency cannot be parsed.
  114. '''
  115. if not checks:
  116. return checks
  117. filtered_checks = list(checks)
  118. if force:
  119. return tuple(filtered_checks)
  120. for check_config in config.get('checks', DEFAULT_CHECKS):
  121. check = check_config['name']
  122. if checks and check not in checks:
  123. continue
  124. only_run_on = check_config.get('only_run_on')
  125. if only_run_on:
  126. # Use a dict instead of a set to preserve ordering.
  127. days = dict.fromkeys(only_run_on)
  128. if 'weekday' in days:
  129. days = {
  130. **dict.fromkeys(day for day in days if day != 'weekday'),
  131. **dict.fromkeys(WEEKDAY_DAYS),
  132. }
  133. if 'weekend' in days:
  134. days = {
  135. **dict.fromkeys(day for day in days if day != 'weekend'),
  136. **dict.fromkeys(WEEKEND_DAYS),
  137. }
  138. if calendar.day_name[datetime_now().weekday()] not in days:
  139. logger.info(
  140. f"Skipping {check} check due to day of the week; check only runs on {'/'.join(day.title() for day in days)} (use --force to check anyway)",
  141. )
  142. filtered_checks.remove(check)
  143. continue
  144. frequency_delta = parse_frequency(check_config.get('frequency'))
  145. if not frequency_delta:
  146. continue
  147. check_time = probe_for_check_time(config, borg_repository_id, check, archives_check_id)
  148. if not check_time:
  149. continue
  150. # If we've not yet reached the time when the frequency dictates we're ready for another
  151. # check, skip this check.
  152. if datetime_now() < check_time + frequency_delta:
  153. remaining = check_time + frequency_delta - datetime_now()
  154. logger.info(
  155. f'Skipping {check} check due to configured frequency; {remaining} until next check (use --force to check anyway)',
  156. )
  157. filtered_checks.remove(check)
  158. return tuple(filtered_checks)
  159. def make_archives_check_id(archive_filter_flags):
  160. '''
  161. Given a sequence of flags to filter archives, return a unique hash corresponding to those
  162. particular flags. If there are no flags, return None.
  163. '''
  164. if not archive_filter_flags:
  165. return None
  166. return hashlib.sha256(' '.join(archive_filter_flags).encode()).hexdigest()
  167. def make_check_time_path(config, borg_repository_id, check_type, archives_check_id=None):
  168. '''
  169. Given a configuration dict, a Borg repository ID, the name of a check type ("repository",
  170. "archives", etc.), and a unique hash of the archives filter flags, return a path for recording
  171. that check's time (the time of that check last occurring).
  172. '''
  173. borgmatic_state_directory = borgmatic.config.paths.get_borgmatic_state_directory(config)
  174. if check_type in {'archives', 'data'}:
  175. return os.path.join(
  176. borgmatic_state_directory,
  177. 'checks',
  178. borg_repository_id,
  179. check_type,
  180. archives_check_id if archives_check_id else 'all',
  181. )
  182. return os.path.join(
  183. borgmatic_state_directory,
  184. 'checks',
  185. borg_repository_id,
  186. check_type,
  187. )
  188. def write_check_time(path): # pragma: no cover
  189. '''
  190. Record a check time of now as the modification time of the given path.
  191. '''
  192. logger.debug(f'Writing check time at {path}')
  193. os.makedirs(os.path.dirname(path), mode=0o700, exist_ok=True)
  194. pathlib.Path(path).touch(mode=0o600)
  195. def read_check_time(path):
  196. '''
  197. Return the check time based on the modification time of the given path. Return None if the path
  198. doesn't exist.
  199. '''
  200. logger.debug(f'Reading check time from {path}')
  201. try:
  202. return datetime.datetime.fromtimestamp(os.stat(path).st_mtime) # noqa: DTZ006
  203. except FileNotFoundError:
  204. return None
  205. def probe_for_check_time(config, borg_repository_id, check, archives_check_id):
  206. '''
  207. Given a configuration dict, a Borg repository ID, the name of a check type ("repository",
  208. "archives", etc.), and a unique hash of the archives filter flags, return the corresponding
  209. check time or None if such a check time does not exist.
  210. When the check type is "archives" or "data", this function probes two different paths to find
  211. the check time, e.g.:
  212. ~/.borgmatic/checks/1234567890/archives/9876543210
  213. ~/.borgmatic/checks/1234567890/archives/all
  214. ... and returns the maximum modification time of the files found (if any). The first path
  215. represents a more specific archives check time (a check on a subset of archives), and the second
  216. is a fallback to the last "all" archives check.
  217. For other check types, this function reads from a single check time path, e.g.:
  218. ~/.borgmatic/checks/1234567890/repository
  219. '''
  220. check_times = (
  221. read_check_time(group[0])
  222. for group in itertools.groupby(
  223. (
  224. make_check_time_path(config, borg_repository_id, check, archives_check_id),
  225. make_check_time_path(config, borg_repository_id, check),
  226. ),
  227. )
  228. )
  229. try:
  230. return max(check_time for check_time in check_times if check_time)
  231. except ValueError:
  232. return None
  233. def upgrade_check_times(config, borg_repository_id):
  234. '''
  235. Given a configuration dict and a Borg repository ID, upgrade any corresponding check times on
  236. disk from old-style paths to new-style paths.
  237. One upgrade performed is moving the checks directory from:
  238. {borgmatic_source_directory}/checks (e.g., ~/.borgmatic/checks)
  239. to:
  240. {borgmatic_state_directory}/checks (e.g. ~/.local/state/borgmatic)
  241. Another upgrade is renaming an archive or data check path that looks like:
  242. {borgmatic_state_directory}/checks/1234567890/archives
  243. to:
  244. {borgmatic_state_directory}/checks/1234567890/archives/all
  245. '''
  246. borgmatic_source_checks_path = os.path.join(
  247. borgmatic.config.paths.get_borgmatic_source_directory(config),
  248. 'checks',
  249. )
  250. borgmatic_state_path = borgmatic.config.paths.get_borgmatic_state_directory(config)
  251. borgmatic_state_checks_path = os.path.join(borgmatic_state_path, 'checks')
  252. if os.path.exists(borgmatic_source_checks_path) and not os.path.exists(
  253. borgmatic_state_checks_path,
  254. ):
  255. logger.debug(
  256. f'Upgrading archives check times directory from {borgmatic_source_checks_path} to {borgmatic_state_checks_path}',
  257. )
  258. os.makedirs(borgmatic_state_path, mode=0o700, exist_ok=True)
  259. shutil.move(borgmatic_source_checks_path, borgmatic_state_checks_path)
  260. for check_type in ('archives', 'data'):
  261. new_path = make_check_time_path(config, borg_repository_id, check_type, 'all')
  262. old_path = os.path.dirname(new_path)
  263. temporary_path = f'{old_path}.temp'
  264. if not os.path.isfile(old_path) and not os.path.isfile(temporary_path):
  265. continue
  266. logger.debug(f'Upgrading archives check time file from {old_path} to {new_path}')
  267. with contextlib.suppress(FileNotFoundError):
  268. shutil.move(old_path, temporary_path)
  269. os.mkdir(old_path)
  270. shutil.move(temporary_path, new_path)
  271. def collect_spot_check_source_paths(
  272. repository,
  273. config,
  274. local_borg_version,
  275. global_arguments,
  276. local_path,
  277. remote_path,
  278. borgmatic_runtime_directory,
  279. bootstrap_config_paths,
  280. ):
  281. '''
  282. Given a repository configuration dict, a configuration dict, the local Borg version, global
  283. arguments as an argparse.Namespace instance, the local Borg path, the remote Borg path, and the
  284. bootstrap configuration paths as read from an archive's manifest, collect the source paths that
  285. Borg would use in an actual create (but only include files). As part of this, include the
  286. bootstrap configuration paths, so that any configuration files included in the archive to
  287. support bootstrapping are also spot checked.
  288. '''
  289. stream_processes = any(
  290. borgmatic.hooks.dispatch.call_hooks(
  291. 'use_streaming',
  292. config,
  293. borgmatic.hooks.dispatch.Hook_type.DATA_SOURCE,
  294. ).values(),
  295. )
  296. working_directory = borgmatic.config.paths.get_working_directory(config)
  297. (create_flags, create_positional_arguments, _) = borgmatic.borg.create.make_base_create_command(
  298. dry_run=True,
  299. repository_path=repository['path'],
  300. # Omit "progress" because it interferes with "list_details".
  301. config=dict(
  302. {option: value for option, value in config.items() if option != 'progress'},
  303. list_details=True,
  304. ),
  305. patterns=borgmatic.actions.pattern.process_patterns(
  306. borgmatic.actions.pattern.collect_patterns(config)
  307. + tuple(
  308. borgmatic.borg.pattern.Pattern(
  309. config_path,
  310. source=borgmatic.borg.pattern.Pattern_source.INTERNAL,
  311. )
  312. for config_path in bootstrap_config_paths
  313. ),
  314. config,
  315. working_directory,
  316. ),
  317. local_borg_version=local_borg_version,
  318. global_arguments=global_arguments,
  319. borgmatic_runtime_directory=borgmatic_runtime_directory,
  320. local_path=local_path,
  321. remote_path=remote_path,
  322. stream_processes=stream_processes,
  323. )
  324. working_directory = borgmatic.config.paths.get_working_directory(config)
  325. paths_output = borgmatic.execute.execute_command_and_capture_output(
  326. create_flags + create_positional_arguments,
  327. capture_stderr=True,
  328. environment=borgmatic.borg.environment.make_environment(config),
  329. working_directory=working_directory,
  330. borg_local_path=local_path,
  331. borg_exit_codes=config.get('borg_exit_codes'),
  332. )
  333. paths = tuple(
  334. path_line.split(' ', 1)[1]
  335. for path_line in paths_output.splitlines()
  336. if path_line and path_line.startswith(('- ', '+ '))
  337. )
  338. return tuple(
  339. path for path in paths if os.path.isfile(os.path.join(working_directory or '', path))
  340. )
  341. BORG_DIRECTORY_FILE_TYPE = 'd'
  342. BORG_PIPE_FILE_TYPE = 'p'
  343. def collect_spot_check_archive_paths(
  344. repository,
  345. archive,
  346. config,
  347. local_borg_version,
  348. global_arguments,
  349. local_path,
  350. remote_path,
  351. borgmatic_runtime_directory,
  352. ):
  353. '''
  354. Given a repository configuration dict, the name of the latest archive, a configuration dict, the
  355. local Borg version, global arguments as an argparse.Namespace instance, the local Borg path, the
  356. remote Borg path, and the borgmatic runtime directory, collect the paths from the given archive
  357. (but only include files and symlinks and exclude borgmatic runtime directories).
  358. These paths do not have a leading slash, as that's how Borg stores them. As a result, we don't
  359. know whether they came from absolute or relative source directories.
  360. '''
  361. borgmatic_source_directory = borgmatic.config.paths.get_borgmatic_source_directory(config)
  362. return tuple(
  363. path
  364. for line in borgmatic.borg.list.capture_archive_listing(
  365. repository['path'],
  366. archive,
  367. config,
  368. local_borg_version,
  369. global_arguments,
  370. path_format='{type} {path}{NUL}',
  371. local_path=local_path,
  372. remote_path=remote_path,
  373. )
  374. for (file_type, path) in (line.split(' ', 1),)
  375. if file_type not in {BORG_DIRECTORY_FILE_TYPE, BORG_PIPE_FILE_TYPE}
  376. if pathlib.Path('borgmatic') not in pathlib.Path(path).parents
  377. if pathlib.Path(borgmatic_source_directory.lstrip(os.path.sep))
  378. not in pathlib.Path(path).parents
  379. if pathlib.Path(borgmatic_runtime_directory.lstrip(os.path.sep))
  380. not in pathlib.Path(path).parents
  381. )
  382. SAMPLE_PATHS_SUBSET_COUNT = 5000
  383. def compare_spot_check_hashes(
  384. repository,
  385. archive,
  386. config,
  387. local_borg_version,
  388. global_arguments,
  389. local_path,
  390. remote_path,
  391. source_paths,
  392. ):
  393. '''
  394. Given a repository configuration dict, the name of the latest archive, a configuration dict, the
  395. local Borg version, global arguments as an argparse.Namespace instance, the local Borg path, the
  396. remote Borg path, and spot check source paths, compare the hashes for a sampling of the source
  397. paths with hashes from corresponding paths in the given archive. Return a sequence of the paths
  398. that fail that hash comparison.
  399. '''
  400. # Based on the configured sample percentage, come up with a list of random sample files from the
  401. # source directories.
  402. spot_check_config = next(check for check in config['checks'] if check['name'] == 'spot')
  403. sample_count = max(
  404. int(len(source_paths) * (min(spot_check_config['data_sample_percentage'], 100) / 100)),
  405. 1,
  406. )
  407. source_sample_paths = tuple(random.SystemRandom().sample(source_paths, sample_count))
  408. working_directory = borgmatic.config.paths.get_working_directory(config)
  409. hashable_source_sample_path = {
  410. source_path
  411. for source_path in source_sample_paths
  412. for full_source_path in (os.path.join(working_directory or '', source_path),)
  413. if os.path.exists(full_source_path)
  414. if not os.path.islink(full_source_path)
  415. }
  416. logger.debug(
  417. f'Sampling {sample_count} source paths (~{spot_check_config["data_sample_percentage"]}%) for spot check',
  418. )
  419. source_sample_paths_iterator = iter(source_sample_paths)
  420. source_hashes = {}
  421. archive_hashes = {}
  422. # Only hash a few thousand files at a time (a subset of the total paths) to avoid an "Argument
  423. # list too long" shell error.
  424. while True:
  425. # Hash each file in the sample paths (if it exists).
  426. source_sample_paths_subset = tuple(
  427. itertools.islice(source_sample_paths_iterator, SAMPLE_PATHS_SUBSET_COUNT),
  428. )
  429. if not source_sample_paths_subset:
  430. break
  431. hash_output = borgmatic.execute.execute_command_and_capture_output(
  432. tuple(
  433. shlex.quote(part)
  434. for part in shlex.split(spot_check_config.get('xxh64sum_command', 'xxh64sum'))
  435. )
  436. + tuple(
  437. path for path in source_sample_paths_subset if path in hashable_source_sample_path
  438. ),
  439. working_directory=working_directory,
  440. )
  441. source_hashes.update(
  442. **dict(
  443. (reversed(line.split(' ', 1)) for line in hash_output.splitlines()),
  444. # Represent non-existent files as having empty hashes so the comparison below still
  445. # works. Same thing for filesystem links, since Borg produces empty archive hashes
  446. # for them.
  447. **{
  448. path: ''
  449. for path in source_sample_paths_subset
  450. if path not in hashable_source_sample_path
  451. },
  452. ),
  453. )
  454. # Get the hash for each file in the archive.
  455. archive_hashes.update(
  456. **dict(
  457. reversed(line.split(' ', 1))
  458. for line in borgmatic.borg.list.capture_archive_listing(
  459. repository['path'],
  460. archive,
  461. config,
  462. local_borg_version,
  463. global_arguments,
  464. list_paths=source_sample_paths_subset,
  465. path_format='{xxh64} {path}{NUL}',
  466. local_path=local_path,
  467. remote_path=remote_path,
  468. )
  469. if line
  470. ),
  471. )
  472. # Compare the source hashes with the archive hashes to see how many match.
  473. failing_paths = []
  474. for path, source_hash in source_hashes.items():
  475. archive_hash = archive_hashes.get(path.lstrip(os.path.sep))
  476. if archive_hash is not None and archive_hash == source_hash:
  477. continue
  478. failing_paths.append(path)
  479. return tuple(failing_paths)
  480. MAX_SPOT_CHECK_PATHS_LENGTH = 1000
  481. def spot_check(
  482. repository,
  483. config,
  484. local_borg_version,
  485. global_arguments,
  486. local_path,
  487. remote_path,
  488. borgmatic_runtime_directory,
  489. ):
  490. '''
  491. Given a repository dict, a loaded configuration dict, the local Borg version, global arguments
  492. as an argparse.Namespace instance, the local Borg path, the remote Borg path, and the borgmatic
  493. runtime directory, perform a spot check for the latest archive in the given repository.
  494. A spot check compares file counts and also the hashes for a random sampling of source files on
  495. disk to those stored in the latest archive. If any differences are beyond configured tolerances,
  496. then the check fails.
  497. '''
  498. logger.debug('Running spot check')
  499. try:
  500. spot_check_config = next(
  501. check for check in config.get('checks', ()) if check.get('name') == 'spot'
  502. )
  503. except StopIteration:
  504. raise ValueError('Cannot run spot check because it is unconfigured')
  505. if spot_check_config['data_tolerance_percentage'] > spot_check_config['data_sample_percentage']:
  506. raise ValueError(
  507. 'The data_tolerance_percentage must be less than or equal to the data_sample_percentage',
  508. )
  509. archive = borgmatic.borg.repo_list.resolve_archive_name(
  510. repository['path'],
  511. 'latest',
  512. config,
  513. local_borg_version,
  514. global_arguments,
  515. local_path,
  516. remote_path,
  517. )
  518. logger.debug(f'Using archive {archive} for spot check')
  519. source_paths = collect_spot_check_source_paths(
  520. repository,
  521. config,
  522. local_borg_version,
  523. global_arguments,
  524. local_path,
  525. remote_path,
  526. borgmatic_runtime_directory,
  527. bootstrap_config_paths=borgmatic.actions.config.bootstrap.load_config_paths_from_archive(
  528. repository['path'],
  529. archive,
  530. config,
  531. local_borg_version,
  532. global_arguments,
  533. borgmatic_runtime_directory,
  534. ),
  535. )
  536. logger.debug(f'{len(source_paths)} total source paths for spot check')
  537. archive_paths = collect_spot_check_archive_paths(
  538. repository,
  539. archive,
  540. config,
  541. local_borg_version,
  542. global_arguments,
  543. local_path,
  544. remote_path,
  545. borgmatic_runtime_directory,
  546. )
  547. logger.debug(f'{len(archive_paths)} total archive paths for spot check')
  548. if len(source_paths) == 0:
  549. truncated_archive_paths = textwrap.shorten(
  550. ', '.join(set(archive_paths)) or 'none',
  551. width=MAX_SPOT_CHECK_PATHS_LENGTH,
  552. placeholder=' ...',
  553. )
  554. logger.debug(f'Paths in latest archive but not source paths: {truncated_archive_paths}')
  555. raise ValueError(
  556. 'Spot check failed: There are no source paths to compare against the archive',
  557. )
  558. # Calculate the percentage delta between the source paths count and the archive paths count, and
  559. # compare that delta to the configured count tolerance percentage.
  560. count_delta_percentage = abs(len(source_paths) - len(archive_paths)) / len(source_paths) * 100
  561. if count_delta_percentage > spot_check_config['count_tolerance_percentage']:
  562. rootless_source_paths = {path.lstrip(os.path.sep) for path in source_paths}
  563. truncated_exclusive_source_paths = textwrap.shorten(
  564. ', '.join(rootless_source_paths - set(archive_paths)) or 'none',
  565. width=MAX_SPOT_CHECK_PATHS_LENGTH,
  566. placeholder=' ...',
  567. )
  568. logger.debug(
  569. f'Paths in source paths but not latest archive: {truncated_exclusive_source_paths}',
  570. )
  571. truncated_exclusive_archive_paths = textwrap.shorten(
  572. ', '.join(set(archive_paths) - rootless_source_paths) or 'none',
  573. width=MAX_SPOT_CHECK_PATHS_LENGTH,
  574. placeholder=' ...',
  575. )
  576. logger.debug(
  577. f'Paths in latest archive but not source paths: {truncated_exclusive_archive_paths}',
  578. )
  579. raise ValueError(
  580. f'Spot check failed: {count_delta_percentage:.2f}% file count delta between source paths and latest archive (tolerance is {spot_check_config["count_tolerance_percentage"]}%)',
  581. )
  582. failing_paths = compare_spot_check_hashes(
  583. repository,
  584. archive,
  585. config,
  586. local_borg_version,
  587. global_arguments,
  588. local_path,
  589. remote_path,
  590. source_paths,
  591. )
  592. # Error if the percentage of failing hashes exceeds the configured tolerance percentage.
  593. logger.debug(f'{len(failing_paths)} non-matching spot check hashes')
  594. data_tolerance_percentage = spot_check_config['data_tolerance_percentage']
  595. failing_percentage = (len(failing_paths) / len(source_paths)) * 100
  596. if failing_percentage > data_tolerance_percentage:
  597. truncated_failing_paths = textwrap.shorten(
  598. ', '.join(failing_paths),
  599. width=MAX_SPOT_CHECK_PATHS_LENGTH,
  600. placeholder=' ...',
  601. )
  602. logger.debug(
  603. f'Source paths with data not matching the latest archive: {truncated_failing_paths}',
  604. )
  605. raise ValueError(
  606. f'Spot check failed: {failing_percentage:.2f}% of source paths with data not matching the latest archive (tolerance is {data_tolerance_percentage}%)',
  607. )
  608. logger.info(
  609. f'Spot check passed with a {count_delta_percentage:.2f}% file count delta and a {failing_percentage:.2f}% file data delta',
  610. )
  611. def run_check(
  612. config_filename,
  613. repository,
  614. config,
  615. local_borg_version,
  616. check_arguments,
  617. global_arguments,
  618. local_path,
  619. remote_path,
  620. ):
  621. '''
  622. Run the "check" action for the given repository.
  623. Raise ValueError if the Borg repository ID cannot be determined.
  624. '''
  625. logger.info('Running consistency checks')
  626. repository_id = borgmatic.borg.check.get_repository_id(
  627. repository['path'],
  628. config,
  629. local_borg_version,
  630. global_arguments,
  631. local_path=local_path,
  632. remote_path=remote_path,
  633. )
  634. upgrade_check_times(config, repository_id)
  635. configured_checks = parse_checks(config, check_arguments.only_checks)
  636. archive_filter_flags = borgmatic.borg.check.make_archive_filter_flags(
  637. local_borg_version,
  638. config,
  639. configured_checks,
  640. check_arguments,
  641. )
  642. archives_check_id = make_archives_check_id(archive_filter_flags)
  643. checks = filter_checks_on_frequency(
  644. config,
  645. repository_id,
  646. configured_checks,
  647. check_arguments.force,
  648. archives_check_id,
  649. )
  650. borg_specific_checks = set(checks).intersection({'repository', 'archives', 'data'})
  651. if borg_specific_checks:
  652. borgmatic.borg.check.check_archives(
  653. repository['path'],
  654. config,
  655. local_borg_version,
  656. check_arguments,
  657. global_arguments,
  658. borg_specific_checks,
  659. archive_filter_flags,
  660. local_path=local_path,
  661. remote_path=remote_path,
  662. )
  663. for check in borg_specific_checks:
  664. write_check_time(make_check_time_path(config, repository_id, check, archives_check_id))
  665. if 'extract' in checks:
  666. borgmatic.borg.extract.extract_last_archive_dry_run(
  667. config,
  668. local_borg_version,
  669. global_arguments,
  670. repository['path'],
  671. config.get('lock_wait'),
  672. local_path,
  673. remote_path,
  674. )
  675. write_check_time(make_check_time_path(config, repository_id, 'extract'))
  676. if 'spot' in checks:
  677. with borgmatic.config.paths.Runtime_directory(config) as borgmatic_runtime_directory:
  678. spot_check(
  679. repository,
  680. config,
  681. local_borg_version,
  682. global_arguments,
  683. local_path,
  684. remote_path,
  685. borgmatic_runtime_directory,
  686. )
  687. write_check_time(make_check_time_path(config, repository_id, 'spot'))