restore.py 22 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640
  1. import collections
  2. import logging
  3. import os
  4. import pathlib
  5. import shutil
  6. import tempfile
  7. import borgmatic.actions.pattern
  8. import borgmatic.borg.extract
  9. import borgmatic.borg.list
  10. import borgmatic.borg.mount
  11. import borgmatic.borg.repo_list
  12. import borgmatic.config.paths
  13. import borgmatic.config.validate
  14. import borgmatic.hooks.data_source.dump
  15. import borgmatic.hooks.dispatch
  16. logger = logging.getLogger(__name__)
  17. UNSPECIFIED = object()
  18. Dump = collections.namedtuple(
  19. 'Dump',
  20. ('hook_name', 'data_source_name', 'hostname', 'port', 'label', 'container'),
  21. defaults=(None, None, None, None),
  22. )
  23. def dumps_match(first, second, default_port=None):
  24. '''
  25. Compare two Dump instances for equality while supporting a field value of UNSPECIFIED, which
  26. indicates that the field should match any value. If a default port is given, then consider any
  27. dump having that port to match with a dump having a None port.
  28. '''
  29. # label kinda counts as an unique id, if they match ignore host/container/port
  30. if first.label not in {None, UNSPECIFIED} and first.label == second.label:
  31. field_list = ('hook_name', 'data_source_name')
  32. else:
  33. field_list = Dump._fields
  34. for field_name in field_list:
  35. first_value = getattr(first, field_name)
  36. second_value = getattr(second, field_name)
  37. if default_port is not None and field_name == 'port':
  38. if first_value == default_port and second_value is None:
  39. continue
  40. if second_value == default_port and first_value is None:
  41. continue
  42. if first_value == UNSPECIFIED or second_value == UNSPECIFIED: # noqa: PLR1714
  43. continue
  44. if first_value != second_value:
  45. return False
  46. return True
  47. def render_dump_metadata(dump):
  48. '''
  49. Given a Dump instance, make a display string describing it for use in log messages.
  50. '''
  51. label = dump.label or UNSPECIFIED
  52. name = 'unspecified' if dump.data_source_name is UNSPECIFIED else dump.data_source_name
  53. host = dump.container or dump.hostname or UNSPECIFIED
  54. port = None if dump.port is UNSPECIFIED else dump.port
  55. if label is not UNSPECIFIED:
  56. metadata = f'{name}@{label}'
  57. elif port:
  58. metadata = f'{name}@:{port}' if host is UNSPECIFIED else f'{name}@{host}:{port}'
  59. else:
  60. metadata = f'{name}' if host is UNSPECIFIED else f'{name}@{host}'
  61. if dump.hook_name not in {None, UNSPECIFIED}:
  62. return f'{metadata} ({dump.hook_name})'
  63. return metadata
  64. def get_configured_data_source(config, restore_dump):
  65. '''
  66. Search in the given configuration dict for dumps corresponding to the given dump to restore. If
  67. there are multiple matches, error.
  68. Return the found data source as a data source configuration dict or None if not found.
  69. '''
  70. try:
  71. hooks_to_search = {restore_dump.hook_name: config[restore_dump.hook_name]}
  72. except KeyError:
  73. return None
  74. matching_dumps = tuple(
  75. hook_data_source
  76. for (hook_name, hook_config) in hooks_to_search.items()
  77. for hook_data_source in hook_config
  78. for default_port in (
  79. borgmatic.hooks.dispatch.call_hook(
  80. function_name='get_default_port',
  81. config=config,
  82. hook_name=hook_name,
  83. ),
  84. )
  85. if dumps_match(
  86. Dump(
  87. hook_name,
  88. hook_data_source.get('name'),
  89. hook_data_source.get('hostname'),
  90. hook_data_source.get('port'),
  91. hook_data_source.get('label') or UNSPECIFIED,
  92. hook_data_source.get('container'),
  93. ),
  94. restore_dump,
  95. default_port,
  96. )
  97. )
  98. if not matching_dumps:
  99. return None
  100. if len(matching_dumps) > 1:
  101. raise ValueError(
  102. f'Cannot restore data source {render_dump_metadata(restore_dump)} because there are multiple matching data sources configured',
  103. )
  104. return matching_dumps[0]
  105. def strip_path_prefix_from_extracted_dump_destination(
  106. destination_path,
  107. borgmatic_runtime_directory,
  108. ):
  109. '''
  110. Directory-format dump files get extracted into a temporary directory containing a path prefix
  111. that depends how the files were stored in the archive. So, given the destination path where the
  112. dump was extracted and the borgmatic runtime directory, move the dump files such that the
  113. restore doesn't have to deal with that varying path prefix.
  114. For instance, if the dump was extracted to:
  115. /run/user/0/borgmatic/tmp1234/borgmatic/postgresql_databases/test/...
  116. or:
  117. /run/user/0/borgmatic/tmp1234/root/.borgmatic/postgresql_databases/test/...
  118. then this function moves it to:
  119. /run/user/0/borgmatic/postgresql_databases/test/...
  120. '''
  121. for subdirectory_path, _, _ in os.walk(destination_path):
  122. databases_directory = os.path.basename(subdirectory_path)
  123. if not databases_directory.endswith('_databases'):
  124. continue
  125. shutil.move(
  126. subdirectory_path,
  127. os.path.join(borgmatic_runtime_directory, databases_directory),
  128. )
  129. break
  130. def restore_single_dump(
  131. repository,
  132. config,
  133. local_borg_version,
  134. global_arguments,
  135. local_path,
  136. remote_path,
  137. archive_name,
  138. hook_name,
  139. data_source,
  140. connection_params,
  141. borgmatic_runtime_directory,
  142. ):
  143. '''
  144. Given (among other things) an archive name, a data source hook name, the hostname, port,
  145. username/password as connection params, and a configured data source configuration dict, restore
  146. that data source from the archive.
  147. '''
  148. dump_metadata = render_dump_metadata(
  149. Dump(
  150. hook_name,
  151. data_source['name'],
  152. data_source.get('hostname'),
  153. data_source.get('port'),
  154. data_source.get('label') or UNSPECIFIED,
  155. data_source.get('container'),
  156. ),
  157. )
  158. logger.info(f'Restoring data source {dump_metadata}')
  159. dump_patterns = borgmatic.hooks.dispatch.call_hooks(
  160. 'make_data_source_dump_patterns',
  161. config,
  162. borgmatic.hooks.dispatch.Hook_type.DATA_SOURCE,
  163. borgmatic_runtime_directory,
  164. data_source['name'],
  165. )[hook_name.split('_databases', 1)[0]]
  166. destination_path = (
  167. tempfile.mkdtemp(dir=borgmatic_runtime_directory)
  168. if data_source.get('format') == 'directory'
  169. else None
  170. )
  171. try:
  172. # Kick off a single data source extract. If using a directory format, extract to a temporary
  173. # directory. Otherwise extract the single dump file to stdout.
  174. extract_process = borgmatic.borg.extract.extract_archive(
  175. dry_run=global_arguments.dry_run,
  176. repository=repository['path'],
  177. archive=archive_name,
  178. paths=[
  179. borgmatic.hooks.data_source.dump.convert_glob_patterns_to_borg_pattern(
  180. dump_patterns,
  181. ),
  182. ],
  183. config=config,
  184. local_borg_version=local_borg_version,
  185. global_arguments=global_arguments,
  186. local_path=local_path,
  187. remote_path=remote_path,
  188. destination_path=destination_path,
  189. # A directory format dump isn't a single file, and therefore can't extract
  190. # to stdout. In this case, the extract_process return value is None.
  191. extract_to_stdout=bool(data_source.get('format') != 'directory'),
  192. )
  193. if destination_path and not global_arguments.dry_run:
  194. strip_path_prefix_from_extracted_dump_destination(
  195. destination_path,
  196. borgmatic_runtime_directory,
  197. )
  198. finally:
  199. if destination_path and not global_arguments.dry_run:
  200. shutil.rmtree(destination_path, ignore_errors=True)
  201. # Run a single data source restore, consuming the extract stdout (if any).
  202. borgmatic.hooks.dispatch.call_hook(
  203. function_name='restore_data_source_dump',
  204. config=config,
  205. hook_name=hook_name,
  206. data_source=data_source,
  207. dry_run=global_arguments.dry_run,
  208. extract_process=extract_process,
  209. connection_params=connection_params,
  210. borgmatic_runtime_directory=borgmatic_runtime_directory,
  211. )
  212. def collect_dumps_from_archive(
  213. repository,
  214. archive,
  215. config,
  216. local_borg_version,
  217. global_arguments,
  218. local_path,
  219. remote_path,
  220. borgmatic_runtime_directory,
  221. ):
  222. '''
  223. Given a local or remote repository path, a resolved archive name, a configuration dict, the
  224. local Borg version, global arguments an argparse.Namespace, local and remote Borg paths, and the
  225. borgmatic runtime directory, query the archive for the names of data sources dumps it contains
  226. and return them as a set of Dump instances.
  227. '''
  228. dumps_from_archive = set()
  229. # There is (at most) one dump metadata file per data source hook. Load each.
  230. for dumps_metadata_path in borgmatic.borg.list.capture_archive_listing(
  231. repository,
  232. archive,
  233. config,
  234. local_borg_version,
  235. global_arguments,
  236. list_paths=[
  237. 'sh:'
  238. + borgmatic.hooks.data_source.dump.make_data_source_dump_path(
  239. base_directory,
  240. '*_databases/dumps.json',
  241. )
  242. # Probe for dump metadata files in multiple locations, as the default location is
  243. # "/borgmatic/*_databases/dumps.json" with Borg 1.4+, but instead begins with the
  244. # borgmatic runtime directory for older versions of Borg.
  245. for base_directory in (
  246. 'borgmatic',
  247. borgmatic.config.paths.make_runtime_directory_glob(borgmatic_runtime_directory),
  248. )
  249. ],
  250. local_path=local_path,
  251. remote_path=remote_path,
  252. ):
  253. if not dumps_metadata_path:
  254. continue
  255. dumps_from_archive.update(
  256. set(
  257. borgmatic.hooks.data_source.dump.parse_data_source_dumps_metadata(
  258. borgmatic.borg.extract.extract_archive(
  259. global_arguments.dry_run,
  260. repository,
  261. archive,
  262. [dumps_metadata_path],
  263. config,
  264. local_borg_version,
  265. global_arguments,
  266. local_path=local_path,
  267. remote_path=remote_path,
  268. extract_to_stdout=True,
  269. )
  270. .stdout.read()
  271. .decode(),
  272. dumps_metadata_path,
  273. )
  274. )
  275. )
  276. # If we've successfully loaded any dumps metadata, we're done.
  277. if dumps_from_archive:
  278. logger.debug('Collecting database dumps from archive data source dumps metadata files')
  279. return dumps_from_archive
  280. # No dumps metadata files were found, so for backwards compatibility, fall back to parsing the
  281. # paths of dumps found in the archive to get their respective dump metadata.
  282. logger.debug('Collecting database dumps from archive data source dump paths (fallback)')
  283. borgmatic_source_directory = str(
  284. pathlib.Path(borgmatic.config.paths.get_borgmatic_source_directory(config)),
  285. )
  286. # Probe for the data source dumps in multiple locations, as the default location has moved to
  287. # the borgmatic runtime directory (which gets stored as just "/borgmatic" with Borg 1.4+). But
  288. # we still want to support reading dumps from previously created archives as well.
  289. dump_paths = borgmatic.borg.list.capture_archive_listing(
  290. repository,
  291. archive,
  292. config,
  293. local_borg_version,
  294. global_arguments,
  295. list_paths=[
  296. 'sh:'
  297. + borgmatic.hooks.data_source.dump.make_data_source_dump_path(
  298. base_directory,
  299. '*_databases/*/*',
  300. )
  301. for base_directory in (
  302. 'borgmatic',
  303. borgmatic.config.paths.make_runtime_directory_glob(borgmatic_runtime_directory),
  304. borgmatic_source_directory.lstrip('/'),
  305. )
  306. ],
  307. local_path=local_path,
  308. remote_path=remote_path,
  309. )
  310. for dump_path in dump_paths:
  311. if not dump_path:
  312. continue
  313. # Probe to find the base directory that's at the start of the dump path.
  314. for base_directory in (
  315. 'borgmatic',
  316. borgmatic_runtime_directory,
  317. borgmatic_source_directory,
  318. ):
  319. try:
  320. (hook_name, host_and_port, data_source_name) = dump_path.split(
  321. base_directory + os.path.sep,
  322. 1,
  323. )[1].split(os.path.sep)[0:3]
  324. except (ValueError, IndexError):
  325. continue
  326. parts = host_and_port.split(':', 1)
  327. if len(parts) == 1:
  328. parts += (None,)
  329. (hostname, port) = parts
  330. try:
  331. port = int(port)
  332. except (ValueError, TypeError):
  333. port = None
  334. dumps_from_archive.add(
  335. Dump(
  336. hook_name, data_source_name, None if hostname == 'localhost' else hostname, port
  337. )
  338. )
  339. # We've successfully parsed the dump path, so need to probe any further.
  340. break
  341. else:
  342. logger.warning(
  343. f'Ignoring invalid data source dump path "{dump_path}" in archive {archive}',
  344. )
  345. return dumps_from_archive
  346. def get_dumps_to_restore(restore_arguments, dumps_from_archive):
  347. '''
  348. Given restore arguments as an argparse.Namespace instance indicating which dumps to restore and
  349. a set of Dump instances representing the dumps found in an archive, return a set of specific
  350. Dump instances from the archive to restore. As part of this, replace any Dump having a data
  351. source name of "all" with multiple named Dump instances as appropriate.
  352. Raise ValueError if any of the requested data source names cannot be found in the archive or if
  353. there are multiple archive dump matches for a given requested dump.
  354. '''
  355. requested_dumps = (
  356. {
  357. Dump(
  358. hook_name=(
  359. (
  360. restore_arguments.hook
  361. if restore_arguments.hook.endswith('_databases')
  362. else f'{restore_arguments.hook}_databases'
  363. )
  364. if restore_arguments.hook
  365. else UNSPECIFIED
  366. ),
  367. data_source_name=name,
  368. hostname=restore_arguments.original_hostname or UNSPECIFIED,
  369. port=restore_arguments.original_port,
  370. label=restore_arguments.original_label or UNSPECIFIED,
  371. container=restore_arguments.original_container or UNSPECIFIED,
  372. )
  373. for name in restore_arguments.data_sources or (UNSPECIFIED,)
  374. }
  375. if restore_arguments.hook
  376. or restore_arguments.data_sources
  377. or restore_arguments.original_hostname
  378. or restore_arguments.original_port
  379. or restore_arguments.original_label
  380. or restore_arguments.original_container
  381. else {
  382. Dump(
  383. hook_name=UNSPECIFIED,
  384. data_source_name='all',
  385. hostname=UNSPECIFIED,
  386. port=UNSPECIFIED,
  387. label=UNSPECIFIED,
  388. container=UNSPECIFIED,
  389. ),
  390. }
  391. )
  392. missing_dumps = set()
  393. dumps_to_restore = set()
  394. # If there's a requested "all" dump, add every dump from the archive to the dumps to restore.
  395. if any(dump for dump in requested_dumps if dump.data_source_name == 'all'):
  396. dumps_to_restore.update(dumps_from_archive)
  397. # If any archive dump matches a requested dump, add the archive dump to the dumps to restore.
  398. for requested_dump in requested_dumps:
  399. if requested_dump.data_source_name == 'all':
  400. continue
  401. matching_dumps = tuple(
  402. archive_dump
  403. for archive_dump in dumps_from_archive
  404. if dumps_match(requested_dump, archive_dump)
  405. )
  406. if len(matching_dumps) == 0:
  407. missing_dumps.add(requested_dump)
  408. elif len(matching_dumps) == 1:
  409. dumps_to_restore.add(matching_dumps[0])
  410. else:
  411. raise ValueError(
  412. f'Cannot restore data source {render_dump_metadata(requested_dump)} because there are multiple matching dumps in the archive. Try adding flags to disambiguate.',
  413. )
  414. if missing_dumps:
  415. rendered_dumps = ', '.join(
  416. f'{render_dump_metadata(dump)}' for dump in sorted(missing_dumps)
  417. )
  418. raise ValueError(
  419. f"Cannot restore data source dump{'s' if len(missing_dumps) > 1 else ''} {rendered_dumps} missing from archive",
  420. )
  421. return dumps_to_restore
  422. def ensure_requested_dumps_restored(dumps_to_restore, dumps_actually_restored):
  423. '''
  424. Given a set of requested dumps to restore and a set of dumps actually restored, raise ValueError
  425. if any requested dumps to restore weren't restored, indicating that they were missing from the
  426. configuration.
  427. '''
  428. if not dumps_actually_restored:
  429. raise ValueError('No data source dumps were found to restore')
  430. missing_dumps = sorted(
  431. dumps_to_restore - dumps_actually_restored,
  432. key=lambda dump: dump.data_source_name,
  433. )
  434. if missing_dumps:
  435. rendered_dumps = ', '.join(f'{render_dump_metadata(dump)}' for dump in missing_dumps)
  436. raise ValueError(
  437. f"Cannot restore data source{'s' if len(missing_dumps) > 1 else ''} {rendered_dumps} missing from borgmatic's configuration",
  438. )
  439. def run_restore(
  440. repository,
  441. config,
  442. local_borg_version,
  443. restore_arguments,
  444. global_arguments,
  445. local_path,
  446. remote_path,
  447. ):
  448. '''
  449. Run the "restore" action for the given repository, but only if the repository matches the
  450. requested repository in restore arguments.
  451. Raise ValueError if a configured data source could not be found to restore or there's no
  452. matching dump in the archive.
  453. '''
  454. if restore_arguments.repository and not borgmatic.config.validate.repositories_match(
  455. repository,
  456. restore_arguments.repository,
  457. ):
  458. return
  459. logger.info(f'Restoring data sources from archive {restore_arguments.archive}')
  460. working_directory = borgmatic.config.paths.get_working_directory(config)
  461. with borgmatic.config.paths.Runtime_directory(config) as borgmatic_runtime_directory:
  462. patterns = borgmatic.actions.pattern.process_patterns(
  463. borgmatic.actions.pattern.collect_patterns(config),
  464. config,
  465. working_directory,
  466. )
  467. borgmatic.hooks.dispatch.call_hooks_even_if_unconfigured(
  468. 'remove_data_source_dumps',
  469. config,
  470. borgmatic.hooks.dispatch.Hook_type.DATA_SOURCE,
  471. borgmatic_runtime_directory,
  472. patterns,
  473. global_arguments.dry_run,
  474. )
  475. archive_name = borgmatic.borg.repo_list.resolve_archive_name(
  476. repository['path'],
  477. restore_arguments.archive,
  478. config,
  479. local_borg_version,
  480. global_arguments,
  481. local_path,
  482. remote_path,
  483. )
  484. dumps_from_archive = collect_dumps_from_archive(
  485. repository['path'],
  486. archive_name,
  487. config,
  488. local_borg_version,
  489. global_arguments,
  490. local_path,
  491. remote_path,
  492. borgmatic_runtime_directory,
  493. )
  494. dumps_to_restore = get_dumps_to_restore(restore_arguments, dumps_from_archive)
  495. dumps_actually_restored = set()
  496. connection_params = {
  497. 'container': restore_arguments.container,
  498. 'hostname': restore_arguments.hostname,
  499. 'port': restore_arguments.port,
  500. 'username': restore_arguments.username,
  501. 'password': restore_arguments.password,
  502. 'restore_path': restore_arguments.restore_path,
  503. }
  504. # Restore each dump.
  505. for restore_dump in dumps_to_restore:
  506. found_data_source = get_configured_data_source(
  507. config,
  508. restore_dump,
  509. )
  510. # For a dump that wasn't found via an exact match in the configuration, try to fallback
  511. # to an "all" data source.
  512. if not found_data_source:
  513. found_data_source = get_configured_data_source(
  514. config,
  515. Dump(
  516. restore_dump.hook_name,
  517. 'all',
  518. restore_dump.hostname,
  519. restore_dump.port,
  520. restore_dump.label,
  521. restore_dump.container,
  522. ),
  523. )
  524. if not found_data_source:
  525. continue
  526. found_data_source = dict(found_data_source)
  527. found_data_source['name'] = restore_dump.data_source_name
  528. dumps_actually_restored.add(restore_dump)
  529. restore_single_dump(
  530. repository,
  531. config,
  532. local_borg_version,
  533. global_arguments,
  534. local_path,
  535. remote_path,
  536. archive_name,
  537. restore_dump.hook_name,
  538. dict(found_data_source, schemas=restore_arguments.schemas),
  539. connection_params,
  540. borgmatic_runtime_directory,
  541. )
  542. borgmatic.hooks.dispatch.call_hooks_even_if_unconfigured(
  543. 'remove_data_source_dumps',
  544. config,
  545. borgmatic.hooks.dispatch.Hook_type.DATA_SOURCE,
  546. borgmatic_runtime_directory,
  547. patterns,
  548. global_arguments.dry_run,
  549. )
  550. ensure_requested_dumps_restored(dumps_to_restore, dumps_actually_restored)