zfs.py 16 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475
  1. import collections
  2. import glob
  3. import hashlib
  4. import logging
  5. import os
  6. import shutil
  7. import subprocess
  8. import borgmatic.borg.pattern
  9. import borgmatic.config.paths
  10. import borgmatic.execute
  11. import borgmatic.hooks.data_source.config
  12. import borgmatic.hooks.data_source.snapshot
  13. logger = logging.getLogger(__name__)
  14. def use_streaming(hook_config, config): # pragma: no cover
  15. '''
  16. Return whether dump streaming is used for this hook. (Spoiler: It isn't.)
  17. '''
  18. return False
  19. BORGMATIC_SNAPSHOT_PREFIX = 'borgmatic-'
  20. BORGMATIC_USER_PROPERTY = 'org.torsion.borgmatic:backup'
  21. Dataset = collections.namedtuple(
  22. 'Dataset',
  23. ('name', 'mount_point', 'auto_backup', 'contained_patterns'),
  24. defaults=(False, ()),
  25. )
  26. def get_datasets_to_backup(zfs_command, patterns):
  27. '''
  28. Given a ZFS command to run and a sequence of configured patterns, find the intersection between
  29. the current ZFS dataset mount points and the paths of any patterns. The idea is that these
  30. pattern paths represent the requested datasets to snapshot. But also include any datasets tagged
  31. with a borgmatic-specific user property, whether or not they appear in the patterns.
  32. Only include datasets that contain at least one root pattern sourced from borgmatic
  33. configuration (as opposed to generated elsewhere in borgmatic).
  34. Return the result as a sequence of Dataset instances, sorted by mount point.
  35. '''
  36. list_output = borgmatic.execute.execute_command_and_capture_output(
  37. (
  38. *zfs_command.split(' '),
  39. 'list',
  40. '-H',
  41. '-t',
  42. 'filesystem',
  43. '-o',
  44. f'name,mountpoint,canmount,{BORGMATIC_USER_PROPERTY}',
  45. ),
  46. close_fds=True,
  47. )
  48. try:
  49. # Sort from longest to shortest mount points, so longer mount points get a whack at the
  50. # candidate pattern piñata before their parents do. (Patterns are consumed during the second
  51. # loop below, so no two datasets end up with the same contained patterns.)
  52. datasets = sorted(
  53. (
  54. Dataset(dataset_name, mount_point, (user_property_value == 'auto'), ())
  55. for line in list_output.splitlines()
  56. for (dataset_name, mount_point, can_mount, user_property_value) in (
  57. line.rstrip().split('\t'),
  58. )
  59. # Skip datasets that are marked "canmount=off", because mounting their snapshots will
  60. # result in completely empty mount points—thereby preventing us from backing them up.
  61. if can_mount == 'on'
  62. ),
  63. key=lambda dataset: dataset.mount_point,
  64. reverse=True,
  65. )
  66. except ValueError:
  67. raise ValueError(f'Invalid {zfs_command} list output')
  68. candidate_patterns = set(patterns)
  69. return tuple(
  70. sorted(
  71. (
  72. Dataset(
  73. dataset.name,
  74. dataset.mount_point,
  75. dataset.auto_backup,
  76. contained_patterns,
  77. )
  78. for dataset in datasets
  79. for contained_patterns in (
  80. (
  81. (
  82. (
  83. borgmatic.borg.pattern.Pattern(
  84. dataset.mount_point,
  85. source=borgmatic.borg.pattern.Pattern_source.HOOK,
  86. ),
  87. )
  88. if dataset.auto_backup
  89. else ()
  90. )
  91. + borgmatic.hooks.data_source.snapshot.get_contained_patterns(
  92. dataset.mount_point,
  93. candidate_patterns,
  94. )
  95. ),
  96. )
  97. if dataset.auto_backup
  98. or any(
  99. pattern.type == borgmatic.borg.pattern.Pattern_type.ROOT
  100. and pattern.source == borgmatic.borg.pattern.Pattern_source.CONFIG
  101. for pattern in contained_patterns
  102. )
  103. ),
  104. key=lambda dataset: dataset.mount_point,
  105. ),
  106. )
  107. def get_all_dataset_mount_points(zfs_command):
  108. '''
  109. Given a ZFS command to run, return all ZFS datasets as a sequence of sorted mount points.
  110. '''
  111. list_output = borgmatic.execute.execute_command_and_capture_output(
  112. (
  113. *zfs_command.split(' '),
  114. 'list',
  115. '-H',
  116. '-t',
  117. 'filesystem',
  118. '-o',
  119. 'mountpoint',
  120. ),
  121. close_fds=True,
  122. )
  123. return tuple(
  124. sorted(
  125. {
  126. mount_point
  127. for line in list_output.splitlines()
  128. for mount_point in (line.rstrip(),)
  129. if mount_point != 'none'
  130. },
  131. ),
  132. )
  133. def snapshot_dataset(zfs_command, full_snapshot_name): # pragma: no cover
  134. '''
  135. Given a ZFS command to run and a snapshot name of the form "dataset@snapshot", create a new ZFS
  136. snapshot.
  137. '''
  138. borgmatic.execute.execute_command(
  139. (
  140. *zfs_command.split(' '),
  141. 'snapshot',
  142. full_snapshot_name,
  143. ),
  144. output_log_level=logging.DEBUG,
  145. close_fds=True,
  146. )
  147. def mount_snapshot(mount_command, full_snapshot_name, snapshot_mount_path): # pragma: no cover
  148. '''
  149. Given a mount command to run, an existing snapshot name of the form "dataset@snapshot", and the
  150. path where the snapshot should be mounted, mount the snapshot (making any necessary directories
  151. first).
  152. '''
  153. os.makedirs(snapshot_mount_path, mode=0o700, exist_ok=True)
  154. borgmatic.execute.execute_command(
  155. (
  156. *mount_command.split(' '),
  157. '-t',
  158. 'zfs',
  159. '-o',
  160. 'ro',
  161. full_snapshot_name,
  162. snapshot_mount_path,
  163. ),
  164. output_log_level=logging.DEBUG,
  165. close_fds=True,
  166. )
  167. MOUNT_POINT_HASH_LENGTH = 10
  168. def make_borg_snapshot_pattern(pattern, dataset, normalized_runtime_directory):
  169. '''
  170. Given a Borg pattern as a borgmatic.borg.pattern.Pattern instance and the Dataset containing it,
  171. return a new Pattern with its path rewritten to be in a snapshot directory based on both the
  172. given runtime directory and the given Dataset's mount point.
  173. Move any initial caret in a regular expression pattern path to the beginning, so as not to break
  174. the regular expression.
  175. '''
  176. initial_caret = (
  177. '^'
  178. if pattern.style == borgmatic.borg.pattern.Pattern_style.REGULAR_EXPRESSION
  179. and pattern.path.startswith('^')
  180. else ''
  181. )
  182. rewritten_path = initial_caret + os.path.join(
  183. normalized_runtime_directory,
  184. 'zfs_snapshots',
  185. # Including this hash prevents conflicts between snapshot patterns for different datasets.
  186. # For instance, without this, snapshotting a dataset at /var and another at /var/spool would
  187. # result in overlapping snapshot patterns and therefore colliding mount attempts.
  188. hashlib.shake_256(dataset.mount_point.encode('utf-8')).hexdigest(MOUNT_POINT_HASH_LENGTH),
  189. # Use the Borg 1.4+ "slashdot" hack to prevent the snapshot path prefix from getting
  190. # included in the archive—but only if there's not already a slashdot hack present in the
  191. # pattern.
  192. ('' if f'{os.path.sep}.{os.path.sep}' in pattern.path else '.'),
  193. # Included so that the source directory ends up in the Borg archive at its "original" path.
  194. pattern.path.lstrip('^').lstrip(os.path.sep),
  195. )
  196. return borgmatic.borg.pattern.Pattern(
  197. rewritten_path,
  198. pattern.type,
  199. pattern.style,
  200. pattern.device,
  201. source=borgmatic.borg.pattern.Pattern_source.HOOK,
  202. )
  203. def dump_data_sources(
  204. hook_config,
  205. config,
  206. config_paths,
  207. borgmatic_runtime_directory,
  208. patterns,
  209. dry_run,
  210. ):
  211. '''
  212. Given a ZFS configuration dict, a configuration dict, the borgmatic configuration file paths,
  213. the borgmatic runtime directory, the configured patterns, and whether this is a dry run,
  214. auto-detect and snapshot any ZFS dataset mount points listed in the given patterns and any
  215. dataset with a borgmatic-specific user property. Also update those patterns, replacing dataset
  216. mount points with corresponding snapshot directories so they get stored in the Borg archive
  217. instead.
  218. Return an empty sequence, since there are no ongoing dump processes from this hook.
  219. If this is a dry run, then don't actually snapshot anything.
  220. '''
  221. dry_run_label = ' (dry run; not actually snapshotting anything)' if dry_run else ''
  222. logger.info(f'Snapshotting ZFS datasets{dry_run_label}')
  223. # List ZFS datasets to get their mount points, but only consider those patterns that came from
  224. # actual user configuration (as opposed to, say, other hooks).
  225. zfs_command = hook_config.get('zfs_command', 'zfs')
  226. requested_datasets = get_datasets_to_backup(zfs_command, patterns)
  227. # Snapshot each dataset, rewriting patterns to use the snapshot paths.
  228. snapshot_name = f'{BORGMATIC_SNAPSHOT_PREFIX}{os.getpid()}'
  229. normalized_runtime_directory = os.path.normpath(borgmatic_runtime_directory)
  230. if not requested_datasets:
  231. logger.warning(f'No ZFS datasets found to snapshot{dry_run_label}')
  232. for dataset in requested_datasets:
  233. full_snapshot_name = f'{dataset.name}@{snapshot_name}'
  234. logger.debug(
  235. f'Creating ZFS snapshot {full_snapshot_name} of {dataset.mount_point}{dry_run_label}',
  236. )
  237. if not dry_run:
  238. snapshot_dataset(zfs_command, full_snapshot_name)
  239. # Mount the snapshot into a particular named temporary directory so that the snapshot ends
  240. # up in the Borg archive at the "original" dataset mount point path.
  241. snapshot_mount_path = os.path.join(
  242. normalized_runtime_directory,
  243. 'zfs_snapshots',
  244. hashlib.shake_256(dataset.mount_point.encode('utf-8')).hexdigest(
  245. MOUNT_POINT_HASH_LENGTH,
  246. ),
  247. dataset.mount_point.lstrip(os.path.sep),
  248. )
  249. logger.debug(
  250. f'Mounting ZFS snapshot {full_snapshot_name} at {snapshot_mount_path}{dry_run_label}',
  251. )
  252. if dry_run:
  253. continue
  254. mount_snapshot(
  255. hook_config.get('mount_command', 'mount'),
  256. full_snapshot_name,
  257. snapshot_mount_path,
  258. )
  259. for pattern in dataset.contained_patterns:
  260. snapshot_pattern = make_borg_snapshot_pattern(
  261. pattern,
  262. dataset,
  263. normalized_runtime_directory,
  264. )
  265. borgmatic.hooks.data_source.config.replace_pattern(patterns, pattern, snapshot_pattern)
  266. return []
  267. def unmount_snapshot(umount_command, snapshot_mount_path): # pragma: no cover
  268. '''
  269. Given a umount command to run and the mount path of a snapshot, unmount it.
  270. '''
  271. borgmatic.execute.execute_command(
  272. (*umount_command.split(' '), snapshot_mount_path),
  273. output_log_level=logging.DEBUG,
  274. close_fds=True,
  275. )
  276. def destroy_snapshot(zfs_command, full_snapshot_name): # pragma: no cover
  277. '''
  278. Given a ZFS command to run and the name of a snapshot in the form "dataset@snapshot", destroy
  279. it.
  280. '''
  281. borgmatic.execute.execute_command(
  282. (
  283. *tuple(zfs_command.split(' ')),
  284. 'destroy',
  285. full_snapshot_name,
  286. ),
  287. output_log_level=logging.DEBUG,
  288. close_fds=True,
  289. )
  290. def get_all_snapshots(zfs_command):
  291. '''
  292. Given a ZFS command to run, return all ZFS snapshots as a sequence of full snapshot names of the
  293. form "dataset@snapshot".
  294. '''
  295. list_output = borgmatic.execute.execute_command_and_capture_output(
  296. (
  297. *tuple(zfs_command.split(' ')),
  298. 'list',
  299. '-H',
  300. '-t',
  301. 'snapshot',
  302. '-o',
  303. 'name',
  304. ),
  305. close_fds=True,
  306. )
  307. return tuple(line.rstrip() for line in list_output.splitlines())
  308. def remove_data_source_dumps(hook_config, config, borgmatic_runtime_directory, patterns, dry_run): # noqa: PLR0912
  309. '''
  310. Given a ZFS configuration dict, a configuration dict, the borgmatic runtime directory, the
  311. configured patterns, and whether this is a dry run, unmount and destroy any ZFS snapshots
  312. created by borgmatic. If this is a dry run or ZFS isn't configured in borgmatic's configuration,
  313. then don't actually remove anything.
  314. '''
  315. if hook_config is None:
  316. return
  317. dry_run_label = ' (dry run; not actually removing anything)' if dry_run else ''
  318. # Unmount snapshots.
  319. zfs_command = hook_config.get('zfs_command', 'zfs')
  320. try:
  321. dataset_mount_points = get_all_dataset_mount_points(zfs_command)
  322. except FileNotFoundError:
  323. logger.debug(f'Could not find "{zfs_command}" command')
  324. return
  325. except subprocess.CalledProcessError as error:
  326. logger.debug(error)
  327. return
  328. snapshots_glob = os.path.join(
  329. borgmatic.config.paths.replace_temporary_subdirectory_with_glob(
  330. os.path.normpath(borgmatic_runtime_directory),
  331. ),
  332. 'zfs_snapshots',
  333. '*',
  334. )
  335. logger.debug(f'Looking for snapshots to remove in {snapshots_glob}{dry_run_label}')
  336. umount_command = hook_config.get('umount_command', 'umount')
  337. for snapshots_directory in glob.glob(snapshots_glob):
  338. if not os.path.isdir(snapshots_directory):
  339. continue
  340. # Reversing the sorted datasets ensures that we unmount the longer mount point paths of
  341. # child datasets before the shorter mount point paths of parent datasets.
  342. for mount_point in reversed(dataset_mount_points):
  343. snapshot_mount_path = os.path.join(snapshots_directory, mount_point.lstrip(os.path.sep))
  344. # If the snapshot mount path is empty, this is probably just a "shadow" of a nested
  345. # dataset and therefore there's nothing to unmount.
  346. if not os.path.isdir(snapshot_mount_path) or not os.listdir(snapshot_mount_path):
  347. continue
  348. # This might fail if the path is already mounted, but we swallow errors here since we'll
  349. # do another recursive delete below. The point of doing it here is that we don't want to
  350. # try to unmount a non-mounted directory (which *will* fail), and probing for whether a
  351. # directory is mounted is tough to do in a cross-platform way.
  352. if not dry_run:
  353. shutil.rmtree(snapshot_mount_path, ignore_errors=True)
  354. # If the delete was successful, that means there's nothing to unmount.
  355. if not os.path.isdir(snapshot_mount_path):
  356. continue
  357. logger.debug(f'Unmounting ZFS snapshot at {snapshot_mount_path}{dry_run_label}')
  358. if not dry_run:
  359. try:
  360. unmount_snapshot(umount_command, snapshot_mount_path)
  361. except FileNotFoundError:
  362. logger.debug(f'Could not find "{umount_command}" command')
  363. return
  364. except subprocess.CalledProcessError as error:
  365. logger.debug(error)
  366. continue
  367. if not dry_run:
  368. shutil.rmtree(snapshot_mount_path, ignore_errors=True)
  369. # Destroy snapshots.
  370. full_snapshot_names = get_all_snapshots(zfs_command)
  371. for full_snapshot_name in full_snapshot_names:
  372. # Only destroy snapshots that borgmatic actually created!
  373. if not full_snapshot_name.split('@')[-1].startswith(BORGMATIC_SNAPSHOT_PREFIX):
  374. continue
  375. logger.debug(f'Destroying ZFS snapshot {full_snapshot_name}{dry_run_label}')
  376. if not dry_run:
  377. destroy_snapshot(zfs_command, full_snapshot_name)
  378. def make_data_source_dump_patterns(
  379. hook_config,
  380. config,
  381. borgmatic_runtime_directory,
  382. name=None,
  383. ): # pragma: no cover
  384. '''
  385. Restores aren't implemented, because stored files can be extracted directly with "extract".
  386. '''
  387. return ()
  388. def restore_data_source_dump(
  389. hook_config,
  390. config,
  391. data_source,
  392. dry_run,
  393. extract_process,
  394. connection_params,
  395. borgmatic_runtime_directory,
  396. ): # pragma: no cover
  397. '''
  398. Restores aren't implemented, because stored files can be extracted directly with "extract".
  399. '''
  400. raise NotImplementedError()