zfs.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384
  1. import collections
  2. import glob
  3. import logging
  4. import os
  5. import shutil
  6. import subprocess
  7. import borgmatic.config.paths
  8. import borgmatic.hooks.data_source.snapshot
  9. import borgmatic.execute
  10. logger = logging.getLogger(__name__)
  11. def use_streaming(hook_config, config, log_prefix): # pragma: no cover
  12. '''
  13. Return whether dump streaming is used for this hook. (Spoiler: It isn't.)
  14. '''
  15. return False
  16. BORGMATIC_SNAPSHOT_PREFIX = 'borgmatic-'
  17. BORGMATIC_USER_PROPERTY = 'org.torsion.borgmatic:backup'
  18. Dataset = collections.namedtuple(
  19. 'Dataset', ('name', 'mount_point', 'auto_backup', 'contained_source_directories')
  20. )
  21. def get_datasets_to_backup(zfs_command, source_directories):
  22. '''
  23. Given a ZFS command to run and a sequence of configured source directories, find the
  24. intersection between the current ZFS dataset mount points and the configured borgmatic source
  25. directories. The idea is that these are the requested datasets to snapshot. But also include any
  26. datasets tagged with a borgmatic-specific user property, whether or not they appear in source
  27. directories.
  28. Return the result as a sequence of Dataset instances, sorted by mount point.
  29. '''
  30. list_output = borgmatic.execute.execute_command_and_capture_output(
  31. (
  32. zfs_command,
  33. 'list',
  34. '-H',
  35. '-t',
  36. 'filesystem',
  37. '-o',
  38. f'name,mountpoint,{BORGMATIC_USER_PROPERTY}',
  39. )
  40. )
  41. try:
  42. # Sort from longest to shortest mount points, so longer mount points get a whack at the
  43. # candidate source directory piñata before their parents do. (Source directories are
  44. # consumed during the second loop below, so no two datasets get the same contained source
  45. # directories.)
  46. datasets = sorted(
  47. (
  48. Dataset(dataset_name, mount_point, (user_property_value == 'auto'), ())
  49. for line in list_output.splitlines()
  50. for (dataset_name, mount_point, user_property_value) in (line.rstrip().split('\t'),)
  51. ),
  52. key=lambda dataset: dataset.mount_point,
  53. reverse=True,
  54. )
  55. except ValueError:
  56. raise ValueError('Invalid {zfs_command} list output')
  57. candidate_source_directories = set(source_directories)
  58. return sorted(
  59. tuple(
  60. Dataset(
  61. dataset.name,
  62. dataset.mount_point,
  63. dataset.auto_backup,
  64. contained_source_directories,
  65. )
  66. for dataset in datasets
  67. for contained_source_directories in (
  68. (
  69. ((dataset.mount_point,) if dataset.auto_backup else ())
  70. + borgmatic.hooks.data_source.snapshot.get_contained_directories(
  71. dataset.mount_point, candidate_source_directories
  72. )
  73. ),
  74. )
  75. if contained_source_directories
  76. ),
  77. key=lambda dataset: dataset.mount_point,
  78. )
  79. def get_all_dataset_mount_points(zfs_command):
  80. '''
  81. Given a ZFS command to run, return all ZFS datasets as a sequence of sorted mount points.
  82. '''
  83. list_output = borgmatic.execute.execute_command_and_capture_output(
  84. (
  85. zfs_command,
  86. 'list',
  87. '-H',
  88. '-t',
  89. 'filesystem',
  90. '-o',
  91. 'mountpoint',
  92. )
  93. )
  94. try:
  95. return tuple(sorted(line.rstrip() for line in list_output.splitlines()))
  96. except ValueError:
  97. raise ValueError('Invalid {zfs_command} list output')
  98. def snapshot_dataset(zfs_command, full_snapshot_name): # pragma: no cover
  99. '''
  100. Given a ZFS command to run and a snapshot name of the form "dataset@snapshot", create a new ZFS
  101. snapshot.
  102. '''
  103. borgmatic.execute.execute_command(
  104. (
  105. zfs_command,
  106. 'snapshot',
  107. full_snapshot_name,
  108. ),
  109. output_log_level=logging.DEBUG,
  110. )
  111. def mount_snapshot(mount_command, full_snapshot_name, snapshot_mount_path): # pragma: no cover
  112. '''
  113. Given a mount command to run, an existing snapshot name of the form "dataset@snapshot", and the
  114. path where the snapshot should be mounted, mount the snapshot (making any necessary directories
  115. first).
  116. '''
  117. os.makedirs(snapshot_mount_path, mode=0o700, exist_ok=True)
  118. borgmatic.execute.execute_command(
  119. (
  120. mount_command,
  121. '-t',
  122. 'zfs',
  123. full_snapshot_name,
  124. snapshot_mount_path,
  125. ),
  126. output_log_level=logging.DEBUG,
  127. )
  128. def dump_data_sources(
  129. hook_config,
  130. config,
  131. log_prefix,
  132. config_paths,
  133. borgmatic_runtime_directory,
  134. source_directories,
  135. dry_run,
  136. ):
  137. '''
  138. Given a ZFS configuration dict, a configuration dict, a log prefix, the borgmatic configuration
  139. file paths, the borgmatic runtime directory, the configured source directories, and whether this
  140. is a dry run, auto-detect and snapshot any ZFS dataset mount points listed in the given source
  141. directories and any dataset with a borgmatic-specific user property. Also update those source
  142. directories, replacing dataset mount points with corresponding snapshot directories so they get
  143. stored in the Borg archive instead. Use the log prefix in any log entries.
  144. Return an empty sequence, since there are no ongoing dump processes from this hook.
  145. If this is a dry run, then don't actually snapshot anything.
  146. '''
  147. dry_run_label = ' (dry run; not actually snapshotting anything)' if dry_run else ''
  148. logger.info(f'{log_prefix}: Snapshotting ZFS datasets{dry_run_label}')
  149. # List ZFS datasets to get their mount points.
  150. zfs_command = hook_config.get('zfs_command', 'zfs')
  151. requested_datasets = get_datasets_to_backup(zfs_command, source_directories)
  152. # Snapshot each dataset, rewriting source directories to use the snapshot paths.
  153. snapshot_name = f'{BORGMATIC_SNAPSHOT_PREFIX}{os.getpid()}'
  154. normalized_runtime_directory = os.path.normpath(borgmatic_runtime_directory)
  155. if not requested_datasets:
  156. logger.warning(f'{log_prefix}: No ZFS datasets found to snapshot{dry_run_label}')
  157. for dataset in requested_datasets:
  158. full_snapshot_name = f'{dataset.name}@{snapshot_name}'
  159. logger.debug(
  160. f'{log_prefix}: Creating ZFS snapshot {full_snapshot_name} of {dataset.mount_point}{dry_run_label}'
  161. )
  162. if not dry_run:
  163. snapshot_dataset(zfs_command, full_snapshot_name)
  164. # Mount the snapshot into a particular named temporary directory so that the snapshot ends
  165. # up in the Borg archive at the "original" dataset mount point path.
  166. snapshot_mount_path = os.path.join(
  167. normalized_runtime_directory,
  168. 'zfs_snapshots',
  169. dataset.mount_point.lstrip(os.path.sep),
  170. )
  171. logger.debug(
  172. f'{log_prefix}: Mounting ZFS snapshot {full_snapshot_name} at {snapshot_mount_path}{dry_run_label}'
  173. )
  174. if dry_run:
  175. continue
  176. mount_snapshot(
  177. hook_config.get('mount_command', 'mount'), full_snapshot_name, snapshot_mount_path
  178. )
  179. for source_directory in dataset.contained_source_directories:
  180. try:
  181. source_directories.remove(source_directory)
  182. except ValueError:
  183. pass
  184. source_directories.append(
  185. os.path.join(
  186. normalized_runtime_directory,
  187. 'zfs_snapshots',
  188. '.', # Borg 1.4+ "slashdot" hack.
  189. source_directory.lstrip(os.path.sep),
  190. )
  191. )
  192. return []
  193. def unmount_snapshot(umount_command, snapshot_mount_path): # pragma: no cover
  194. '''
  195. Given a umount command to run and the mount path of a snapshot, unmount it.
  196. '''
  197. borgmatic.execute.execute_command(
  198. (
  199. umount_command,
  200. snapshot_mount_path,
  201. ),
  202. output_log_level=logging.DEBUG,
  203. )
  204. def destroy_snapshot(zfs_command, full_snapshot_name): # pragma: no cover
  205. '''
  206. Given a ZFS command to run and the name of a snapshot in the form "dataset@snapshot", destroy
  207. it.
  208. '''
  209. borgmatic.execute.execute_command(
  210. (
  211. zfs_command,
  212. 'destroy',
  213. full_snapshot_name,
  214. ),
  215. output_log_level=logging.DEBUG,
  216. )
  217. def get_all_snapshots(zfs_command):
  218. '''
  219. Given a ZFS command to run, return all ZFS snapshots as a sequence of full snapshot names of the
  220. form "dataset@snapshot".
  221. '''
  222. list_output = borgmatic.execute.execute_command_and_capture_output(
  223. (
  224. zfs_command,
  225. 'list',
  226. '-H',
  227. '-t',
  228. 'snapshot',
  229. '-o',
  230. 'name',
  231. )
  232. )
  233. return tuple(line.rstrip() for line in list_output.splitlines())
  234. def remove_data_source_dumps(hook_config, config, log_prefix, borgmatic_runtime_directory, dry_run):
  235. '''
  236. Given a ZFS configuration dict, a configuration dict, a log prefix, the borgmatic runtime
  237. directory, and whether this is a dry run, unmount and destroy any ZFS snapshots created by
  238. borgmatic. Use the log prefix in any log entries. If this is a dry run, then don't actually
  239. remove anything.
  240. '''
  241. dry_run_label = ' (dry run; not actually removing anything)' if dry_run else ''
  242. # Unmount snapshots.
  243. zfs_command = hook_config.get('zfs_command', 'zfs')
  244. try:
  245. dataset_mount_points = get_all_dataset_mount_points(zfs_command)
  246. except FileNotFoundError:
  247. logger.debug(f'{log_prefix}: Could not find "{zfs_command}" command')
  248. return
  249. except subprocess.CalledProcessError as error:
  250. logger.debug(f'{log_prefix}: {error}')
  251. return
  252. snapshots_glob = os.path.join(
  253. borgmatic.config.paths.replace_temporary_subdirectory_with_glob(
  254. os.path.normpath(borgmatic_runtime_directory),
  255. ),
  256. 'zfs_snapshots',
  257. )
  258. logger.debug(
  259. f'{log_prefix}: Looking for snapshots to remove in {snapshots_glob}{dry_run_label}'
  260. )
  261. umount_command = hook_config.get('umount_command', 'umount')
  262. for snapshots_directory in glob.glob(snapshots_glob):
  263. if not os.path.isdir(snapshots_directory):
  264. continue
  265. # This might fail if the directory is already mounted, but we swallow errors here since
  266. # we'll try again below. The point of doing it here is that we don't want to try to unmount
  267. # a non-mounted directory (which *will* fail), and probing for whether a directory is
  268. # mounted is tough to do in a cross-platform way.
  269. if not dry_run:
  270. shutil.rmtree(snapshots_directory, ignore_errors=True)
  271. # Reversing the sorted datasets ensures that we unmount the longer mount point paths of
  272. # child datasets before the shorter mount point paths of parent datasets.
  273. for mount_point in reversed(dataset_mount_points):
  274. snapshot_mount_path = os.path.join(snapshots_directory, mount_point.lstrip(os.path.sep))
  275. if not os.path.isdir(snapshot_mount_path):
  276. continue
  277. logger.debug(
  278. f'{log_prefix}: Unmounting ZFS snapshot at {snapshot_mount_path}{dry_run_label}'
  279. )
  280. if not dry_run:
  281. try:
  282. unmount_snapshot(umount_command, snapshot_mount_path)
  283. except FileNotFoundError:
  284. logger.debug(f'{log_prefix}: Could not find "{umount_command}" command')
  285. return
  286. except subprocess.CalledProcessError as error:
  287. logger.debug(f'{log_prefix}: {error}')
  288. return
  289. if not dry_run:
  290. shutil.rmtree(snapshots_directory)
  291. # Destroy snapshots.
  292. full_snapshot_names = get_all_snapshots(zfs_command)
  293. for full_snapshot_name in full_snapshot_names:
  294. # Only destroy snapshots that borgmatic actually created!
  295. if not full_snapshot_name.split('@')[-1].startswith(BORGMATIC_SNAPSHOT_PREFIX):
  296. continue
  297. logger.debug(f'{log_prefix}: Destroying ZFS snapshot {full_snapshot_name}{dry_run_label}')
  298. if not dry_run:
  299. destroy_snapshot(zfs_command, full_snapshot_name)
  300. def make_data_source_dump_patterns(
  301. hook_config, config, log_prefix, borgmatic_runtime_directory, name=None
  302. ): # pragma: no cover
  303. '''
  304. Restores aren't implemented, because stored files can be extracted directly with "extract".
  305. '''
  306. return ()
  307. def restore_data_source_dump(
  308. hook_config,
  309. config,
  310. log_prefix,
  311. data_source,
  312. dry_run,
  313. extract_process,
  314. connection_params,
  315. borgmatic_runtime_directory,
  316. ): # pragma: no cover
  317. '''
  318. Restores aren't implemented, because stored files can be extracted directly with "extract".
  319. '''
  320. raise NotImplementedError()