btrfs.py 15 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446
  1. import collections
  2. import functools
  3. import glob
  4. import logging
  5. import os
  6. import pathlib
  7. import shutil
  8. import subprocess
  9. import borgmatic.borg.pattern
  10. import borgmatic.config.paths
  11. import borgmatic.execute
  12. import borgmatic.hooks.data_source.snapshot
  13. logger = logging.getLogger(__name__)
  14. def use_streaming(hook_config, config): # pragma: no cover
  15. '''
  16. Return whether dump streaming is used for this hook. (Spoiler: It isn't.)
  17. '''
  18. return False
  19. @functools.cache
  20. def path_is_a_subvolume(btrfs_command, path):
  21. '''
  22. Given a btrfs command and a path, return whether the path is a Btrfs subvolume. Return False if
  23. the btrfs command errors, which probably indicates there isn't a containing Btrfs subvolume for
  24. the given path.
  25. As a performance optimization, multiple calls to this function with the same arguments are
  26. cached.
  27. '''
  28. try:
  29. borgmatic.execute.execute_command(
  30. (
  31. *btrfs_command.split(' '),
  32. 'subvolume',
  33. 'show',
  34. path,
  35. ),
  36. output_log_level=None,
  37. close_fds=True,
  38. )
  39. # An error from the command (probably) indicates that the path is not actually a subvolume.
  40. except subprocess.CalledProcessError:
  41. return False
  42. return True
  43. @functools.cache
  44. def get_subvolume_property(btrfs_command, subvolume_path, property_name):
  45. '''
  46. Given a btrfs command, a subvolume path, and a property name to lookup, return the value of the
  47. corresponding property.
  48. Raise subprocess.CalledProcessError if the btrfs command errors.
  49. As a performance optimization, multiple calls to this function with the same arguments are
  50. cached.
  51. '''
  52. output = borgmatic.execute.execute_command_and_capture_output(
  53. (
  54. *btrfs_command.split(' '),
  55. 'property',
  56. 'get',
  57. '-t', # Type.
  58. 'subvol',
  59. subvolume_path,
  60. property_name,
  61. ),
  62. close_fds=True,
  63. )
  64. try:
  65. value = output.strip().split('=')[1]
  66. except IndexError:
  67. raise ValueError(f'Invalid {btrfs_command} property output')
  68. return {
  69. 'true': True,
  70. 'false': False,
  71. }.get(value, value)
  72. def get_containing_subvolume_path(btrfs_command, path):
  73. '''
  74. Given a btrfs command and a path, return the subvolume path that contains the given path (or is
  75. the same as the path).
  76. If there is no such subvolume path or the containing subvolume is read-only, return None.
  77. '''
  78. # Probe the given pattern's path and all of its parents, grandparents, etc. to try to find a
  79. # Btrfs subvolume.
  80. for candidate_path in (
  81. path,
  82. *tuple(str(ancestor) for ancestor in pathlib.PurePath(path).parents),
  83. ):
  84. if not path_is_a_subvolume(btrfs_command, candidate_path):
  85. continue
  86. try:
  87. if get_subvolume_property(btrfs_command, candidate_path, 'ro'):
  88. logger.debug(f'Ignoring Btrfs subvolume {candidate_path} because it is read-only')
  89. return None
  90. logger.debug(f'Path {candidate_path} is a Btrfs subvolume')
  91. return candidate_path
  92. except subprocess.CalledProcessError as error:
  93. logger.debug(
  94. f'Error determining read-only status of Btrfs subvolume {candidate_path}: {error}',
  95. )
  96. return None
  97. return None
  98. def get_all_subvolume_paths(btrfs_command, patterns):
  99. '''
  100. Given a btrfs command and a sequence of patterns, get the sorted paths for all Btrfs subvolumes
  101. containing those patterns.
  102. '''
  103. return tuple(
  104. sorted(
  105. {
  106. subvolume_path
  107. for pattern in patterns
  108. if pattern.type == borgmatic.borg.pattern.Pattern_type.ROOT
  109. if pattern.source == borgmatic.borg.pattern.Pattern_source.CONFIG
  110. for subvolume_path in (get_containing_subvolume_path(btrfs_command, pattern.path),)
  111. if subvolume_path
  112. }
  113. ),
  114. )
  115. Subvolume = collections.namedtuple('Subvolume', ('path', 'contained_patterns'), defaults=((),))
  116. def get_subvolumes(btrfs_command, patterns):
  117. '''
  118. Given a Btrfs command to run and a sequence of configured patterns, find the intersection
  119. between the current Btrfs filesystem and subvolume paths and the paths of any patterns. The
  120. idea is that these pattern paths represent the requested subvolumes to snapshot.
  121. Only include subvolumes that contain at least one root pattern sourced from borgmatic
  122. configuration (as opposed to generated elsewhere in borgmatic).
  123. Return the result as a sequence of matching Subvolume instances.
  124. '''
  125. candidate_patterns = set(patterns or ())
  126. subvolumes = []
  127. # For each subvolume path, match it against the given patterns to find the subvolumes to
  128. # backup. Sort the subvolumes from longest to shortest mount points, so longer subvolumes get
  129. # a whack at the candidate pattern piñata before their parents do. (Patterns are consumed during
  130. # this process, so no two subvolumes end up with the same contained patterns.)
  131. for subvolume_path in reversed(get_all_subvolume_paths(btrfs_command, patterns)):
  132. subvolumes.extend(
  133. Subvolume(subvolume_path, contained_patterns)
  134. for contained_patterns in (
  135. borgmatic.hooks.data_source.snapshot.get_contained_patterns(
  136. subvolume_path,
  137. candidate_patterns,
  138. ),
  139. )
  140. if any(
  141. pattern.type == borgmatic.borg.pattern.Pattern_type.ROOT
  142. and pattern.source == borgmatic.borg.pattern.Pattern_source.CONFIG
  143. for pattern in contained_patterns
  144. )
  145. )
  146. return tuple(sorted(subvolumes, key=lambda subvolume: subvolume.path))
  147. BORGMATIC_SNAPSHOT_PREFIX = '.borgmatic-snapshot-'
  148. def make_snapshot_path(subvolume_path):
  149. '''
  150. Given the path to a subvolume, make a corresponding snapshot path for it.
  151. '''
  152. return os.path.join(
  153. subvolume_path,
  154. f'{BORGMATIC_SNAPSHOT_PREFIX}{os.getpid()}',
  155. # Included so that the snapshot ends up in the Borg archive at the "original" subvolume path.
  156. ) + subvolume_path.rstrip(os.path.sep)
  157. def make_snapshot_exclude_pattern(subvolume_path): # pragma: no cover
  158. '''
  159. Given the path to a subvolume, make a corresponding exclude pattern for its embedded snapshot
  160. path. This is to work around a quirk of Btrfs: If you make a snapshot path as a child directory
  161. of a subvolume, then the snapshot's own initial directory component shows up as an empty
  162. directory within the snapshot itself. For instance, if you have a Btrfs subvolume at /mnt and
  163. make a snapshot of it at:
  164. /mnt/.borgmatic-snapshot-1234/mnt
  165. ... then the snapshot itself will have an empty directory at:
  166. /mnt/.borgmatic-snapshot-1234/mnt/.borgmatic-snapshot-1234
  167. So to prevent that from ending up in the Borg archive, this function produces an exclude pattern
  168. to exclude that path.
  169. '''
  170. snapshot_directory = f'{BORGMATIC_SNAPSHOT_PREFIX}{os.getpid()}'
  171. return borgmatic.borg.pattern.Pattern(
  172. os.path.join(
  173. subvolume_path,
  174. snapshot_directory,
  175. subvolume_path.lstrip(os.path.sep),
  176. snapshot_directory,
  177. ),
  178. borgmatic.borg.pattern.Pattern_type.NO_RECURSE,
  179. borgmatic.borg.pattern.Pattern_style.FNMATCH,
  180. source=borgmatic.borg.pattern.Pattern_source.HOOK,
  181. )
  182. def make_borg_snapshot_pattern(subvolume_path, pattern):
  183. '''
  184. Given the path to a subvolume and a pattern as a borgmatic.borg.pattern.Pattern instance whose
  185. path is inside the subvolume, return a new Pattern with its path rewritten to be in a snapshot
  186. path intended for giving to Borg.
  187. Move any initial caret in a regular expression pattern path to the beginning, so as not to break
  188. the regular expression.
  189. '''
  190. initial_caret = (
  191. '^'
  192. if pattern.style == borgmatic.borg.pattern.Pattern_style.REGULAR_EXPRESSION
  193. and pattern.path.startswith('^')
  194. else ''
  195. )
  196. rewritten_path = initial_caret + os.path.join(
  197. subvolume_path,
  198. f'{BORGMATIC_SNAPSHOT_PREFIX}{os.getpid()}',
  199. # Use the Borg 1.4+ "slashdot" hack to prevent the snapshot path prefix from getting
  200. # included in the archive—but only if there's not already a slashdot hack present in the
  201. # pattern.
  202. ('' if f'{os.path.sep}.{os.path.sep}' in pattern.path else '.'),
  203. # Included so that the source directory ends up in the Borg archive at its "original" path.
  204. pattern.path.lstrip('^').lstrip(os.path.sep),
  205. )
  206. return borgmatic.borg.pattern.Pattern(
  207. rewritten_path,
  208. pattern.type,
  209. pattern.style,
  210. pattern.device,
  211. source=borgmatic.borg.pattern.Pattern_source.HOOK,
  212. )
  213. def snapshot_subvolume(btrfs_command, subvolume_path, snapshot_path): # pragma: no cover
  214. '''
  215. Given a Btrfs command to run, the path to a subvolume, and the path for a snapshot, create a new
  216. Btrfs snapshot of the subvolume.
  217. '''
  218. os.makedirs(os.path.dirname(snapshot_path), mode=0o700, exist_ok=True)
  219. borgmatic.execute.execute_command(
  220. (
  221. *btrfs_command.split(' '),
  222. 'subvolume',
  223. 'snapshot',
  224. '-r', # Read-only.
  225. subvolume_path,
  226. snapshot_path,
  227. ),
  228. output_log_level=logging.DEBUG,
  229. close_fds=True,
  230. )
  231. def dump_data_sources(
  232. hook_config,
  233. config,
  234. config_paths,
  235. borgmatic_runtime_directory,
  236. patterns,
  237. dry_run,
  238. ):
  239. '''
  240. Given a Btrfs configuration dict, a configuration dict, the borgmatic configuration file paths,
  241. the borgmatic runtime directory, the configured patterns, and whether this is a dry run,
  242. auto-detect and snapshot any Btrfs subvolume paths listed in the given patterns. Also update
  243. those patterns, replacing subvolume paths with corresponding snapshot directories so they get
  244. stored in the Borg archive instead.
  245. Return an empty sequence, since there are no ongoing dump processes from this hook.
  246. If this is a dry run, then don't actually snapshot anything.
  247. '''
  248. dry_run_label = ' (dry run; not actually snapshotting anything)' if dry_run else ''
  249. logger.info(f'Snapshotting Btrfs subvolumes{dry_run_label}')
  250. if 'findmnt_command' in hook_config:
  251. logger.warning(
  252. 'The Btrfs "findmnt_command" option is deprecated and will be removed from a future release; findmnt is no longer used',
  253. )
  254. # Based on the configured patterns, determine Btrfs subvolumes to backup. Only consider those
  255. # patterns that came from actual user configuration (as opposed to, say, other hooks).
  256. btrfs_command = hook_config.get('btrfs_command', 'btrfs')
  257. subvolumes = get_subvolumes(btrfs_command, patterns)
  258. if not subvolumes:
  259. logger.warning(f'No Btrfs subvolumes found to snapshot{dry_run_label}')
  260. # Snapshot each subvolume, rewriting patterns to use their snapshot paths.
  261. for subvolume in subvolumes:
  262. logger.debug(f'Creating Btrfs snapshot for {subvolume.path} subvolume')
  263. snapshot_path = make_snapshot_path(subvolume.path)
  264. if dry_run:
  265. continue
  266. snapshot_subvolume(btrfs_command, subvolume.path, snapshot_path)
  267. for pattern in subvolume.contained_patterns:
  268. snapshot_pattern = make_borg_snapshot_pattern(subvolume.path, pattern)
  269. # Attempt to update the pattern in place, since pattern order matters to Borg.
  270. try:
  271. patterns[patterns.index(pattern)] = snapshot_pattern
  272. except ValueError:
  273. patterns.append(snapshot_pattern)
  274. patterns.append(make_snapshot_exclude_pattern(subvolume.path))
  275. return []
  276. def delete_snapshot(btrfs_command, snapshot_path): # pragma: no cover
  277. '''
  278. Given a Btrfs command to run and the name of a snapshot path, delete it.
  279. '''
  280. borgmatic.execute.execute_command(
  281. (
  282. *btrfs_command.split(' '),
  283. 'subvolume',
  284. 'delete',
  285. snapshot_path,
  286. ),
  287. output_log_level=logging.DEBUG,
  288. close_fds=True,
  289. )
  290. def remove_data_source_dumps(hook_config, config, borgmatic_runtime_directory, patterns, dry_run):
  291. '''
  292. Given a Btrfs configuration dict, a configuration dict, the borgmatic runtime directory, the
  293. configured patterns, and whether this is a dry run, delete any Btrfs snapshots created by
  294. borgmatic. If this is a dry run or Btrfs isn't configured in borgmatic's configuration, then
  295. don't actually remove anything.
  296. '''
  297. if hook_config is None:
  298. return
  299. dry_run_label = ' (dry run; not actually removing anything)' if dry_run else ''
  300. btrfs_command = hook_config.get('btrfs_command', 'btrfs')
  301. try:
  302. all_subvolumes = get_subvolumes(btrfs_command, patterns)
  303. except FileNotFoundError as error:
  304. logger.debug(f'Could not find "{error.filename}" command')
  305. return
  306. except subprocess.CalledProcessError as error:
  307. logger.debug(error)
  308. return
  309. # Reversing the sorted subvolumes ensures that we remove longer paths of child subvolumes before
  310. # the shorter paths of parent subvolumes.
  311. for subvolume in reversed(all_subvolumes):
  312. subvolume_snapshots_glob = borgmatic.config.paths.replace_temporary_subdirectory_with_glob(
  313. os.path.normpath(make_snapshot_path(subvolume.path)),
  314. temporary_directory_prefix=BORGMATIC_SNAPSHOT_PREFIX,
  315. )
  316. logger.debug(
  317. f'Looking for snapshots to remove in {subvolume_snapshots_glob}{dry_run_label}',
  318. )
  319. for snapshot_path in glob.glob(subvolume_snapshots_glob):
  320. if not os.path.isdir(snapshot_path):
  321. continue
  322. logger.debug(f'Deleting Btrfs snapshot {snapshot_path}{dry_run_label}')
  323. if dry_run:
  324. continue
  325. try:
  326. delete_snapshot(btrfs_command, snapshot_path)
  327. except FileNotFoundError:
  328. logger.debug(f'Could not find "{btrfs_command}" command')
  329. return
  330. except subprocess.CalledProcessError as error:
  331. logger.debug(error)
  332. return
  333. # Remove the snapshot parent directory if it still exists. (It might not exist if the
  334. # snapshot was for "/".)
  335. snapshot_parent_dir = snapshot_path.rsplit(subvolume.path, 1)[0]
  336. if os.path.isdir(snapshot_parent_dir):
  337. shutil.rmtree(snapshot_parent_dir)
  338. def make_data_source_dump_patterns(
  339. hook_config,
  340. config,
  341. borgmatic_runtime_directory,
  342. name=None,
  343. ): # pragma: no cover
  344. '''
  345. Restores aren't implemented, because stored files can be extracted directly with "extract".
  346. '''
  347. return ()
  348. def restore_data_source_dump(
  349. hook_config,
  350. config,
  351. data_source,
  352. dry_run,
  353. extract_process,
  354. connection_params,
  355. borgmatic_runtime_directory,
  356. ): # pragma: no cover
  357. '''
  358. Restores aren't implemented, because stored files can be extracted directly with "extract".
  359. '''
  360. raise NotImplementedError()