btrfs.py 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364
  1. import collections
  2. import glob
  3. import json
  4. import logging
  5. import os
  6. import shutil
  7. import subprocess
  8. import borgmatic.borg.pattern
  9. import borgmatic.config.paths
  10. import borgmatic.execute
  11. import borgmatic.hooks.data_source.snapshot
  12. logger = logging.getLogger(__name__)
  13. def use_streaming(hook_config, config, log_prefix): # pragma: no cover
  14. '''
  15. Return whether dump streaming is used for this hook. (Spoiler: It isn't.)
  16. '''
  17. return False
  18. def get_filesystem_mount_points(findmnt_command):
  19. '''
  20. Given a findmnt command to run, get all top-level Btrfs filesystem mount points.
  21. '''
  22. findmnt_output = borgmatic.execute.execute_command_and_capture_output(
  23. tuple(findmnt_command.split(' '))
  24. + (
  25. '-t', # Filesystem type.
  26. 'btrfs',
  27. '--json',
  28. '--list', # Request a flat list instead of a nested subvolume hierarchy.
  29. )
  30. )
  31. try:
  32. return tuple(
  33. filesystem['target'] for filesystem in json.loads(findmnt_output)['filesystems']
  34. )
  35. except json.JSONDecodeError as error:
  36. raise ValueError(f'Invalid {findmnt_command} JSON output: {error}')
  37. except KeyError as error:
  38. raise ValueError(f'Invalid {findmnt_command} output: Missing key "{error}"')
  39. def get_subvolumes_for_filesystem(btrfs_command, filesystem_mount_point):
  40. '''
  41. Given a Btrfs command to run and a Btrfs filesystem mount point, get the sorted subvolumes for
  42. that filesystem. Include the filesystem itself.
  43. '''
  44. btrfs_output = borgmatic.execute.execute_command_and_capture_output(
  45. tuple(btrfs_command.split(' '))
  46. + (
  47. 'subvolume',
  48. 'list',
  49. filesystem_mount_point,
  50. )
  51. )
  52. if not filesystem_mount_point.strip():
  53. return ()
  54. return (filesystem_mount_point,) + tuple(
  55. sorted(
  56. subvolume_path
  57. for line in btrfs_output.splitlines()
  58. for subvolume_subpath in (line.rstrip().split(' ')[-1],)
  59. for subvolume_path in (os.path.join(filesystem_mount_point, subvolume_subpath),)
  60. if subvolume_subpath.strip()
  61. )
  62. )
  63. Subvolume = collections.namedtuple('Subvolume', ('path', 'contained_patterns'), defaults=((),))
  64. def get_subvolumes(btrfs_command, findmnt_command, patterns=None):
  65. '''
  66. Given a Btrfs command to run and a sequence of configured patterns, find the intersection
  67. between the current Btrfs filesystem and subvolume mount points and the paths of any patterns.
  68. The idea is that these pattern paths represent the requested subvolumes to snapshot.
  69. If patterns is None, then return all subvolumes, sorted by path.
  70. Return the result as a sequence of matching subvolume mount points.
  71. '''
  72. candidate_patterns = set(patterns or ())
  73. subvolumes = []
  74. # For each filesystem mount point, find its subvolumes and match them against the given patterns
  75. # to find the subvolumes to backup. And within this loop, sort the subvolumes from longest to
  76. # shortest mount points, so longer mount points get a whack at the candidate pattern piñata
  77. # before their parents do. (Patterns are consumed during this process, so no two subvolumes end
  78. # up with the same contained patterns.)
  79. for mount_point in get_filesystem_mount_points(findmnt_command):
  80. subvolumes.extend(
  81. Subvolume(subvolume_path, contained_patterns)
  82. for subvolume_path in reversed(
  83. get_subvolumes_for_filesystem(btrfs_command, mount_point)
  84. )
  85. for contained_patterns in (
  86. borgmatic.hooks.data_source.snapshot.get_contained_patterns(
  87. subvolume_path, candidate_patterns
  88. ),
  89. )
  90. if patterns is None or contained_patterns
  91. )
  92. return tuple(sorted(subvolumes, key=lambda subvolume: subvolume.path))
  93. BORGMATIC_SNAPSHOT_PREFIX = '.borgmatic-snapshot-'
  94. def make_snapshot_path(subvolume_path):
  95. '''
  96. Given the path to a subvolume, make a corresponding snapshot path for it.
  97. '''
  98. return os.path.join(
  99. subvolume_path,
  100. f'{BORGMATIC_SNAPSHOT_PREFIX}{os.getpid()}',
  101. # Included so that the snapshot ends up in the Borg archive at the "original" subvolume path.
  102. ) + subvolume_path.rstrip(os.path.sep)
  103. def make_snapshot_exclude_pattern(subvolume_path): # pragma: no cover
  104. '''
  105. Given the path to a subvolume, make a corresponding exclude pattern for its embedded snapshot
  106. path. This is to work around a quirk of Btrfs: If you make a snapshot path as a child directory
  107. of a subvolume, then the snapshot's own initial directory component shows up as an empty
  108. directory within the snapshot itself. For instance, if you have a Btrfs subvolume at /mnt and
  109. make a snapshot of it at:
  110. /mnt/.borgmatic-snapshot-1234/mnt
  111. ... then the snapshot itself will have an empty directory at:
  112. /mnt/.borgmatic-snapshot-1234/mnt/.borgmatic-snapshot-1234
  113. So to prevent that from ending up in the Borg archive, this function produces an exclude pattern
  114. to exclude that path.
  115. '''
  116. snapshot_directory = f'{BORGMATIC_SNAPSHOT_PREFIX}{os.getpid()}'
  117. return borgmatic.borg.pattern.Pattern(
  118. os.path.join(
  119. subvolume_path,
  120. snapshot_directory,
  121. subvolume_path.lstrip(os.path.sep),
  122. snapshot_directory,
  123. ),
  124. borgmatic.borg.pattern.Pattern_type.NO_RECURSE,
  125. borgmatic.borg.pattern.Pattern_style.FNMATCH,
  126. )
  127. def make_borg_snapshot_pattern(subvolume_path, pattern):
  128. '''
  129. Given the path to a subvolume and a pattern as a borgmatic.borg.pattern.Pattern instance whose
  130. path is inside the subvolume, return a new Pattern with its path rewritten to be in a snapshot
  131. path intended for giving to Borg.
  132. Move any initial caret in a regular expression pattern path to the beginning, so as not to break
  133. the regular expression.
  134. '''
  135. initial_caret = (
  136. '^'
  137. if pattern.style == borgmatic.borg.pattern.Pattern_style.REGULAR_EXPRESSION
  138. and pattern.path.startswith('^')
  139. else ''
  140. )
  141. rewritten_path = initial_caret + os.path.join(
  142. subvolume_path,
  143. f'{BORGMATIC_SNAPSHOT_PREFIX}{os.getpid()}',
  144. '.', # Borg 1.4+ "slashdot" hack.
  145. # Included so that the source directory ends up in the Borg archive at its "original" path.
  146. pattern.path.lstrip('^').lstrip(os.path.sep),
  147. )
  148. return borgmatic.borg.pattern.Pattern(
  149. rewritten_path,
  150. pattern.type,
  151. pattern.style,
  152. pattern.device,
  153. )
  154. def snapshot_subvolume(btrfs_command, subvolume_path, snapshot_path): # pragma: no cover
  155. '''
  156. Given a Btrfs command to run, the path to a subvolume, and the path for a snapshot, create a new
  157. Btrfs snapshot of the subvolume.
  158. '''
  159. os.makedirs(os.path.dirname(snapshot_path), mode=0o700, exist_ok=True)
  160. borgmatic.execute.execute_command(
  161. tuple(btrfs_command.split(' '))
  162. + (
  163. 'subvolume',
  164. 'snapshot',
  165. '-r', # Read-only.
  166. subvolume_path,
  167. snapshot_path,
  168. ),
  169. output_log_level=logging.DEBUG,
  170. )
  171. def dump_data_sources(
  172. hook_config,
  173. config,
  174. log_prefix,
  175. config_paths,
  176. borgmatic_runtime_directory,
  177. patterns,
  178. dry_run,
  179. ):
  180. '''
  181. Given a Btrfs configuration dict, a configuration dict, a log prefix, the borgmatic
  182. configuration file paths, the borgmatic runtime directory, the configured patterns, and whether
  183. this is a dry run, auto-detect and snapshot any Btrfs subvolume mount points listed in the given
  184. patterns. Also update those patterns, replacing subvolume mount points with corresponding
  185. snapshot directories so they get stored in the Borg archive instead. Use the log prefix in any
  186. log entries.
  187. Return an empty sequence, since there are no ongoing dump processes from this hook.
  188. If this is a dry run, then don't actually snapshot anything.
  189. '''
  190. dry_run_label = ' (dry run; not actually snapshotting anything)' if dry_run else ''
  191. logger.info(f'{log_prefix}: Snapshotting Btrfs subvolumes{dry_run_label}')
  192. # Based on the configured patterns, determine Btrfs subvolumes to backup.
  193. btrfs_command = hook_config.get('btrfs_command', 'btrfs')
  194. findmnt_command = hook_config.get('findmnt_command', 'findmnt')
  195. subvolumes = get_subvolumes(btrfs_command, findmnt_command, patterns)
  196. if not subvolumes:
  197. logger.warning(f'{log_prefix}: No Btrfs subvolumes found to snapshot{dry_run_label}')
  198. # Snapshot each subvolume, rewriting patterns to use their snapshot paths.
  199. for subvolume in subvolumes:
  200. logger.debug(f'{log_prefix}: Creating Btrfs snapshot for {subvolume.path} subvolume')
  201. snapshot_path = make_snapshot_path(subvolume.path)
  202. if dry_run:
  203. continue
  204. snapshot_subvolume(btrfs_command, subvolume.path, snapshot_path)
  205. for pattern in subvolume.contained_patterns:
  206. snapshot_pattern = make_borg_snapshot_pattern(subvolume.path, pattern)
  207. # Attempt to update the pattern in place, since pattern order matters to Borg.
  208. try:
  209. patterns[patterns.index(pattern)] = snapshot_pattern
  210. except ValueError:
  211. patterns.append(snapshot_pattern)
  212. patterns.append(make_snapshot_exclude_pattern(subvolume.path))
  213. return []
  214. def delete_snapshot(btrfs_command, snapshot_path): # pragma: no cover
  215. '''
  216. Given a Btrfs command to run and the name of a snapshot path, delete it.
  217. '''
  218. borgmatic.execute.execute_command(
  219. tuple(btrfs_command.split(' '))
  220. + (
  221. 'subvolume',
  222. 'delete',
  223. snapshot_path,
  224. ),
  225. output_log_level=logging.DEBUG,
  226. )
  227. def remove_data_source_dumps(hook_config, config, log_prefix, borgmatic_runtime_directory, dry_run):
  228. '''
  229. Given a Btrfs configuration dict, a configuration dict, a log prefix, the borgmatic runtime
  230. directory, and whether this is a dry run, delete any Btrfs snapshots created by borgmatic. Use
  231. the log prefix in any log entries. If this is a dry run or Btrfs isn't configured in borgmatic's
  232. configuration, then don't actually remove anything.
  233. '''
  234. if hook_config is None:
  235. return
  236. dry_run_label = ' (dry run; not actually removing anything)' if dry_run else ''
  237. btrfs_command = hook_config.get('btrfs_command', 'btrfs')
  238. findmnt_command = hook_config.get('findmnt_command', 'findmnt')
  239. try:
  240. all_subvolumes = get_subvolumes(btrfs_command, findmnt_command)
  241. except FileNotFoundError as error:
  242. logger.debug(f'{log_prefix}: Could not find "{error.filename}" command')
  243. return
  244. except subprocess.CalledProcessError as error:
  245. logger.debug(f'{log_prefix}: {error}')
  246. return
  247. # Reversing the sorted subvolumes ensures that we remove longer mount point paths of child
  248. # subvolumes before the shorter mount point paths of parent subvolumes.
  249. for subvolume in reversed(all_subvolumes):
  250. subvolume_snapshots_glob = borgmatic.config.paths.replace_temporary_subdirectory_with_glob(
  251. os.path.normpath(make_snapshot_path(subvolume.path)),
  252. temporary_directory_prefix=BORGMATIC_SNAPSHOT_PREFIX,
  253. )
  254. logger.debug(
  255. f'{log_prefix}: Looking for snapshots to remove in {subvolume_snapshots_glob}{dry_run_label}'
  256. )
  257. for snapshot_path in glob.glob(subvolume_snapshots_glob):
  258. if not os.path.isdir(snapshot_path):
  259. continue
  260. logger.debug(f'{log_prefix}: Deleting Btrfs snapshot {snapshot_path}{dry_run_label}')
  261. if dry_run:
  262. continue
  263. try:
  264. delete_snapshot(btrfs_command, snapshot_path)
  265. except FileNotFoundError:
  266. logger.debug(f'{log_prefix}: Could not find "{btrfs_command}" command')
  267. return
  268. except subprocess.CalledProcessError as error:
  269. logger.debug(f'{log_prefix}: {error}')
  270. return
  271. # Strip off the subvolume path from the end of the snapshot path and then delete the
  272. # resulting directory.
  273. shutil.rmtree(snapshot_path.rsplit(subvolume.path, 1)[0])
  274. def make_data_source_dump_patterns(
  275. hook_config, config, log_prefix, borgmatic_runtime_directory, name=None
  276. ): # pragma: no cover
  277. '''
  278. Restores aren't implemented, because stored files can be extracted directly with "extract".
  279. '''
  280. return ()
  281. def restore_data_source_dump(
  282. hook_config,
  283. config,
  284. log_prefix,
  285. data_source,
  286. dry_run,
  287. extract_process,
  288. connection_params,
  289. borgmatic_runtime_directory,
  290. ): # pragma: no cover
  291. '''
  292. Restores aren't implemented, because stored files can be extracted directly with "extract".
  293. '''
  294. raise NotImplementedError()