create.py 39 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223224225226227228229230231232233234235236237238239240241242243244245246247248249250251252253254255256257258259260261262263264265266267268269270271272273274275276277278279280281282283284285286287288289290291292293294295296297298299300301302303304305306307308309310311312313314315316317318319320321322323324325326327328329330331332333334335336337338339340341342343344345346347348349350351352353354355356357358359360361362363364365366367368369370371372373374375376377378379380381382383384385386387388389390391392393394395396397398399400401402403404405406407408409410411412413414415416417418419420421422423424425426427428429430431432433434435436437438439440441442443444445446447448449450451452453454455456457458459460461462463464465466467468469470471472473474475476477478479480481482483484485486487488489490491492493494495496497498499500501502503504505506507508509510511512513514515516517518519520521522523524525526527528529530531532533534535536537538539540541542543544545546547548549550551552553554555556557558559560561562563564565566567568569570571572573574575576577578579580581582583584585586587588589590591592593594595596597598599600601602603604605606607608609610611612613614615616617618619620621622623624625626627628629630631632633634635636637638639640641642643644645646647648649650651652653654655656657658659660661662663664665666667668669670671672673674675676677678679680681682683684685686687688689690691692693694695696697698699700701702703704705706707708709710711712713714715716717718719720721722723724725726727728729730731732733734735736737738739740741742743744745746747748749750751752753754755756757758759760761762763764765766767768769770771772773774775776777778779780781782783784785786787788789790791792793794795796797798799800801802803804805806807808809810811812813814815816817818819820821822823824825826827828829830831832833834835836837838839840841842843844845846847848849850851852853854855856857
  1. import sys
  2. import argparse
  3. import logging
  4. import os
  5. import stat
  6. import subprocess
  7. import time
  8. from datetime import datetime
  9. from io import TextIOWrapper
  10. from .common import with_repository, Highlander
  11. from .. import helpers
  12. from ..archive import Archive, is_special
  13. from ..archive import BackupError, BackupOSError, backup_io, OsOpen, stat_update_check
  14. from ..archive import FilesystemObjectProcessors, MetadataCollector, ChunksProcessor
  15. from ..cache import Cache
  16. from ..constants import * # NOQA
  17. from ..compress import CompressionSpec
  18. from ..helpers import ChunkerParams
  19. from ..helpers import NameSpec, CommentSpec, FilesCacheMode
  20. from ..helpers import eval_escapes
  21. from ..helpers import timestamp
  22. from ..helpers import get_cache_dir, os_stat
  23. from ..helpers import Manifest
  24. from ..helpers import dir_is_tagged
  25. from ..helpers import log_multi
  26. from ..helpers import basic_json_data, json_print
  27. from ..helpers import flags_root, flags_dir, flags_special_follow, flags_special
  28. from ..helpers import sig_int
  29. from ..helpers import iter_separated
  30. from ..patterns import PatternMatcher
  31. from ..platform import get_flags
  32. from ..platform import uid2user, gid2group
  33. from ..logger import create_logger
  34. logger = create_logger()
  35. class CreateMixIn:
  36. @with_repository(fake="dry_run", exclusive=True, compatibility=(Manifest.Operation.WRITE,))
  37. def do_create(self, args, repository, manifest=None, key=None):
  38. """Create new archive"""
  39. matcher = PatternMatcher(fallback=True)
  40. matcher.add_inclexcl(args.patterns)
  41. def create_inner(archive, cache, fso):
  42. # Add cache dir to inode_skip list
  43. skip_inodes = set()
  44. try:
  45. st = os.stat(get_cache_dir())
  46. skip_inodes.add((st.st_ino, st.st_dev))
  47. except OSError:
  48. pass
  49. # Add local repository dir to inode_skip list
  50. if not args.location.host:
  51. try:
  52. st = os.stat(args.location.path)
  53. skip_inodes.add((st.st_ino, st.st_dev))
  54. except OSError:
  55. pass
  56. logger.debug("Processing files ...")
  57. if args.content_from_command:
  58. path = args.stdin_name
  59. mode = args.stdin_mode
  60. user = args.stdin_user
  61. group = args.stdin_group
  62. if not dry_run:
  63. try:
  64. try:
  65. proc = subprocess.Popen(args.paths, stdout=subprocess.PIPE)
  66. except (FileNotFoundError, PermissionError) as e:
  67. self.print_error("Failed to execute command: %s", e)
  68. return self.exit_code
  69. status = fso.process_pipe(
  70. path=path, cache=cache, fd=proc.stdout, mode=mode, user=user, group=group
  71. )
  72. rc = proc.wait()
  73. if rc != 0:
  74. self.print_error("Command %r exited with status %d", args.paths[0], rc)
  75. return self.exit_code
  76. except BackupOSError as e:
  77. self.print_error("%s: %s", path, e)
  78. return self.exit_code
  79. else:
  80. status = "-"
  81. self.print_file_status(status, path)
  82. elif args.paths_from_command or args.paths_from_stdin:
  83. paths_sep = eval_escapes(args.paths_delimiter) if args.paths_delimiter is not None else "\n"
  84. if args.paths_from_command:
  85. try:
  86. proc = subprocess.Popen(args.paths, stdout=subprocess.PIPE)
  87. except (FileNotFoundError, PermissionError) as e:
  88. self.print_error("Failed to execute command: %s", e)
  89. return self.exit_code
  90. pipe_bin = proc.stdout
  91. else: # args.paths_from_stdin == True
  92. pipe_bin = sys.stdin.buffer
  93. pipe = TextIOWrapper(pipe_bin, errors="surrogateescape")
  94. for path in iter_separated(pipe, paths_sep):
  95. path = os.path.normpath(path)
  96. try:
  97. with backup_io("stat"):
  98. st = os_stat(path=path, parent_fd=None, name=None, follow_symlinks=False)
  99. status = self._process_any(
  100. path=path,
  101. parent_fd=None,
  102. name=None,
  103. st=st,
  104. fso=fso,
  105. cache=cache,
  106. read_special=args.read_special,
  107. dry_run=dry_run,
  108. )
  109. except (BackupOSError, BackupError) as e:
  110. self.print_warning("%s: %s", path, e)
  111. status = "E"
  112. if status == "C":
  113. self.print_warning("%s: file changed while we backed it up", path)
  114. self.print_file_status(status, path)
  115. if args.paths_from_command:
  116. rc = proc.wait()
  117. if rc != 0:
  118. self.print_error("Command %r exited with status %d", args.paths[0], rc)
  119. return self.exit_code
  120. else:
  121. for path in args.paths:
  122. if path == "-": # stdin
  123. path = args.stdin_name
  124. mode = args.stdin_mode
  125. user = args.stdin_user
  126. group = args.stdin_group
  127. if not dry_run:
  128. try:
  129. status = fso.process_pipe(
  130. path=path, cache=cache, fd=sys.stdin.buffer, mode=mode, user=user, group=group
  131. )
  132. except BackupOSError as e:
  133. status = "E"
  134. self.print_warning("%s: %s", path, e)
  135. else:
  136. status = "-"
  137. self.print_file_status(status, path)
  138. continue
  139. path = os.path.normpath(path)
  140. parent_dir = os.path.dirname(path) or "."
  141. name = os.path.basename(path)
  142. try:
  143. # note: for path == '/': name == '' and parent_dir == '/'.
  144. # the empty name will trigger a fall-back to path-based processing in os_stat and os_open.
  145. with OsOpen(path=parent_dir, flags=flags_root, noatime=True, op="open_root") as parent_fd:
  146. try:
  147. st = os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=False)
  148. except OSError as e:
  149. self.print_warning("%s: %s", path, e)
  150. continue
  151. if args.one_file_system:
  152. restrict_dev = st.st_dev
  153. else:
  154. restrict_dev = None
  155. self._rec_walk(
  156. path=path,
  157. parent_fd=parent_fd,
  158. name=name,
  159. fso=fso,
  160. cache=cache,
  161. matcher=matcher,
  162. exclude_caches=args.exclude_caches,
  163. exclude_if_present=args.exclude_if_present,
  164. keep_exclude_tags=args.keep_exclude_tags,
  165. skip_inodes=skip_inodes,
  166. restrict_dev=restrict_dev,
  167. read_special=args.read_special,
  168. dry_run=dry_run,
  169. )
  170. # if we get back here, we've finished recursing into <path>,
  171. # we do not ever want to get back in there (even if path is given twice as recursion root)
  172. skip_inodes.add((st.st_ino, st.st_dev))
  173. except (BackupOSError, BackupError) as e:
  174. # this comes from OsOpen, self._rec_walk has own exception handler
  175. self.print_warning("%s: %s", path, e)
  176. continue
  177. if not dry_run:
  178. if args.progress:
  179. archive.stats.show_progress(final=True)
  180. archive.stats += fso.stats
  181. if sig_int:
  182. # do not save the archive if the user ctrl-c-ed - it is valid, but incomplete.
  183. # we already have a checkpoint archive in this case.
  184. self.print_error("Got Ctrl-C / SIGINT.")
  185. else:
  186. archive.save(comment=args.comment, timestamp=args.timestamp, stats=archive.stats)
  187. args.stats |= args.json
  188. if args.stats:
  189. if args.json:
  190. json_print(basic_json_data(manifest, cache=cache, extra={"archive": archive}))
  191. else:
  192. log_multi(str(archive), str(archive.stats), logger=logging.getLogger("borg.output.stats"))
  193. self.output_filter = args.output_filter
  194. self.output_list = args.output_list
  195. self.noflags = args.noflags
  196. self.noacls = args.noacls
  197. self.noxattrs = args.noxattrs
  198. self.exclude_nodump = args.exclude_nodump
  199. dry_run = args.dry_run
  200. t0 = datetime.utcnow()
  201. t0_monotonic = time.monotonic()
  202. logger.info('Creating archive at "%s"' % args.location.processed)
  203. if not dry_run:
  204. with Cache(
  205. repository,
  206. key,
  207. manifest,
  208. progress=args.progress,
  209. lock_wait=self.lock_wait,
  210. permit_adhoc_cache=args.no_cache_sync,
  211. cache_mode=args.files_cache_mode,
  212. iec=args.iec,
  213. ) as cache:
  214. archive = Archive(
  215. repository,
  216. key,
  217. manifest,
  218. args.name,
  219. cache=cache,
  220. create=True,
  221. checkpoint_interval=args.checkpoint_interval,
  222. numeric_ids=args.numeric_ids,
  223. noatime=not args.atime,
  224. noctime=args.noctime,
  225. progress=args.progress,
  226. chunker_params=args.chunker_params,
  227. start=t0,
  228. start_monotonic=t0_monotonic,
  229. log_json=args.log_json,
  230. iec=args.iec,
  231. )
  232. metadata_collector = MetadataCollector(
  233. noatime=not args.atime,
  234. noctime=args.noctime,
  235. noflags=args.noflags,
  236. noacls=args.noacls,
  237. noxattrs=args.noxattrs,
  238. numeric_ids=args.numeric_ids,
  239. nobirthtime=args.nobirthtime,
  240. )
  241. cp = ChunksProcessor(
  242. cache=cache,
  243. key=key,
  244. add_item=archive.add_item,
  245. write_checkpoint=archive.write_checkpoint,
  246. checkpoint_interval=args.checkpoint_interval,
  247. rechunkify=False,
  248. )
  249. fso = FilesystemObjectProcessors(
  250. metadata_collector=metadata_collector,
  251. cache=cache,
  252. key=key,
  253. process_file_chunks=cp.process_file_chunks,
  254. add_item=archive.add_item,
  255. chunker_params=args.chunker_params,
  256. show_progress=args.progress,
  257. sparse=args.sparse,
  258. log_json=args.log_json,
  259. iec=args.iec,
  260. file_status_printer=self.print_file_status,
  261. )
  262. create_inner(archive, cache, fso)
  263. else:
  264. create_inner(None, None, None)
  265. return self.exit_code
  266. def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, dry_run):
  267. """
  268. Call the right method on the given FilesystemObjectProcessor.
  269. """
  270. if dry_run:
  271. return "-"
  272. elif stat.S_ISREG(st.st_mode):
  273. return fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache)
  274. elif stat.S_ISDIR(st.st_mode):
  275. return fso.process_dir(path=path, parent_fd=parent_fd, name=name, st=st)
  276. elif stat.S_ISLNK(st.st_mode):
  277. if not read_special:
  278. return fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st)
  279. else:
  280. try:
  281. st_target = os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=True)
  282. except OSError:
  283. special = False
  284. else:
  285. special = is_special(st_target.st_mode)
  286. if special:
  287. return fso.process_file(
  288. path=path, parent_fd=parent_fd, name=name, st=st_target, cache=cache, flags=flags_special_follow
  289. )
  290. else:
  291. return fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st)
  292. elif stat.S_ISFIFO(st.st_mode):
  293. if not read_special:
  294. return fso.process_fifo(path=path, parent_fd=parent_fd, name=name, st=st)
  295. else:
  296. return fso.process_file(
  297. path=path, parent_fd=parent_fd, name=name, st=st, cache=cache, flags=flags_special
  298. )
  299. elif stat.S_ISCHR(st.st_mode):
  300. if not read_special:
  301. return fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type="c")
  302. else:
  303. return fso.process_file(
  304. path=path, parent_fd=parent_fd, name=name, st=st, cache=cache, flags=flags_special
  305. )
  306. elif stat.S_ISBLK(st.st_mode):
  307. if not read_special:
  308. return fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type="b")
  309. else:
  310. return fso.process_file(
  311. path=path, parent_fd=parent_fd, name=name, st=st, cache=cache, flags=flags_special
  312. )
  313. elif stat.S_ISSOCK(st.st_mode):
  314. # Ignore unix sockets
  315. return
  316. elif stat.S_ISDOOR(st.st_mode):
  317. # Ignore Solaris doors
  318. return
  319. elif stat.S_ISPORT(st.st_mode):
  320. # Ignore Solaris event ports
  321. return
  322. else:
  323. self.print_warning("Unknown file type: %s", path)
  324. return
  325. def _rec_walk(
  326. self,
  327. *,
  328. path,
  329. parent_fd,
  330. name,
  331. fso,
  332. cache,
  333. matcher,
  334. exclude_caches,
  335. exclude_if_present,
  336. keep_exclude_tags,
  337. skip_inodes,
  338. restrict_dev,
  339. read_special,
  340. dry_run,
  341. ):
  342. """
  343. Process *path* (or, preferably, parent_fd/name) recursively according to the various parameters.
  344. This should only raise on critical errors. Per-item errors must be handled within this method.
  345. """
  346. if sig_int and sig_int.action_done():
  347. # the user says "get out of here!" and we have already completed the desired action.
  348. return
  349. status = None
  350. try:
  351. recurse_excluded_dir = False
  352. if matcher.match(path):
  353. with backup_io("stat"):
  354. st = os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=False)
  355. else:
  356. self.print_file_status("x", path)
  357. # get out here as quickly as possible:
  358. # we only need to continue if we shall recurse into an excluded directory.
  359. # if we shall not recurse, then do not even touch (stat()) the item, it
  360. # could trigger an error, e.g. if access is forbidden, see #3209.
  361. if not matcher.recurse_dir:
  362. return
  363. recurse_excluded_dir = True
  364. with backup_io("stat"):
  365. st = os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=False)
  366. if not stat.S_ISDIR(st.st_mode):
  367. return
  368. if (st.st_ino, st.st_dev) in skip_inodes:
  369. return
  370. # if restrict_dev is given, we do not want to recurse into a new filesystem,
  371. # but we WILL save the mountpoint directory (or more precise: the root
  372. # directory of the mounted filesystem that shadows the mountpoint dir).
  373. recurse = restrict_dev is None or st.st_dev == restrict_dev
  374. if self.exclude_nodump:
  375. # Ignore if nodump flag is set
  376. with backup_io("flags"):
  377. if get_flags(path=path, st=st) & stat.UF_NODUMP:
  378. self.print_file_status("x", path)
  379. return
  380. if not stat.S_ISDIR(st.st_mode):
  381. # directories cannot go in this branch because they can be excluded based on tag
  382. # files they might contain
  383. status = self._process_any(
  384. path=path,
  385. parent_fd=parent_fd,
  386. name=name,
  387. st=st,
  388. fso=fso,
  389. cache=cache,
  390. read_special=read_special,
  391. dry_run=dry_run,
  392. )
  393. else:
  394. with OsOpen(
  395. path=path, parent_fd=parent_fd, name=name, flags=flags_dir, noatime=True, op="dir_open"
  396. ) as child_fd:
  397. # child_fd is None for directories on windows, in that case a race condition check is not possible.
  398. if child_fd is not None:
  399. with backup_io("fstat"):
  400. st = stat_update_check(st, os.fstat(child_fd))
  401. if recurse:
  402. tag_names = dir_is_tagged(path, exclude_caches, exclude_if_present)
  403. if tag_names:
  404. # if we are already recursing in an excluded dir, we do not need to do anything else than
  405. # returning (we do not need to archive or recurse into tagged directories), see #3991:
  406. if not recurse_excluded_dir:
  407. if keep_exclude_tags:
  408. if not dry_run:
  409. fso.process_dir_with_fd(path=path, fd=child_fd, st=st)
  410. for tag_name in tag_names:
  411. tag_path = os.path.join(path, tag_name)
  412. self._rec_walk(
  413. path=tag_path,
  414. parent_fd=child_fd,
  415. name=tag_name,
  416. fso=fso,
  417. cache=cache,
  418. matcher=matcher,
  419. exclude_caches=exclude_caches,
  420. exclude_if_present=exclude_if_present,
  421. keep_exclude_tags=keep_exclude_tags,
  422. skip_inodes=skip_inodes,
  423. restrict_dev=restrict_dev,
  424. read_special=read_special,
  425. dry_run=dry_run,
  426. )
  427. self.print_file_status("x", path)
  428. return
  429. if not recurse_excluded_dir and not dry_run:
  430. status = fso.process_dir_with_fd(path=path, fd=child_fd, st=st)
  431. if recurse:
  432. with backup_io("scandir"):
  433. entries = helpers.scandir_inorder(path=path, fd=child_fd)
  434. for dirent in entries:
  435. normpath = os.path.normpath(os.path.join(path, dirent.name))
  436. self._rec_walk(
  437. path=normpath,
  438. parent_fd=child_fd,
  439. name=dirent.name,
  440. fso=fso,
  441. cache=cache,
  442. matcher=matcher,
  443. exclude_caches=exclude_caches,
  444. exclude_if_present=exclude_if_present,
  445. keep_exclude_tags=keep_exclude_tags,
  446. skip_inodes=skip_inodes,
  447. restrict_dev=restrict_dev,
  448. read_special=read_special,
  449. dry_run=dry_run,
  450. )
  451. except (BackupOSError, BackupError) as e:
  452. self.print_warning("%s: %s", path, e)
  453. status = "E"
  454. if status == "C":
  455. self.print_warning("%s: file changed while we backed it up", path)
  456. if not recurse_excluded_dir:
  457. self.print_file_status(status, path)
  458. def build_parser_create(self, subparsers, common_parser, mid_common_parser):
  459. from .common import process_epilog
  460. from .common import define_exclusion_group
  461. create_epilog = process_epilog(
  462. """
  463. This command creates a backup archive containing all files found while recursively
  464. traversing all paths specified. Paths are added to the archive as they are given,
  465. that means if relative paths are desired, the command has to be run from the correct
  466. directory.
  467. When giving '-' as path, borg will read data from standard input and create a
  468. file 'stdin' in the created archive from that data. In some cases it's more
  469. appropriate to use --content-from-command, however. See section *Reading from
  470. stdin* below for details.
  471. The archive will consume almost no disk space for files or parts of files that
  472. have already been stored in other archives.
  473. The archive name needs to be unique. It must not end in '.checkpoint' or
  474. '.checkpoint.N' (with N being a number), because these names are used for
  475. checkpoints and treated in special ways.
  476. In the archive name, you may use the following placeholders:
  477. {now}, {utcnow}, {fqdn}, {hostname}, {user} and some others.
  478. Backup speed is increased by not reprocessing files that are already part of
  479. existing archives and weren't modified. The detection of unmodified files is
  480. done by comparing multiple file metadata values with previous values kept in
  481. the files cache.
  482. This comparison can operate in different modes as given by ``--files-cache``:
  483. - ctime,size,inode (default)
  484. - mtime,size,inode (default behaviour of borg versions older than 1.1.0rc4)
  485. - ctime,size (ignore the inode number)
  486. - mtime,size (ignore the inode number)
  487. - rechunk,ctime (all files are considered modified - rechunk, cache ctime)
  488. - rechunk,mtime (all files are considered modified - rechunk, cache mtime)
  489. - disabled (disable the files cache, all files considered modified - rechunk)
  490. inode number: better safety, but often unstable on network filesystems
  491. Normally, detecting file modifications will take inode information into
  492. consideration to improve the reliability of file change detection.
  493. This is problematic for files located on sshfs and similar network file
  494. systems which do not provide stable inode numbers, such files will always
  495. be considered modified. You can use modes without `inode` in this case to
  496. improve performance, but reliability of change detection might be reduced.
  497. ctime vs. mtime: safety vs. speed
  498. - ctime is a rather safe way to detect changes to a file (metadata and contents)
  499. as it can not be set from userspace. But, a metadata-only change will already
  500. update the ctime, so there might be some unnecessary chunking/hashing even
  501. without content changes. Some filesystems do not support ctime (change time).
  502. E.g. doing a chown or chmod to a file will change its ctime.
  503. - mtime usually works and only updates if file contents were changed. But mtime
  504. can be arbitrarily set from userspace, e.g. to set mtime back to the same value
  505. it had before a content change happened. This can be used maliciously as well as
  506. well-meant, but in both cases mtime based cache modes can be problematic.
  507. The mount points of filesystems or filesystem snapshots should be the same for every
  508. creation of a new archive to ensure fast operation. This is because the file cache that
  509. is used to determine changed files quickly uses absolute filenames.
  510. If this is not possible, consider creating a bind mount to a stable location.
  511. The ``--progress`` option shows (from left to right) Original, Compressed and Deduplicated
  512. (O, C and D, respectively), then the Number of files (N) processed so far, followed by
  513. the currently processed path.
  514. When using ``--stats``, you will get some statistics about how much data was
  515. added - the "This Archive" deduplicated size there is most interesting as that is
  516. how much your repository will grow. Please note that the "All archives" stats refer to
  517. the state after creation. Also, the ``--stats`` and ``--dry-run`` options are mutually
  518. exclusive because the data is not actually compressed and deduplicated during a dry run.
  519. For more help on include/exclude patterns, see the :ref:`borg_patterns` command output.
  520. For more help on placeholders, see the :ref:`borg_placeholders` command output.
  521. .. man NOTES
  522. The ``--exclude`` patterns are not like tar. In tar ``--exclude`` .bundler/gems will
  523. exclude foo/.bundler/gems. In borg it will not, you need to use ``--exclude``
  524. '\\*/.bundler/gems' to get the same effect.
  525. In addition to using ``--exclude`` patterns, it is possible to use
  526. ``--exclude-if-present`` to specify the name of a filesystem object (e.g. a file
  527. or folder name) which, when contained within another folder, will prevent the
  528. containing folder from being backed up. By default, the containing folder and
  529. all of its contents will be omitted from the backup. If, however, you wish to
  530. only include the objects specified by ``--exclude-if-present`` in your backup,
  531. and not include any other contents of the containing folder, this can be enabled
  532. through using the ``--keep-exclude-tags`` option.
  533. The ``-x`` or ``--one-file-system`` option excludes directories, that are mountpoints (and everything in them).
  534. It detects mountpoints by comparing the device number from the output of ``stat()`` of the directory and its
  535. parent directory. Specifically, it excludes directories for which ``stat()`` reports a device number different
  536. from the device number of their parent. Be aware that in Linux (and possibly elsewhere) there are directories
  537. with device number different from their parent, which the kernel does not consider a mountpoint and also the
  538. other way around. Examples are bind mounts (possibly same device number, but always a mountpoint) and ALL
  539. subvolumes of a btrfs (different device number from parent but not necessarily a mountpoint). Therefore when
  540. using ``--one-file-system``, one should make doubly sure that the backup works as intended especially when using
  541. btrfs. This is even more important, if the btrfs layout was created by someone else, e.g. a distribution
  542. installer.
  543. .. _list_item_flags:
  544. Item flags
  545. ++++++++++
  546. ``--list`` outputs a list of all files, directories and other
  547. file system items it considered (no matter whether they had content changes
  548. or not). For each item, it prefixes a single-letter flag that indicates type
  549. and/or status of the item.
  550. If you are interested only in a subset of that output, you can give e.g.
  551. ``--filter=AME`` and it will only show regular files with A, M or E status (see
  552. below).
  553. A uppercase character represents the status of a regular file relative to the
  554. "files" cache (not relative to the repo -- this is an issue if the files cache
  555. is not used). Metadata is stored in any case and for 'A' and 'M' also new data
  556. chunks are stored. For 'U' all data chunks refer to already existing chunks.
  557. - 'A' = regular file, added (see also :ref:`a_status_oddity` in the FAQ)
  558. - 'M' = regular file, modified
  559. - 'U' = regular file, unchanged
  560. - 'C' = regular file, it changed while we backed it up
  561. - 'E' = regular file, an error happened while accessing/reading *this* file
  562. A lowercase character means a file type other than a regular file,
  563. borg usually just stores their metadata:
  564. - 'd' = directory
  565. - 'b' = block device
  566. - 'c' = char device
  567. - 'h' = regular file, hardlink (to already seen inodes)
  568. - 's' = symlink
  569. - 'f' = fifo
  570. Other flags used include:
  571. - 'i' = backup data was read from standard input (stdin)
  572. - '-' = dry run, item was *not* backed up
  573. - 'x' = excluded, item was *not* backed up
  574. - '?' = missing status code (if you see this, please file a bug report!)
  575. Reading from stdin
  576. ++++++++++++++++++
  577. There are two methods to read from stdin. Either specify ``-`` as path and
  578. pipe directly to borg::
  579. backup-vm --id myvm --stdout | borg create REPO::ARCHIVE -
  580. Or use ``--content-from-command`` to have Borg manage the execution of the
  581. command and piping. If you do so, the first PATH argument is interpreted
  582. as command to execute and any further arguments are treated as arguments
  583. to the command::
  584. borg create --content-from-command REPO::ARCHIVE -- backup-vm --id myvm --stdout
  585. ``--`` is used to ensure ``--id`` and ``--stdout`` are **not** considered
  586. arguments to ``borg`` but rather ``backup-vm``.
  587. The difference between the two approaches is that piping to borg creates an
  588. archive even if the command piping to borg exits with a failure. In this case,
  589. **one can end up with truncated output being backed up**. Using
  590. ``--content-from-command``, in contrast, borg is guaranteed to fail without
  591. creating an archive should the command fail. The command is considered failed
  592. when it returned a non-zero exit code.
  593. Reading from stdin yields just a stream of data without file metadata
  594. associated with it, and the files cache is not needed at all. So it is
  595. safe to disable it via ``--files-cache disabled`` and speed up backup
  596. creation a bit.
  597. By default, the content read from stdin is stored in a file called 'stdin'.
  598. Use ``--stdin-name`` to change the name.
  599. """
  600. )
  601. subparser = subparsers.add_parser(
  602. "create",
  603. parents=[common_parser],
  604. add_help=False,
  605. description=self.do_create.__doc__,
  606. epilog=create_epilog,
  607. formatter_class=argparse.RawDescriptionHelpFormatter,
  608. help="create backup",
  609. )
  610. subparser.set_defaults(func=self.do_create)
  611. # note: --dry-run and --stats are mutually exclusive, but we do not want to abort when
  612. # parsing, but rather proceed with the dry-run, but without stats (see run() method).
  613. subparser.add_argument(
  614. "-n", "--dry-run", dest="dry_run", action="store_true", help="do not create a backup archive"
  615. )
  616. subparser.add_argument(
  617. "-s", "--stats", dest="stats", action="store_true", help="print statistics for the created archive"
  618. )
  619. subparser.add_argument(
  620. "--list", dest="output_list", action="store_true", help="output verbose list of items (files, dirs, ...)"
  621. )
  622. subparser.add_argument(
  623. "--filter",
  624. metavar="STATUSCHARS",
  625. dest="output_filter",
  626. action=Highlander,
  627. help="only display items with the given status characters (see description)",
  628. )
  629. subparser.add_argument("--json", action="store_true", help="output stats as JSON. Implies ``--stats``.")
  630. subparser.add_argument(
  631. "--no-cache-sync",
  632. dest="no_cache_sync",
  633. action="store_true",
  634. help="experimental: do not synchronize the cache. Implies not using the files cache.",
  635. )
  636. subparser.add_argument(
  637. "--stdin-name",
  638. metavar="NAME",
  639. dest="stdin_name",
  640. default="stdin",
  641. help="use NAME in archive for stdin data (default: %(default)r)",
  642. )
  643. subparser.add_argument(
  644. "--stdin-user",
  645. metavar="USER",
  646. dest="stdin_user",
  647. default=uid2user(0),
  648. help="set user USER in archive for stdin data (default: %(default)r)",
  649. )
  650. subparser.add_argument(
  651. "--stdin-group",
  652. metavar="GROUP",
  653. dest="stdin_group",
  654. default=gid2group(0),
  655. help="set group GROUP in archive for stdin data (default: %(default)r)",
  656. )
  657. subparser.add_argument(
  658. "--stdin-mode",
  659. metavar="M",
  660. dest="stdin_mode",
  661. type=lambda s: int(s, 8),
  662. default=STDIN_MODE_DEFAULT,
  663. help="set mode to M in archive for stdin data (default: %(default)04o)",
  664. )
  665. subparser.add_argument(
  666. "--content-from-command",
  667. action="store_true",
  668. help="interpret PATH as command and store its stdout. See also section Reading from" " stdin below.",
  669. )
  670. subparser.add_argument(
  671. "--paths-from-stdin",
  672. action="store_true",
  673. help="read DELIM-separated list of paths to backup from stdin. Will not " "recurse into directories.",
  674. )
  675. subparser.add_argument(
  676. "--paths-from-command",
  677. action="store_true",
  678. help="interpret PATH as command and treat its output as ``--paths-from-stdin``",
  679. )
  680. subparser.add_argument(
  681. "--paths-delimiter",
  682. metavar="DELIM",
  683. help="set path delimiter for ``--paths-from-stdin`` and ``--paths-from-command`` (default: \\n) ",
  684. )
  685. exclude_group = define_exclusion_group(subparser, tag_files=True)
  686. exclude_group.add_argument(
  687. "--exclude-nodump", dest="exclude_nodump", action="store_true", help="exclude files flagged NODUMP"
  688. )
  689. fs_group = subparser.add_argument_group("Filesystem options")
  690. fs_group.add_argument(
  691. "-x",
  692. "--one-file-system",
  693. dest="one_file_system",
  694. action="store_true",
  695. help="stay in the same file system and do not store mount points of other file systems. This might behave different from your expectations, see the docs.",
  696. )
  697. fs_group.add_argument(
  698. "--numeric-ids",
  699. dest="numeric_ids",
  700. action="store_true",
  701. help="only store numeric user and group identifiers",
  702. )
  703. fs_group.add_argument("--atime", dest="atime", action="store_true", help="do store atime into archive")
  704. fs_group.add_argument("--noctime", dest="noctime", action="store_true", help="do not store ctime into archive")
  705. fs_group.add_argument(
  706. "--nobirthtime",
  707. dest="nobirthtime",
  708. action="store_true",
  709. help="do not store birthtime (creation date) into archive",
  710. )
  711. fs_group.add_argument(
  712. "--noflags",
  713. dest="noflags",
  714. action="store_true",
  715. help="do not read and store flags (e.g. NODUMP, IMMUTABLE) into archive",
  716. )
  717. fs_group.add_argument(
  718. "--noacls", dest="noacls", action="store_true", help="do not read and store ACLs into archive"
  719. )
  720. fs_group.add_argument(
  721. "--noxattrs", dest="noxattrs", action="store_true", help="do not read and store xattrs into archive"
  722. )
  723. fs_group.add_argument(
  724. "--sparse",
  725. dest="sparse",
  726. action="store_true",
  727. help="detect sparse holes in input (supported only by fixed chunker)",
  728. )
  729. fs_group.add_argument(
  730. "--files-cache",
  731. metavar="MODE",
  732. dest="files_cache_mode",
  733. action=Highlander,
  734. type=FilesCacheMode,
  735. default=FILES_CACHE_MODE_UI_DEFAULT,
  736. help="operate files cache in MODE. default: %s" % FILES_CACHE_MODE_UI_DEFAULT,
  737. )
  738. fs_group.add_argument(
  739. "--read-special",
  740. dest="read_special",
  741. action="store_true",
  742. help="open and read block and char device files as well as FIFOs as if they were "
  743. "regular files. Also follows symlinks pointing to these kinds of files.",
  744. )
  745. archive_group = subparser.add_argument_group("Archive options")
  746. archive_group.add_argument(
  747. "--comment",
  748. dest="comment",
  749. metavar="COMMENT",
  750. type=CommentSpec,
  751. default="",
  752. help="add a comment text to the archive",
  753. )
  754. archive_group.add_argument(
  755. "--timestamp",
  756. metavar="TIMESTAMP",
  757. dest="timestamp",
  758. type=timestamp,
  759. default=None,
  760. help="manually specify the archive creation date/time (UTC, yyyy-mm-ddThh:mm:ss format). "
  761. "Alternatively, give a reference file/directory.",
  762. )
  763. archive_group.add_argument(
  764. "-c",
  765. "--checkpoint-interval",
  766. metavar="SECONDS",
  767. dest="checkpoint_interval",
  768. type=int,
  769. default=1800,
  770. help="write checkpoint every SECONDS seconds (Default: 1800)",
  771. )
  772. archive_group.add_argument(
  773. "--chunker-params",
  774. metavar="PARAMS",
  775. dest="chunker_params",
  776. type=ChunkerParams,
  777. default=CHUNKER_PARAMS,
  778. action=Highlander,
  779. help="specify the chunker parameters (ALGO, CHUNK_MIN_EXP, CHUNK_MAX_EXP, "
  780. "HASH_MASK_BITS, HASH_WINDOW_SIZE). default: %s,%d,%d,%d,%d" % CHUNKER_PARAMS,
  781. )
  782. archive_group.add_argument(
  783. "-C",
  784. "--compression",
  785. metavar="COMPRESSION",
  786. dest="compression",
  787. type=CompressionSpec,
  788. default=CompressionSpec("lz4"),
  789. help="select compression algorithm, see the output of the " '"borg help compression" command for details.',
  790. )
  791. subparser.add_argument("name", metavar="NAME", type=NameSpec, help="specify the archive name")
  792. subparser.add_argument("paths", metavar="PATH", nargs="*", type=str, help="paths to archive")