Browse Source

create: auto-exclude items based on xattrs or NODUMP, see #4972

no options yet, just hardcoded macOS and Linux xattrs.
removed the --exclude-nodump option, it is also done automagically now.

also: create: call stat_ext_attrs early

this reads bsdflags, xattrs and ACLs from the
filesystem, except if the user chose to disable that.

notable:
- borg always reads these, even for unchanged files
- if we read them early, borg can now behave differently
  based e.g. on a xattr value (and e.g. exclude the file)
Thomas Waldmann 4 months ago
parent
commit
2129d81fdb

+ 16 - 2
src/borg/archive.py

@@ -27,7 +27,7 @@ from .crypto.key import key_factory, UnsupportedPayloadError
 from .compress import CompressionSpec
 from .constants import *  # NOQA
 from .crypto.low_level import IntegrityError as IntegrityErrorBase
-from .helpers import BackupError, BackupRaceConditionError
+from .helpers import BackupError, BackupRaceConditionError, BackupItemExcluded
 from .helpers import BackupOSError, BackupPermissionError, BackupFileNotFoundError, BackupIOError
 from .hashindex import ChunkIndex, ChunkIndexEntry
 from .helpers import HardLinkManager
@@ -1185,6 +1185,18 @@ class ChunksProcessor:
                 stats.show_progress(item=item, dt=0.2)
 
 
+def maybe_exclude_by_attr(item):
+    if xattrs := item.get("xattrs"):
+        apple_excluded = xattrs.get(b"com.apple.metadata:com_apple_backup_excludeItem")
+        linux_excluded = xattrs.get(b"user.xdg.robots.backup")
+        if apple_excluded is not None or linux_excluded == b"true":
+            raise BackupItemExcluded
+
+    if flags := item.get("bsdflags"):
+        if flags & stat.UF_NODUMP:
+            raise BackupItemExcluded
+
+
 class FilesystemObjectProcessors:
     # When ported to threading, then this doesn't need chunker, cache, key any more.
     # process_file becomes a callback passed to __init__.
@@ -1243,6 +1255,7 @@ class FilesystemObjectProcessors:
                 hl_chunks = chunks
             item.hlid = self.hlm.hardlink_id_from_inode(ino=st.st_ino, dev=st.st_dev)
         yield item, status, hardlinked, hl_chunks
+        maybe_exclude_by_attr(item)
         self.add_item(item, stats=self.stats)
         if update_map:
             # remember the hlid of this fs object and if the item has chunks,
@@ -1370,6 +1383,8 @@ class FilesystemObjectProcessors:
                 with backup_io("fstat"):
                     st = stat_update_check(st, os.fstat(fd))
                 item.update(self.metadata_collector.stat_simple_attrs(st, path, fd=fd))
+                item.update(self.metadata_collector.stat_ext_attrs(st, path, fd=fd))
+                maybe_exclude_by_attr(item)  # check early, before processing all the file content
                 is_special_file = is_special(st.st_mode)
                 if is_special_file:
                     # we process a special file like a regular file. reflect that in mode,
@@ -1461,7 +1476,6 @@ class FilesystemObjectProcessors:
                     if not changed_while_backup:
                         status = None  # we already called print_file_status
                 self.stats.nfiles += 1
-                item.update(self.metadata_collector.stat_ext_attrs(st, path, fd=fd))
                 item.get_size(memorize=True)
                 return status
 

+ 4 - 14
src/borg/archiver/create_cmd.py

@@ -11,7 +11,7 @@ from io import TextIOWrapper
 from ._common import with_repository, Highlander
 from .. import helpers
 from ..archive import Archive, is_special
-from ..archive import BackupError, BackupOSError, backup_io, OsOpen, stat_update_check
+from ..archive import BackupError, BackupOSError, BackupItemExcluded, backup_io, OsOpen, stat_update_check
 from ..archive import FilesystemObjectProcessors, MetadataCollector, ChunksProcessor
 from ..cache import Cache
 from ..constants import *  # NOQA
@@ -33,7 +33,6 @@ from ..helpers import Error, CommandError, BackupWarning, FileChangedWarning
 from ..manifest import Manifest
 from ..patterns import PatternMatcher
 from ..platform import is_win32
-from ..platform import get_flags
 
 from ..logger import create_logger
 
@@ -212,7 +211,6 @@ class CreateMixIn:
         self.noflags = args.noflags
         self.noacls = args.noacls
         self.noxattrs = args.noxattrs
-        self.exclude_nodump = args.exclude_nodump
         dry_run = args.dry_run
         self.start_backup = time.time_ns()
         t0 = archive_ts_now()
@@ -379,6 +377,8 @@ class CreateMixIn:
                 else:
                     self.print_warning("Unknown file type: %s", path)
                     return
+            except BackupItemExcluded:
+                return "-"
             except BackupError as err:
                 if isinstance(err, BackupOSError):
                     if err.errno in (errno.EPERM, errno.EACCES):
@@ -454,13 +454,6 @@ class CreateMixIn:
             # directory of the mounted filesystem that shadows the mountpoint dir).
             recurse = restrict_dev is None or st.st_dev == restrict_dev
 
-            if self.exclude_nodump:
-                # Ignore if nodump flag is set
-                with backup_io("flags"):
-                    if get_flags(path=path, st=st) & stat.UF_NODUMP:
-                        self.print_file_status("-", path)  # excluded
-                        return
-
             if not stat.S_ISDIR(st.st_mode):
                 # directories cannot go in this branch because they can be excluded based on tag
                 # files they might contain
@@ -843,10 +836,7 @@ class CreateMixIn:
             help="set path delimiter for ``--paths-from-stdin`` and ``--paths-from-command`` (default: ``\\n``) ",
         )
 
-        exclude_group = define_exclusion_group(subparser, tag_files=True)
-        exclude_group.add_argument(
-            "--exclude-nodump", dest="exclude_nodump", action="store_true", help="exclude files flagged NODUMP"
-        )
+        define_exclusion_group(subparser, tag_files=True)
 
         fs_group = subparser.add_argument_group("Filesystem options")
         fs_group.add_argument(

+ 1 - 1
src/borg/helpers/__init__.py

@@ -16,7 +16,7 @@ from .datastruct import StableDict, Buffer, EfficientCollectionQueue
 from .errors import Error, ErrorWithTraceback, IntegrityError, DecompressionError, CancelledByUser, CommandError
 from .errors import RTError, modern_ec
 from .errors import BorgWarning, FileChangedWarning, BackupWarning, IncludePatternNeverMatchedWarning
-from .errors import BackupError, BackupOSError, BackupRaceConditionError
+from .errors import BackupError, BackupOSError, BackupRaceConditionError, BackupItemExcluded
 from .errors import BackupPermissionError, BackupIOError, BackupFileNotFoundError
 from .fs import ensure_dir, join_base_dir, get_socket_filename
 from .fs import get_security_dir, get_keys_dir, get_base_dir, get_cache_dir, get_config_dir, get_runtime_dir

+ 4 - 0
src/borg/helpers/errors.py

@@ -182,3 +182,7 @@ class BackupFileNotFoundError(BackupOSError):
     """{}: {}"""
 
     exit_mcode = 107
+
+
+class BackupItemExcluded(Exception):
+    """used internally to skip an item from processing, when it is excluded."""

+ 4 - 4
src/borg/testsuite/archiver/create_cmd_test.py

@@ -57,8 +57,8 @@ def test_basic_functionality(archivers, request):
     assert "borgbackup version" in output
     assert "terminating with success status, rc 0" in output
 
-    cmd(archiver, "create", "--exclude-nodump", "test", "input")
-    output = cmd(archiver, "create", "--exclude-nodump", "--stats", "test.2", "input")
+    cmd(archiver, "create", "test", "input")
+    output = cmd(archiver, "create", "--stats", "test.2", "input")
     assert "Archive name: test.2" in output
 
     with changedir("output"):
@@ -723,13 +723,13 @@ def test_file_status_excluded(archivers, request):
         create_regular_file(archiver.input_path, "file3", size=1024 * 80)
         platform.set_flags(os.path.join(archiver.input_path, "file3"), stat.UF_NODUMP)
     cmd(archiver, "repo-create", RK_ENCRYPTION)
-    output = cmd(archiver, "create", "--list", "--exclude-nodump", "test", "input")
+    output = cmd(archiver, "create", "--list", "test", "input")
     assert "A input/file1" in output
     assert "A input/file2" in output
     if has_lchflags:
         assert "- input/file3" in output
     # should find second file as excluded
-    output = cmd(archiver, "create", "test", "input", "--list", "--exclude-nodump", "--exclude", "*/file2")
+    output = cmd(archiver, "create", "test", "input", "--list", "--exclude", "*/file2")
     assert "U input/file1" in output
     assert "- input/file2" in output
     if has_lchflags:

+ 2 - 2
src/borg/testsuite/archiver/mount_cmds_test.py

@@ -78,8 +78,8 @@ def test_fuse(archivers, request):
     cmd(archiver, "repo-create", RK_ENCRYPTION)
     create_test_files(archiver.input_path)
     have_noatime = has_noatime("input/file1")
-    cmd(archiver, "create", "--exclude-nodump", "--atime", "archive", "input")
-    cmd(archiver, "create", "--exclude-nodump", "--atime", "archive2", "input")
+    cmd(archiver, "create", "--atime", "archive", "input")
+    cmd(archiver, "create", "--atime", "archive2", "input")
     if has_lchflags:
         # remove the file that we did not back up, so input and output become equal
         os.remove(os.path.join("input", "flagfile"))

+ 1 - 0
src/borg/testsuite/archiver/tar_cmds_test.py

@@ -214,6 +214,7 @@ def test_import_concatenated_tar_without_ignore_zeros(archivers, request):
 def test_roundtrip_pax_borg(archivers, request):
     archiver = request.getfixturevalue(archivers)
     create_test_files(archiver.input_path)
+    os.remove("input/flagfile")  # this would be automagically excluded due to NODUMP
     cmd(archiver, "repo-create", "--encryption=none")
     cmd(archiver, "create", "src", "input")
     cmd(archiver, "export-tar", "src", "simple.tar", "--tar-format=BORG")