Browse Source

Merge pull request #8895 from ThomasWaldmann/exclude-based-on-xattrs

Exclude based on xattrs or NODUMP flag
TW 2 days ago
parent
commit
7ec02dce46

+ 16 - 2
src/borg/archive.py

@@ -27,7 +27,7 @@ from .crypto.key import key_factory, UnsupportedPayloadError
 from .compress import CompressionSpec
 from .compress import CompressionSpec
 from .constants import *  # NOQA
 from .constants import *  # NOQA
 from .crypto.low_level import IntegrityError as IntegrityErrorBase
 from .crypto.low_level import IntegrityError as IntegrityErrorBase
-from .helpers import BackupError, BackupRaceConditionError
+from .helpers import BackupError, BackupRaceConditionError, BackupItemExcluded
 from .helpers import BackupOSError, BackupPermissionError, BackupFileNotFoundError, BackupIOError
 from .helpers import BackupOSError, BackupPermissionError, BackupFileNotFoundError, BackupIOError
 from .hashindex import ChunkIndex, ChunkIndexEntry
 from .hashindex import ChunkIndex, ChunkIndexEntry
 from .helpers import HardLinkManager
 from .helpers import HardLinkManager
@@ -1185,6 +1185,18 @@ class ChunksProcessor:
                 stats.show_progress(item=item, dt=0.2)
                 stats.show_progress(item=item, dt=0.2)
 
 
 
 
+def maybe_exclude_by_attr(item):
+    if xattrs := item.get("xattrs"):
+        apple_excluded = xattrs.get(b"com.apple.metadata:com_apple_backup_excludeItem")
+        linux_excluded = xattrs.get(b"user.xdg.robots.backup")
+        if apple_excluded is not None or linux_excluded == b"true":
+            raise BackupItemExcluded
+
+    if flags := item.get("bsdflags"):
+        if flags & stat.UF_NODUMP:
+            raise BackupItemExcluded
+
+
 class FilesystemObjectProcessors:
 class FilesystemObjectProcessors:
     # When ported to threading, then this doesn't need chunker, cache, key any more.
     # When ported to threading, then this doesn't need chunker, cache, key any more.
     # process_file becomes a callback passed to __init__.
     # process_file becomes a callback passed to __init__.
@@ -1243,6 +1255,7 @@ class FilesystemObjectProcessors:
                 hl_chunks = chunks
                 hl_chunks = chunks
             item.hlid = self.hlm.hardlink_id_from_inode(ino=st.st_ino, dev=st.st_dev)
             item.hlid = self.hlm.hardlink_id_from_inode(ino=st.st_ino, dev=st.st_dev)
         yield item, status, hardlinked, hl_chunks
         yield item, status, hardlinked, hl_chunks
+        maybe_exclude_by_attr(item)
         self.add_item(item, stats=self.stats)
         self.add_item(item, stats=self.stats)
         if update_map:
         if update_map:
             # remember the hlid of this fs object and if the item has chunks,
             # remember the hlid of this fs object and if the item has chunks,
@@ -1370,6 +1383,8 @@ class FilesystemObjectProcessors:
                 with backup_io("fstat"):
                 with backup_io("fstat"):
                     st = stat_update_check(st, os.fstat(fd))
                     st = stat_update_check(st, os.fstat(fd))
                 item.update(self.metadata_collector.stat_simple_attrs(st, path, fd=fd))
                 item.update(self.metadata_collector.stat_simple_attrs(st, path, fd=fd))
+                item.update(self.metadata_collector.stat_ext_attrs(st, path, fd=fd))
+                maybe_exclude_by_attr(item)  # check early, before processing all the file content
                 is_special_file = is_special(st.st_mode)
                 is_special_file = is_special(st.st_mode)
                 if is_special_file:
                 if is_special_file:
                     # we process a special file like a regular file. reflect that in mode,
                     # we process a special file like a regular file. reflect that in mode,
@@ -1461,7 +1476,6 @@ class FilesystemObjectProcessors:
                     if not changed_while_backup:
                     if not changed_while_backup:
                         status = None  # we already called print_file_status
                         status = None  # we already called print_file_status
                 self.stats.nfiles += 1
                 self.stats.nfiles += 1
-                item.update(self.metadata_collector.stat_ext_attrs(st, path, fd=fd))
                 item.get_size(memorize=True)
                 item.get_size(memorize=True)
                 return status
                 return status
 
 

+ 4 - 14
src/borg/archiver/create_cmd.py

@@ -11,7 +11,7 @@ from io import TextIOWrapper
 from ._common import with_repository, Highlander
 from ._common import with_repository, Highlander
 from .. import helpers
 from .. import helpers
 from ..archive import Archive, is_special
 from ..archive import Archive, is_special
-from ..archive import BackupError, BackupOSError, backup_io, OsOpen, stat_update_check
+from ..archive import BackupError, BackupOSError, BackupItemExcluded, backup_io, OsOpen, stat_update_check
 from ..archive import FilesystemObjectProcessors, MetadataCollector, ChunksProcessor
 from ..archive import FilesystemObjectProcessors, MetadataCollector, ChunksProcessor
 from ..cache import Cache
 from ..cache import Cache
 from ..constants import *  # NOQA
 from ..constants import *  # NOQA
@@ -33,7 +33,6 @@ from ..helpers import Error, CommandError, BackupWarning, FileChangedWarning
 from ..manifest import Manifest
 from ..manifest import Manifest
 from ..patterns import PatternMatcher
 from ..patterns import PatternMatcher
 from ..platform import is_win32
 from ..platform import is_win32
-from ..platform import get_flags
 
 
 from ..logger import create_logger
 from ..logger import create_logger
 
 
@@ -212,7 +211,6 @@ class CreateMixIn:
         self.noflags = args.noflags
         self.noflags = args.noflags
         self.noacls = args.noacls
         self.noacls = args.noacls
         self.noxattrs = args.noxattrs
         self.noxattrs = args.noxattrs
-        self.exclude_nodump = args.exclude_nodump
         dry_run = args.dry_run
         dry_run = args.dry_run
         self.start_backup = time.time_ns()
         self.start_backup = time.time_ns()
         t0 = archive_ts_now()
         t0 = archive_ts_now()
@@ -379,6 +377,8 @@ class CreateMixIn:
                 else:
                 else:
                     self.print_warning("Unknown file type: %s", path)
                     self.print_warning("Unknown file type: %s", path)
                     return
                     return
+            except BackupItemExcluded:
+                return "-"
             except BackupError as err:
             except BackupError as err:
                 if isinstance(err, BackupOSError):
                 if isinstance(err, BackupOSError):
                     if err.errno in (errno.EPERM, errno.EACCES):
                     if err.errno in (errno.EPERM, errno.EACCES):
@@ -454,13 +454,6 @@ class CreateMixIn:
             # directory of the mounted filesystem that shadows the mountpoint dir).
             # directory of the mounted filesystem that shadows the mountpoint dir).
             recurse = restrict_dev is None or st.st_dev == restrict_dev
             recurse = restrict_dev is None or st.st_dev == restrict_dev
 
 
-            if self.exclude_nodump:
-                # Ignore if nodump flag is set
-                with backup_io("flags"):
-                    if get_flags(path=path, st=st) & stat.UF_NODUMP:
-                        self.print_file_status("-", path)  # excluded
-                        return
-
             if not stat.S_ISDIR(st.st_mode):
             if not stat.S_ISDIR(st.st_mode):
                 # directories cannot go in this branch because they can be excluded based on tag
                 # directories cannot go in this branch because they can be excluded based on tag
                 # files they might contain
                 # files they might contain
@@ -843,10 +836,7 @@ class CreateMixIn:
             help="set path delimiter for ``--paths-from-stdin`` and ``--paths-from-command`` (default: ``\\n``) ",
             help="set path delimiter for ``--paths-from-stdin`` and ``--paths-from-command`` (default: ``\\n``) ",
         )
         )
 
 
-        exclude_group = define_exclusion_group(subparser, tag_files=True)
-        exclude_group.add_argument(
-            "--exclude-nodump", dest="exclude_nodump", action="store_true", help="exclude files flagged NODUMP"
-        )
+        define_exclusion_group(subparser, tag_files=True)
 
 
         fs_group = subparser.add_argument_group("Filesystem options")
         fs_group = subparser.add_argument_group("Filesystem options")
         fs_group.add_argument(
         fs_group.add_argument(

+ 1 - 1
src/borg/helpers/__init__.py

@@ -16,7 +16,7 @@ from .datastruct import StableDict, Buffer, EfficientCollectionQueue
 from .errors import Error, ErrorWithTraceback, IntegrityError, DecompressionError, CancelledByUser, CommandError
 from .errors import Error, ErrorWithTraceback, IntegrityError, DecompressionError, CancelledByUser, CommandError
 from .errors import RTError, modern_ec
 from .errors import RTError, modern_ec
 from .errors import BorgWarning, FileChangedWarning, BackupWarning, IncludePatternNeverMatchedWarning
 from .errors import BorgWarning, FileChangedWarning, BackupWarning, IncludePatternNeverMatchedWarning
-from .errors import BackupError, BackupOSError, BackupRaceConditionError
+from .errors import BackupError, BackupOSError, BackupRaceConditionError, BackupItemExcluded
 from .errors import BackupPermissionError, BackupIOError, BackupFileNotFoundError
 from .errors import BackupPermissionError, BackupIOError, BackupFileNotFoundError
 from .fs import ensure_dir, join_base_dir, get_socket_filename
 from .fs import ensure_dir, join_base_dir, get_socket_filename
 from .fs import get_security_dir, get_keys_dir, get_base_dir, get_cache_dir, get_config_dir, get_runtime_dir
 from .fs import get_security_dir, get_keys_dir, get_base_dir, get_cache_dir, get_config_dir, get_runtime_dir

+ 4 - 0
src/borg/helpers/errors.py

@@ -182,3 +182,7 @@ class BackupFileNotFoundError(BackupOSError):
     """{}: {}"""
     """{}: {}"""
 
 
     exit_mcode = 107
     exit_mcode = 107
+
+
+class BackupItemExcluded(Exception):
+    """used internally to skip an item from processing, when it is excluded."""

+ 4 - 4
src/borg/testsuite/archiver/create_cmd_test.py

@@ -57,8 +57,8 @@ def test_basic_functionality(archivers, request):
     assert "borgbackup version" in output
     assert "borgbackup version" in output
     assert "terminating with success status, rc 0" in output
     assert "terminating with success status, rc 0" in output
 
 
-    cmd(archiver, "create", "--exclude-nodump", "test", "input")
-    output = cmd(archiver, "create", "--exclude-nodump", "--stats", "test.2", "input")
+    cmd(archiver, "create", "test", "input")
+    output = cmd(archiver, "create", "--stats", "test.2", "input")
     assert "Archive name: test.2" in output
     assert "Archive name: test.2" in output
 
 
     with changedir("output"):
     with changedir("output"):
@@ -723,13 +723,13 @@ def test_file_status_excluded(archivers, request):
         create_regular_file(archiver.input_path, "file3", size=1024 * 80)
         create_regular_file(archiver.input_path, "file3", size=1024 * 80)
         platform.set_flags(os.path.join(archiver.input_path, "file3"), stat.UF_NODUMP)
         platform.set_flags(os.path.join(archiver.input_path, "file3"), stat.UF_NODUMP)
     cmd(archiver, "repo-create", RK_ENCRYPTION)
     cmd(archiver, "repo-create", RK_ENCRYPTION)
-    output = cmd(archiver, "create", "--list", "--exclude-nodump", "test", "input")
+    output = cmd(archiver, "create", "--list", "test", "input")
     assert "A input/file1" in output
     assert "A input/file1" in output
     assert "A input/file2" in output
     assert "A input/file2" in output
     if has_lchflags:
     if has_lchflags:
         assert "- input/file3" in output
         assert "- input/file3" in output
     # should find second file as excluded
     # should find second file as excluded
-    output = cmd(archiver, "create", "test", "input", "--list", "--exclude-nodump", "--exclude", "*/file2")
+    output = cmd(archiver, "create", "test", "input", "--list", "--exclude", "*/file2")
     assert "U input/file1" in output
     assert "U input/file1" in output
     assert "- input/file2" in output
     assert "- input/file2" in output
     if has_lchflags:
     if has_lchflags:

+ 2 - 2
src/borg/testsuite/archiver/mount_cmds_test.py

@@ -78,8 +78,8 @@ def test_fuse(archivers, request):
     cmd(archiver, "repo-create", RK_ENCRYPTION)
     cmd(archiver, "repo-create", RK_ENCRYPTION)
     create_test_files(archiver.input_path)
     create_test_files(archiver.input_path)
     have_noatime = has_noatime("input/file1")
     have_noatime = has_noatime("input/file1")
-    cmd(archiver, "create", "--exclude-nodump", "--atime", "archive", "input")
-    cmd(archiver, "create", "--exclude-nodump", "--atime", "archive2", "input")
+    cmd(archiver, "create", "--atime", "archive", "input")
+    cmd(archiver, "create", "--atime", "archive2", "input")
     if has_lchflags:
     if has_lchflags:
         # remove the file that we did not back up, so input and output become equal
         # remove the file that we did not back up, so input and output become equal
         os.remove(os.path.join("input", "flagfile"))
         os.remove(os.path.join("input", "flagfile"))

+ 1 - 0
src/borg/testsuite/archiver/tar_cmds_test.py

@@ -214,6 +214,7 @@ def test_import_concatenated_tar_without_ignore_zeros(archivers, request):
 def test_roundtrip_pax_borg(archivers, request):
 def test_roundtrip_pax_borg(archivers, request):
     archiver = request.getfixturevalue(archivers)
     archiver = request.getfixturevalue(archivers)
     create_test_files(archiver.input_path)
     create_test_files(archiver.input_path)
+    os.remove("input/flagfile")  # this would be automagically excluded due to NODUMP
     cmd(archiver, "repo-create", "--encryption=none")
     cmd(archiver, "repo-create", "--encryption=none")
     cmd(archiver, "create", "src", "input")
     cmd(archiver, "create", "src", "input")
     cmd(archiver, "export-tar", "src", "simple.tar", "--tar-format=BORG")
     cmd(archiver, "export-tar", "src", "simple.tar", "--tar-format=BORG")