Browse Source

Merge pull request #4043 from ThomasWaldmann/use-more-fds

use more FDs, avoid race conditions on active fs
TW 6 năm trước cách đây
mục cha
commit
d644323333

+ 98 - 77
src/borg/archive.py

@@ -38,6 +38,7 @@ from .helpers import StableDict
 from .helpers import bin_to_hex
 from .helpers import bin_to_hex
 from .helpers import safe_ns
 from .helpers import safe_ns
 from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi
 from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi
+from .helpers import os_open, flags_normal
 from .helpers import msgpack
 from .helpers import msgpack
 from .patterns import PathPrefixPattern, FnmatchPattern, IECommand
 from .patterns import PathPrefixPattern, FnmatchPattern, IECommand
 from .item import Item, ArchiveItem, ItemDiff
 from .item import Item, ArchiveItem, ItemDiff
@@ -47,9 +48,6 @@ from .repository import Repository, LIST_SCAN_LIMIT
 
 
 has_lchmod = hasattr(os, 'lchmod')
 has_lchmod = hasattr(os, 'lchmod')
 
 
-flags_normal = os.O_RDONLY | getattr(os, 'O_BINARY', 0)
-flags_noatime = flags_normal | getattr(os, 'O_NOATIME', 0)
-
 
 
 class Statistics:
 class Statistics:
 
 
@@ -197,6 +195,42 @@ def backup_io_iter(iterator):
         yield item
         yield item
 
 
 
 
+def stat_update_check(st_old, st_curr):
+    """
+    this checks for some race conditions between the first filename-based stat()
+    we did before dispatching to the (hopefully correct) file type backup handler
+    and the (hopefully) fd-based fstat() we did in the handler.
+
+    if there is a problematic difference (e.g. file type changed), we rather
+    skip the file than being tricked into a security problem.
+
+    such races should only happen if:
+    - we are backing up a live filesystem (no snapshot, not inactive)
+    - if files change due to normal fs activity at an unfortunate time
+    - if somebody is doing an attack against us
+    """
+    # assuming that a file type change implicates a different inode change AND that inode numbers
+    # are not duplicate in a short timeframe, this check is redundant and solved by the ino check:
+    if stat.S_IFMT(st_old.st_mode) != stat.S_IFMT(st_curr.st_mode):
+        # in this case, we dispatched to wrong handler - abort
+        raise BackupError('file type changed (race condition), skipping file')
+    if st_old.st_ino != st_curr.st_ino:
+        # in this case, the hardlinks-related code in create_helper has the wrong inode - abort!
+        raise BackupError('file inode changed (race condition), skipping file')
+    # looks ok, we are still dealing with the same thing - return current stat:
+    return st_curr
+
+
+@contextmanager
+def OsOpen(*, flags, path=None, parent_fd=None, name=None, noatime=False, op='open'):
+    with backup_io(op):
+        fd = os_open(path=path, parent_fd=parent_fd, name=name, flags=flags, noatime=noatime)
+    try:
+        yield fd
+    finally:
+        os.close(fd)
+
+
 class DownloadPipeline:
 class DownloadPipeline:
 
 
     def __init__(self, repository, key):
     def __init__(self, repository, key):
@@ -826,18 +860,6 @@ Utilization of max. archive size: {csize_max:.0%}
             logger.warning('forced deletion succeeded, but the deleted archive was corrupted.')
             logger.warning('forced deletion succeeded, but the deleted archive was corrupted.')
             logger.warning('borg check --repair is required to free all space.')
             logger.warning('borg check --repair is required to free all space.')
 
 
-    @staticmethod
-    def _open_rb(path):
-        try:
-            # if we have O_NOATIME, this likely will succeed if we are root or owner of file:
-            return os.open(path, flags_noatime)
-        except PermissionError:
-            if flags_noatime == flags_normal:
-                # we do not have O_NOATIME, no need to try again:
-                raise
-            # Was this EPERM due to the O_NOATIME flag? Try again without it:
-            return os.open(path, flags_normal)
-
     @staticmethod
     @staticmethod
     def compare_archives_iter(archive1, archive2, matcher=None, can_compare_chunk_ids=False):
     def compare_archives_iter(archive1, archive2, matcher=None, can_compare_chunk_ids=False):
         """
         """
@@ -958,9 +980,9 @@ class MetadataCollector:
         attrs = {}
         attrs = {}
         bsdflags = 0
         bsdflags = 0
         with backup_io('extended stat'):
         with backup_io('extended stat'):
-            xattrs = xattr.get_all(fd or path, follow_symlinks=False)
             if not self.nobsdflags:
             if not self.nobsdflags:
                 bsdflags = get_flags(path, st, fd=fd)
                 bsdflags = get_flags(path, st, fd=fd)
+            xattrs = xattr.get_all(fd or path, follow_symlinks=False)
             acl_get(path, attrs, st, self.numeric_owner, fd=fd)
             acl_get(path, attrs, st, self.numeric_owner, fd=fd)
         if xattrs:
         if xattrs:
             attrs['xattrs'] = StableDict(xattrs)
             attrs['xattrs'] = StableDict(xattrs)
@@ -1080,34 +1102,41 @@ class FilesystemObjectProcessors:
         if hardlink_master:
         if hardlink_master:
             self.hard_links[(st.st_ino, st.st_dev)] = safe_path
             self.hard_links[(st.st_ino, st.st_dev)] = safe_path
 
 
-    def process_dir(self, path, st):
+    def process_dir(self, *, path, fd, st):
         with self.create_helper(path, st, 'd', hardlinkable=False) as (item, status, hardlinked, hardlink_master):
         with self.create_helper(path, st, 'd', hardlinkable=False) as (item, status, hardlinked, hardlink_master):
-            item.update(self.metadata_collector.stat_attrs(st, path))
+            item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
             return status
             return status
 
 
-    def process_fifo(self, path, st):
+    def process_fifo(self, *, path, parent_fd, name, st):
         with self.create_helper(path, st, 'f') as (item, status, hardlinked, hardlink_master):  # fifo
         with self.create_helper(path, st, 'f') as (item, status, hardlinked, hardlink_master):  # fifo
-            item.update(self.metadata_collector.stat_attrs(st, path))
-            return status
+            with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd:
+                with backup_io('fstat'):
+                    st = stat_update_check(st, os.fstat(fd))
+                item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
+                return status
 
 
-    def process_dev(self, path, st, dev_type):
+    def process_dev(self, *, path, parent_fd, name, st, dev_type):
         with self.create_helper(path, st, dev_type) as (item, status, hardlinked, hardlink_master):  # char/block device
         with self.create_helper(path, st, dev_type) as (item, status, hardlinked, hardlink_master):  # char/block device
+            # looks like we can not work fd-based here without causing issues when trying to open/close the device
+            with backup_io('stat'):
+                st = stat_update_check(st, os.stat(name, dir_fd=parent_fd, follow_symlinks=False))
             item.rdev = st.st_rdev
             item.rdev = st.st_rdev
             item.update(self.metadata_collector.stat_attrs(st, path))
             item.update(self.metadata_collector.stat_attrs(st, path))
             return status
             return status
 
 
-    def process_symlink(self, path, st):
+    def process_symlink(self, *, path, parent_fd, name, st):
         # note: using hardlinkable=False because we can not support hardlinked symlinks,
         # note: using hardlinkable=False because we can not support hardlinked symlinks,
         #       due to the dual-use of item.source, see issue #2343:
         #       due to the dual-use of item.source, see issue #2343:
         # hardlinked symlinks will be archived [and extracted] as non-hardlinked symlinks.
         # hardlinked symlinks will be archived [and extracted] as non-hardlinked symlinks.
         with self.create_helper(path, st, 's', hardlinkable=False) as (item, status, hardlinked, hardlink_master):
         with self.create_helper(path, st, 's', hardlinkable=False) as (item, status, hardlinked, hardlink_master):
+            fname = name if name is not None and parent_fd is not None else path
             with backup_io('readlink'):
             with backup_io('readlink'):
-                source = os.readlink(path)
+                source = os.readlink(fname, dir_fd=parent_fd)
             item.source = source
             item.source = source
-            item.update(self.metadata_collector.stat_attrs(st, path))
+            item.update(self.metadata_collector.stat_attrs(st, path))  # can't use FD here?
             return status
             return status
 
 
-    def process_stdin(self, path, cache):
+    def process_stdin(self, *, path, cache):
         uid, gid = 0, 0
         uid, gid = 0, 0
         t = int(time.time()) * 1000000000
         t = int(time.time()) * 1000000000
         item = Item(
         item = Item(
@@ -1124,63 +1153,55 @@ class FilesystemObjectProcessors:
         self.add_item(item, stats=self.stats)
         self.add_item(item, stats=self.stats)
         return 'i'  # stdin
         return 'i'  # stdin
 
 
-    def process_file(self, path, st, cache):
+    def process_file(self, *, path, parent_fd, name, st, cache, flags=flags_normal):
         with self.create_helper(path, st, None) as (item, status, hardlinked, hardlink_master):  # no status yet
         with self.create_helper(path, st, None) as (item, status, hardlinked, hardlink_master):  # no status yet
-            md = None
-            is_special_file = is_special(st.st_mode)
-            if not hardlinked or hardlink_master:
-                if not is_special_file:
-                    path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path)))
-                    known, ids = cache.file_known_and_unchanged(path_hash, st)
-                else:
-                    # in --read-special mode, we may be called for special files.
-                    # there should be no information in the cache about special files processed in
-                    # read-special mode, but we better play safe as this was wrong in the past:
-                    path_hash = None
-                    known, ids = False, None
-                chunks = None
-                if ids is not None:
-                    # Make sure all ids are available
-                    for id_ in ids:
-                        if not cache.seen_chunk(id_):
-                            status = 'M'  # cache said it is unmodified, but we lost a chunk: process file like modified
-                            break
+            with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags, noatime=True) as fd:
+                with backup_io('fstat'):
+                    st = stat_update_check(st, os.fstat(fd))
+                is_special_file = is_special(st.st_mode)
+                if not hardlinked or hardlink_master:
+                    if not is_special_file:
+                        path_hash = self.key.id_hash(safe_encode(os.path.join(self.cwd, path)))
+                        known, ids = cache.file_known_and_unchanged(path_hash, st)
                     else:
                     else:
-                        chunks = [cache.chunk_incref(id_, self.stats) for id_ in ids]
-                        status = 'U'  # regular file, unchanged
-                else:
-                    status = 'M' if known else 'A'  # regular file, modified or added
-                item.hardlink_master = hardlinked
-                item.update(self.metadata_collector.stat_simple_attrs(st))
-                # Only chunkify the file if needed
-                if chunks is not None:
-                    item.chunks = chunks
-                else:
-                    with backup_io('open'):
-                        fh = Archive._open_rb(path)
-                        try:
-                            self.process_file_chunks(item, cache, self.stats, self.show_progress, backup_io_iter(self.chunker.chunkify(None, fh)))
-                            md = self.metadata_collector.stat_attrs(st, path, fd=fh)
-                        finally:
-                            os.close(fh)
+                        # in --read-special mode, we may be called for special files.
+                        # there should be no information in the cache about special files processed in
+                        # read-special mode, but we better play safe as this was wrong in the past:
+                        path_hash = None
+                        known, ids = False, None
+                    chunks = None
+                    if ids is not None:
+                        # Make sure all ids are available
+                        for id_ in ids:
+                            if not cache.seen_chunk(id_):
+                                status = 'M'  # cache said it is unmodified, but we lost a chunk: process file like modified
+                                break
+                        else:
+                            chunks = [cache.chunk_incref(id_, self.stats) for id_ in ids]
+                            status = 'U'  # regular file, unchanged
+                    else:
+                        status = 'M' if known else 'A'  # regular file, modified or added
+                    item.hardlink_master = hardlinked
+                    item.update(self.metadata_collector.stat_simple_attrs(st))
+                    # Only chunkify the file if needed
+                    if chunks is not None:
+                        item.chunks = chunks
+                    else:
+                        with backup_io('read'):
+                            self.process_file_chunks(item, cache, self.stats, self.show_progress, backup_io_iter(self.chunker.chunkify(None, fd)))
                         if not is_special_file:
                         if not is_special_file:
                             # we must not memorize special files, because the contents of e.g. a
                             # we must not memorize special files, because the contents of e.g. a
                             # block or char device will change without its mtime/size/inode changing.
                             # block or char device will change without its mtime/size/inode changing.
                             cache.memorize_file(path_hash, st, [c.id for c in item.chunks])
                             cache.memorize_file(path_hash, st, [c.id for c in item.chunks])
-                self.stats.nfiles += 1
-            if md is None:
-                fh = Archive._open_rb(path)
-                try:
-                    md = self.metadata_collector.stat_attrs(st, path, fd=fh)
-                finally:
-                    os.close(fh)
-            item.update(md)
-            item.get_size(memorize=True)
-            if is_special_file:
-                # we processed a special file like a regular file. reflect that in mode,
-                # so it can be extracted / accessed in FUSE mount like a regular file:
-                item.mode = stat.S_IFREG | stat.S_IMODE(item.mode)
-            return status
+                    self.stats.nfiles += 1
+                md = self.metadata_collector.stat_attrs(st, path, fd=fd)
+                item.update(md)
+                item.get_size(memorize=True)
+                if is_special_file:
+                    # we processed a special file like a regular file. reflect that in mode,
+                    # so it can be extracted / accessed in FUSE mount like a regular file:
+                    item.mode = stat.S_IFREG | stat.S_IMODE(item.mode)
+                return status
 
 
 
 
 def valid_msgpacked_dict(d, keys_serialized):
 def valid_msgpacked_dict(d, keys_serialized):

+ 76 - 63
src/borg/archiver.py

@@ -34,7 +34,7 @@ from . import __version__
 from . import helpers
 from . import helpers
 from .algorithms.checksums import crc32
 from .algorithms.checksums import crc32
 from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special
 from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special
-from .archive import BackupError, BackupOSError, backup_io
+from .archive import BackupError, BackupOSError, backup_io, OsOpen, stat_update_check
 from .archive import FilesystemObjectProcessors, MetadataCollector, ChunksProcessor
 from .archive import FilesystemObjectProcessors, MetadataCollector, ChunksProcessor
 from .cache import Cache, assert_secure, SecurityManager
 from .cache import Cache, assert_secure, SecurityManager
 from .constants import *  # NOQA
 from .constants import *  # NOQA
@@ -66,6 +66,7 @@ from .helpers import ChunkIteratorFileWrapper
 from .helpers import popen_with_error_handling, prepare_subprocess_env
 from .helpers import popen_with_error_handling, prepare_subprocess_env
 from .helpers import dash_open
 from .helpers import dash_open
 from .helpers import umount
 from .helpers import umount
+from .helpers import flags_root, flags_dir, flags_follow
 from .helpers import msgpack
 from .helpers import msgpack
 from .nanorst import rst_to_terminal
 from .nanorst import rst_to_terminal
 from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern
 from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern
@@ -470,7 +471,7 @@ class Archiver:
                     path = args.stdin_name
                     path = args.stdin_name
                     if not dry_run:
                     if not dry_run:
                         try:
                         try:
-                            status = fso.process_stdin(path, cache)
+                            status = fso.process_stdin(path=path, cache=cache)
                         except BackupOSError as e:
                         except BackupOSError as e:
                             status = 'E'
                             status = 'E'
                             self.print_warning('%s: %s', path, e)
                             self.print_warning('%s: %s', path, e)
@@ -479,18 +480,23 @@ class Archiver:
                     self.print_file_status(status, path)
                     self.print_file_status(status, path)
                     continue
                     continue
                 path = os.path.normpath(path)
                 path = os.path.normpath(path)
-                try:
-                    st = os.stat(path, follow_symlinks=False)
-                except OSError as e:
-                    self.print_warning('%s: %s', path, e)
-                    continue
-                if args.one_file_system:
-                    restrict_dev = st.st_dev
-                else:
-                    restrict_dev = None
-                self._process(fso, cache, matcher, args.exclude_caches, args.exclude_if_present,
-                              args.keep_exclude_tags, skip_inodes, path, restrict_dev,
-                              read_special=args.read_special, dry_run=dry_run, st=st)
+                parent_dir = os.path.dirname(path) or '.'
+                name = os.path.basename(path)
+                with OsOpen(path=parent_dir, flags=flags_root, noatime=True, op='open_root') as parent_fd:
+                    try:
+                        st = os.stat(name, dir_fd=parent_fd, follow_symlinks=False)
+                    except OSError as e:
+                        self.print_warning('%s: %s', path, e)
+                        continue
+                    if args.one_file_system:
+                        restrict_dev = st.st_dev
+                    else:
+                        restrict_dev = None
+                    self._process(path=path, parent_fd=parent_fd, name=name,
+                                  fso=fso, cache=cache, matcher=matcher,
+                                  exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
+                                  keep_exclude_tags=args.keep_exclude_tags, skip_inodes=skip_inodes,
+                                  restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run)
             if not dry_run:
             if not dry_run:
                 archive.save(comment=args.comment, timestamp=args.timestamp)
                 archive.save(comment=args.comment, timestamp=args.timestamp)
                 if args.progress:
                 if args.progress:
@@ -542,22 +548,20 @@ class Archiver:
             create_inner(None, None, None)
             create_inner(None, None, None)
         return self.exit_code
         return self.exit_code
 
 
-    def _process(self, fso, cache, matcher, exclude_caches, exclude_if_present,
-                 keep_exclude_tags, skip_inodes, path, restrict_dev,
-                 read_special=False, dry_run=False, st=None):
+    def _process(self, *, path, parent_fd=None, name=None,
+                 fso, cache, matcher,
+                 exclude_caches, exclude_if_present, keep_exclude_tags, skip_inodes,
+                 restrict_dev, read_special=False, dry_run=False):
         """
         """
-        Process *path* recursively according to the various parameters.
-
-        *st* (if given) is a *os.stat_result* object for *path*.
+        Process *path* (or, preferably, parent_fd/name) recursively according to the various parameters.
 
 
         This should only raise on critical errors. Per-item errors must be handled within this method.
         This should only raise on critical errors. Per-item errors must be handled within this method.
         """
         """
         try:
         try:
             recurse_excluded_dir = False
             recurse_excluded_dir = False
             if matcher.match(path):
             if matcher.match(path):
-                if st is None:
-                    with backup_io('stat'):
-                        st = os.stat(path, follow_symlinks=False)
+                with backup_io('stat'):
+                    st = os.stat(name, dir_fd=parent_fd, follow_symlinks=False)
             else:
             else:
                 self.print_file_status('x', path)
                 self.print_file_status('x', path)
                 # get out here as quickly as possible:
                 # get out here as quickly as possible:
@@ -566,9 +570,8 @@ class Archiver:
                 # could trigger an error, e.g. if access is forbidden, see #3209.
                 # could trigger an error, e.g. if access is forbidden, see #3209.
                 if not matcher.recurse_dir:
                 if not matcher.recurse_dir:
                     return
                     return
-                if st is None:
-                    with backup_io('stat'):
-                        st = os.stat(path, follow_symlinks=False)
+                with backup_io('stat'):
+                    st = os.stat(name, dir_fd=parent_fd, follow_symlinks=False)
                 recurse_excluded_dir = stat.S_ISDIR(st.st_mode)
                 recurse_excluded_dir = stat.S_ISDIR(st.st_mode)
                 if not recurse_excluded_dir:
                 if not recurse_excluded_dir:
                     return
                     return
@@ -583,71 +586,81 @@ class Archiver:
             if self.exclude_nodump:
             if self.exclude_nodump:
                 # Ignore if nodump flag is set
                 # Ignore if nodump flag is set
                 with backup_io('flags'):
                 with backup_io('flags'):
-                    if get_flags(path, st) & stat.UF_NODUMP:
+                    if get_flags(path=path, st=st) & stat.UF_NODUMP:
                         self.print_file_status('x', path)
                         self.print_file_status('x', path)
                         return
                         return
             if stat.S_ISREG(st.st_mode):
             if stat.S_ISREG(st.st_mode):
                 if not dry_run:
                 if not dry_run:
-                    status = fso.process_file(path, st, cache)
+                    status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache)
             elif stat.S_ISDIR(st.st_mode):
             elif stat.S_ISDIR(st.st_mode):
-                if recurse:
-                    tag_paths = dir_is_tagged(path, exclude_caches, exclude_if_present)
-                    if tag_paths:
-                        # if we are already recursing in an excluded dir, we do not need to do anything else than
-                        # returning (we do not need to archive or recurse into tagged directories), see #3991:
+                with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_dir,
+                            noatime=True, op='dir_open') as child_fd:
+                    with backup_io('fstat'):
+                        st = stat_update_check(st, os.fstat(child_fd))
+                    if recurse:
+                        tag_names = dir_is_tagged(path, exclude_caches, exclude_if_present)
+                        if tag_names:
+                            # if we are already recursing in an excluded dir, we do not need to do anything else than
+                            # returning (we do not need to archive or recurse into tagged directories), see #3991:
+                            if not recurse_excluded_dir:
+                                if keep_exclude_tags and not dry_run:
+                                    fso.process_dir(path=path, fd=child_fd, st=st)
+                                    for tag_name in tag_names:
+                                        tag_path = os.path.join(path, tag_name)
+                                        self._process(path=tag_path, parent_fd=child_fd, name=tag_name,
+                                                      fso=fso, cache=cache, matcher=matcher,
+                                                      exclude_caches=exclude_caches, exclude_if_present=exclude_if_present,
+                                                      keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes,
+                                                      restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run)
+                                self.print_file_status('x', path)
+                            return
+                    if not dry_run:
                         if not recurse_excluded_dir:
                         if not recurse_excluded_dir:
-                            if keep_exclude_tags and not dry_run:
-                                fso.process_dir(path, st)
-                                for tag_path in tag_paths:
-                                    self._process(fso, cache, matcher, exclude_caches, exclude_if_present,
-                                                  keep_exclude_tags, skip_inodes, tag_path, restrict_dev,
-                                                  read_special=read_special, dry_run=dry_run)
-                            self.print_file_status('x', path)
-                        return
-                if not dry_run:
-                    if not recurse_excluded_dir:
-                        status = fso.process_dir(path, st)
-                if recurse:
-                    with backup_io('scandir'):
-                        entries = helpers.scandir_inorder(path)
-                    for dirent in entries:
-                        normpath = os.path.normpath(dirent.path)
-                        self._process(fso, cache, matcher, exclude_caches, exclude_if_present,
-                                      keep_exclude_tags, skip_inodes, normpath, restrict_dev,
-                                      read_special=read_special, dry_run=dry_run)
+                            status = fso.process_dir(path=path, fd=child_fd, st=st)
+                    if recurse:
+                        with backup_io('scandir'):
+                            entries = helpers.scandir_inorder(path=path, fd=child_fd)
+                        for dirent in entries:
+                            normpath = os.path.normpath(os.path.join(path, dirent.name))
+                            self._process(path=normpath, parent_fd=child_fd, name=dirent.name,
+                                          fso=fso, cache=cache, matcher=matcher,
+                                          exclude_caches=exclude_caches, exclude_if_present=exclude_if_present,
+                                          keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes,
+                                          restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run)
             elif stat.S_ISLNK(st.st_mode):
             elif stat.S_ISLNK(st.st_mode):
                 if not dry_run:
                 if not dry_run:
                     if not read_special:
                     if not read_special:
-                        status = fso.process_symlink(path, st)
+                        status = fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st)
                     else:
                     else:
                         try:
                         try:
-                            st_target = os.stat(path)
+                            st_target = os.stat(name, dir_fd=parent_fd, follow_symlinks=True)
                         except OSError:
                         except OSError:
                             special = False
                             special = False
                         else:
                         else:
                             special = is_special(st_target.st_mode)
                             special = is_special(st_target.st_mode)
                         if special:
                         if special:
-                            status = fso.process_file(path, st_target, cache)
+                            status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st_target,
+                                                      cache=cache, flags=flags_follow)
                         else:
                         else:
-                            status = fso.process_symlink(path, st)
+                            status = fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st)
             elif stat.S_ISFIFO(st.st_mode):
             elif stat.S_ISFIFO(st.st_mode):
                 if not dry_run:
                 if not dry_run:
                     if not read_special:
                     if not read_special:
-                        status = fso.process_fifo(path, st)
+                        status = fso.process_fifo(path=path, parent_fd=parent_fd, name=name, st=st)
                     else:
                     else:
-                        status = fso.process_file(path, st, cache)
+                        status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache)
             elif stat.S_ISCHR(st.st_mode):
             elif stat.S_ISCHR(st.st_mode):
                 if not dry_run:
                 if not dry_run:
                     if not read_special:
                     if not read_special:
-                        status = fso.process_dev(path, st, 'c')
+                        status = fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='c')
                     else:
                     else:
-                        status = fso.process_file(path, st, cache)
+                        status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache)
             elif stat.S_ISBLK(st.st_mode):
             elif stat.S_ISBLK(st.st_mode):
                 if not dry_run:
                 if not dry_run:
                     if not read_special:
                     if not read_special:
-                        status = fso.process_dev(path, st, 'b')
+                        status = fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='b')
                     else:
                     else:
-                        status = fso.process_file(path, st, cache)
+                        status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache)
             elif stat.S_ISSOCK(st.st_mode):
             elif stat.S_ISSOCK(st.st_mode):
                 # Ignore unix sockets
                 # Ignore unix sockets
                 return
                 return
@@ -660,7 +673,7 @@ class Archiver:
             else:
             else:
                 self.print_warning('Unknown file type: %s', path)
                 self.print_warning('Unknown file type: %s', path)
                 return
                 return
-        except BackupOSError as e:
+        except (BackupOSError, BackupError) as e:
             self.print_warning('%s: %s', path, e)
             self.print_warning('%s: %s', path, e)
             status = 'E'
             status = 'E'
         # Status output
         # Status output

+ 60 - 7
src/borg/helpers/fs.py

@@ -15,6 +15,9 @@ from ..logger import create_logger
 logger = create_logger()
 logger = create_logger()
 
 
 
 
+py_37_plus = sys.version_info >= (3, 7)
+
+
 def get_base_dir():
 def get_base_dir():
     """Get home directory / base directory for borg:
     """Get home directory / base directory for borg:
 
 
@@ -103,18 +106,19 @@ def dir_is_cachedir(path):
 def dir_is_tagged(path, exclude_caches, exclude_if_present):
 def dir_is_tagged(path, exclude_caches, exclude_if_present):
     """Determines whether the specified path is excluded by being a cache
     """Determines whether the specified path is excluded by being a cache
     directory or containing user-specified tag files/directories. Returns a
     directory or containing user-specified tag files/directories. Returns a
-    list of the paths of the tag files/directories (either CACHEDIR.TAG or the
+    list of the names of the tag files/directories (either CACHEDIR.TAG or the
     matching user-specified files/directories).
     matching user-specified files/directories).
     """
     """
-    tag_paths = []
+    # TODO: do operations based on the directory fd
+    tag_names = []
     if exclude_caches and dir_is_cachedir(path):
     if exclude_caches and dir_is_cachedir(path):
-        tag_paths.append(os.path.join(path, CACHE_TAG_NAME))
+        tag_names.append(CACHE_TAG_NAME)
     if exclude_if_present is not None:
     if exclude_if_present is not None:
         for tag in exclude_if_present:
         for tag in exclude_if_present:
             tag_path = os.path.join(path, tag)
             tag_path = os.path.join(path, tag)
             if os.path.exists(tag_path):
             if os.path.exists(tag_path):
-                tag_paths.append(tag_path)
-    return tag_paths
+                tag_names.append(tag)
+    return tag_names
 
 
 
 
 _safe_re = re.compile(r'^((\.\.)?/+)+')
 _safe_re = re.compile(r'^((\.\.)?/+)+')
@@ -144,8 +148,10 @@ def scandir_keyfunc(dirent):
         return (1, dirent.name)
         return (1, dirent.name)
 
 
 
 
-def scandir_inorder(path='.'):
-    return sorted(os.scandir(path), key=scandir_keyfunc)
+def scandir_inorder(*, path, fd=None):
+    # py37+ supports giving an fd instead of a path (no full entry.path in DirEntry in that case!)
+    arg = fd if fd is not None and py_37_plus else path
+    return sorted(os.scandir(arg), key=scandir_keyfunc)
 
 
 
 
 def secure_erase(path):
 def secure_erase(path):
@@ -189,6 +195,53 @@ def dash_open(path, mode):
         return open(path, mode)
         return open(path, mode)
 
 
 
 
+def O_(*flags):
+    result = 0
+    for flag in flags:
+        result |= getattr(os, 'O_' + flag, 0)
+    return result
+
+
+flags_base = O_('BINARY', 'NONBLOCK', 'NOCTTY')
+flags_follow = flags_base | O_('RDONLY')
+flags_normal = flags_base | O_('RDONLY', 'NOFOLLOW')
+flags_noatime = flags_normal | O_('NOATIME')
+flags_root = O_('RDONLY')
+flags_dir = O_('DIRECTORY', 'RDONLY', 'NOFOLLOW')
+
+
+def os_open(*, flags, path=None, parent_fd=None, name=None, noatime=False):
+    """
+    Use os.open to open a fs item.
+
+    If parent_fd and name are given, they are preferred and openat will be used,
+    path is not used in this case.
+
+    :param path: full (but not necessarily absolute) path
+    :param parent_fd: open directory file descriptor
+    :param name: name relative to parent_fd
+    :param flags: open flags for os.open() (int)
+    :param noatime: True if access time shall be preserved
+    :return: file descriptor
+    """
+    fname = name if name is not None and parent_fd is not None else path
+    _flags_normal = flags
+    if noatime:
+        _flags_noatime = _flags_normal | O_('NOATIME')
+        try:
+            # if we have O_NOATIME, this likely will succeed if we are root or owner of file:
+            fd = os.open(fname, _flags_noatime, dir_fd=parent_fd)
+        except PermissionError:
+            if _flags_noatime == _flags_normal:
+                # we do not have O_NOATIME, no need to try again:
+                raise
+            # Was this EPERM due to the O_NOATIME flag? Try again without it:
+            fd = os.open(fname, _flags_normal, dir_fd=parent_fd)
+    else:
+        fd = os.open(fname, _flags_normal, dir_fd=parent_fd)
+    return fd
+
+
 def umount(mountpoint):
 def umount(mountpoint):
     env = prepare_subprocess_env(system=True)
     env = prepare_subprocess_env(system=True)
     try:
     try:

+ 7 - 10
src/borg/platform/linux.pyx

@@ -233,10 +233,11 @@ def acl_get(path, item, st, numeric_owner=False, fd=None):
     cdef char *default_text = NULL
     cdef char *default_text = NULL
     cdef char *access_text = NULL
     cdef char *access_text = NULL
 
 
-    if fd is None and isinstance(path, str):
-        path = os.fsencode(path)
     if stat.S_ISLNK(st.st_mode):
     if stat.S_ISLNK(st.st_mode):
+        # symlinks can not have ACLs
         return
         return
+    if isinstance(path, str):
+        path = os.fsencode(path)
     if (fd is not None and acl_extended_fd(fd) <= 0
     if (fd is not None and acl_extended_fd(fd) <= 0
         or
         or
         fd is None and acl_extended_file(path) <= 0):
         fd is None and acl_extended_file(path) <= 0):
@@ -247,12 +248,11 @@ def acl_get(path, item, st, numeric_owner=False, fd=None):
         converter = acl_append_numeric_ids
         converter = acl_append_numeric_ids
     try:
     try:
         if fd is not None:
         if fd is not None:
-            # we only have a fd for FILES (not other fs objects), so we can get the access_acl:
-            assert stat.S_ISREG(st.st_mode)
             access_acl = acl_get_fd(fd)
             access_acl = acl_get_fd(fd)
         else:
         else:
-            # if we have no fd, it can be anything
             access_acl = acl_get_file(path, ACL_TYPE_ACCESS)
             access_acl = acl_get_file(path, ACL_TYPE_ACCESS)
+        if stat.S_ISDIR(st.st_mode):
+            # only directories can have a default ACL. there is no fd-based api to get it.
             default_acl = acl_get_file(path, ACL_TYPE_DEFAULT)
             default_acl = acl_get_file(path, ACL_TYPE_DEFAULT)
         if access_acl:
         if access_acl:
             access_text = acl_to_text(access_acl, NULL)
             access_text = acl_to_text(access_acl, NULL)
@@ -299,11 +299,8 @@ def acl_set(path, item, numeric_owner=False, fd=None):
         try:
         try:
             default_acl = acl_from_text(<bytes>converter(default_text))
             default_acl = acl_from_text(<bytes>converter(default_text))
             if default_acl:
             if default_acl:
-                # default acls apply only to directories
-                if False and fd is not None:  # Linux API seems to not support this
-                    acl_set_fd(fd, default_acl)
-                else:
-                    acl_set_file(path, ACL_TYPE_DEFAULT, default_acl)
+                # only directories can get a default ACL. there is no fd-based api to set it.
+                acl_set_file(path, ACL_TYPE_DEFAULT, default_acl)
         finally:
         finally:
             acl_free(default_acl)
             acl_free(default_acl)
 
 

+ 16 - 1
src/borg/testsuite/archiver.py

@@ -32,7 +32,7 @@ except ImportError:
 
 
 import borg
 import borg
 from .. import xattr, helpers, platform
 from .. import xattr, helpers, platform
-from ..archive import Archive, ChunkBuffer, flags_noatime, flags_normal
+from ..archive import Archive, ChunkBuffer
 from ..archiver import Archiver, parse_storage_quota
 from ..archiver import Archiver, parse_storage_quota
 from ..cache import Cache, LocalCache
 from ..cache import Cache, LocalCache
 from ..constants import *  # NOQA
 from ..constants import *  # NOQA
@@ -46,6 +46,7 @@ from ..helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
 from ..helpers import bin_to_hex
 from ..helpers import bin_to_hex
 from ..helpers import MAX_S
 from ..helpers import MAX_S
 from ..helpers import msgpack
 from ..helpers import msgpack
+from ..helpers import flags_noatime, flags_normal
 from ..nanorst import RstToTextLazy, rst_to_terminal
 from ..nanorst import RstToTextLazy, rst_to_terminal
 from ..patterns import IECommand, PatternMatcher, parse_pattern
 from ..patterns import IECommand, PatternMatcher, parse_pattern
 from ..item import Item, ItemDiff
 from ..item import Item, ItemDiff
@@ -1779,6 +1780,20 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         output = self.cmd('create', '--list', '--filter=AM', self.repository_location + '::test3', 'input')
         output = self.cmd('create', '--list', '--filter=AM', self.repository_location + '::test3', 'input')
         self.assert_in('file1', output)
         self.assert_in('file1', output)
 
 
+    def test_create_read_special(self):
+        self.create_regular_file('regular', size=1024)
+        os.symlink(os.path.join(self.input_path, 'file'), os.path.join(self.input_path, 'link_regular'))
+        if are_fifos_supported():
+            os.mkfifo(os.path.join(self.input_path, 'fifo'))
+            os.symlink(os.path.join(self.input_path, 'fifo'), os.path.join(self.input_path, 'link_fifo'))
+        self.cmd('init', '--encryption=repokey', self.repository_location)
+        archive = self.repository_location + '::test'
+        self.cmd('create', '--read-special', archive, 'input')
+        output = self.cmd('list', archive)
+        assert 'input/link_regular -> ' in output  # not pointing to special file: archived as symlink
+        if are_fifos_supported():
+            assert 'input/link_fifo\n' in output  # pointing to a special file: archived following symlink
+
     def test_create_read_special_broken_symlink(self):
     def test_create_read_special_broken_symlink(self):
         os.symlink('somewhere doesnt exist', os.path.join(self.input_path, 'link'))
         os.symlink('somewhere doesnt exist', os.path.join(self.input_path, 'link'))
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('init', '--encryption=repokey', self.repository_location)