Browse Source

use *at style functions (e.g. openat, statat) to avoid races

races via changing path components can be avoided by opening the
parent directory and using parent_fd + file_name combination with
*at style functions to access the directories' contents.
Thomas Waldmann 7 years ago
parent
commit
833c49f834
3 changed files with 138 additions and 87 deletions
  1. 32 17
      src/borg/archive.py
  2. 73 59
      src/borg/archiver.py
  3. 33 11
      src/borg/helpers/fs.py

+ 32 - 17
src/borg/archive.py

@@ -196,9 +196,9 @@ def backup_io_iter(iterator):
 
 
 @contextmanager
-def OsOpen(path, flags, noatime=False, op='open'):
+def OsOpen(*, flags, path=None, parent_fd=None, name=None, noatime=False, op='open'):
     with backup_io(op):
-        fd = os_open(path, flags, noatime)
+        fd = os_open(path=path, parent_fd=parent_fd, name=name, flags=flags, noatime=noatime)
     try:
         yield fd
     finally:
@@ -1076,31 +1076,46 @@ class FilesystemObjectProcessors:
         if hardlink_master:
             self.hard_links[(st.st_ino, st.st_dev)] = safe_path
 
-    def process_dir(self, *, path, st):
+    def process_dir(self, *, path, fd, st):
         with self.create_helper(path, st, 'd', hardlinkable=False) as (item, status, hardlinked, hardlink_master):
-            item.update(self.metadata_collector.stat_attrs(st, path))
+            item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
             return status
 
-    def process_fifo(self, *, path, st):
+    def process_fifo(self, *, path, parent_fd, name, st):
         with self.create_helper(path, st, 'f') as (item, status, hardlinked, hardlink_master):  # fifo
-            item.update(self.metadata_collector.stat_attrs(st, path))
-            return status
+            with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd:
+                with backup_io('fstat'):
+                    curr_st = os.fstat(fd)
+                # XXX do some checks here: st vs. curr_st
+                assert stat.S_ISFIFO(curr_st.st_mode)
+                # make sure stats refer to same object that we are processing below
+                st = curr_st
+                item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
+                return status
 
-    def process_dev(self, *, path, st, dev_type):
+    def process_dev(self, *, path, parent_fd, name, st, dev_type):
         with self.create_helper(path, st, dev_type) as (item, status, hardlinked, hardlink_master):  # char/block device
-            item.rdev = st.st_rdev
-            item.update(self.metadata_collector.stat_attrs(st, path))
-            return status
+            with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd:
+                with backup_io('fstat'):
+                    curr_st = os.fstat(fd)
+                # XXX do some checks here: st vs. curr_st
+                assert stat.S_ISBLK(curr_st.st_mode) or stat.S_ISCHR(curr_st.st_mode)
+                # make sure stats refer to same object that we are processing below
+                st = curr_st
+                item.rdev = st.st_rdev
+                item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
+                return status
 
-    def process_symlink(self, *, path, st):
+    def process_symlink(self, *, path, parent_fd, name, st):
         # note: using hardlinkable=False because we can not support hardlinked symlinks,
         #       due to the dual-use of item.source, see issue #2343:
         # hardlinked symlinks will be archived [and extracted] as non-hardlinked symlinks.
         with self.create_helper(path, st, 's', hardlinkable=False) as (item, status, hardlinked, hardlink_master):
+            fname = name if name is not None and parent_fd is not None else path
             with backup_io('readlink'):
-                source = os.readlink(path)
+                source = os.readlink(fname, dir_fd=parent_fd)
             item.source = source
-            item.update(self.metadata_collector.stat_attrs(st, path))
+            item.update(self.metadata_collector.stat_attrs(st, path))  # can't use FD here?
             return status
 
     def process_stdin(self, *, path, cache):
@@ -1120,9 +1135,9 @@ class FilesystemObjectProcessors:
         self.add_item(item, stats=self.stats)
         return 'i'  # stdin
 
-    def process_file(self, *, path, st, cache):
+    def process_file(self, *, path, parent_fd, name, st, cache):
         with self.create_helper(path, st, None) as (item, status, hardlinked, hardlink_master):  # no status yet
-            with OsOpen(path, flags_normal, noatime=True) as fd:
+            with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd:
                 with backup_io('fstat'):
                     curr_st = os.fstat(fd)
                 # XXX do some checks here: st vs. curr_st
@@ -1172,7 +1187,7 @@ class FilesystemObjectProcessors:
                     # we processed a special file like a regular file. reflect that in mode,
                     # so it can be extracted / accessed in FUSE mount like a regular file:
                     item.mode = stat.S_IFREG | stat.S_IMODE(item.mode)
-            return status
+                return status
 
 
 def valid_msgpacked_dict(d, keys_serialized):

+ 73 - 59
src/borg/archiver.py

@@ -34,7 +34,7 @@ from . import __version__
 from . import helpers
 from .algorithms.checksums import crc32
 from .archive import Archive, ArchiveChecker, ArchiveRecreater, Statistics, is_special
-from .archive import BackupError, BackupOSError, backup_io
+from .archive import BackupError, BackupOSError, backup_io, OsOpen
 from .archive import FilesystemObjectProcessors, MetadataCollector, ChunksProcessor
 from .cache import Cache, assert_secure, SecurityManager
 from .constants import *  # NOQA
@@ -66,6 +66,7 @@ from .helpers import ChunkIteratorFileWrapper
 from .helpers import popen_with_error_handling, prepare_subprocess_env
 from .helpers import dash_open
 from .helpers import umount
+from .helpers import flags_root, flags_dir
 from .helpers import msgpack
 from .nanorst import rst_to_terminal
 from .patterns import ArgparsePatternAction, ArgparseExcludeFileAction, ArgparsePatternFileAction, parse_exclude_pattern
@@ -479,20 +480,23 @@ class Archiver:
                     self.print_file_status(status, path)
                     continue
                 path = os.path.normpath(path)
-                try:
-                    st = os.stat(path, follow_symlinks=False)
-                except OSError as e:
-                    self.print_warning('%s: %s', path, e)
-                    continue
-                if args.one_file_system:
-                    restrict_dev = st.st_dev
-                else:
-                    restrict_dev = None
-                self._process(path=path,
-                              fso=fso, cache=cache, matcher=matcher,
-                              exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
-                              keep_exclude_tags=args.keep_exclude_tags, skip_inodes=skip_inodes,
-                              restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run)
+                parent_dir = os.path.dirname(path) or '.'
+                name = os.path.basename(path)
+                with OsOpen(path=parent_dir, flags=flags_root, noatime=True, op='open_root') as parent_fd:
+                    try:
+                        st = os.stat(name, dir_fd=parent_fd, follow_symlinks=False)
+                    except OSError as e:
+                        self.print_warning('%s: %s', path, e)
+                        continue
+                    if args.one_file_system:
+                        restrict_dev = st.st_dev
+                    else:
+                        restrict_dev = None
+                    self._process(path=path, parent_fd=parent_fd, name=name,
+                                  fso=fso, cache=cache, matcher=matcher,
+                                  exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
+                                  keep_exclude_tags=args.keep_exclude_tags, skip_inodes=skip_inodes,
+                                  restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run)
             if not dry_run:
                 archive.save(comment=args.comment, timestamp=args.timestamp)
                 if args.progress:
@@ -544,12 +548,12 @@ class Archiver:
             create_inner(None, None, None)
         return self.exit_code
 
-    def _process(self, *, path,
+    def _process(self, *, path, parent_fd=None, name=None,
                  fso, cache, matcher,
                  exclude_caches, exclude_if_present, keep_exclude_tags, skip_inodes,
                  restrict_dev, read_special=False, dry_run=False):
         """
-        Process *path* recursively according to the various parameters.
+        Process *path* (or, preferably, parent_fd/name) recursively according to the various parameters.
 
         This should only raise on critical errors. Per-item errors must be handled within this method.
         """
@@ -557,7 +561,7 @@ class Archiver:
             recurse_excluded_dir = False
             if matcher.match(path):
                 with backup_io('stat'):
-                    st = os.stat(path, follow_symlinks=False)
+                    st = os.stat(name, dir_fd=parent_fd, follow_symlinks=False)
             else:
                 self.print_file_status('x', path)
                 # get out here as quickly as possible:
@@ -567,7 +571,7 @@ class Archiver:
                 if not matcher.recurse_dir:
                     return
                 with backup_io('stat'):
-                    st = os.stat(path, follow_symlinks=False)
+                    st = os.stat(name, dir_fd=parent_fd, follow_symlinks=False)
                 recurse_excluded_dir = stat.S_ISDIR(st.st_mode)
                 if not recurse_excluded_dir:
                     return
@@ -582,75 +586,85 @@ class Archiver:
             if self.exclude_nodump:
                 # Ignore if nodump flag is set
                 with backup_io('flags'):
-                    if get_flags(path, st) & stat.UF_NODUMP:
+                    if get_flags(path=path, st=st) & stat.UF_NODUMP:
                         self.print_file_status('x', path)
                         return
             if stat.S_ISREG(st.st_mode):
                 if not dry_run:
-                    status = fso.process_file(path=path, st=st, cache=cache)
+                    status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache)
             elif stat.S_ISDIR(st.st_mode):
-                if recurse:
-                    tag_paths = dir_is_tagged(path, exclude_caches, exclude_if_present)
-                    if tag_paths:
-                        # if we are already recursing in an excluded dir, we do not need to do anything else than
-                        # returning (we do not need to archive or recurse into tagged directories), see #3991:
+                with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_dir,
+                            noatime=True, op='dir_open') as child_fd:
+                    with backup_io('fstat'):
+                        curr_st = os.fstat(child_fd)
+                    # XXX do some checks here: st vs. curr_st
+                    assert stat.S_ISDIR(curr_st.st_mode)
+                    # make sure stats refer to same object that we are processing below
+                    st = curr_st
+                    if recurse:
+                        tag_names = dir_is_tagged(path, exclude_caches, exclude_if_present)
+                        if tag_names:
+                            # if we are already recursing in an excluded dir, we do not need to do anything else than
+                            # returning (we do not need to archive or recurse into tagged directories), see #3991:
+                            if not recurse_excluded_dir:
+                                if keep_exclude_tags and not dry_run:
+                                    fso.process_dir(path=path, fd=child_fd, st=st)
+                                    for tag_name in tag_names:
+                                        tag_path = os.path.join(path, tag_name)
+                                        self._process(path=tag_path, parent_fd=child_fd, name=tag_name,
+                                                      fso=fso, cache=cache, matcher=matcher,
+                                                      exclude_caches=exclude_caches, exclude_if_present=exclude_if_present,
+                                                      keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes,
+                                                      restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run)
+                                self.print_file_status('x', path)
+                            return
+                    if not dry_run:
                         if not recurse_excluded_dir:
-                            if keep_exclude_tags and not dry_run:
-                                fso.process_dir(path=path, st=st)
-                                for tag_path in tag_paths:
-                                    self._process(path=tag_path,
-                                                  fso=fso, cache=cache, matcher=matcher,
-                                                  exclude_caches=exclude_caches, exclude_if_present=exclude_if_present,
-                                                  keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes,
-                                                  restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run)
-                            self.print_file_status('x', path)
-                        return
-                if not dry_run:
-                    if not recurse_excluded_dir:
-                        status = fso.process_dir(path=path, st=st)
-                if recurse:
-                    with backup_io('scandir'):
-                        entries = helpers.scandir_inorder(path)
-                    for dirent in entries:
-                        normpath = os.path.normpath(dirent.path)
-                        self._process(path=normpath,
-                                      fso=fso, cache=cache, matcher=matcher,
-                                      exclude_caches=exclude_caches, exclude_if_present=exclude_if_present,
-                                      keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes,
-                                      restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run)
+                            status = fso.process_dir(path=path, fd=child_fd, st=st)
+                    if recurse:
+                        with backup_io('scandir'):
+                            entries = helpers.scandir_inorder(path=path, fd=child_fd)
+                        for dirent in entries:
+                            normpath = os.path.normpath(dirent.path)
+                            self._process(path=normpath, parent_fd=child_fd, name=dirent.name,
+                                          fso=fso, cache=cache, matcher=matcher,
+                                          exclude_caches=exclude_caches, exclude_if_present=exclude_if_present,
+                                          keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes,
+                                          restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run)
             elif stat.S_ISLNK(st.st_mode):
                 if not dry_run:
                     if not read_special:
-                        status = fso.process_symlink(path=path, st=st)
+                        status = fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st)
                     else:
                         try:
-                            st_target = os.stat(path)
+                            st_target = os.stat(name, dir_fd=parent_fd, follow_symlinks=True)
                         except OSError:
                             special = False
                         else:
                             special = is_special(st_target.st_mode)
                         if special:
-                            status = fso.process_file(path=path, st=st_target, cache=cache)
+                            # XXX must FOLLOW symlinks!
+                            status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st_target, cache=cache)
                         else:
-                            status = fso.process_symlink(path=path, st=st)
+                            status = fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st)
             elif stat.S_ISFIFO(st.st_mode):
                 if not dry_run:
                     if not read_special:
-                        status = fso.process_fifo(path=path, st=st)
+                        status = fso.process_fifo(path=path, parent_fd=parent_fd, name=name, st=st)
                     else:
-                        status = fso.process_file(path=path, st=st, cache=cache)
+                        status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache)
             elif stat.S_ISCHR(st.st_mode):
                 if not dry_run:
                     if not read_special:
-                        status = fso.process_dev(path=path, st=st, dev_type='c')
+                        status = fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='c')
                     else:
-                        status = fso.process_file(path=path, st=st, cache=cache)
+                        status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache)
             elif stat.S_ISBLK(st.st_mode):
                 if not dry_run:
                     if not read_special:
-                        status = fso.process_dev(path=path, st=st, dev_type='b')
+                        status = fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='b')
                     else:
-                        status = fso.process_file(path=path, st=st, cache=cache)
+                        status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache)
             elif stat.S_ISSOCK(st.st_mode):
                 # Ignore unix sockets
                 return

+ 33 - 11
src/borg/helpers/fs.py

@@ -15,6 +15,9 @@ from ..logger import create_logger
 logger = create_logger()
 
 
+py_37_plus = sys.version_info >= (3, 7)
+
+
 def get_base_dir():
     """Get home directory / base directory for borg:
 
@@ -103,18 +106,19 @@ def dir_is_cachedir(path):
 def dir_is_tagged(path, exclude_caches, exclude_if_present):
     """Determines whether the specified path is excluded by being a cache
     directory or containing user-specified tag files/directories. Returns a
-    list of the paths of the tag files/directories (either CACHEDIR.TAG or the
+    list of the names of the tag files/directories (either CACHEDIR.TAG or the
     matching user-specified files/directories).
     """
-    tag_paths = []
+    # TODO: do operations based on the directory fd
+    tag_names = []
     if exclude_caches and dir_is_cachedir(path):
-        tag_paths.append(os.path.join(path, CACHE_TAG_NAME))
+        tag_names.append(CACHE_TAG_NAME)
     if exclude_if_present is not None:
         for tag in exclude_if_present:
             tag_path = os.path.join(path, tag)
             if os.path.exists(tag_path):
-                tag_paths.append(tag_path)
-    return tag_paths
+                tag_names.append(tag)
+    return tag_names
 
 
 _safe_re = re.compile(r'^((\.\.)?/+)+')
@@ -144,8 +148,10 @@ def scandir_keyfunc(dirent):
         return (1, dirent.name)
 
 
-def scandir_inorder(path='.'):
-    return sorted(os.scandir(path), key=scandir_keyfunc)
+def scandir_inorder(*, path, fd=None):
+    # py37+ supports giving a fd instead of a path
+    arg = fd if fd is not None and py_37_plus else path
+    return sorted(os.scandir(arg), key=scandir_keyfunc)
 
 
 def secure_erase(path):
@@ -199,23 +205,39 @@ def O_(*flags):
 flags_base = O_('BINARY', 'NONBLOCK', 'NOCTTY')  # later: add 'NOFOLLOW'
 flags_normal = flags_base | O_('RDONLY')
 flags_noatime = flags_normal | O_('NOATIME')
+flags_root = O_('RDONLY')
+flags_dir = O_('DIRECTORY', 'RDONLY', 'NOFOLLOW')
+
 
+def os_open(*, flags, path=None, parent_fd=None, name=None, noatime=False):
+    """
+    Use os.open to open a fs item.
+
+    If parent_fd and name are given, they are preferred and openat will be used,
+    path is not used in this case.
 
-def os_open(path, flags, noatime=False):
+    :param path: full (but not necessarily absolute) path
+    :param parent_fd: open directory file descriptor
+    :param name: name relative to parent_fd
+    :param flags: open flags for os.open() (int)
+    :param noatime: True if access time shall be preserved
+    :return: file descriptor
+    """
+    fname = name if name is not None and parent_fd is not None else path
     _flags_normal = flags
     if noatime:
         _flags_noatime = _flags_normal | O_('NOATIME')
         try:
             # if we have O_NOATIME, this likely will succeed if we are root or owner of file:
-            fd = os.open(path, _flags_noatime)
+            fd = os.open(fname, _flags_noatime, dir_fd=parent_fd)
         except PermissionError:
             if _flags_noatime == _flags_normal:
                 # we do not have O_NOATIME, no need to try again:
                 raise
             # Was this EPERM due to the O_NOATIME flag? Try again without it:
-            fd = os.open(path, _flags_normal)
+            fd = os.open(fname, _flags_normal, dir_fd=parent_fd)
     else:
-        fd = os.open(path, _flags_normal)
+        fd = os.open(fname, _flags_normal, dir_fd=parent_fd)
     return fd