浏览代码

Outsource recursive directory walking (#5492)

Split recursive directory walking/processing into walking and item processing.
Lapinot 4 年之前
父节点
当前提交
34f6cfcd81
共有 2 个文件被更改,包括 105 次插入88 次删除
  1. 13 2
      src/borg/archive.py
  2. 92 86
      src/borg/archiver.py

+ 13 - 2
src/borg/archive.py

@@ -38,7 +38,7 @@ from .helpers import StableDict
 from .helpers import bin_to_hex
 from .helpers import safe_ns
 from .helpers import ellipsis_truncate, ProgressIndicatorPercent, log_multi
-from .helpers import os_open, flags_normal
+from .helpers import os_open, flags_normal, flags_dir
 from .helpers import msgpack
 from .helpers import sig_int
 from .patterns import PathPrefixPattern, FnmatchPattern, IECommand
@@ -1200,11 +1200,22 @@ class FilesystemObjectProcessors:
         if hardlink_master:
             self.hard_links[(st.st_ino, st.st_dev)] = safe_path
 
-    def process_dir(self, *, path, fd, st):
+    def process_dir_with_fd(self, *, path, fd, st):
         with self.create_helper(path, st, 'd', hardlinkable=False) as (item, status, hardlinked, hardlink_master):
             item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
             return status
 
+    def process_dir(self, *, path, parent_fd, name, st):
+        with self.create_helper(path, st, 'd', hardlinkable=False) as (item, status, hardlinked, hardlink_master):
+            with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_dir,
+                        noatime=True, op='dir_open') as fd:
+                # fd is None for directories on windows, in that case a race condition check is not possible.
+                if fd is not None:
+                    with backup_io('fstat'):
+                        st = stat_update_check(st, os.fstat(fd))
+                item.update(self.metadata_collector.stat_attrs(st, path, fd=fd))
+                return status
+
     def process_fifo(self, *, path, parent_fd, name, st):
         with self.create_helper(path, st, 'f') as (item, status, hardlinked, hardlink_master):  # fifo
             with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_normal, noatime=True) as fd:

+ 92 - 86
src/borg/archiver.py

@@ -565,11 +565,11 @@ class Archiver:
                             restrict_dev = st.st_dev
                         else:
                             restrict_dev = None
-                        self._process(path=path, parent_fd=parent_fd, name=name,
-                                      fso=fso, cache=cache, matcher=matcher,
-                                      exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
-                                      keep_exclude_tags=args.keep_exclude_tags, skip_inodes=skip_inodes,
-                                      restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run)
+                        self._rec_walk(path=path, parent_fd=parent_fd, name=name,
+                                       fso=fso, cache=cache, matcher=matcher,
+                                       exclude_caches=args.exclude_caches, exclude_if_present=args.exclude_if_present,
+                                       keep_exclude_tags=args.keep_exclude_tags, skip_inodes=skip_inodes,
+                                       restrict_dev=restrict_dev, read_special=args.read_special, dry_run=dry_run)
             if not dry_run:
                 if args.progress:
                     archive.stats.show_progress(final=True)
@@ -626,10 +626,66 @@ class Archiver:
             create_inner(None, None, None)
         return self.exit_code
 
-    def _process(self, *, path, parent_fd=None, name=None,
-                 fso, cache, matcher,
-                 exclude_caches, exclude_if_present, keep_exclude_tags, skip_inodes,
-                 restrict_dev, read_special=False, dry_run=False):
+    def _process_any(self, *, path, parent_fd, name, st, fso, cache, read_special, dry_run):
+        """
+        Call the right method on the given FilesystemObjectProcessor.
+        """
+
+        if dry_run:
+            return '-'
+        elif stat.S_ISREG(st.st_mode):
+            return fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache)
+        elif stat.S_ISDIR(st.st_mode):
+            return fso.process_dir(path=path, parent_fd=parent_fd, name=name, st=st)
+        elif stat.S_ISLNK(st.st_mode):
+            if not read_special:
+                return fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st)
+            else:
+                try:
+                    st_target = os.stat(name, dir_fd=parent_fd, follow_symlinks=True)
+                except OSError:
+                    special = False
+                else:
+                    special = is_special(st_target.st_mode)
+                if special:
+                    return fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st_target,
+                                              cache=cache, flags=flags_special_follow)
+                else:
+                    return fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st)
+        elif stat.S_ISFIFO(st.st_mode):
+            if not read_special:
+                return fso.process_fifo(path=path, parent_fd=parent_fd, name=name, st=st)
+            else:
+                return fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st,
+                                        cache=cache, flags=flags_special)
+        elif stat.S_ISCHR(st.st_mode):
+            if not read_special:
+                return fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='c')
+            else:
+                return fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st,
+                                        cache=cache, flags=flags_special)
+        elif stat.S_ISBLK(st.st_mode):
+            if not read_special:
+                return fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='b')
+            else:
+                return fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st,
+                                        cache=cache, flags=flags_special)
+        elif stat.S_ISSOCK(st.st_mode):
+            # Ignore unix sockets
+            return
+        elif stat.S_ISDOOR(st.st_mode):
+            # Ignore Solaris doors
+            return
+        elif stat.S_ISPORT(st.st_mode):
+            # Ignore Solaris event ports
+            return
+        else:
+            self.print_warning('Unknown file type: %s', path)
+            return
+
+    def _rec_walk(self, *, path, parent_fd, name, fso, cache, matcher,
+                  exclude_caches, exclude_if_present, keep_exclude_tags,
+                  skip_inodes, restrict_dev, read_special, dry_run):
         """
         Process *path* (or, preferably, parent_fd/name) recursively according to the various parameters.
 
@@ -639,6 +695,7 @@ class Archiver:
             # the user says "get out of here!" and we have already completed the desired action.
             return
 
+        status = None
         try:
             recurse_excluded_dir = False
             if matcher.match(path):
@@ -652,10 +709,10 @@ class Archiver:
                 # could trigger an error, e.g. if access is forbidden, see #3209.
                 if not matcher.recurse_dir:
                     return
+                recurse_excluded_dir = True
                 with backup_io('stat'):
                     st = os_stat(path=path, parent_fd=parent_fd, name=name, follow_symlinks=False)
-                recurse_excluded_dir = stat.S_ISDIR(st.st_mode)
-                if not recurse_excluded_dir:
+                if not stat.S_ISDIR(st.st_mode):
                     return
 
             if (st.st_ino, st.st_dev) in skip_inodes:
@@ -664,17 +721,20 @@ class Archiver:
             # but we WILL save the mountpoint directory (or more precise: the root
             # directory of the mounted filesystem that shadows the mountpoint dir).
             recurse = restrict_dev is None or st.st_dev == restrict_dev
-            status = None
+
             if self.exclude_nodump:
                 # Ignore if nodump flag is set
                 with backup_io('flags'):
                     if get_flags(path=path, st=st) & stat.UF_NODUMP:
                         self.print_file_status('x', path)
                         return
-            if stat.S_ISREG(st.st_mode):
-                if not dry_run:
-                    status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st, cache=cache)
-            elif stat.S_ISDIR(st.st_mode):
+
+            if not stat.S_ISDIR(st.st_mode):
+                # directories cannot go in this branch because they can be excluded based on tag
+                # files they might contain
+                status = self._process_any(path=path, parent_fd=parent_fd, name=name, st=st, fso=fso, cache=cache,
+                                           read_special=read_special, dry_run=dry_run)
+            else:
                 with OsOpen(path=path, parent_fd=parent_fd, name=name, flags=flags_dir,
                             noatime=True, op='dir_open') as child_fd:
                     # child_fd is None for directories on windows, in that case a race condition check is not possible.
@@ -688,90 +748,36 @@ class Archiver:
                             # returning (we do not need to archive or recurse into tagged directories), see #3991:
                             if not recurse_excluded_dir:
                                 if keep_exclude_tags and not dry_run:
-                                    fso.process_dir(path=path, fd=child_fd, st=st)
+                                    fso.process_dir_with_fd(path=path, fd=child_fd, st=st)
                                     for tag_name in tag_names:
                                         tag_path = os.path.join(path, tag_name)
-                                        self._process(path=tag_path, parent_fd=child_fd, name=tag_name,
-                                                      fso=fso, cache=cache, matcher=matcher,
-                                                      exclude_caches=exclude_caches, exclude_if_present=exclude_if_present,
-                                                      keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes,
-                                                      restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run)
+                                        self._rec_walk(
+                                                path=tag_path, parent_fd=child_fd, name=tag_name, fso=fso, cache=cache,
+                                                matcher=matcher, exclude_caches=exclude_caches, exclude_if_present=exclude_if_present,
+                                                keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes,
+                                                restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run)
                                 self.print_file_status('x', path)
                             return
-                    if not dry_run:
-                        if not recurse_excluded_dir:
-                            status = fso.process_dir(path=path, fd=child_fd, st=st)
+                    if not recurse_excluded_dir and not dry_run:
+                        status = fso.process_dir_with_fd(path=path, fd=child_fd, st=st)
                     if recurse:
                         with backup_io('scandir'):
                             entries = helpers.scandir_inorder(path=path, fd=child_fd)
                         for dirent in entries:
                             normpath = os.path.normpath(os.path.join(path, dirent.name))
-                            self._process(path=normpath, parent_fd=child_fd, name=dirent.name,
-                                          fso=fso, cache=cache, matcher=matcher,
-                                          exclude_caches=exclude_caches, exclude_if_present=exclude_if_present,
-                                          keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes,
-                                          restrict_dev=restrict_dev, read_special=read_special, dry_run=dry_run)
-            elif stat.S_ISLNK(st.st_mode):
-                if not dry_run:
-                    if not read_special:
-                        status = fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st)
-                    else:
-                        try:
-                            st_target = os.stat(name, dir_fd=parent_fd, follow_symlinks=True)
-                        except OSError:
-                            special = False
-                        else:
-                            special = is_special(st_target.st_mode)
-                        if special:
-                            status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st_target,
-                                                      cache=cache, flags=flags_special_follow)
-                        else:
-                            status = fso.process_symlink(path=path, parent_fd=parent_fd, name=name, st=st)
-            elif stat.S_ISFIFO(st.st_mode):
-                if not dry_run:
-                    if not read_special:
-                        status = fso.process_fifo(path=path, parent_fd=parent_fd, name=name, st=st)
-                    else:
-                        status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st,
-                                                  cache=cache, flags=flags_special)
-            elif stat.S_ISCHR(st.st_mode):
-                if not dry_run:
-                    if not read_special:
-                        status = fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='c')
-                    else:
-                        status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st,
-                                                  cache=cache, flags=flags_special)
-            elif stat.S_ISBLK(st.st_mode):
-                if not dry_run:
-                    if not read_special:
-                        status = fso.process_dev(path=path, parent_fd=parent_fd, name=name, st=st, dev_type='b')
-                    else:
-                        status = fso.process_file(path=path, parent_fd=parent_fd, name=name, st=st,
-                                                  cache=cache, flags=flags_special)
-            elif stat.S_ISSOCK(st.st_mode):
-                # Ignore unix sockets
-                return
-            elif stat.S_ISDOOR(st.st_mode):
-                # Ignore Solaris doors
-                return
-            elif stat.S_ISPORT(st.st_mode):
-                # Ignore Solaris event ports
-                return
-            else:
-                self.print_warning('Unknown file type: %s', path)
-                return
+                            self._rec_walk(
+                                    path=normpath, parent_fd=child_fd, name=dirent.name, fso=fso, cache=cache, matcher=matcher,
+                                    exclude_caches=exclude_caches, exclude_if_present=exclude_if_present,
+                                    keep_exclude_tags=keep_exclude_tags, skip_inodes=skip_inodes, restrict_dev=restrict_dev,
+                                    read_special=read_special, dry_run=dry_run)
+
         except (BackupOSError, BackupError) as e:
             self.print_warning('%s: %s', path, e)
             status = 'E'
         if status == 'C':
             self.print_warning('%s: file changed while we backed it up', path)
-        # Status output
         if status is None:
-            if not dry_run:
-                status = '?'  # need to add a status code somewhere
-            else:
-                status = '-'  # dry run, item was not backed up
-
+            status = '?'  # need to add a status code somewhere
         if not recurse_excluded_dir:
             self.print_file_status(status, path)
 
@@ -4592,7 +4598,7 @@ def sig_info_handler(sig_no, stack):  # pragma: no cover
     with signal_handler(sig_no, signal.SIG_IGN):
         for frame in inspect.getouterframes(stack):
             func, loc = frame[3], frame[0].f_locals
-            if func in ('process_file', '_process', ):  # create op
+            if func in ('process_file', '_rec_walk', ):  # create op
                 path = loc['path']
                 try:
                     pos = loc['fd'].tell()