Răsfoiți Sursa

unify scanning and listing of segment dirs / segment files and apply good practices

+ os.scandir instead of os.listdir
  Improved speed and added flexibility with attributes (name,path,is_dir(),is_file())
+ use is_dir / is_file to make sure  we're reading only dirs / files respectively
+ Filtering to particular start, end index range built in
+ Have seperate generators to avoid unnecessary less_than / greater_than checking for every dir/file when not required

Resolves #7597
nain 2 ani în urmă
părinte
comite
c9f35a16e9
1 a modificat fișierele cu 35 adăugiri și 11 ștergeri
  1. 35 11
      src/borg/repository.py

+ 35 - 11
src/borg/repository.py

@@ -1319,28 +1319,52 @@ class LoggedIO:
         safe_fadvise(fd.fileno(), 0, 0, 'DONTNEED')
         fd.close()
 
+    def get_segment_dirs(self, start_index=None, end_index=None):
+        """Returns generator yeilding required segment dirs as `os.DirEntry` objects. Start and end are inclusive.
+        """
+        data_dir = os.path.join(self.path, 'data')
+        if start_index is None and end_index is None:
+            segment_dirs = (f for f in os.scandir(data_dir) if f.is_dir() and f.name.isdigit())
+        elif start_index is not None and end_index is None:
+            segment_dirs = (f for f in os.scandir(data_dir) if f.is_dir() and f.name.isdigit() and start_index <= int(f.name))
+        elif start_index is None and end_index is not None:
+            segment_dirs = (f for f in os.scandir(data_dir) if f.is_dir() and f.name.isdigit() and int(f.name) <= end_index)
+        elif start_index is not None and end_index is not None:
+            segment_dirs = (f for f in os.scandir(data_dir) if f.is_dir() and f.name.isdigit() and start_index <= int(f.name) <= end_index)
+        return segment_dirs
+
+    def get_segment_files(self, segment_dir, start_index=None, end_index=None):
+        """Returns generator yeilding required segment files in segment_dir as `os.DirEntry` objects. Start and end are inclusive.
+        """
+        if start_index is None and end_index is None:
+            segment_files = (f for f in os.scandir(segment_dir) if f.is_file() and f.name.isdigit())
+        elif start_index is not None and end_index is None:
+            segment_files = (f for f in os.scandir(segment_dir) if f.is_file() and f.name.isdigit() and start_index <= int(f.name))
+        elif start_index is None and end_index is not None:
+            segment_files = (f for f in os.scandir(segment_dir) if f.is_file() and f.name.isdigit() and int(f.name) <= end_index)
+        elif start_index is not None and end_index is not None:
+            segment_files = (f for f in os.scandir(segment_dir) if f.is_file() and f.name.isdigit() and start_index <= int(f.name) <= end_index)
+        return segment_files
+
     def segment_iterator(self, segment=None, reverse=False):
         if segment is None:
             segment = 0 if not reverse else 2 ** 32 - 1
-        data_path = os.path.join(self.path, 'data')
         start_segment_dir = segment // self.segments_per_dir
-        dirs = os.listdir(data_path)
         if not reverse:
-            dirs = [dir for dir in dirs if dir.isdigit() and int(dir) >= start_segment_dir]
+            dirs = self.get_segment_dirs(start_index=start_segment_dir)
         else:
-            dirs = [dir for dir in dirs if dir.isdigit() and int(dir) <= start_segment_dir]
-        dirs = sorted(dirs, key=int, reverse=reverse)
+            dirs = self.get_segment_dirs(end_index=start_segment_dir)
+        dirs = sorted(dirs, key=lambda dir: int(dir.name), reverse=reverse)
         for dir in dirs:
-            filenames = os.listdir(os.path.join(data_path, dir))
             if not reverse:
-                filenames = [filename for filename in filenames if filename.isdigit() and int(filename) >= segment]
+                files = self.get_segment_files(dir, start_index=segment)
             else:
-                filenames = [filename for filename in filenames if filename.isdigit() and int(filename) <= segment]
-            filenames = sorted(filenames, key=int, reverse=reverse)
-            for filename in filenames:
+                files = self.get_segment_files(dir, end_index=segment)
+            files = sorted(files, key=lambda file: int(file.name), reverse=reverse)
+            for file in files:
                 # Note: Do not filter out logically deleted segments  (see "File system interaction" above),
                 # since this is used by cleanup and txn state detection as well.
-                yield int(filename), os.path.join(data_path, dir, filename)
+                yield int(file.name), file.path
 
     def get_latest_segment(self):
         for segment, filename in self.segment_iterator(reverse=True):