2 سال پیش · cca8280393
--- a/src/borg/archive.py
+++ b/src/borg/archive.py
@@ -783,14 +783,14 @@ Duration: {0.duration}
 
				     def extract_item(
			
 
				         self,
			
 
				         item,
			
 
				+        *,
			
 
				         restore_attrs=True,
			
 
				         dry_run=False,
			
 
				         stdout=False,
			
 
				         sparse=False,
			
 
				         hlm=None,
			
 
				-        stripped_components=0,
			
 
				-        original_path=None,
			
 
				         pi=None,
			
 
				+        continue_extraction=False,
			
 
				     ):
			
 
				         """
			
 
				         Extract archive item.
			
@@ -801,10 +801,28 @@ Duration: {0.duration}
 
				         :param stdout: write extracted data to stdout
			
 
				         :param sparse: write sparse files (chunk-granularity, independent of the original being sparse)
			
 
				         :param hlm: maps hlid to link_target for extracting subtrees with hardlinks correctly
			
 
				-        :param stripped_components: stripped leading path components to correct hard link extraction
			
 
				-        :param original_path: 'path' key as stored in archive
			
 
				         :param pi: ProgressIndicatorPercent (or similar) for file extraction progress (in bytes)
			
 
				+        :param continue_extraction: continue a previously interrupted extraction of same archive
			
 
				         """
			
 
				+
			
 
				+        def same_item(item, st):
			
 
				+            """is the archived item the same as the fs item at same path with stat st?"""
			
 
				+            if not stat.S_ISREG(st.st_mode):
			
 
				+                # we only "optimize" for regular files.
			
 
				+                # other file types are less frequent and have no content extraction we could "optimize away".
			
 
				+                return False
			
 
				+            if item.mode != st.st_mode or item.size != st.st_size:
			
 
				+                # the size check catches incomplete previous file extraction
			
 
				+                return False
			
 
				+            if item.get("mtime") != st.st_mtime_ns:
			
 
				+                # note: mtime is "extracted" late, after xattrs and ACLs, but before flags.
			
 
				+                return False
			
 
				+            # this is good enough for the intended use case:
			
 
				+            # continuing an extraction of same archive that initially started in an empty directory.
			
 
				+            # there is a very small risk that "bsdflags" of one file are wrong:
			
 
				+            # if a previous extraction was interrupted between setting the mtime and setting non-default flags.
			
 
				+            return True
			
 
				+
			
 
				         has_damaged_chunks = "chunks_healthy" in item
			
 
				         if dry_run or stdout:
			
 
				             with self.extract_helper(item, "", hlm, dry_run=dry_run or stdout) as hardlink_set:
			
@@ -834,7 +852,6 @@ Duration: {0.duration}
 
				                 raise BackupError("File has damaged (all-zero) chunks. Try running borg check --repair.")
			
 
				             return
			
 
				 
			
 
				-        original_path = original_path or item.path
			
 
				         dest = self.cwd
			
 
				         if item.path.startswith(("/", "../")):
			
 
				             raise Exception("Path should be relative and local")
			
@@ -842,7 +859,9 @@ Duration: {0.duration}
 
				         # Attempt to remove existing files, ignore errors on failure
			
 
				         try:
			
 
				             st = os.stat(path, follow_symlinks=False)
			
 
				-            if stat.S_ISDIR(st.st_mode):
			
 
				+            if continue_extraction and same_item(item, st):
			
 
				+                return  # done! we already have fully extracted this file in a previous run.
			
 
				+            elif stat.S_ISDIR(st.st_mode):
			
 
				                 os.rmdir(path)
			
 
				             else:
			
 
				                 os.unlink(path)
			
@@ -998,6 +1017,16 @@ Duration: {0.duration}
 
				                     set_flags(path, item.bsdflags, fd=fd)
			
 
				                 except OSError:
			
 
				                     pass
			
 
				+        else:  # win32
			
 
				+            # set timestamps rather late
			
 
				+            mtime = item.mtime
			
 
				+            atime = item.atime if "atime" in item else mtime
			
 
				+            try:
			
 
				+                # note: no fd support on win32
			
 
				+                os.utime(path, None, ns=(atime, mtime))
			
 
				+            except OSError:
			
 
				+                # some systems don't support calling utime on a symlink
			
 
				+                pass
			
 
				 
			
 
				     def set_meta(self, key, value):
			
 
				         metadata = self._load_meta(self.id)
			
--- a/src/borg/archiver/extract_cmd.py
+++ b/src/borg/archiver/extract_cmd.py
@@ -42,6 +42,7 @@ class ExtractMixIn:
 
				         stdout = args.stdout
			
 
				         sparse = args.sparse
			
 
				         strip_components = args.strip_components
			
 
				+        continue_extraction = args.continue_extraction
			
 
				         dirs = []
			
 
				         hlm = HardLinkManager(id_type=bytes, info_type=str)  # hlid -> path
			
 
				 
			
@@ -76,13 +77,7 @@ class ExtractMixIn:
 
				                         archive.extract_item(item, stdout=stdout, restore_attrs=False)
			
 
				                     else:
			
 
				                         archive.extract_item(
			
 
				-                            item,
			
 
				-                            stdout=stdout,
			
 
				-                            sparse=sparse,
			
 
				-                            hlm=hlm,
			
 
				-                            stripped_components=strip_components,
			
 
				-                            original_path=orig_path,
			
 
				-                            pi=pi,
			
 
				+                            item, stdout=stdout, sparse=sparse, hlm=hlm, pi=pi, continue_extraction=continue_extraction
			
 
				                         )
			
 
				             except (BackupOSError, BackupError) as e:
			
 
				                 self.print_warning("%s: %s", remove_surrogates(orig_path), e)
			
@@ -174,6 +169,12 @@ class ExtractMixIn:
 
				             action="store_true",
			
 
				             help="create holes in output sparse file from all-zero chunks",
			
 
				         )
			
 
				+        subparser.add_argument(
			
 
				+            "--continue",
			
 
				+            dest="continue_extraction",
			
 
				+            action="store_true",
			
 
				+            help="continue a previously interrupted extraction of same archive",
			
 
				+        )
			
 
				         subparser.add_argument("name", metavar="NAME", type=archivename_validator, help="specify the archive name")
			
 
				         subparser.add_argument(
			
 
				             "paths", metavar="PATH", nargs="*", type=str, help="paths to extract; patterns are supported"
			
--- a/src/borg/testsuite/archiver/extract_cmd.py
+++ b/src/borg/testsuite/archiver/extract_cmd.py
@@ -13,7 +13,7 @@ from ...helpers import EXIT_WARNING
 
				 from ...helpers import flags_noatime, flags_normal
			
 
				 from .. import changedir, same_ts_ns
			
 
				 from .. import are_symlinks_supported, are_hardlinks_supported, is_utime_fully_supported, is_birthtime_fully_supported
			
 
				-from ..platform import is_darwin
			
 
				+from ..platform import is_darwin, is_win32
			
 
				 from . import (
			
 
				     ArchiverTestCaseBase,
			
 
				     ArchiverTestCaseBinaryBase,
			
@@ -621,6 +621,49 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
				             with patch.object(xattr, "setxattr", patched_setxattr_EACCES):
			
 
				                 self.cmd(f"--repo={self.repository_location}", "extract", "test", exit_code=EXIT_WARNING)
			
 
				 
			
 
				+    def test_extract_continue(self):
			
 
				+        CONTENTS1, CONTENTS2, CONTENTS3 = b"contents1" * 100, b"contents2" * 200, b"contents3" * 300
			
 
				+        self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
			
 
				+        self.create_regular_file("file1", contents=CONTENTS1)
			
 
				+        self.create_regular_file("file2", contents=CONTENTS2)
			
 
				+        self.create_regular_file("file3", contents=CONTENTS3)
			
 
				+        self.cmd(f"--repo={self.repository_location}", "create", "arch", "input")
			
 
				+        with changedir("output"):
			
 
				+            # we simulate an interrupted/partial extraction:
			
 
				+            self.cmd(f"--repo={self.repository_location}", "extract", "arch")
			
 
				+            # do not modify file1, it stands for a successfully extracted file
			
 
				+            file1_st = os.stat("input/file1")
			
 
				+            # simulate a partially extracted file2 (smaller size, archived mtime not yet set)
			
 
				+            file2_st = os.stat("input/file2")
			
 
				+            os.truncate("input/file2", 123)  # -> incorrect size, incorrect mtime
			
 
				+            # simulate file3 has not yet been extracted
			
 
				+            file3_st = os.stat("input/file3")
			
 
				+            os.remove("input/file3")
			
 
				+        with changedir("output"):
			
 
				+            # now try to continue extracting, using the same archive, same output dir:
			
 
				+            self.cmd(f"--repo={self.repository_location}", "extract", "arch", "--continue")
			
 
				+            now_file1_st = os.stat("input/file1")
			
 
				+            assert file1_st.st_ino == now_file1_st.st_ino  # file1 was NOT extracted again
			
 
				+            assert file1_st.st_mtime_ns == now_file1_st.st_mtime_ns  # has correct mtime
			
 
				+            new_file2_st = os.stat("input/file2")
			
 
				+            assert file2_st.st_ino != new_file2_st.st_ino  # file2 was extracted again
			
 
				+            assert file2_st.st_mtime_ns == new_file2_st.st_mtime_ns  # has correct mtime
			
 
				+            new_file3_st = os.stat("input/file3")
			
 
				+            assert file3_st.st_ino != new_file3_st.st_ino  # file3 was extracted again
			
 
				+            assert file3_st.st_mtime_ns == new_file3_st.st_mtime_ns  # has correct mtime
			
 
				+            # windows has a strange ctime behaviour when deleting and recreating a file
			
 
				+            if not is_win32:
			
 
				+                assert file1_st.st_ctime_ns == now_file1_st.st_ctime_ns  # file not extracted again
			
 
				+                assert file2_st.st_ctime_ns != new_file2_st.st_ctime_ns  # file extracted again
			
 
				+                assert file3_st.st_ctime_ns != new_file3_st.st_ctime_ns  # file extracted again
			
 
				+            # check if all contents (and thus also file sizes) are correct:
			
 
				+            with open("input/file1", "rb") as f:
			
 
				+                assert f.read() == CONTENTS1
			
 
				+            with open("input/file2", "rb") as f:
			
 
				+                assert f.read() == CONTENTS2
			
 
				+            with open("input/file3", "rb") as f:
			
 
				+                assert f.read() == CONTENTS3
			
 
				+
			
 
				 
			
 
				 class RemoteArchiverTestCase(RemoteArchiverTestCaseBase, ArchiverTestCase):
			
 
				     """run the same tests, but with a remote repository"""