Procházet zdrojové kódy

Add --ignore-zeros flag to import-tar

Fixes #7432.

Backport of #7460.
Artem Sheremet před 2 roky
rodič
revize
3b7c0605af
2 změnil soubory, kde provedl 70 přidání a 3 odebrání
  1. 9 1
      src/borg/archiver.py
  2. 61 2
      src/borg/testsuite/archiver.py

+ 9 - 1
src/borg/archiver.py

@@ -1763,7 +1763,8 @@ class Archiver:
                                       log_json=args.log_json, iec=args.iec,
                                       file_status_printer=self.print_file_status)
 
-        tar = tarfile.open(fileobj=tarstream, mode='r|')
+        tar = tarfile.open(fileobj=tarstream, mode='r|',
+                           ignore_zeros=args.ignore_zeros)
 
         while True:
             tarinfo = tar.next()
@@ -4932,6 +4933,10 @@ class Archiver:
 
         import-tar reads POSIX.1-1988 (ustar), POSIX.1-2001 (pax), GNU tar, UNIX V7 tar
         and SunOS tar with extended attributes.
+
+        To import multiple tarballs into a single archive, they can be simply
+        concatenated (e.g. using "cat") into a single file, and imported with an
+        ``--ignore-zeros`` option to skip through the stop markers between them.
         """)
         subparser = subparsers.add_parser('import-tar', parents=[common_parser], add_help=False,
                                           description=self.do_import_tar.__doc__,
@@ -4951,6 +4956,9 @@ class Archiver:
                                help='only display items with the given status characters')
         subparser.add_argument('--json', action='store_true',
                                help='output stats as JSON (implies --stats)')
+        subparser.add_argument('--ignore-zeros', dest='ignore_zeros',
+                               action='store_true', default=False,
+                               help='ignore zero-filled blocks in the input tarball')
 
         archive_group = subparser.add_argument_group('Archive options')
         archive_group.add_argument('--comment', dest='comment', metavar='COMMENT', default='',

+ 61 - 2
src/borg/testsuite/archiver.py

@@ -322,8 +322,12 @@ class ArchiverTestCaseBase(BaseTestCase):
                 contents = b'X' * size
             fd.write(contents)
 
-    def create_test_files(self):
+    def create_test_files(self, create_hardlinks=True):
         """Create a minimal test case including all supported file types
+
+        Args:
+          create_hardlinks: whether to create a sample hardlink. When set to
+                            False, the hardlink file will not be created at all.
         """
         # File
         self.create_regular_file('file1', size=1024 * 80)
@@ -333,7 +337,7 @@ class ArchiverTestCaseBase(BaseTestCase):
         # File mode
         os.chmod('input/file1', 0o4755)
         # Hard link
-        if are_hardlinks_supported():
+        if create_hardlinks and are_hardlinks_supported():
             os.link(os.path.join(self.input_path, 'file1'),
                     os.path.join(self.input_path, 'hardlink'))
         # Symlink
@@ -3588,6 +3592,61 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
             self.cmd('extract', self.repository_location + '::dst')
         self.assert_dirs_equal('input', 'output/input', ignore_ns=True, ignore_xattrs=True)
 
+    @requires_gnutar
+    def test_import_concatenated_tar_with_ignore_zeros(self):
+        # file1 has a hardlink reference to it, but we put it in a separate
+        # tarball, breaking the link during import-tar. It could be any other
+        # file though, so we won't take chances and just avoid hardlinks.
+        self.create_test_files(create_hardlinks=False)
+        os.unlink('input/flagfile')
+
+        with changedir('input'):
+            subprocess.check_call(['tar', 'cf', 'file1.tar', 'file1'])
+            subprocess.check_call(['tar', 'cf', 'the_rest.tar', '--exclude', 'file1*', '.'])
+            with open('concatenated.tar', 'wb') as concatenated:
+                with open('file1.tar', 'rb') as file1:
+                    concatenated.write(file1.read())
+                # Clean up for assert_dirs_equal.
+                os.unlink('file1.tar')
+
+                with open('the_rest.tar', 'rb') as the_rest:
+                    concatenated.write(the_rest.read())
+                # Clean up for assert_dirs_equal.
+                os.unlink('the_rest.tar')
+
+        self.cmd('init', '--encryption=none', self.repository_location)
+        self.cmd('import-tar', '--ignore-zeros', self.repository_location + '::dst', 'input/concatenated.tar')
+        os.unlink('input/concatenated.tar')
+
+        with changedir(self.output_path):
+            self.cmd('extract', self.repository_location + '::dst')
+        self.assert_dirs_equal('input', 'output', ignore_ns=True, ignore_xattrs=True)
+
+    @requires_gnutar
+    def test_import_concatenated_tar_without_ignore_zeros(self):
+        self.create_test_files()
+        os.unlink('input/flagfile')
+
+        with changedir('input'):
+            subprocess.check_call(['tar', 'cf', 'file1.tar', 'file1'])
+            subprocess.check_call(['tar', 'cf', 'the_rest.tar', '--exclude', 'file1*', '.'])
+            with open('concatenated.tar', 'wb') as concatenated:
+                with open('file1.tar', 'rb') as file1:
+                    concatenated.write(file1.read())
+
+                with open('the_rest.tar', 'rb') as the_rest:
+                    concatenated.write(the_rest.read())
+
+        self.cmd('init', '--encryption=none', self.repository_location)
+        self.cmd('import-tar', self.repository_location + '::dst', 'input/concatenated.tar')
+
+        with changedir(self.output_path):
+            self.cmd('extract', self.repository_location + '::dst')
+
+        # Negative test -- assert that only file1 has been extracted, and the_rest has been ignored
+        # due to zero-filled block marker.
+        self.assert_equal(os.listdir('output'), ['file1'])
+
     def test_detect_attic_repo(self):
         path = make_attic_repo(self.repository_path)
         cmds = [