浏览代码

Merge pull request #6522 from ThomasWaldmann/tar-pax-master

import/export-tar: PAX format, ctime and atime support
TW 3 年之前
父节点
当前提交
22fc6d1bdd
共有 3 个文件被更改,包括 53 次插入19 次删除
  1. 12 1
      src/borg/archive.py
  2. 29 9
      src/borg/archiver.py
  3. 12 9
      src/borg/testsuite/archiver.py

+ 12 - 1
src/borg/archive.py

@@ -1445,9 +1445,20 @@ class TarfileObjectProcessors:
 
     @contextmanager
     def create_helper(self, tarinfo, status=None, type=None):
+        def s_to_ns(s):
+            return safe_ns(int(float(s) * 1e9))
+
         item = Item(path=make_path_safe(tarinfo.name), mode=tarinfo.mode | type,
                     uid=tarinfo.uid, gid=tarinfo.gid, user=tarinfo.uname or None, group=tarinfo.gname or None,
-                    mtime=safe_ns(int(tarinfo.mtime * 1000**3)))
+                    mtime=s_to_ns(tarinfo.mtime))
+        if tarinfo.pax_headers:
+            ph = tarinfo.pax_headers
+            # note: for mtime this is a bit redundant as it is already done by tarfile module,
+            #       but we just do it in our way to be consistent for sure.
+            for name in 'atime', 'ctime', 'mtime':
+                if name in ph:
+                    ns = s_to_ns(ph[name])
+                    setattr(item, name, ns)
         yield item, status
         # if we get here, "with"-block worked ok without error/exception, the item was processed ok...
         self.add_item(item, stats=self.stats)

+ 29 - 9
src/borg/archiver.py

@@ -1137,7 +1137,8 @@ class Archiver:
 
         # The | (pipe) symbol instructs tarfile to use a streaming mode of operation
         # where it never seeks on the passed fileobj.
-        tar = tarfile.open(fileobj=tarstream, mode='w|', format=tarfile.GNU_FORMAT)
+        tar_format = dict(GNU=tarfile.GNU_FORMAT, PAX=tarfile.PAX_FORMAT)[args.tar_format]
+        tar = tarfile.open(fileobj=tarstream, mode='w|', format=tar_format)
 
         if progress:
             pi = ProgressIndicatorPercent(msg='%5.1f%% Processing: %s', step=0.1, msgid='extract')
@@ -1168,13 +1169,6 @@ class Archiver:
             the file contents, if any, and is None otherwise. When *tarinfo* is None, the *item*
             cannot be represented as a TarInfo object and should be skipped.
             """
-
-            # If we would use the PAX (POSIX) format (which we currently don't),
-            # we can support most things that aren't possible with classic tar
-            # formats, including GNU tar, such as:
-            # atime, ctime, possibly Linux capabilities (security.* xattrs)
-            # and various additions supported by GNU tar in POSIX mode.
-
             stream = None
             tarinfo = tarfile.TarInfo()
             tarinfo.name = item.path
@@ -1236,6 +1230,24 @@ class Archiver:
                 return None, stream
             return tarinfo, stream
 
+        def item_to_paxheaders(item):
+            """
+            Transform (parts of) a Borg *item* into a pax_headers dict.
+            """
+            # When using the PAX (POSIX) format, we can support some things that aren't possible
+            # with classic tar formats, including GNU tar, such as:
+            # - atime, ctime (DONE)
+            # - possibly Linux capabilities, security.* xattrs (TODO)
+            # - various additions supported by GNU tar in POSIX mode (TODO)
+            ph = {}
+            # note: for mtime this is a bit redundant as it is already done by tarfile module,
+            #       but we just do it in our way to be consistent for sure.
+            for name in 'atime', 'ctime', 'mtime':
+                if hasattr(item, name):
+                    ns = getattr(item, name)
+                    ph[name] = str(ns / 1e9)
+            return ph
+
         for item in archive.iter_items(filter, partial_extract=partial_extract,
                                        preload=True, hardlink_masters=hardlink_masters):
             orig_path = item.path
@@ -1243,6 +1255,8 @@ class Archiver:
                 item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
             tarinfo, stream = item_to_tarinfo(item, orig_path)
             if tarinfo:
+                if args.tar_format == 'PAX':
+                    tarinfo.pax_headers = item_to_paxheaders(item)
                 if output_list:
                     logging.getLogger('borg.output.list').info(remove_surrogates(orig_path))
                 tar.addfile(tarinfo, stream)
@@ -4043,7 +4057,10 @@ class Archiver:
         read the uncompressed tar stream from stdin and write a compressed/filtered
         tar stream to stdout.
 
-        The generated tarball uses the GNU tar format.
+        Depending on the ```-tar-format``option, the generated tarball uses this format:
+
+        - PAX: POSIX.1-2001 (pax) format
+        - GNU: GNU tar format
 
         export-tar is a lossy conversion:
         BSD flags, ACLs, extended attributes (xattrs), atime and ctime are not exported.
@@ -4071,6 +4088,9 @@ class Archiver:
                                help='filter program to pipe data through')
         subparser.add_argument('--list', dest='output_list', action='store_true',
                                help='output verbose list of items (files, dirs, ...)')
+        subparser.add_argument('--tar-format', metavar='FMT', dest='tar_format', default='GNU',
+                               choices=('PAX', 'GNU'),
+                               help='select tar format: PAX or GNU')
         subparser.add_argument('location', metavar='ARCHIVE',
                                type=location_validator(archive=True),
                                help='archive to export')

+ 12 - 9
src/borg/testsuite/archiver.py

@@ -3409,7 +3409,7 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
         os.unlink('input/flagfile')
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('create', self.repository_location + '::test', 'input')
-        self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', '--progress')
+        self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', '--progress', '--tar-format=GNU')
         with changedir('output'):
             # This probably assumes GNU tar. Note -p switch to extract permissions regardless of umask.
             subprocess.check_call(['tar', 'xpf', '../simple.tar', '--warning=no-timestamp'])
@@ -3424,7 +3424,8 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
         os.unlink('input/flagfile')
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('create', self.repository_location + '::test', 'input')
-        list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar.gz', '--list')
+        list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar.gz',
+                        '--list', '--tar-format=GNU')
         assert 'input/file1\n' in list
         assert 'input/dir2\n' in list
         with changedir('output'):
@@ -3439,7 +3440,8 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
         os.unlink('input/flagfile')
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('create', self.repository_location + '::test', 'input')
-        list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', '--strip-components=1', '--list')
+        list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar',
+                        '--strip-components=1', '--list', '--tar-format=GNU')
         # --list's path are those before processing with --strip-components
         assert 'input/file1\n' in list
         assert 'input/dir2\n' in list
@@ -3451,7 +3453,8 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
     @requires_gnutar
     def test_export_tar_strip_components_links(self):
         self._extract_hardlinks_setup()
-        self.cmd('export-tar', self.repository_location + '::test', 'output.tar', '--strip-components=2')
+        self.cmd('export-tar', self.repository_location + '::test', 'output.tar',
+                 '--strip-components=2', '--tar-format=GNU')
         with changedir('output'):
             subprocess.check_call(['tar', 'xpf', '../output.tar', '--warning=no-timestamp'])
             assert os.stat('hardlink').st_nlink == 2
@@ -3463,7 +3466,7 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
     @requires_gnutar
     def test_extract_hardlinks_tar(self):
         self._extract_hardlinks_setup()
-        self.cmd('export-tar', self.repository_location + '::test', 'output.tar', 'input/dir1')
+        self.cmd('export-tar', self.repository_location + '::test', 'output.tar', 'input/dir1', '--tar-format=GNU')
         with changedir('output'):
             subprocess.check_call(['tar', 'xpf', '../output.tar', '--warning=no-timestamp'])
             assert os.stat('input/dir1/hardlink').st_nlink == 2
@@ -3471,26 +3474,26 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
             assert os.stat('input/dir1/aaaa').st_nlink == 2
             assert os.stat('input/dir1/source2').st_nlink == 2
 
-    def test_import_tar(self):
+    def test_import_tar(self, tar_format='PAX'):
         self.create_test_files()
         os.unlink('input/flagfile')
         self.cmd('init', '--encryption=none', self.repository_location)
         self.cmd('create', self.repository_location + '::src', 'input')
-        self.cmd('export-tar', self.repository_location + '::src', 'simple.tar')
+        self.cmd('export-tar', self.repository_location + '::src', 'simple.tar', f'--tar-format={tar_format}')
         self.cmd('import-tar', self.repository_location + '::dst', 'simple.tar')
         with changedir(self.output_path):
             self.cmd('extract', self.repository_location + '::dst')
         self.assert_dirs_equal('input', 'output/input', ignore_ns=True, ignore_xattrs=True)
 
     @requires_gzip
-    def test_import_tar_gz(self):
+    def test_import_tar_gz(self, tar_format='GNU'):
         if not shutil.which('gzip'):
             pytest.skip('gzip is not installed')
         self.create_test_files()
         os.unlink('input/flagfile')
         self.cmd('init', '--encryption=none', self.repository_location)
         self.cmd('create', self.repository_location + '::src', 'input')
-        self.cmd('export-tar', self.repository_location + '::src', 'simple.tgz')
+        self.cmd('export-tar', self.repository_location + '::src', 'simple.tgz', f'--tar-format={tar_format}')
         self.cmd('import-tar', self.repository_location + '::dst', 'simple.tgz')
         with changedir(self.output_path):
             self.cmd('extract', self.repository_location + '::dst')