Browse Source

import/export-tar: --tar-format, support ctime/atime

--tar-format=GNU|PAX (default: GNU)

changed the tests which use GNU tar cli tool to use --tar-format=GNU
explicitly, so they don't break in case we change the default.

atime timestamp is only present in output if the archive item has it
(which is not the case by default, needs "borg create --atime ...").
Thomas Waldmann 3 years ago
parent
commit
78e92fa9e1
3 changed files with 53 additions and 19 deletions
  1. 12 1
      src/borg/archive.py
  2. 29 9
      src/borg/archiver.py
  3. 12 9
      src/borg/testsuite/archiver.py

+ 12 - 1
src/borg/archive.py

@@ -1445,9 +1445,20 @@ class TarfileObjectProcessors:
 
     @contextmanager
     def create_helper(self, tarinfo, status=None, type=None):
+        def s_to_ns(s):
+            return safe_ns(int(float(s) * 1e9))
+
         item = Item(path=make_path_safe(tarinfo.name), mode=tarinfo.mode | type,
                     uid=tarinfo.uid, gid=tarinfo.gid, user=tarinfo.uname or None, group=tarinfo.gname or None,
-                    mtime=safe_ns(int(tarinfo.mtime * 1000**3)))
+                    mtime=s_to_ns(tarinfo.mtime))
+        if tarinfo.pax_headers:
+            ph = tarinfo.pax_headers
+            # note: for mtime this is a bit redundant as it is already done by tarfile module,
+            #       but we just do it in our way to be consistent for sure.
+            for name in 'atime', 'ctime', 'mtime':
+                if name in ph:
+                    ns = s_to_ns(ph[name])
+                    setattr(item, name, ns)
         yield item, status
         # if we get here, "with"-block worked ok without error/exception, the item was processed ok...
         self.add_item(item, stats=self.stats)

+ 29 - 9
src/borg/archiver.py

@@ -1137,7 +1137,8 @@ class Archiver:
 
         # The | (pipe) symbol instructs tarfile to use a streaming mode of operation
         # where it never seeks on the passed fileobj.
-        tar = tarfile.open(fileobj=tarstream, mode='w|', format=tarfile.GNU_FORMAT)
+        tar_format = dict(GNU=tarfile.GNU_FORMAT, PAX=tarfile.PAX_FORMAT)[args.tar_format]
+        tar = tarfile.open(fileobj=tarstream, mode='w|', format=tar_format)
 
         if progress:
             pi = ProgressIndicatorPercent(msg='%5.1f%% Processing: %s', step=0.1, msgid='extract')
@@ -1168,13 +1169,6 @@ class Archiver:
             the file contents, if any, and is None otherwise. When *tarinfo* is None, the *item*
             cannot be represented as a TarInfo object and should be skipped.
             """
-
-            # If we would use the PAX (POSIX) format (which we currently don't),
-            # we can support most things that aren't possible with classic tar
-            # formats, including GNU tar, such as:
-            # atime, ctime, possibly Linux capabilities (security.* xattrs)
-            # and various additions supported by GNU tar in POSIX mode.
-
             stream = None
             tarinfo = tarfile.TarInfo()
             tarinfo.name = item.path
@@ -1236,6 +1230,24 @@ class Archiver:
                 return None, stream
             return tarinfo, stream
 
+        def item_to_paxheaders(item):
+            """
+            Transform (parts of) a Borg *item* into a pax_headers dict.
+            """
+            # When using the PAX (POSIX) format, we can support some things that aren't possible
+            # with classic tar formats, including GNU tar, such as:
+            # - atime, ctime (DONE)
+            # - possibly Linux capabilities, security.* xattrs (TODO)
+            # - various additions supported by GNU tar in POSIX mode (TODO)
+            ph = {}
+            # note: for mtime this is a bit redundant as it is already done by tarfile module,
+            #       but we just do it in our way to be consistent for sure.
+            for name in 'atime', 'ctime', 'mtime':
+                if hasattr(item, name):
+                    ns = getattr(item, name)
+                    ph[name] = str(ns / 1e9)
+            return ph
+
         for item in archive.iter_items(filter, partial_extract=partial_extract,
                                        preload=True, hardlink_masters=hardlink_masters):
             orig_path = item.path
@@ -1243,6 +1255,8 @@ class Archiver:
                 item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
             tarinfo, stream = item_to_tarinfo(item, orig_path)
             if tarinfo:
+                if args.tar_format == 'PAX':
+                    tarinfo.pax_headers = item_to_paxheaders(item)
                 if output_list:
                     logging.getLogger('borg.output.list').info(remove_surrogates(orig_path))
                 tar.addfile(tarinfo, stream)
@@ -4043,7 +4057,10 @@ class Archiver:
         read the uncompressed tar stream from stdin and write a compressed/filtered
         tar stream to stdout.
 
-        The generated tarball uses the GNU tar format.
+        Depending on the ```-tar-format``option, the generated tarball uses this format:
+
+        - PAX: POSIX.1-2001 (pax) format
+        - GNU: GNU tar format
 
         export-tar is a lossy conversion:
         BSD flags, ACLs, extended attributes (xattrs), atime and ctime are not exported.
@@ -4071,6 +4088,9 @@ class Archiver:
                                help='filter program to pipe data through')
         subparser.add_argument('--list', dest='output_list', action='store_true',
                                help='output verbose list of items (files, dirs, ...)')
+        subparser.add_argument('--tar-format', metavar='FMT', dest='tar_format', default='GNU',
+                               choices=('PAX', 'GNU'),
+                               help='select tar format: PAX or GNU')
         subparser.add_argument('location', metavar='ARCHIVE',
                                type=location_validator(archive=True),
                                help='archive to export')

+ 12 - 9
src/borg/testsuite/archiver.py

@@ -3409,7 +3409,7 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
         os.unlink('input/flagfile')
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('create', self.repository_location + '::test', 'input')
-        self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', '--progress')
+        self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', '--progress', '--tar-format=GNU')
         with changedir('output'):
             # This probably assumes GNU tar. Note -p switch to extract permissions regardless of umask.
             subprocess.check_call(['tar', 'xpf', '../simple.tar', '--warning=no-timestamp'])
@@ -3424,7 +3424,8 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
         os.unlink('input/flagfile')
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('create', self.repository_location + '::test', 'input')
-        list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar.gz', '--list')
+        list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar.gz',
+                        '--list', '--tar-format=GNU')
         assert 'input/file1\n' in list
         assert 'input/dir2\n' in list
         with changedir('output'):
@@ -3439,7 +3440,8 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
         os.unlink('input/flagfile')
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('create', self.repository_location + '::test', 'input')
-        list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', '--strip-components=1', '--list')
+        list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar',
+                        '--strip-components=1', '--list', '--tar-format=GNU')
         # --list's path are those before processing with --strip-components
         assert 'input/file1\n' in list
         assert 'input/dir2\n' in list
@@ -3451,7 +3453,8 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
     @requires_gnutar
     def test_export_tar_strip_components_links(self):
         self._extract_hardlinks_setup()
-        self.cmd('export-tar', self.repository_location + '::test', 'output.tar', '--strip-components=2')
+        self.cmd('export-tar', self.repository_location + '::test', 'output.tar',
+                 '--strip-components=2', '--tar-format=GNU')
         with changedir('output'):
             subprocess.check_call(['tar', 'xpf', '../output.tar', '--warning=no-timestamp'])
             assert os.stat('hardlink').st_nlink == 2
@@ -3463,7 +3466,7 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
     @requires_gnutar
     def test_extract_hardlinks_tar(self):
         self._extract_hardlinks_setup()
-        self.cmd('export-tar', self.repository_location + '::test', 'output.tar', 'input/dir1')
+        self.cmd('export-tar', self.repository_location + '::test', 'output.tar', 'input/dir1', '--tar-format=GNU')
         with changedir('output'):
             subprocess.check_call(['tar', 'xpf', '../output.tar', '--warning=no-timestamp'])
             assert os.stat('input/dir1/hardlink').st_nlink == 2
@@ -3471,26 +3474,26 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
             assert os.stat('input/dir1/aaaa').st_nlink == 2
             assert os.stat('input/dir1/source2').st_nlink == 2
 
-    def test_import_tar(self):
+    def test_import_tar(self, tar_format='PAX'):
         self.create_test_files()
         os.unlink('input/flagfile')
         self.cmd('init', '--encryption=none', self.repository_location)
         self.cmd('create', self.repository_location + '::src', 'input')
-        self.cmd('export-tar', self.repository_location + '::src', 'simple.tar')
+        self.cmd('export-tar', self.repository_location + '::src', 'simple.tar', f'--tar-format={tar_format}')
         self.cmd('import-tar', self.repository_location + '::dst', 'simple.tar')
         with changedir(self.output_path):
             self.cmd('extract', self.repository_location + '::dst')
         self.assert_dirs_equal('input', 'output/input', ignore_ns=True, ignore_xattrs=True)
 
     @requires_gzip
-    def test_import_tar_gz(self):
+    def test_import_tar_gz(self, tar_format='GNU'):
         if not shutil.which('gzip'):
             pytest.skip('gzip is not installed')
         self.create_test_files()
         os.unlink('input/flagfile')
         self.cmd('init', '--encryption=none', self.repository_location)
         self.cmd('create', self.repository_location + '::src', 'input')
-        self.cmd('export-tar', self.repository_location + '::src', 'simple.tgz')
+        self.cmd('export-tar', self.repository_location + '::src', 'simple.tgz', f'--tar-format={tar_format}')
         self.cmd('import-tar', self.repository_location + '::dst', 'simple.tgz')
         with changedir(self.output_path):
             self.cmd('extract', self.repository_location + '::dst')