Quellcode durchsuchen

import/export-tar: --tar-format, support ctime/atime

--tar-format=GNU|PAX (default: GNU)

changed the tests which use GNU tar cli tool to use --tar-format=GNU
explicitly, so they don't break in case we change the default.

atime timestamp is only present in output if the archive item has it
(which is not the case by default, needs "borg create --atime ...").
Thomas Waldmann vor 3 Jahren
Ursprung
Commit
78e92fa9e1
3 geänderte Dateien mit 53 neuen und 19 gelöschten Zeilen
  1. 12 1
      src/borg/archive.py
  2. 29 9
      src/borg/archiver.py
  3. 12 9
      src/borg/testsuite/archiver.py

+ 12 - 1
src/borg/archive.py

@@ -1445,9 +1445,20 @@ class TarfileObjectProcessors:
 
 
     @contextmanager
     @contextmanager
     def create_helper(self, tarinfo, status=None, type=None):
     def create_helper(self, tarinfo, status=None, type=None):
+        def s_to_ns(s):
+            return safe_ns(int(float(s) * 1e9))
+
         item = Item(path=make_path_safe(tarinfo.name), mode=tarinfo.mode | type,
         item = Item(path=make_path_safe(tarinfo.name), mode=tarinfo.mode | type,
                     uid=tarinfo.uid, gid=tarinfo.gid, user=tarinfo.uname or None, group=tarinfo.gname or None,
                     uid=tarinfo.uid, gid=tarinfo.gid, user=tarinfo.uname or None, group=tarinfo.gname or None,
-                    mtime=safe_ns(int(tarinfo.mtime * 1000**3)))
+                    mtime=s_to_ns(tarinfo.mtime))
+        if tarinfo.pax_headers:
+            ph = tarinfo.pax_headers
+            # note: for mtime this is a bit redundant as it is already done by tarfile module,
+            #       but we just do it in our way to be consistent for sure.
+            for name in 'atime', 'ctime', 'mtime':
+                if name in ph:
+                    ns = s_to_ns(ph[name])
+                    setattr(item, name, ns)
         yield item, status
         yield item, status
         # if we get here, "with"-block worked ok without error/exception, the item was processed ok...
         # if we get here, "with"-block worked ok without error/exception, the item was processed ok...
         self.add_item(item, stats=self.stats)
         self.add_item(item, stats=self.stats)

+ 29 - 9
src/borg/archiver.py

@@ -1137,7 +1137,8 @@ class Archiver:
 
 
         # The | (pipe) symbol instructs tarfile to use a streaming mode of operation
         # The | (pipe) symbol instructs tarfile to use a streaming mode of operation
         # where it never seeks on the passed fileobj.
         # where it never seeks on the passed fileobj.
-        tar = tarfile.open(fileobj=tarstream, mode='w|', format=tarfile.GNU_FORMAT)
+        tar_format = dict(GNU=tarfile.GNU_FORMAT, PAX=tarfile.PAX_FORMAT)[args.tar_format]
+        tar = tarfile.open(fileobj=tarstream, mode='w|', format=tar_format)
 
 
         if progress:
         if progress:
             pi = ProgressIndicatorPercent(msg='%5.1f%% Processing: %s', step=0.1, msgid='extract')
             pi = ProgressIndicatorPercent(msg='%5.1f%% Processing: %s', step=0.1, msgid='extract')
@@ -1168,13 +1169,6 @@ class Archiver:
             the file contents, if any, and is None otherwise. When *tarinfo* is None, the *item*
             the file contents, if any, and is None otherwise. When *tarinfo* is None, the *item*
             cannot be represented as a TarInfo object and should be skipped.
             cannot be represented as a TarInfo object and should be skipped.
             """
             """
-
-            # If we would use the PAX (POSIX) format (which we currently don't),
-            # we can support most things that aren't possible with classic tar
-            # formats, including GNU tar, such as:
-            # atime, ctime, possibly Linux capabilities (security.* xattrs)
-            # and various additions supported by GNU tar in POSIX mode.
-
             stream = None
             stream = None
             tarinfo = tarfile.TarInfo()
             tarinfo = tarfile.TarInfo()
             tarinfo.name = item.path
             tarinfo.name = item.path
@@ -1236,6 +1230,24 @@ class Archiver:
                 return None, stream
                 return None, stream
             return tarinfo, stream
             return tarinfo, stream
 
 
+        def item_to_paxheaders(item):
+            """
+            Transform (parts of) a Borg *item* into a pax_headers dict.
+            """
+            # When using the PAX (POSIX) format, we can support some things that aren't possible
+            # with classic tar formats, including GNU tar, such as:
+            # - atime, ctime (DONE)
+            # - possibly Linux capabilities, security.* xattrs (TODO)
+            # - various additions supported by GNU tar in POSIX mode (TODO)
+            ph = {}
+            # note: for mtime this is a bit redundant as it is already done by tarfile module,
+            #       but we just do it in our way to be consistent for sure.
+            for name in 'atime', 'ctime', 'mtime':
+                if hasattr(item, name):
+                    ns = getattr(item, name)
+                    ph[name] = str(ns / 1e9)
+            return ph
+
         for item in archive.iter_items(filter, partial_extract=partial_extract,
         for item in archive.iter_items(filter, partial_extract=partial_extract,
                                        preload=True, hardlink_masters=hardlink_masters):
                                        preload=True, hardlink_masters=hardlink_masters):
             orig_path = item.path
             orig_path = item.path
@@ -1243,6 +1255,8 @@ class Archiver:
                 item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
                 item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
             tarinfo, stream = item_to_tarinfo(item, orig_path)
             tarinfo, stream = item_to_tarinfo(item, orig_path)
             if tarinfo:
             if tarinfo:
+                if args.tar_format == 'PAX':
+                    tarinfo.pax_headers = item_to_paxheaders(item)
                 if output_list:
                 if output_list:
                     logging.getLogger('borg.output.list').info(remove_surrogates(orig_path))
                     logging.getLogger('borg.output.list').info(remove_surrogates(orig_path))
                 tar.addfile(tarinfo, stream)
                 tar.addfile(tarinfo, stream)
@@ -4043,7 +4057,10 @@ class Archiver:
         read the uncompressed tar stream from stdin and write a compressed/filtered
         read the uncompressed tar stream from stdin and write a compressed/filtered
         tar stream to stdout.
         tar stream to stdout.
 
 
-        The generated tarball uses the GNU tar format.
+        Depending on the ```-tar-format``option, the generated tarball uses this format:
+
+        - PAX: POSIX.1-2001 (pax) format
+        - GNU: GNU tar format
 
 
         export-tar is a lossy conversion:
         export-tar is a lossy conversion:
         BSD flags, ACLs, extended attributes (xattrs), atime and ctime are not exported.
         BSD flags, ACLs, extended attributes (xattrs), atime and ctime are not exported.
@@ -4071,6 +4088,9 @@ class Archiver:
                                help='filter program to pipe data through')
                                help='filter program to pipe data through')
         subparser.add_argument('--list', dest='output_list', action='store_true',
         subparser.add_argument('--list', dest='output_list', action='store_true',
                                help='output verbose list of items (files, dirs, ...)')
                                help='output verbose list of items (files, dirs, ...)')
+        subparser.add_argument('--tar-format', metavar='FMT', dest='tar_format', default='GNU',
+                               choices=('PAX', 'GNU'),
+                               help='select tar format: PAX or GNU')
         subparser.add_argument('location', metavar='ARCHIVE',
         subparser.add_argument('location', metavar='ARCHIVE',
                                type=location_validator(archive=True),
                                type=location_validator(archive=True),
                                help='archive to export')
                                help='archive to export')

+ 12 - 9
src/borg/testsuite/archiver.py

@@ -3409,7 +3409,7 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
         os.unlink('input/flagfile')
         os.unlink('input/flagfile')
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('create', self.repository_location + '::test', 'input')
         self.cmd('create', self.repository_location + '::test', 'input')
-        self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', '--progress')
+        self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', '--progress', '--tar-format=GNU')
         with changedir('output'):
         with changedir('output'):
             # This probably assumes GNU tar. Note -p switch to extract permissions regardless of umask.
             # This probably assumes GNU tar. Note -p switch to extract permissions regardless of umask.
             subprocess.check_call(['tar', 'xpf', '../simple.tar', '--warning=no-timestamp'])
             subprocess.check_call(['tar', 'xpf', '../simple.tar', '--warning=no-timestamp'])
@@ -3424,7 +3424,8 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
         os.unlink('input/flagfile')
         os.unlink('input/flagfile')
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('create', self.repository_location + '::test', 'input')
         self.cmd('create', self.repository_location + '::test', 'input')
-        list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar.gz', '--list')
+        list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar.gz',
+                        '--list', '--tar-format=GNU')
         assert 'input/file1\n' in list
         assert 'input/file1\n' in list
         assert 'input/dir2\n' in list
         assert 'input/dir2\n' in list
         with changedir('output'):
         with changedir('output'):
@@ -3439,7 +3440,8 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
         os.unlink('input/flagfile')
         os.unlink('input/flagfile')
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('init', '--encryption=repokey', self.repository_location)
         self.cmd('create', self.repository_location + '::test', 'input')
         self.cmd('create', self.repository_location + '::test', 'input')
-        list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar', '--strip-components=1', '--list')
+        list = self.cmd('export-tar', self.repository_location + '::test', 'simple.tar',
+                        '--strip-components=1', '--list', '--tar-format=GNU')
         # --list's path are those before processing with --strip-components
         # --list's path are those before processing with --strip-components
         assert 'input/file1\n' in list
         assert 'input/file1\n' in list
         assert 'input/dir2\n' in list
         assert 'input/dir2\n' in list
@@ -3451,7 +3453,8 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
     @requires_gnutar
     @requires_gnutar
     def test_export_tar_strip_components_links(self):
     def test_export_tar_strip_components_links(self):
         self._extract_hardlinks_setup()
         self._extract_hardlinks_setup()
-        self.cmd('export-tar', self.repository_location + '::test', 'output.tar', '--strip-components=2')
+        self.cmd('export-tar', self.repository_location + '::test', 'output.tar',
+                 '--strip-components=2', '--tar-format=GNU')
         with changedir('output'):
         with changedir('output'):
             subprocess.check_call(['tar', 'xpf', '../output.tar', '--warning=no-timestamp'])
             subprocess.check_call(['tar', 'xpf', '../output.tar', '--warning=no-timestamp'])
             assert os.stat('hardlink').st_nlink == 2
             assert os.stat('hardlink').st_nlink == 2
@@ -3463,7 +3466,7 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
     @requires_gnutar
     @requires_gnutar
     def test_extract_hardlinks_tar(self):
     def test_extract_hardlinks_tar(self):
         self._extract_hardlinks_setup()
         self._extract_hardlinks_setup()
-        self.cmd('export-tar', self.repository_location + '::test', 'output.tar', 'input/dir1')
+        self.cmd('export-tar', self.repository_location + '::test', 'output.tar', 'input/dir1', '--tar-format=GNU')
         with changedir('output'):
         with changedir('output'):
             subprocess.check_call(['tar', 'xpf', '../output.tar', '--warning=no-timestamp'])
             subprocess.check_call(['tar', 'xpf', '../output.tar', '--warning=no-timestamp'])
             assert os.stat('input/dir1/hardlink').st_nlink == 2
             assert os.stat('input/dir1/hardlink').st_nlink == 2
@@ -3471,26 +3474,26 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
             assert os.stat('input/dir1/aaaa').st_nlink == 2
             assert os.stat('input/dir1/aaaa').st_nlink == 2
             assert os.stat('input/dir1/source2').st_nlink == 2
             assert os.stat('input/dir1/source2').st_nlink == 2
 
 
-    def test_import_tar(self):
+    def test_import_tar(self, tar_format='PAX'):
         self.create_test_files()
         self.create_test_files()
         os.unlink('input/flagfile')
         os.unlink('input/flagfile')
         self.cmd('init', '--encryption=none', self.repository_location)
         self.cmd('init', '--encryption=none', self.repository_location)
         self.cmd('create', self.repository_location + '::src', 'input')
         self.cmd('create', self.repository_location + '::src', 'input')
-        self.cmd('export-tar', self.repository_location + '::src', 'simple.tar')
+        self.cmd('export-tar', self.repository_location + '::src', 'simple.tar', f'--tar-format={tar_format}')
         self.cmd('import-tar', self.repository_location + '::dst', 'simple.tar')
         self.cmd('import-tar', self.repository_location + '::dst', 'simple.tar')
         with changedir(self.output_path):
         with changedir(self.output_path):
             self.cmd('extract', self.repository_location + '::dst')
             self.cmd('extract', self.repository_location + '::dst')
         self.assert_dirs_equal('input', 'output/input', ignore_ns=True, ignore_xattrs=True)
         self.assert_dirs_equal('input', 'output/input', ignore_ns=True, ignore_xattrs=True)
 
 
     @requires_gzip
     @requires_gzip
-    def test_import_tar_gz(self):
+    def test_import_tar_gz(self, tar_format='GNU'):
         if not shutil.which('gzip'):
         if not shutil.which('gzip'):
             pytest.skip('gzip is not installed')
             pytest.skip('gzip is not installed')
         self.create_test_files()
         self.create_test_files()
         os.unlink('input/flagfile')
         os.unlink('input/flagfile')
         self.cmd('init', '--encryption=none', self.repository_location)
         self.cmd('init', '--encryption=none', self.repository_location)
         self.cmd('create', self.repository_location + '::src', 'input')
         self.cmd('create', self.repository_location + '::src', 'input')
-        self.cmd('export-tar', self.repository_location + '::src', 'simple.tgz')
+        self.cmd('export-tar', self.repository_location + '::src', 'simple.tgz', f'--tar-format={tar_format}')
         self.cmd('import-tar', self.repository_location + '::dst', 'simple.tgz')
         self.cmd('import-tar', self.repository_location + '::dst', 'simple.tgz')
         with changedir(self.output_path):
         with changedir(self.output_path):
             self.cmd('extract', self.repository_location + '::dst')
             self.cmd('extract', self.repository_location + '::dst')