Parcourir la source

import/export-tar: --tar-format=BORG: roundtrip ALL item metadata, fixes #5830

export-tar: just msgpack and b64encode all item metadata and
            put that into a BORG specific PAX header.
            this is *additional* to the standard tar metadata.

import-tar: when detecting the BORG specific PAX header, just get
            all metadata from there (and ignore the standard tar
            metadata).
Thomas Waldmann il y a 3 ans
Parent
commit
e8069a8f80
4 fichiers modifiés avec 115 ajouts et 41 suppressions
  1. 40 6
      docs/usage/tar.rst
  2. 21 14
      src/borg/archive.py
  3. 44 21
      src/borg/archiver.py
  4. 10 0
      src/borg/testsuite/archiver.py

+ 40 - 6
docs/usage/tar.rst

@@ -1,7 +1,7 @@
-.. include:: import-tar.rst.inc
-
 .. include:: export-tar.rst.inc
 
+.. include:: import-tar.rst.inc
+
 Examples
 ~~~~~~~~
 ::
@@ -9,15 +9,49 @@ Examples
     # export as uncompressed tar
     $ borg export-tar /path/to/repo::Monday Monday.tar
 
-    # exclude some types, compress using gzip
+    # import an uncompressed tar
+    $ borg import-tar /path/to/repo::Monday Monday.tar
+
+    # exclude some file types, compress using gzip
     $ borg export-tar /path/to/repo::Monday Monday.tar.gz --exclude '*.so'
 
     # use higher compression level with gzip
-    $ borg export-tar --tar-filter="gzip -9" testrepo::linux Monday.tar.gz
+    $ borg export-tar --tar-filter="gzip -9" repo::Monday Monday.tar.gz
 
-    # export a tar, but instead of storing it on disk,
-    # upload it to a remote site using curl.
+    # copy an archive from repoA to repoB
+    $ borg export-tar --tar-format=BORG repoA::archive - | borg import-tar repoB::archive -
+
+    # export a tar, but instead of storing it on disk, upload it to remote site using curl
     $ borg export-tar /path/to/repo::Monday - | curl --data-binary @- https://somewhere/to/POST
 
     # remote extraction via "tarpipe"
     $ borg export-tar /path/to/repo::Monday - | ssh somewhere "cd extracted; tar x"
+
+Archives transfer script
+~~~~~~~~~~~~~~~~~~~~~~~~
+
+Outputs a script that copies all archives from repo1 to repo2:
+
+::
+
+    for A T in `borg list --format='{archive} {time:%Y-%m-%dT%H:%M:%S}{LF}' repo1`
+    do
+      echo "borg export-tar --tar-format=BORG repo1::$A - | borg import-tar --timestamp=$T repo2::$A -"
+    done
+
+Kept:
+
+- archive name, archive timestamp
+- archive contents (all items with metadata and data)
+
+Lost:
+
+- some archive metadata (like the original commandline, execution time, etc.)
+
+Please note:
+
+- all data goes over that pipe, again and again for every archive
+- the pipe is dumb, there is no data or transfer time reduction there due to deduplication
+- maybe add compression
+- pipe over ssh for remote transfer
+- no special sparse file support

+ 21 - 14
src/borg/archive.py

@@ -1,3 +1,4 @@
+import base64
 import json
 import os
 import socket
@@ -1445,20 +1446,26 @@ class TarfileObjectProcessors:
 
     @contextmanager
     def create_helper(self, tarinfo, status=None, type=None):
-        def s_to_ns(s):
-            return safe_ns(int(float(s) * 1e9))
-
-        item = Item(path=make_path_safe(tarinfo.name), mode=tarinfo.mode | type,
-                    uid=tarinfo.uid, gid=tarinfo.gid, user=tarinfo.uname or None, group=tarinfo.gname or None,
-                    mtime=s_to_ns(tarinfo.mtime))
-        if tarinfo.pax_headers:
-            ph = tarinfo.pax_headers
-            # note: for mtime this is a bit redundant as it is already done by tarfile module,
-            #       but we just do it in our way to be consistent for sure.
-            for name in 'atime', 'ctime', 'mtime':
-                if name in ph:
-                    ns = s_to_ns(ph[name])
-                    setattr(item, name, ns)
+        ph = tarinfo.pax_headers
+        if ph and 'BORG.item.version' in ph:
+            assert ph['BORG.item.version'] == '1'
+            meta_bin = base64.b64decode(ph['BORG.item.meta'])
+            meta_dict = msgpack.unpackb(meta_bin, object_hook=StableDict)
+            item = Item(internal_dict=meta_dict)
+        else:
+            def s_to_ns(s):
+                return safe_ns(int(float(s) * 1e9))
+
+            item = Item(path=make_path_safe(tarinfo.name), mode=tarinfo.mode | type,
+                        uid=tarinfo.uid, gid=tarinfo.gid, user=tarinfo.uname or None, group=tarinfo.gname or None,
+                        mtime=s_to_ns(tarinfo.mtime))
+            if ph:
+                # note: for mtime this is a bit redundant as it is already done by tarfile module,
+                #       but we just do it in our way to be consistent for sure.
+                for name in 'atime', 'ctime', 'mtime':
+                    if name in ph:
+                        ns = s_to_ns(ph[name])
+                        setattr(item, name, ns)
         yield item, status
         # if we get here, "with"-block worked ok without error/exception, the item was processed ok...
         self.add_item(item, stats=self.stats)

+ 44 - 21
src/borg/archiver.py

@@ -5,6 +5,7 @@ import traceback
 
 try:
     import argparse
+    import base64
     import collections
     import configparser
     import faulthandler
@@ -1137,7 +1138,7 @@ class Archiver:
 
         # The | (pipe) symbol instructs tarfile to use a streaming mode of operation
         # where it never seeks on the passed fileobj.
-        tar_format = dict(GNU=tarfile.GNU_FORMAT, PAX=tarfile.PAX_FORMAT)[args.tar_format]
+        tar_format = dict(GNU=tarfile.GNU_FORMAT, PAX=tarfile.PAX_FORMAT, BORG=tarfile.PAX_FORMAT)[args.tar_format]
         tar = tarfile.open(fileobj=tarstream, mode='w|', format=tar_format)
 
         if progress:
@@ -1230,15 +1231,24 @@ class Archiver:
                 return None, stream
             return tarinfo, stream
 
-        def item_to_paxheaders(item):
+        def item_to_paxheaders(format, item):
             """
             Transform (parts of) a Borg *item* into a pax_headers dict.
             """
+            # PAX format
+            # ----------
             # When using the PAX (POSIX) format, we can support some things that aren't possible
             # with classic tar formats, including GNU tar, such as:
             # - atime, ctime (DONE)
             # - possibly Linux capabilities, security.* xattrs (TODO)
             # - various additions supported by GNU tar in POSIX mode (TODO)
+            #
+            # BORG format
+            # -----------
+            # This is based on PAX, but additionally adds BORG.* pax headers.
+            # Additionally to the standard tar / PAX metadata and data, it transfers
+            # ALL borg item metadata in a BORG specific way.
+            #
             ph = {}
             # note: for mtime this is a bit redundant as it is already done by tarfile module,
             #       but we just do it in our way to be consistent for sure.
@@ -1246,6 +1256,12 @@ class Archiver:
                 if hasattr(item, name):
                     ns = getattr(item, name)
                     ph[name] = str(ns / 1e9)
+            if format == 'BORG':  # BORG format additions
+                ph['BORG.item.version'] = '1'
+                # BORG.item.meta - just serialize all metadata we have:
+                meta_bin = msgpack.packb(item.as_dict())
+                meta_text = base64.b64encode(meta_bin).decode()
+                ph['BORG.item.meta'] = meta_text
             return ph
 
         for item in archive.iter_items(filter, partial_extract=partial_extract,
@@ -1255,8 +1271,8 @@ class Archiver:
                 item.path = os.sep.join(orig_path.split(os.sep)[strip_components:])
             tarinfo, stream = item_to_tarinfo(item, orig_path)
             if tarinfo:
-                if args.tar_format == 'PAX':
-                    tarinfo.pax_headers = item_to_paxheaders(item)
+                if args.tar_format in ('BORG', 'PAX'):
+                    tarinfo.pax_headers = item_to_paxheaders(args.tar_format, item)
                 if output_list:
                     logging.getLogger('borg.output.list').info(remove_surrogates(orig_path))
                 tar.addfile(tarinfo, stream)
@@ -4057,15 +4073,18 @@ class Archiver:
         read the uncompressed tar stream from stdin and write a compressed/filtered
         tar stream to stdout.
 
-        Depending on the ```-tar-format``option, the generated tarball uses this format:
+        Depending on the ``-tar-format`` option, these formats are created:
 
-        - PAX: POSIX.1-2001 (pax) format
-        - GNU: GNU tar format
-
-        export-tar is a lossy conversion:
-        BSD flags, ACLs, extended attributes (xattrs), atime and ctime are not exported.
-        Timestamp resolution is limited to whole seconds, not the nanosecond resolution
-        otherwise supported by Borg.
+        +--------------+---------------------------+----------------------------+
+        | --tar-format | Specification             | Metadata                   |
+        +--------------+---------------------------+----------------------------+
+        | BORG         | BORG specific, like PAX   | all as supported by borg   |
+        +--------------+---------------------------+----------------------------+
+        | PAX          | POSIX.1-2001 (pax) format | GNU + atime/ctime/mtime ns |
+        +--------------+---------------------------+----------------------------+
+        | GNU          | GNU tar format            | mtime s, no atime/ctime,   |
+        |              |                           | no ACLs/xattrs/bsdflags    |
+        +--------------+---------------------------+----------------------------+
 
         A ``--sparse`` option (as found in borg extract) is not supported.
 
@@ -4089,8 +4108,8 @@ class Archiver:
         subparser.add_argument('--list', dest='output_list', action='store_true',
                                help='output verbose list of items (files, dirs, ...)')
         subparser.add_argument('--tar-format', metavar='FMT', dest='tar_format', default='GNU',
-                               choices=('PAX', 'GNU'),
-                               help='select tar format: PAX or GNU')
+                               choices=('BORG', 'PAX', 'GNU'),
+                               help='select tar format: BORG, PAX or GNU')
         subparser.add_argument('location', metavar='ARCHIVE',
                                type=location_validator(archive=True),
                                help='archive to export')
@@ -4939,15 +4958,19 @@ class Archiver:
         Most documentation of borg create applies. Note that this command does not
         support excluding files.
 
-        import-tar is a lossy conversion:
-        BSD flags, ACLs, extended attributes (xattrs), atime and ctime are not exported.
-        Timestamp resolution is limited to whole seconds, not the nanosecond resolution
-        otherwise supported by Borg.
-
         A ``--sparse`` option (as found in borg create) is not supported.
 
-        import-tar reads POSIX.1-1988 (ustar), POSIX.1-2001 (pax), GNU tar, UNIX V7 tar
-        and SunOS tar with extended attributes.
+        About tar formats and metadata conservation or loss, please see ``borg export-tar``.
+
+        import-tar reads these tar formats:
+
+        - BORG: borg specific (PAX-based)
+        - PAX: POSIX.1-2001
+        - GNU: GNU tar
+        - POSIX.1-1988 (ustar)
+        - UNIX V7 tar
+        - SunOS tar with extended attributes
+
         """)
         subparser = subparsers.add_parser('import-tar', parents=[common_parser], add_help=False,
                                           description=self.do_import_tar.__doc__,

+ 10 - 0
src/borg/testsuite/archiver.py

@@ -3499,6 +3499,16 @@ id: 2 / e29442 3506da 4e1ea7 / 25f62a 5a3d41 - 02
             self.cmd('extract', self.repository_location + '::dst')
         self.assert_dirs_equal('input', 'output/input', ignore_ns=True, ignore_xattrs=True)
 
+    def test_roundtrip_pax_borg(self):
+        self.create_test_files()
+        self.cmd('init', '--encryption=none', self.repository_location)
+        self.cmd('create', self.repository_location + '::src', 'input')
+        self.cmd('export-tar', self.repository_location + '::src', 'simple.tar', '--tar-format=BORG')
+        self.cmd('import-tar', self.repository_location + '::dst', 'simple.tar')
+        with changedir(self.output_path):
+            self.cmd('extract', self.repository_location + '::dst')
+        self.assert_dirs_equal('input', 'output/input')
+
     # derived from test_extract_xattrs_errors()
     @pytest.mark.skipif(not xattr.XATTR_FAKEROOT, reason='xattr not supported on this system or on this version of'
                                                          'fakeroot')