浏览代码

ArchiveItem.cmdline list-of-str -> .command_line str, fixes #7246

Same change for .recreate_cmdline -> .recreate_command_line .

JSON output key "command_line":
borg 1.x: sys.argv [list of str]
borg 2: shlex.join(sys.argv) [str]
Thomas Waldmann 2 年之前
父节点
当前提交
bf667170a7

+ 1 - 1
docs/internals/data-structures.rst

@@ -542,7 +542,7 @@ The archive object itself further contains some metadata:
   in the manifest, but leaves the *name* field of the archives as it was.
 * *item_ptrs*, a list of "pointer chunk" IDs.
   Each "pointer chunk" contains a list of chunk IDs of item metadata.
-* *cmdline*, the command line which was used to create the archive
+* *command_line*, the command line which was used to create the archive
 * *hostname*
 * *username*
 * *time* and *time_end* are the start and end timestamps, respectively

+ 9 - 9
src/borg/archive.py

@@ -32,7 +32,7 @@ from .helpers import Error, IntegrityError, set_ec
 from .platform import uid2user, user2uid, gid2group, group2gid
 from .helpers import parse_timestamp, archive_ts_now
 from .helpers import OutputTimestamp, format_timedelta, format_file_size, file_status, FileSize
-from .helpers import safe_encode, make_path_safe, remove_surrogates, text_to_json
+from .helpers import safe_encode, make_path_safe, remove_surrogates, text_to_json, join_cmd
 from .helpers import StableDict
 from .helpers import bin_to_hex
 from .helpers import safe_ns
@@ -597,11 +597,11 @@ class Archive:
             "stats": stats.as_dict(),
         }
         if self.create:
-            info["command_line"] = sys.argv
+            info["command_line"] = join_cmd(sys.argv)
         else:
             info.update(
                 {
-                    "command_line": self.metadata.cmdline,
+                    "command_line": self.metadata.command_line,
                     "hostname": self.metadata.hostname,
                     "username": self.metadata.username,
                     "comment": self.metadata.get("comment", ""),
@@ -676,7 +676,7 @@ Duration: {0.duration}
             "name": name,
             "comment": comment or "",
             "item_ptrs": item_ptrs,  # see #1473
-            "cmdline": sys.argv,
+            "command_line": join_cmd(sys.argv),
             "hostname": hostname,
             "username": getuser(),
             "time": start.isoformat(timespec="microseconds"),
@@ -1902,7 +1902,7 @@ class ArchiveChecker:
                 continue
             if not valid_msgpacked_dict(data, archive_keys_serialized):
                 continue
-            if b"cmdline" not in data or b"\xa7version\x02" not in data:
+            if b"command_line" not in data or b"\xa7version\x02" not in data:
                 continue
             try:
                 archive = msgpack.unpackb(data)
@@ -2360,15 +2360,15 @@ class ArchiveRecreater:
             additional_metadata = {
                 "time": archive.metadata.time,
                 "time_end": archive.metadata.get("time_end") or archive.metadata.time,
-                "cmdline": archive.metadata.cmdline,
+                "command_line": archive.metadata.command_line,
                 # but also remember recreate metadata:
-                "recreate_cmdline": sys.argv,
+                "recreate_command_line": join_cmd(sys.argv),
             }
         else:
             additional_metadata = {
-                "cmdline": archive.metadata.cmdline,
+                "command_line": archive.metadata.command_line,
                 # but also remember recreate metadata:
-                "recreate_cmdline": sys.argv,
+                "recreate_command_line": join_cmd(sys.argv),
             }
 
         target.save(comment=comment, timestamp=self.timestamp, additional_metadata=additional_metadata)

+ 1 - 6
src/borg/archiver/info_cmd.py

@@ -1,12 +1,11 @@
 import argparse
-import shlex
 import textwrap
 from datetime import timedelta
 
 from ._common import with_repository
 from ..archive import Archive
 from ..constants import *  # NOQA
-from ..helpers import remove_surrogates, format_timedelta, json_print, basic_json_data
+from ..helpers import format_timedelta, json_print, basic_json_data
 from ..manifest import Manifest
 
 from ..logger import create_logger
@@ -19,9 +18,6 @@ class InfoMixIn:
     def do_info(self, args, repository, manifest, cache):
         """Show archive details such as disk space used"""
 
-        def format_cmdline(cmdline):
-            return remove_surrogates(" ".join(shlex.quote(x) for x in cmdline))
-
         args.consider_checkpoints = True
         archive_names = tuple(x.name for x in manifest.archives.list_considering(args))
 
@@ -36,7 +32,6 @@ class InfoMixIn:
                 output_data.append(info)
             else:
                 info["duration"] = format_timedelta(timedelta(seconds=info["duration"]))
-                info["command_line"] = format_cmdline(info["command_line"])
                 print(
                     textwrap.dedent(
                         """

+ 4 - 3
src/borg/constants.py

@@ -11,17 +11,18 @@ REQUIRED_ITEM_KEYS = frozenset(["path", "mtime"])
 
 # this set must be kept complete, otherwise rebuild_manifest might malfunction:
 # fmt: off
-ARCHIVE_KEYS = frozenset(['version', 'name', 'cmdline', 'hostname', 'username', 'time', 'time_end',
+ARCHIVE_KEYS = frozenset(['version', 'name', 'hostname', 'username', 'time', 'time_end',
                           'items',  # legacy v1 archives
                           'item_ptrs',  # v2+ archives
                           'comment', 'chunker_params',
-                          'recreate_cmdline',
+                          'command_line', 'recreate_command_line',  # v2+ archives
+                          'cmdline', 'recreate_cmdline',  # legacy
                           'recreate_source_id', 'recreate_args', 'recreate_partial_chunks',  # used in 1.1.0b1 .. b2
                           'size', 'nfiles', 'size_parts', 'nfiles_parts'])
 # fmt: on
 
 # this is the set of keys that are always present in archives:
-REQUIRED_ARCHIVE_KEYS = frozenset(["version", "name", "item_ptrs", "cmdline", "time"])
+REQUIRED_ARCHIVE_KEYS = frozenset(["version", "name", "item_ptrs", "command_line", "time"])
 
 # default umask, overridden by --umask, defaults to read/write only for owner
 UMASK_DEFAULT = 0o077

+ 1 - 1
src/borg/helpers/__init__.py

@@ -19,7 +19,7 @@ from .fs import HardLinkManager
 from .misc import sysinfo, log_multi, consume
 from .misc import ChunkIteratorFileWrapper, open_item, chunkit, iter_separated, ErrorIgnoringTextIOWrapper
 from .parseformat import bin_to_hex, safe_encode, safe_decode
-from .parseformat import text_to_json, binary_to_json, remove_surrogates
+from .parseformat import text_to_json, binary_to_json, remove_surrogates, join_cmd
 from .parseformat import eval_escapes, decode_dict, positive_int_validator, interval
 from .parseformat import SortBySpec, ChunkerParams, FilesCacheMode, partial_format, DatetimeWrapper
 from .parseformat import format_file_size, parse_file_size, FileSize, parse_storage_quota

+ 8 - 10
src/borg/helpers/parseformat.py

@@ -85,6 +85,11 @@ def text_to_json(key, value):
     return data
 
 
+def join_cmd(argv, rs=False):
+    cmd = shlex.join(argv)
+    return remove_surrogates(cmd) if rs else cmd
+
+
 def eval_escapes(s):
     """Evaluate literal escape sequences in a string (eg `\\n` -> `\n`)."""
     return s.encode("ascii", "backslashreplace").decode("unicode-escape")
@@ -717,8 +722,8 @@ class ArchiveFormatter(BaseFormatter):
             "hostname": partial(self.get_meta, "hostname"),
             "username": partial(self.get_meta, "username"),
             "comment": partial(self.get_meta, "comment"),
+            "command_line": partial(self.get_meta, "command_line"),
             "end": self.get_ts_end,
-            "command_line": self.get_cmdline,
         }
         self.used_call_keys = set(self.call_keys) & self.format_keys
         if self.json:
@@ -748,8 +753,8 @@ class ArchiveFormatter(BaseFormatter):
             item_data[key] = self.call_keys[key]()
 
         # Note: name and comment are validated, should never contain surrogate escapes.
-        # But unsure whether hostname, username could contain surrogate escapes, play safe:
-        for key in "hostname", "username":
+        # But unsure whether hostname, username, command_line could contain surrogate escapes, play safe:
+        for key in "hostname", "username", "command_line":
             if key in item_data:
                 item_data.update(text_to_json(key, item_data[key]))
         return item_data
@@ -766,13 +771,6 @@ class ArchiveFormatter(BaseFormatter):
     def get_meta(self, key):
         return self.archive.metadata.get(key, "")
 
-    def get_cmdline(self):
-        cmdline = map(remove_surrogates, self.archive.metadata.get("cmdline", []))
-        if self.json:
-            return list(cmdline)
-        else:
-            return " ".join(map(shlex.quote, cmdline))
-
     def get_ts_end(self):
         return self.format_time(self.archive.ts_end)
 

+ 7 - 3
src/borg/item.pyx

@@ -500,14 +500,16 @@ cdef class ArchiveItem(PropDict):
     name = PropDictProperty(str, 'surrogate-escaped str')
     items = PropDictProperty(list)  # list of chunk ids of item metadata stream (only in memory)
     item_ptrs = PropDictProperty(list)  # list of blocks with list of chunk ids of ims, arch v2
-    cmdline = PropDictProperty(list)  # list of s-e-str
+    cmdline = PropDictProperty(list)  # legacy, list of s-e-str
+    command_line = PropDictProperty(str, 'surrogate-escaped str')
     hostname = PropDictProperty(str, 'surrogate-escaped str')
     username = PropDictProperty(str, 'surrogate-escaped str')
     time = PropDictProperty(str)
     time_end = PropDictProperty(str)
     comment = PropDictProperty(str, 'surrogate-escaped str')
     chunker_params = PropDictProperty(tuple)
-    recreate_cmdline = PropDictProperty(list)  # list of s-e-str
+    recreate_cmdline = PropDictProperty(list)  # legacy, list of s-e-str
+    recreate_command_line = PropDictProperty(str, 'surrogate-escaped str')
     # recreate_source_id, recreate_args, recreate_partial_chunks were used in 1.1.0b1 .. b2
     recreate_source_id = PropDictProperty(bytes)
     recreate_args = PropDictProperty(list)  # list of s-e-str
@@ -529,7 +531,9 @@ cdef class ArchiveItem(PropDict):
                 v = fix_str_value(d, k, 'replace')
             if k == 'chunker_params':
                 v = fix_tuple_of_str_and_int(v)
-            if k in ('cmdline', 'recreate_cmdline'):
+            if k in ('command_line', 'recreate_command_line'):
+                v = fix_str_value(d, k)
+            if k in ('cmdline', 'recreate_cmdline'):  # legacy
                 v = fix_list_of_str(v)
             if k == 'items':  # legacy
                 v = fix_list_of_bytes(v)

+ 1 - 1
src/borg/testsuite/archiver/check_cmd.py

@@ -177,7 +177,7 @@ class ArchiverCheckTestCase(ArchiverTestCaseBase):
 
             archive = msgpack.packb(
                 {
-                    "cmdline": [],
+                    "command_line": "",
                     "item_ptrs": [],
                     "hostname": "foo",
                     "username": "bar",

+ 1 - 1
src/borg/testsuite/archiver/create_cmd.py

@@ -692,7 +692,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
 
         archive = create_info["archive"]
         assert archive["name"] == "test"
-        assert isinstance(archive["command_line"], list)
+        assert isinstance(archive["command_line"], str)
         assert isinstance(archive["duration"], float)
         assert len(archive["id"]) == 64
         assert "stats" in archive

+ 1 - 1
src/borg/testsuite/archiver/info_cmd.py

@@ -33,7 +33,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         assert len(archives) == 1
         archive = archives[0]
         assert archive["name"] == "test"
-        assert isinstance(archive["command_line"], list)
+        assert isinstance(archive["command_line"], str)
         assert isinstance(archive["duration"], float)
         assert len(archive["id"]) == 64
         assert "stats" in archive

+ 10 - 4
src/borg/upgrade.py

@@ -2,7 +2,7 @@ from struct import Struct
 
 from .constants import REQUIRED_ITEM_KEYS, CH_BUZHASH
 from .compress import ZLIB, ZLIB_legacy, ObfuscateSize
-from .helpers import HardLinkManager
+from .helpers import HardLinkManager, join_cmd
 from .item import Item
 from .logger import create_logger
 
@@ -26,14 +26,14 @@ class UpgraderNoOp:
         new_metadata = {}
         # keep all metadata except archive version and stats.
         for attr in (
-            "cmdline",
+            "command_line",
             "hostname",
             "username",
             "time",
             "time_end",
             "comment",
             "chunker_params",
-            "recreate_cmdline",
+            "recreate_command_line",
         ):
             if hasattr(metadata, attr):
                 new_metadata[attr] = getattr(metadata, attr)
@@ -144,7 +144,7 @@ class UpgraderFrom12To20:
         new_metadata = {}
         # keep all metadata except archive version and stats. also do not keep
         # recreate_source_id, recreate_args, recreate_partial_chunks which were used only in 1.1.0b1 .. b2.
-        for attr in ("cmdline", "hostname", "username", "comment", "chunker_params", "recreate_cmdline"):
+        for attr in ("hostname", "username", "comment", "chunker_params"):
             if hasattr(metadata, attr):
                 new_metadata[attr] = getattr(metadata, attr)
         if chunker_params := new_metadata.get("chunker_params"):
@@ -155,4 +155,10 @@ class UpgraderFrom12To20:
         for attr in ("time", "time_end"):
             if hasattr(metadata, attr):
                 new_metadata[attr] = getattr(metadata, attr) + "+00:00"
+        # borg 1: cmdline, recreate_cmdline: a copy of sys.argv
+        # borg 2: command_line, recreate_command_line: a single string
+        if hasattr(metadata, "cmdline"):
+            new_metadata["command_line"] = join_cmd(getattr(metadata, "cmdline"))
+        if hasattr(metadata, "recreate_cmdline"):
+            new_metadata["recreate_command_line"] = join_cmd(getattr(metadata, "recreate_cmdline"))
         return new_metadata