Browse Source

archive names: validate more strictly, fixes #2290

we want to be able to use an archive name as a directory name,
e.g. for the FUSE fs built by borg mount.

thus we can not allow "/" in an archive name on linux.

on windows, the rules are more restrictive, disallowing
quite some more characters (':<>"|*?' plus some more).
we do not have FUSE fs / borg mount on windows yet, but
we better avoid any issues.
we can not avoid ":" though, as our {now} placeholder
generates ISO-8601 timestamps, including ":" chars.

also, we do not want to have leading/trailing blanks in
archive names, neither surrogate-escapes.

control chars are disallowed also, including chr(0).
we have python str here, thus chr(0) is not expected in there
(is not used to terminate a string, like it is in C).
Thomas Waldmann 2 years ago
parent
commit
fe2b2bc007
2 changed files with 71 additions and 2 deletions
  1. 27 2
      src/borg/helpers/parseformat.py
  2. 44 0
      src/borg/testsuite/helpers.py

+ 27 - 2
src/borg/helpers/parseformat.py

@@ -540,9 +540,34 @@ def location_validator(proto=None, other=False):
 
 def archivename_validator():
     def validator(text):
+        assert isinstance(text, str)
+        # we make sure that the archive name can be used as directory name (for borg mount)
         text = replace_placeholders(text)
-        if "/" in text or "::" in text or not text:
-            raise argparse.ArgumentTypeError('Invalid archive name: "%s"' % text)
+        MAX_PATH = 260  # Windows default. Since Win10, there is a registry setting LongPathsEnabled to get more.
+        MAX_DIRNAME = MAX_PATH - len("12345678.123")
+        SAFETY_MARGIN = 48  # borgfs path: mountpoint / archivename / dir / dir / ... / file
+        MAX_ARCHIVENAME = MAX_DIRNAME - SAFETY_MARGIN
+        if not (0 < len(text) <= MAX_ARCHIVENAME):
+            raise argparse.ArgumentTypeError(f'Invalid archive name: "{text}" [0 < length <= {MAX_ARCHIVENAME}]')
+        # note: ":" is also a invalid path char on windows, but we can not blacklist it,
+        # because e.g. our {now} placeholder creates ISO-8601 like output like 2022-12-10T20:47:42 .
+        invalid_chars = r"/" + r"\"<|>?*"  # posix + windows
+        if re.search(f"[{re.escape(invalid_chars)}]", text):
+            raise argparse.ArgumentTypeError(
+                f'Invalid archive name: "{text}" [invalid chars detected matching "{invalid_chars}"]'
+            )
+        invalid_ctrl_chars = "".join(chr(i) for i in range(32))
+        if re.search(f"[{re.escape(invalid_ctrl_chars)}]", text):
+            raise argparse.ArgumentTypeError(
+                f'Invalid archive name: "{text}" [invalid control chars detected, ASCII < 32]'
+            )
+        if text.startswith(" ") or text.endswith(" "):
+            raise argparse.ArgumentTypeError(f'Invalid archive name: "{text}" [leading or trailing blanks]')
+        try:
+            text.encode("utf-8", errors="strict")
+        except UnicodeEncodeError:
+            # looks like text contains surrogate-escapes
+            raise argparse.ArgumentTypeError(f'Invalid archive name: "{text}" [contains non-unicode characters]')
         return text
 
     return validator

+ 44 - 0
src/borg/testsuite/helpers.py

@@ -32,6 +32,7 @@ from ..helpers import msgpack
 from ..helpers import yes, TRUISH, FALSISH, DEFAULTISH
 from ..helpers import StableDict, bin_to_hex
 from ..helpers import parse_timestamp, ChunkIteratorFileWrapper, ChunkerParams
+from ..helpers import archivename_validator
 from ..helpers import ProgressIndicatorPercent, ProgressIndicatorEndless
 from ..helpers import swidth_slice
 from ..helpers import chunkit
@@ -246,6 +247,49 @@ class TestLocationWithoutEnv:
             Location("ssh://user@host:/path")
 
 
+@pytest.mark.parametrize(
+    "name",
+    [
+        "foobar",
+        # placeholders
+        "foobar-{now}",
+    ],
+)
+def test_archivename_ok(name):
+    av = archivename_validator()
+    av(name)  # must not raise an exception
+
+
+@pytest.mark.parametrize(
+    "name",
+    [
+        "",  # too short
+        "x" * 201,  # too long
+        # invalid chars:
+        "foo/bar",
+        "foo\\bar",
+        ">foo",
+        "<foo",
+        "|foo",
+        'foo"bar',
+        "foo?",
+        "*bar",
+        "foo\nbar",
+        "foo\0bar",
+        # leading/trailing blanks
+        " foo",
+        "bar  ",
+        # contains surrogate-escapes
+        "foo\udc80bar",
+        "foo\udcffbar",
+    ],
+)
+def test_archivename_invalid(name):
+    av = archivename_validator()
+    with pytest.raises(ArgumentTypeError):
+        av(name)
+
+
 class FormatTimedeltaTestCase(BaseTestCase):
     def test(self):
         t0 = datetime(2001, 1, 1, 10, 20, 3, 0)