Browse Source

Sanitize paths during archive creation/extraction/...

Paths are not always sanitized when creating an archive and,
more importantly, never when extracting one. The following example
shows how this can be used to attempt to write a file outside the
extraction directory:

$ echo abcdef | borg create -r ~/borg/a --stdin-name x/../../../../../etc/shadow archive-1 -
$ borg list -r ~/borg/a archive-1
-rw-rw---- root   root          7 Sun, 2022-10-23 19:14:27  x/../../../../../etc/shadow
$ mkdir borg/target
$ cd borg/target
$ borg extract -r ~/borg/a archive-1
x/../../../../../etc/shadow: makedirs: [Errno 13] Permission denied: '/home/user/borg/target/x/../../../../../etc'

Note that Borg tries to extract the file to /etc/shadow and the
permission error is a result of the user not having access.

This patch ensures file names are sanitized before archiving.
As for files extracted from the archive, paths are sanitized
by making all paths relative, removing '.' elements, and removing
superfluous slashes (as in '//'). '..' elements, however, are
rejected outright. The reasoning here is that it is easy to start
a path with './' or insert a '//' by accident (e.g. via --stdin-name
or import-tar). '..', however, seem unlikely to be the result
of an accident and could indicate a tampered repository.

With paths being sanitized as they are being read, this "errors"
will be corrected during the `borg transfer` required when upgrading
to Borg 2. Hence, the sanitation, when reading the archive,
can be removed once support for reading v1 repositories is dropped.
V2 repository will not contain non-sanitized paths. Of course,
a check for absolute paths and '..' elements needs to kept in
place to detect tempered archives.

I recommend treating this as a security issue. I see the following
cases where extracting a file outside the extraction path could
constitute a security risk:

a) When extraction is done as a different user than archive
creation. The user that created the archive may be able to
get a file overwritten as a different user.
b) When the archive is created on one host and extracted on
another. The user that created the archive may be able to
get a file overwritten on another host.
c) When an archive is created and extracted after a OS reinstall.
When a host is suspected compromised, it is common to reinstall
(or set up a new machine), extract the backups and then evaluate
their integrity. A user that manipulates the archive before such
a reinstall may be able to get a file overwritten outside the
extraction path and may evade integrity checks.

Notably absent is the creation and extraction on the same host as
the same user. In such case, an adversary must be assumed to be able
to replace any file directly.

This also (partially) fixes #7099.
Peter Gerber 2 năm trước cách đây
mục cha
commit
438cf2e7ef

+ 3 - 5
src/borg/archive.py

@@ -32,7 +32,7 @@ from .helpers import Error, IntegrityError, set_ec
 from .platform import uid2user, user2uid, gid2group, group2gid
 from .helpers import parse_timestamp, archive_ts_now
 from .helpers import OutputTimestamp, format_timedelta, format_file_size, file_status, FileSize
-from .helpers import safe_encode, make_path_safe, remove_surrogates, text_to_json, join_cmd
+from .helpers import safe_encode, make_path_safe, remove_surrogates, text_to_json, join_cmd, remove_dotdot_prefixes
 from .helpers import StableDict
 from .helpers import bin_to_hex
 from .helpers import safe_ns
@@ -853,8 +853,6 @@ Duration: {0.duration}
             return
 
         dest = self.cwd
-        if item.path.startswith(("/", "../")):
-            raise Exception("Path should be relative and local")
         path = os.path.join(dest, item.path)
         # Attempt to remove existing files, ignore errors on failure
         try:
@@ -1376,8 +1374,8 @@ class FilesystemObjectProcessors:
 
     @contextmanager
     def create_helper(self, path, st, status=None, hardlinkable=True):
-        safe_path = make_path_safe(path)
-        item = Item(path=safe_path)
+        sanitized_path = remove_dotdot_prefixes(path)
+        item = Item(path=sanitized_path)
         hardlinked = hardlinkable and st.st_nlink > 1
         hl_chunks = None
         update_map = False

+ 2 - 1
src/borg/archiver/create_cmd.py

@@ -27,6 +27,7 @@ from ..helpers import basic_json_data, json_print
 from ..helpers import flags_root, flags_dir, flags_special_follow, flags_special
 from ..helpers import sig_int, ignore_sigint
 from ..helpers import iter_separated
+from ..helpers import MakePathSafeAction
 from ..manifest import Manifest
 from ..patterns import PatternMatcher
 from ..platform import is_win32
@@ -766,7 +767,7 @@ class CreateMixIn:
             metavar="NAME",
             dest="stdin_name",
             default="stdin",
-            action=Highlander,
+            action=MakePathSafeAction,  # MakePathSafeAction maybe should subclass from Highlander.
             help="use NAME in archive for stdin data (default: %(default)r)",
         )
         subparser.add_argument(

+ 2 - 1
src/borg/helpers/__init__.py

@@ -13,7 +13,7 @@ from .datastruct import StableDict, Buffer, EfficientCollectionQueue
 from .errors import Error, ErrorWithTraceback, IntegrityError, DecompressionError
 from .fs import ensure_dir, join_base_dir, get_socket_filename
 from .fs import get_security_dir, get_keys_dir, get_base_dir, get_cache_dir, get_config_dir, get_runtime_dir
-from .fs import dir_is_tagged, dir_is_cachedir, make_path_safe, scandir_inorder
+from .fs import dir_is_tagged, dir_is_cachedir, remove_dotdot_prefixes, make_path_safe, scandir_inorder
 from .fs import secure_erase, safe_unlink, dash_open, os_open, os_stat, umount
 from .fs import O_, flags_root, flags_dir, flags_special_follow, flags_special, flags_base, flags_normal, flags_noatime
 from .fs import HardLinkManager
@@ -31,6 +31,7 @@ from .parseformat import location_validator, archivename_validator, comment_vali
 from .parseformat import BaseFormatter, ArchiveFormatter, ItemFormatter, file_status
 from .parseformat import swidth_slice, ellipsis_truncate
 from .parseformat import BorgJsonEncoder, basic_json_data, json_print, json_dump, prepare_dump_dict
+from .parseformat import MakePathSafeAction
 from .process import daemonize, daemonizing
 from .process import signal_handler, raising_signal_handler, sig_int, ignore_sigint, SigHup, SigTerm
 from .process import popen_with_error_handling, is_terminal, prepare_subprocess_env, create_filter_process

+ 56 - 4
src/borg/helpers/fs.py

@@ -217,12 +217,64 @@ def dir_is_tagged(path, exclude_caches, exclude_if_present):
     return tag_names
 
 
-_safe_re = re.compile(r"^((\.\.)?/+)+")
+def make_path_safe(path):
+    """
+    Make path safe by making it relative and normalized.
 
+    `path` is sanitized by making it relative, removing
+    consecutive slashes (e.g. '//'), removing '.' elements,
+    and removing trailing slashes.
 
-def make_path_safe(path):
-    """Make path safe by making it relative and local"""
-    return _safe_re.sub("", path) or "."
+    For reasons of security, a ValueError is raised should
+    `path` contain any '..' elements.
+    """
+    path = path.lstrip("/")
+    if path.startswith("../") or "/../" in path or path.endswith("/..") or path == "..":
+        raise ValueError(f"unexpected '..' element in path {path!r}")
+    path = os.path.normpath(path)
+    return path
+
+
+_dotdot_re = re.compile(r"^(\.\./)+")
+
+
+def remove_dotdot_prefixes(path):
+    """
+    Remove '../'s at the beginning of `path`. Additionally,
+    the path is made relative.
+
+    `path` is expected to be normalized already (e.g. via `os.path.normpath()`).
+    """
+    path = path.lstrip("/")
+    path = _dotdot_re.sub("", path)
+    if path in ["", ".."]:
+        return "."
+    return path
+
+
+def assert_sanitized_path(path):
+    assert isinstance(path, str)
+    # `path` should have been sanitized earlier. Some features,
+    # like pattern matching rely on a sanitized path. As a
+    # precaution we check here again.
+    if make_path_safe(path) != path:
+        raise ValueError(f"path {path!r} is not sanitized")
+    return path
+
+
+def to_sanitized_path(path):
+    assert isinstance(path, str)
+    # Legacy versions of Borg still allowed non-sanitized paths
+    # to be stored. So, we sanitize them when reading.
+    #
+    # Borg 2 ensures paths are safe before storing them. Thus, when
+    # support for reading Borg 1 archives is dropped, this should be
+    # changed to a simple check to verify paths aren't malicious.
+    # Namely, absolute paths and paths containing '..' elements must
+    # be rejected.
+    #
+    # Also checks for '..' elements in `path` for reasons of security.
+    return make_path_safe(path)
 
 
 class HardLinkManager:

+ 13 - 2
src/borg/helpers/parseformat.py

@@ -19,7 +19,7 @@ from ..logger import create_logger
 logger = create_logger()
 
 from .errors import Error
-from .fs import get_keys_dir
+from .fs import get_keys_dir, make_path_safe
 from .msgpack import Timestamp
 from .time import OutputTimestamp, format_time, safe_timestamp
 from .. import __version__ as borg_version
@@ -840,7 +840,7 @@ class ItemFormatter(BaseFormatter):
 
         from ..item import Item
 
-        fake_item = Item(mode=0, path="", user="", group="", mtime=0, uid=0, gid=0)
+        fake_item = Item(mode=0, path="foo", user="", group="", mtime=0, uid=0, gid=0)
         formatter = cls(FakeArchive, "")
         keys = []
         keys.extend(formatter.call_keys.keys())
@@ -1147,3 +1147,14 @@ def prepare_dump_dict(d):
         return res
 
     return decode(d)
+
+
+class MakePathSafeAction(argparse.Action):
+    def __call__(self, parser, namespace, path, option_string=None):
+        try:
+            sanitized_path = make_path_safe(path)
+        except ValueError as e:
+            raise argparse.ArgumentError(self, e)
+        if sanitized_path == ".":
+            raise argparse.ArgumentError(self, f"{path!r} is not a valid file name")
+        setattr(namespace, self.dest, sanitized_path)

+ 2 - 1
src/borg/item.pyx

@@ -7,6 +7,7 @@ from cpython.bytes cimport PyBytes_AsStringAndSize
 from .constants import ITEM_KEYS, ARCHIVE_KEYS
 from .helpers import StableDict
 from .helpers import format_file_size
+from .helpers.fs import assert_sanitized_path, to_sanitized_path
 from .helpers.msgpack import timestamp_to_int, int_to_timestamp, Timestamp
 from .helpers.time import OutputTimestamp, safe_timestamp
 
@@ -262,7 +263,7 @@ cdef class Item(PropDict):
 
     # properties statically defined, so that IDEs can know their names:
 
-    path = PropDictProperty(str, 'surrogate-escaped str')
+    path = PropDictProperty(str, 'surrogate-escaped str', encode=assert_sanitized_path, decode=to_sanitized_path)
     source = PropDictProperty(str, 'surrogate-escaped str')  # legacy borg 1.x. borg 2: see .target
     target = PropDictProperty(str, 'surrogate-escaped str')
     user = PropDictProperty(str, 'surrogate-escaped str')

+ 19 - 0
src/borg/testsuite/__init__.py

@@ -23,6 +23,7 @@ from ..helpers import umount
 from ..helpers import EXIT_SUCCESS, EXIT_WARNING, EXIT_ERROR
 from .. import platform
 
+
 # Note: this is used by borg.selftest, do not use or import py.test functionality here.
 
 from ..fuse_impl import llfuse, has_pyfuse3, has_llfuse
@@ -61,6 +62,24 @@ def same_ts_ns(ts_ns1, ts_ns2):
     return diff_ts <= diff_max
 
 
+rejected_dotdot_paths = (
+    "..",
+    "../",
+    "../etc/shadow",
+    "/..",
+    "/../",
+    "/../etc",
+    "/../etc/",
+    "etc/..",
+    "/etc/..",
+    "/etc/../etc/shadow",
+    "//etc/..",
+    "etc//..",
+    "etc/..//",
+    "foo/../bar",
+)
+
+
 @contextmanager
 def unopened_tempfile():
     with tempfile.TemporaryDirectory() as tempdir:

+ 7 - 0
src/borg/testsuite/archive.py

@@ -8,6 +8,7 @@ from unittest.mock import Mock
 import pytest
 
 from . import BaseTestCase
+from . import rejected_dotdot_paths
 from ..crypto.key import PlaintextKey
 from ..archive import Archive, CacheChunkBuffer, RobustUnpacker, valid_msgpacked_dict, ITEM_KEYS, Statistics
 from ..archive import BackupOSError, backup_io, backup_io_iter, get_item_uid_gid
@@ -394,3 +395,9 @@ def test_get_item_uid_gid():
     # as there is nothing, it'll fall back to uid_default/gid_default.
     assert uid == 0
     assert gid == 16
+
+
+def test_reject_non_sanitized_item():
+    for path in rejected_dotdot_paths:
+        with pytest.raises(ValueError, match="unexpected '..' element in path"):
+            Item(path=path, user="root", group="root")

+ 29 - 2
src/borg/testsuite/archiver/create_cmd.py

@@ -279,6 +279,33 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         assert "input/file2" not in out  # it skipped file2
         assert "input/file3" in out
 
+    def test_sanitized_stdin_name(self):
+        self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
+        self.cmd(f"--repo={self.repository_location}", "create", "--stdin-name", "./a//path", "test", "-", input=b"")
+        item = json.loads(self.cmd(f"--repo={self.repository_location}", "list", "test", "--json-lines"))
+        assert item["path"] == "a/path"
+
+    def test_dotdot_stdin_name(self):
+        self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
+        output = self.cmd(
+            f"--repo={self.repository_location}",
+            "create",
+            "--stdin-name",
+            "foo/../bar",
+            "test",
+            "-",
+            input=b"",
+            exit_code=2,
+        )
+        assert output.endswith("'..' element in path 'foo/../bar'" + os.linesep)
+
+    def test_dot_stdin_name(self):
+        self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
+        output = self.cmd(
+            f"--repo={self.repository_location}", "create", "--stdin-name", "./", "test", "-", input=b"", exit_code=2
+        )
+        assert output.endswith("'./' is not a valid file name" + os.linesep)
+
     def test_create_content_from_command(self):
         self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
         input_data = "some test content"
@@ -586,7 +613,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         )
         self._assert_test_keep_tagged()
 
-    def test_path_normalization(self):
+    def test_path_sanitation(self):
         self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
         self.create_regular_file("dir1/dir2/file", size=1024 * 80)
         with changedir("input/dir1/dir2"):
@@ -595,7 +622,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.assert_not_in("..", output)
         self.assert_in(" input/dir1/dir2/file", output)
 
-    def test_exclude_normalization(self):
+    def test_exclude_sanitation(self):
         self.cmd(f"--repo={self.repository_location}", "rcreate", RK_ENCRYPTION)
         self.create_regular_file("file1", size=1024 * 80)
         self.create_regular_file("file2", size=1024 * 80)

BIN
src/borg/testsuite/archiver/dotdot_path.tar


+ 23 - 0
src/borg/testsuite/archiver/tar_cmds.py

@@ -129,6 +129,29 @@ class ArchiverTestCase(ArchiverTestCaseBase):
             self.cmd(f"--repo={self.repository_location}", "extract", "dst")
         self.assert_dirs_equal("input", "output/input", ignore_ns=True, ignore_xattrs=True)
 
+    def test_import_unusual_tar(self):
+        # Contains these, unusual entries:
+        # /foobar
+        # ./bar
+        # ./foo2/
+        # ./foo//bar
+        # ./
+        tar_archive = os.path.join(os.path.dirname(__file__), "unusual_paths.tar")
+
+        self.cmd(f"--repo={self.repository_location}", "rcreate", "--encryption=none")
+        self.cmd(f"--repo={self.repository_location}", "import-tar", "dst", tar_archive)
+        files = self.cmd(f"--repo={self.repository_location}", "list", "dst", "--format", "{path}{NL}").splitlines()
+        self.assert_equal(set(files), {"foobar", "bar", "foo2", "foo/bar", "."})
+
+    def test_import_tar_with_dotdot(self):
+        # Contains this file:
+        # ../../../../etc/shadow
+        tar_archive = os.path.join(os.path.dirname(__file__), "dotdot_path.tar")
+
+        self.cmd(f"--repo={self.repository_location}", "rcreate", "--encryption=none")
+        with pytest.raises(ValueError, match="unexpected '..' element in path '../../../../etc/shadow'"):
+            self.cmd(f"--repo={self.repository_location}", "import-tar", "dst", tar_archive, exit_code=2)
+
     @requires_gzip
     def test_import_tar_gz(self, tar_format="GNU"):
         if not shutil.which("gzip"):

BIN
src/borg/testsuite/archiver/unusual_paths.tar


+ 30 - 8
src/borg/testsuite/helpers.py

@@ -25,7 +25,7 @@ from ..helpers import (
     PlaceholderError,
     replace_placeholders,
 )
-from ..helpers import make_path_safe, clean_lines
+from ..helpers import remove_dotdot_prefixes, make_path_safe, clean_lines
 from ..helpers import interval
 from ..helpers import get_base_dir, get_cache_dir, get_keys_dir, get_security_dir, get_config_dir, get_runtime_dir
 from ..helpers import is_slow_msgpack
@@ -48,6 +48,7 @@ from ..helpers.passphrase import Passphrase, PasswordRetriesExceeded
 from ..platform import is_cygwin, is_win32, is_darwin, swidth
 
 from . import BaseTestCase, FakeInputs, are_hardlinks_supported
+from . import rejected_dotdot_paths
 
 
 def test_bin_to_hex():
@@ -393,16 +394,37 @@ def test_chunkerparams():
         ChunkerParams("fixed,%d,%d" % (4096, MAX_DATA_SIZE + 1))  # too big header size
 
 
+class RemoveDotdotPrefixesTestCase(BaseTestCase):
+    def test(self):
+        self.assert_equal(remove_dotdot_prefixes("."), ".")
+        self.assert_equal(remove_dotdot_prefixes(".."), ".")
+        self.assert_equal(remove_dotdot_prefixes("/"), ".")
+        self.assert_equal(remove_dotdot_prefixes("//"), ".")
+        self.assert_equal(remove_dotdot_prefixes("foo"), "foo")
+        self.assert_equal(remove_dotdot_prefixes("foo/bar"), "foo/bar")
+        self.assert_equal(remove_dotdot_prefixes("/foo/bar"), "foo/bar")
+        self.assert_equal(remove_dotdot_prefixes("../foo/bar"), "foo/bar")
+
+
 class MakePathSafeTestCase(BaseTestCase):
     def test(self):
+        self.assert_equal(make_path_safe("."), ".")
+        self.assert_equal(make_path_safe("./"), ".")
+        self.assert_equal(make_path_safe("./foo"), "foo")
+        self.assert_equal(make_path_safe(".//foo"), "foo")
+        self.assert_equal(make_path_safe(".//foo//bar//"), "foo/bar")
         self.assert_equal(make_path_safe("/foo/bar"), "foo/bar")
-        self.assert_equal(make_path_safe("/foo/bar"), "foo/bar")
-        self.assert_equal(make_path_safe("/f/bar"), "f/bar")
-        self.assert_equal(make_path_safe("fo/bar"), "fo/bar")
-        self.assert_equal(make_path_safe("../foo/bar"), "foo/bar")
-        self.assert_equal(make_path_safe("../../foo/bar"), "foo/bar")
-        self.assert_equal(make_path_safe("/"), ".")
-        self.assert_equal(make_path_safe("/"), ".")
+        self.assert_equal(make_path_safe("//foo/bar"), "foo/bar")
+        self.assert_equal(make_path_safe("//foo/./bar"), "foo/bar")
+        self.assert_equal(make_path_safe(".test"), ".test")
+        self.assert_equal(make_path_safe(".test."), ".test.")
+        self.assert_equal(make_path_safe("..test.."), "..test..")
+        self.assert_equal(make_path_safe("/te..st/foo/bar"), "te..st/foo/bar")
+        self.assert_equal(make_path_safe("/..test../abc//"), "..test../abc")
+
+        for path in rejected_dotdot_paths:
+            with pytest.raises(ValueError, match="unexpected '..' element in path"):
+                make_path_safe(path)
 
 
 class MockArchive:

+ 12 - 12
src/borg/testsuite/item.py

@@ -38,8 +38,8 @@ def test_item_empty():
 @pytest.mark.parametrize(
     "item_dict, path, mode",
     [  # does not matter whether we get str or bytes keys
-        ({b"path": "/a/b/c", b"mode": 0o666}, "/a/b/c", 0o666),
-        ({"path": "/a/b/c", "mode": 0o666}, "/a/b/c", 0o666),
+        ({b"path": "a/b/c", b"mode": 0o666}, "a/b/c", 0o666),
+        ({"path": "a/b/c", "mode": 0o666}, "a/b/c", 0o666),
     ],
 )
 def test_item_from_dict(item_dict, path, mode):
@@ -64,8 +64,8 @@ def test_item_invalid(invalid_item, error):
 
 
 def test_item_from_kw():
-    item = Item(path="/a/b/c", mode=0o666)
-    assert item.path == "/a/b/c"
+    item = Item(path="a/b/c", mode=0o666)
+    assert item.path == "a/b/c"
     assert item.mode == 0o666
 
 
@@ -91,22 +91,22 @@ def test_item_mptimestamp_property(atime):
 def test_item_se_str_property():
     # start simple
     item = Item()
-    item.path = "/a/b/c"
-    assert item.path == "/a/b/c"
-    assert item.as_dict() == {"path": "/a/b/c"}
+    item.path = "a/b/c"
+    assert item.path == "a/b/c"
+    assert item.as_dict() == {"path": "a/b/c"}
     del item.path
     assert item.as_dict() == {}
     with pytest.raises(TypeError):
         item.path = 42
 
     # non-utf-8 path, needing surrogate-escaping for latin-1 u-umlaut
-    item = Item(internal_dict={"path": b"/a/\xfc/c"})
-    assert item.path == "/a/\udcfc/c"  # getting a surrogate-escaped representation
-    assert item.as_dict() == {"path": "/a/\udcfc/c"}
+    item = Item(internal_dict={"path": b"a/\xfc/c"})
+    assert item.path == "a/\udcfc/c"  # getting a surrogate-escaped representation
+    assert item.as_dict() == {"path": "a/\udcfc/c"}
     del item.path
     assert "path" not in item
-    item.path = "/a/\udcfc/c"  # setting using a surrogate-escaped representation
-    assert item.as_dict() == {"path": "/a/\udcfc/c"}
+    item.path = "a/\udcfc/c"  # setting using a surrogate-escaped representation
+    assert item.as_dict() == {"path": "a/\udcfc/c"}
 
 
 def test_item_list_property():