瀏覽代碼

Merge pull request #9143 from ThomasWaldmann/feature/8753-linux-acl-any-text

linux ACL: use acl_to_any_text to avoid libacl name lookups, fixes #8753
TW 2 周之前
父節點
當前提交
1ede79bdec

+ 34 - 6
src/borg/platform/__init__.py

@@ -4,12 +4,16 @@ Platform-specific APIs.
 Public APIs are documented in platform.base.
 """
 
+from types import ModuleType
+
 from ..platformflags import is_win32, is_linux, is_freebsd, is_netbsd, is_darwin, is_cygwin
 
 from .base import ENOATTR, API_VERSION
 from .base import SaveFile, sync_dir, fdatasync, safe_fadvise
 from .base import get_process_id, fqdn, hostname, hostid
 
+platform_ug: ModuleType | None = None  # make mypy happy
+
 if is_linux:  # pragma: linux only
     from .linux import API_VERSION as OS_API_VERSION
     from .linux import listxattr, getxattr, setxattr
@@ -19,7 +23,8 @@ if is_linux:  # pragma: linux only
     from .posix import process_alive, local_pid_alive
     from .posix import swidth
     from .posix import get_errno
-    from .posix import uid2user, user2uid, gid2group, group2gid, getosusername
+    from .posix import getosusername
+    from . import posix_ug as platform_ug
 elif is_freebsd:  # pragma: freebsd only
     from .freebsd import API_VERSION as OS_API_VERSION
     from .freebsd import listxattr, getxattr, setxattr
@@ -30,7 +35,8 @@ elif is_freebsd:  # pragma: freebsd only
     from .posix import process_alive, local_pid_alive
     from .posix import swidth
     from .posix import get_errno
-    from .posix import uid2user, user2uid, gid2group, group2gid, getosusername
+    from .posix import getosusername
+    from . import posix_ug as platform_ug
 elif is_netbsd:  # pragma: netbsd only
     from .netbsd import API_VERSION as OS_API_VERSION
     from .netbsd import listxattr, getxattr, setxattr
@@ -40,7 +46,8 @@ elif is_netbsd:  # pragma: netbsd only
     from .posix import process_alive, local_pid_alive
     from .posix import swidth
     from .posix import get_errno
-    from .posix import uid2user, user2uid, gid2group, group2gid, getosusername
+    from .posix import getosusername
+    from . import posix_ug as platform_ug
 elif is_darwin:  # pragma: darwin only
     from .darwin import API_VERSION as OS_API_VERSION
     from .darwin import listxattr, getxattr, setxattr
@@ -52,7 +59,8 @@ elif is_darwin:  # pragma: darwin only
     from .posix import process_alive, local_pid_alive
     from .posix import swidth
     from .posix import get_errno
-    from .posix import uid2user, user2uid, gid2group, group2gid, getosusername
+    from .posix import getosusername
+    from . import posix_ug as platform_ug
 elif not is_win32:  # pragma: posix only
     # Generic code for all other POSIX OSes
     OS_API_VERSION = API_VERSION
@@ -63,7 +71,8 @@ elif not is_win32:  # pragma: posix only
     from .posix import process_alive, local_pid_alive
     from .posix import swidth
     from .posix import get_errno
-    from .posix import uid2user, user2uid, gid2group, group2gid, getosusername
+    from .posix import getosusername
+    from . import posix_ug as platform_ug
 else:  # pragma: win32 only
     # Win32-specific stuff
     OS_API_VERSION = API_VERSION
@@ -73,7 +82,8 @@ else:  # pragma: win32 only
     from .base import SyncFile
     from .windows import process_alive, local_pid_alive
     from .base import swidth
-    from .windows import uid2user, user2uid, gid2group, group2gid, getosusername
+    from .windows import getosusername
+    from . import windows_ug as platform_ug
 
 
 def get_birthtime_ns(st, path, fd=None):
@@ -86,3 +96,21 @@ def get_birthtime_ns(st, path, fd=None):
         return int(st.st_birthtime * 10**9)
     else:
         return None
+
+
+# have some wrapper functions, so we can monkeypatch the functions in platform_ug.
+# for normal usage from outside the platform package, always import these:
+def uid2user(uid, default=None):
+    return platform_ug._uid2user(uid, default)
+
+
+def gid2group(gid, default=None):
+    return platform_ug._gid2group(gid, default)
+
+
+def user2uid(user, default=None):
+    return platform_ug._user2uid(user, default)
+
+
+def group2gid(group, default=None):
+    return platform_ug._group2gid(group, default)

+ 3 - 3
src/borg/platform/darwin.pyx

@@ -4,7 +4,7 @@ from libc.stdint cimport uint32_t
 from libc cimport errno
 from posix.time cimport timespec
 
-from .posix import user2uid, group2gid
+from . import posix_ug
 from ..helpers import safe_decode, safe_encode
 from .xattr import _listxattr_inner, _getxattr_inner, _setxattr_inner, split_string0
 
@@ -108,10 +108,10 @@ def _remove_numeric_id_if_possible(acl):
         if entry:
             fields = entry.split(':')
             if fields[0] == 'user':
-                if user2uid(fields[2]) is not None:
+                if posix_ug._user2uid(fields[2]) is not None:
                     fields[1] = fields[3] = ''
             elif fields[0] == 'group':
-                if group2gid(fields[2]) is not None:
+                if posix_ug._group2gid(fields[2]) is not None:
                     fields[1] = fields[3] = ''
             entries.append(':'.join(fields))
     return safe_encode('\n'.join(entries))

+ 4 - 0
src/borg/platform/freebsd.pyx

@@ -147,6 +147,10 @@ def acl_get(path, item, st, numeric_ids=False, fd=None):
     If `numeric_ids` is True the user/group field is not preserved only uid/gid
     """
     cdef int flags = ACL_TEXT_APPEND_ID
+    # Note: likely this could be faster if we always used ACL_TEXT_NUMERIC_IDS,
+    # and then used uid2user() and gid2group() to translate the numeric ids to names
+    # inside borg (borg has a LRUcache for these lookups).
+    # See how the Linux implementation does it.
     flags |= ACL_TEXT_NUMERIC_IDS if numeric_ids else 0
     if isinstance(path, str):
         path = os.fsencode(path)

+ 50 - 29
src/borg/platform/linux.pyx

@@ -3,7 +3,7 @@ import re
 import stat
 
 from .posix import posix_acl_use_stored_uid_gid
-from .posix import user2uid, group2gid
+from . import posix_ug
 from ..helpers import workarounds
 from ..helpers import safe_decode, safe_encode
 from .base import SyncFile as BaseSyncFile
@@ -47,11 +47,12 @@ cdef extern from "sys/acl.h":
     int acl_set_file(const char *path, int type, acl_t acl)
     int acl_set_fd(int fd, acl_t acl)
     acl_t acl_from_text(const char *buf)
-    char *acl_to_text(acl_t acl, ssize_t *len)
 
 cdef extern from "acl/libacl.h":
     int acl_extended_file_nofollow(const char *path)
     int acl_extended_fd(int fd)
+    char *acl_to_any_text(acl_t acl, const char *prefix, char separator, int options)
+    int TEXT_NUMERIC_IDS
 
 cdef extern from "linux/fs.h":
     # ioctls
@@ -203,46 +204,66 @@ def acl_use_local_uid_gid(acl):
         if entry:
             fields = entry.split(':')
             if fields[0] == 'user' and fields[1]:
-                fields[1] = str(user2uid(fields[1], fields[3]))
+                fields[1] = str(posix_ug._user2uid(fields[1], fields[3]))
             elif fields[0] == 'group' and fields[1]:
-                fields[1] = str(group2gid(fields[1], fields[3]))
+                fields[1] = str(posix_ug._group2gid(fields[1], fields[3]))
             entries.append(':'.join(fields[:3]))
     return safe_encode('\n'.join(entries))
 
 
-cdef acl_append_numeric_ids(acl):
-    """Extend the "POSIX 1003.1e draft standard 17" format with an additional uid/gid field
+def _acl_from_numeric_to_named_with_id(acl):
+    """Convert numeric-id ACL entries to name entries and append numeric id as 4th field.
+
+    Input format (Linux libacl): lines like 'user:1000:rwx' or 'group:100:r-x' or 'user::rwx'.
+    Output format: for entries with a name/id field, become 'user:uname:rwx:uid' or 'group:gname:r-x:gid'.
     """
     assert isinstance(acl, bytes)
     entries = []
     for entry in _comment_re.sub('', safe_decode(acl)).split('\n'):
-        if entry:
-            type, name, permission = entry.split(':')
-            if name and type == 'user':
-                entries.append(':'.join([type, name, permission, str(user2uid(name, name))]))
-            elif name and type == 'group':
-                entries.append(':'.join([type, name, permission, str(group2gid(name, name))]))
+        if not entry:
+            continue
+        fields = entry.split(':')
+        # Expected 3 fields: type, ugid_or_empty, perms
+        if len(fields) >= 3:
+            typ, ugid_str, perm = fields[0], fields[1], fields[2]
+            if ugid_str and typ == 'user':
+                try:
+                    uid = int(ugid_str)
+                except ValueError:
+                    uid = None
+                uname = posix_ug._uid2user(uid, ugid_str) if uid is not None else ugid_str
+                entries.append(':'.join([typ, uname, perm, str(uid if uid is not None else ugid_str)]))
+            elif ugid_str and typ == 'group':
+                try:
+                    gid = int(ugid_str)
+                except ValueError:
+                    gid = None
+                gname = posix_ug._gid2group(gid, ugid_str) if gid is not None else ugid_str
+                entries.append(':'.join([typ, gname, perm, str(gid if gid is not None else ugid_str)]))
             else:
-                entries.append(entry)
+                # owner, group_obj, mask, other (empty ugid_str field) stay as-is
+                entries.append(':'.join([typ, '', perm]))
+        else:
+            entries.append(entry)
     return safe_encode('\n'.join(entries))
 
 
-cdef acl_numeric_ids(acl):
-    """Replace the "POSIX 1003.1e draft standard 17" user/group field with uid/gid
-    """
+def _acl_from_numeric_to_numeric_with_id(acl):
+    """Keep numeric ids in name field and append the same id as 4th field where applicable."""
     assert isinstance(acl, bytes)
     entries = []
     for entry in _comment_re.sub('', safe_decode(acl)).split('\n'):
-        if entry:
-            type, name, permission = entry.split(':')
-            if name and type == 'user':
-                uid = str(user2uid(name, name))
-                entries.append(':'.join([type, uid, permission, uid]))
-            elif name and type == 'group':
-                gid = str(group2gid(name, name))
-                entries.append(':'.join([type, gid, permission, gid]))
+        if not entry:
+            continue
+        fields = entry.split(':')
+        if len(fields) >= 3:
+            typ, ugid, perm = fields[0], fields[1], fields[2]
+            if ugid and (typ == 'user' or typ == 'group'):
+                entries.append(':'.join([typ, ugid, perm, ugid]))
             else:
-                entries.append(entry)
+                entries.append(':'.join([typ, '', perm]))
+        else:
+            entries.append(entry)
     return safe_encode('\n'.join(entries))
 
 
@@ -266,9 +287,9 @@ def acl_get(path, item, st, numeric_ids=False, fd=None):
         # note: this should also be the case for symlink fs objects, as they can not have ACLs.
         return
     if numeric_ids:
-        converter = acl_numeric_ids
+        converter = _acl_from_numeric_to_numeric_with_id
     else:
-        converter = acl_append_numeric_ids
+        converter = _acl_from_numeric_to_named_with_id
     try:
         if fd is not None:
             access_acl = acl_get_fd(fd)
@@ -276,7 +297,7 @@ def acl_get(path, item, st, numeric_ids=False, fd=None):
             access_acl = acl_get_file(path, ACL_TYPE_ACCESS)
         if access_acl == NULL:
             raise OSError(errno.errno, os.strerror(errno.errno), os.fsdecode(path))
-        access_text = acl_to_text(access_acl, NULL)
+        access_text = acl_to_any_text(access_acl, NULL, '\n', TEXT_NUMERIC_IDS)
         if access_text == NULL:
             raise OSError(errno.errno, os.strerror(errno.errno), os.fsdecode(path))
         item['acl_access'] = converter(access_text)
@@ -289,7 +310,7 @@ def acl_get(path, item, st, numeric_ids=False, fd=None):
             default_acl = acl_get_file(path, ACL_TYPE_DEFAULT)
             if default_acl == NULL:
                 raise OSError(errno.errno, os.strerror(errno.errno), os.fsdecode(path))
-            default_text = acl_to_text(default_acl, NULL)
+            default_text = acl_to_any_text(default_acl, NULL, '\n', TEXT_NUMERIC_IDS)
             if default_text == NULL:
                 raise OSError(errno.errno, os.strerror(errno.errno), os.fsdecode(path))
             item['acl_default'] = converter(default_text)

+ 3 - 40
src/borg/platform/posix.pyx

@@ -1,8 +1,7 @@
 import errno
 import os
-import grp
-import pwd
-from functools import lru_cache
+
+from . import posix_ug
 
 from libc.errno cimport errno as c_errno
 
@@ -77,42 +76,6 @@ def local_pid_alive(pid):
         return True
 
 
-@lru_cache(maxsize=None)
-def uid2user(uid, default=None):
-    try:
-        return pwd.getpwuid(uid).pw_name
-    except KeyError:
-        return default
-
-
-@lru_cache(maxsize=None)
-def user2uid(user, default=None):
-    if not user:
-        return default
-    try:
-        return pwd.getpwnam(user).pw_uid
-    except KeyError:
-        return default
-
-
-@lru_cache(maxsize=None)
-def gid2group(gid, default=None):
-    try:
-        return grp.getgrgid(gid).gr_name
-    except KeyError:
-        return default
-
-
-@lru_cache(maxsize=None)
-def group2gid(group, default=None):
-    if not group:
-        return default
-    try:
-        return grp.getgrnam(group).gr_gid
-    except KeyError:
-        return default
-
-
 def posix_acl_use_stored_uid_gid(acl):
     """Replace the user/group field with the stored uid/gid."""
     assert isinstance(acl, bytes)
@@ -131,4 +94,4 @@ def posix_acl_use_stored_uid_gid(acl):
 def getosusername():
     """Return the OS username."""
     uid = os.getuid()
-    return uid2user(uid, uid)
+    return posix_ug._uid2user(uid, uid)

+ 39 - 0
src/borg/platform/posix_ug.py

@@ -0,0 +1,39 @@
+import grp
+import pwd
+from functools import lru_cache
+
+
+@lru_cache(maxsize=None)
+def _uid2user(uid, default=None):
+    try:
+        return pwd.getpwuid(uid).pw_name
+    except KeyError:
+        return default
+
+
+@lru_cache(maxsize=None)
+def _user2uid(user, default=None):
+    if not user:
+        return default
+    try:
+        return pwd.getpwnam(user).pw_uid
+    except KeyError:
+        return default
+
+
+@lru_cache(maxsize=None)
+def _gid2group(gid, default=None):
+    try:
+        return grp.getgrgid(gid).gr_name
+    except KeyError:
+        return default
+
+
+@lru_cache(maxsize=None)
+def _group2gid(group, default=None):
+    if not group:
+        return default
+    try:
+        return grp.getgrnam(group).gr_gid
+    except KeyError:
+        return default

+ 0 - 27
src/borg/platform/windows.pyx

@@ -1,6 +1,5 @@
 import os
 import platform
-from functools import lru_cache
 
 
 cdef extern from 'windows.h':
@@ -14,32 +13,6 @@ cdef extern from 'windows.h':
     cdef extern int PROCESS_QUERY_INFORMATION
 
 
-@lru_cache(maxsize=None)
-def uid2user(uid, default=None):
-    return "root"
-
-
-@lru_cache(maxsize=None)
-def user2uid(user, default=None):
-    if not user:
-        # user is either None or the empty string
-        return default
-    return 0
-
-
-@lru_cache(maxsize=None)
-def gid2group(gid, default=None):
-    return "root"
-
-
-@lru_cache(maxsize=None)
-def group2gid(group, default=None):
-    if not group:
-        # group is either None or the empty string
-        return default
-    return 0
-
-
 def getosusername():
     """Return the OS username."""
     return os.getlogin()

+ 33 - 0
src/borg/platform/windows_ug.py

@@ -0,0 +1,33 @@
+from functools import lru_cache
+
+
+@lru_cache(maxsize=None)
+def _uid2user(uid, default=None):
+    # On Windows, Borg uses a simplified mapping for ownership fields.
+    # Return a stable placeholder name.
+    return "root"
+
+
+@lru_cache(maxsize=None)
+def _user2uid(user, default=None):
+    if not user:
+        # user is either None or the empty string
+        return default
+    # Use 0 as the canonical uid placeholder on Windows.
+    return 0
+
+
+@lru_cache(maxsize=None)
+def _gid2group(gid, default=None):
+    # On Windows, Borg uses a simplified mapping for ownership fields.
+    # Return a stable placeholder name.
+    return "root"
+
+
+@lru_cache(maxsize=None)
+def _group2gid(group, default=None):
+    if not group:
+        # group is either None or the empty string
+        return default
+    # Use 0 as the canonical gid placeholder on Windows.
+    return 0

+ 1 - 1
src/borg/testsuite/archive_test.py

@@ -379,7 +379,7 @@ def test_get_item_uid_gid():
     assert gid == 8
 
     if not is_win32:
-        # Due to the hack in borg.platform.windows, user2uid/group2gid always return 0
+        # Due to the hack in borg.platform.windows_ug, user2uid/group2gid always return 0
         # (no matter which username we ask for), and they never raise a KeyError (e.g., for
         # a non-existing user/group name). Thus, these tests can currently not succeed on win32.
 

+ 67 - 0
src/borg/testsuite/platform/linux_test.py

@@ -122,3 +122,70 @@ def test_utils():
     assert acl_use_local_uid_gid(b"group:nonexistent1234:rw-:1234") == b"group:1234:rw-"
     assert acl_use_local_uid_gid(b"user:root:rw-:0") == b"user:0:rw-"
     assert acl_use_local_uid_gid(b"group:root:rw-:0") == b"group:0:rw-"
+
+
+def test_numeric_to_named_with_id_simple(monkeypatch):
+    # Import here to ensure skip marker is applied before any platform-specific import side effects.
+    from ...platform.linux import _acl_from_numeric_to_named_with_id
+
+    # Pretend uid 1000 -> 'alice', gid 100 -> 'staff'
+    from ...platform import platform_ug
+
+    def _uid2user(uid, default=None):
+        if uid == 1000:
+            return "alice"
+        return default
+
+    def _gid2group(gid, default=None):
+        if gid == 100:
+            return "staff"
+        return default
+
+    monkeypatch.setattr(platform_ug, "_uid2user", _uid2user)
+    monkeypatch.setattr(platform_ug, "_gid2group", _gid2group)
+
+    src = b"\n".join([b"user::rwx", b"user:1000:r-x", b"group::r--", b"group:100:r--", b"mask::r-x", b"other::r--"])
+    out = _acl_from_numeric_to_named_with_id(src)
+    lines = set(out.split(b"\n"))
+    assert b"user::rwx" in lines
+    assert b"user:alice:r-x:1000" in lines
+    assert b"group::r--" in lines
+    assert b"group:staff:r--:100" in lines
+    assert b"mask::r-x" in lines
+    assert b"other::r--" in lines
+
+
+def test_numeric_to_named_with_id_nonexistent_ids(monkeypatch):
+    from ...platform.linux import _acl_from_numeric_to_named_with_id
+
+    # Map functions return default (the given fallback), so names stay numeric but still append the fourth field
+    from ...platform import platform_ug
+
+    def _uid2user(uid, default=None):
+        return default
+
+    def _gid2group(gid, default=None):
+        return default
+
+    monkeypatch.setattr(platform_ug, "_uid2user", _uid2user)
+    monkeypatch.setattr(platform_ug, "_gid2group", _gid2group)
+
+    src = b"user:9999:r--\ngroup:8888:r--\n"
+    out = _acl_from_numeric_to_named_with_id(src)
+    lines = out.split(b"\n")
+    assert lines[0] == b"user:9999:r--:9999"
+    assert lines[1] == b"group:8888:r--:8888"
+
+
+def test_numeric_to_numeric_with_id_simple():
+    from ...platform.linux import _acl_from_numeric_to_numeric_with_id
+
+    src = b"\n".join([b"user::rwx", b"user:1000:r-x", b"group::r--", b"group:100:r--", b"mask::r-x", b"other::r--"])
+    out = _acl_from_numeric_to_numeric_with_id(src)
+    lines = set(out.split(b"\n"))
+    assert b"user::rwx" in lines
+    assert b"user:1000:r-x:1000" in lines
+    assert b"group::r--" in lines
+    assert b"group:100:r--:100" in lines
+    assert b"mask::r-x" in lines
+    assert b"other::r--" in lines