Bladeren bron

Merge commit 'feb7e2517ef7ec07cc638953a86c726aada7d37e'

Thomas Waldmann 8 jaren geleden
bovenliggende
commit
8be6761c26
12 gewijzigde bestanden met toevoegingen van 413 en 172 verwijderingen
  1. 9 0
      docs/changes.rst
  2. 11 0
      docs/faq.rst
  3. 30 2
      docs/internals.rst
  4. 6 6
      docs/usage.rst
  5. 7 12
      src/borg/compress.pyx
  6. 1 1
      src/borg/constants.py
  7. 46 4
      src/borg/helpers.py
  8. 10 0
      src/borg/remote.py
  9. 1 0
      src/borg/repository.py
  10. 66 0
      src/borg/testsuite/helpers.py
  11. 21 1
      src/borg/testsuite/xattr.py
  12. 205 146
      src/borg/xattr.py

+ 9 - 0
docs/changes.rst

@@ -130,6 +130,10 @@ Security fixes:
 
 - fix security issue with remote repository access, #1428
 
+
+Version 1.0.7rc2 (not released yet)
+-----------------------------------
+
 Bug fixes:
 
 - do not write objects to repository that are bigger than the allowed size,
@@ -137,6 +141,11 @@ Bug fixes:
   IMPORTANT: if you created archives with many millions of files or
              directories, please verify if you can open them successfully,
              e.g. try a "borg list REPO::ARCHIVE".
+- fixed a race condition in extended attributes querying that led to the
+  entire file not being backed up (while logging the error, exit code = 1),
+  #1469
+- fixed a race condition in extended attributes querying that led to a crash,
+  #1462
 
 
 Version 1.0.7rc1 (2016-08-05)

+ 11 - 0
docs/faq.rst

@@ -62,6 +62,17 @@ Which file types, attributes, etc. are *not* preserved?
       holes in a sparse file.
     * filesystem specific attributes, like ext4 immutable bit, see :issue:`618`.
 
+Are there other known limitations?
+----------------------------------
+
+- A single archive can only reference a limited volume of file/dir metadata,
+  usually corresponding to tens or hundreds of millions of files/dirs.
+  When trying to go beyond that limit, you will get a fatal IntegrityError
+  exception telling that the (archive) object is too big.
+  An easy workaround is to create multiple archives with less items each.
+  See also the :ref:`archive_limitation` and :issue:`1452`.
+
+
 Why is my backup bigger than with attic? Why doesn't |project_name| do compression by default?
 ----------------------------------------------------------------------------------------------
 

+ 30 - 2
docs/internals.rst

@@ -160,12 +160,40 @@ object that contains:
 
 * version
 * name
-* list of chunks containing item metadata
+* list of chunks containing item metadata (size: count * ~40B)
 * cmdline
 * hostname
 * username
 * time
 
+.. _archive_limitation:
+
+Note about archive limitations
+~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+The archive is currently stored as a single object in the repository
+and thus limited in size to MAX_OBJECT_SIZE (20MiB).
+
+As one chunk list entry is ~40B, that means we can reference ~500.000 item
+metadata stream chunks per archive.
+
+Each item metadata stream chunk is ~128kiB (see hardcoded ITEMS_CHUNKER_PARAMS).
+
+So that means the whole item metadata stream is limited to ~64GiB chunks.
+If compression is used, the amount of storable metadata is bigger - by the
+compression factor.
+
+If the medium size of an item entry is 100B (small size file, no ACLs/xattrs),
+that means a limit of ~640 million files/directories per archive.
+
+If the medium size of an item entry is 2kB (~100MB size files or more
+ACLs/xattrs), the limit will be ~32 million files/directories per archive.
+
+If one tries to create an archive object bigger than MAX_OBJECT_SIZE, a fatal
+IntegrityError will be raised.
+
+A workaround is to create multiple archives with less items each, see
+also :issue:`1452`.
 
 The Item
 --------
@@ -174,7 +202,7 @@ Each item represents a file, directory or other fs item and is stored as an
 ``item`` dictionary that contains:
 
 * path
-* list of data chunks
+* list of data chunks (size: count * ~40B)
 * user
 * group
 * uid

+ 6 - 6
docs/usage.rst

@@ -439,8 +439,8 @@ you restrict its operation to a subset of the archives using ``--prefix``.
 When using ``--prefix``, be careful to choose a good prefix - e.g. do not use a
 prefix "foo" if you do not also want to match "foobar".
 
-It is strongly recommended to always run ``prune --dry-run ...`` first so you
-will see what it would do without it actually doing anything.
+It is strongly recommended to always run ``prune -v --list --dry-run ...``
+first so you will see what it would do without it actually doing anything.
 
 There is also a visualized prune example in ``docs/misc/prune-example.txt``.
 
@@ -448,19 +448,19 @@ There is also a visualized prune example in ``docs/misc/prune-example.txt``.
 
     # Keep 7 end of day and 4 additional end of week archives.
     # Do a dry-run without actually deleting anything.
-    $ borg prune --dry-run --keep-daily=7 --keep-weekly=4 /path/to/repo
+    $ borg prune -v --list --dry-run --keep-daily=7 --keep-weekly=4 /path/to/repo
 
     # Same as above but only apply to archive names starting with the hostname
     # of the machine followed by a "-" character:
-    $ borg prune --keep-daily=7 --keep-weekly=4 --prefix='{hostname}-' /path/to/repo
+    $ borg prune -v --list --keep-daily=7 --keep-weekly=4 --prefix='{hostname}-' /path/to/repo
 
     # Keep 7 end of day, 4 additional end of week archives,
     # and an end of month archive for every month:
-    $ borg prune --keep-daily=7 --keep-weekly=4 --keep-monthly=-1 /path/to/repo
+    $ borg prune -v --list --keep-daily=7 --keep-weekly=4 --keep-monthly=-1 /path/to/repo
 
     # Keep all backups in the last 10 days, 4 additional end of week archives,
     # and an end of month archive for every month:
-    $ borg prune --keep-within=10d --keep-weekly=4 --keep-monthly=-1 /path/to/repo
+    $ borg prune -v --list --keep-within=10d --keep-weekly=4 --keep-monthly=-1 /path/to/repo
 
 
 .. include:: usage/info.rst.inc

+ 7 - 12
src/borg/compress.pyx

@@ -1,25 +1,18 @@
-import threading
 import zlib
 try:
     import lzma
 except ImportError:
     lzma = None
 
+from .helpers import Buffer
+
 cdef extern from "lz4.h":
     int LZ4_compress_limitedOutput(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
     int LZ4_decompress_safe(const char* source, char* dest, int inputSize, int maxOutputSize) nogil
     int LZ4_compressBound(int inputSize) nogil
 
 
-thread_local = threading.local()
-thread_local.buffer = bytes()
-
-
-cdef char *get_buffer(size):
-    size = int(size)
-    if len(thread_local.buffer) < size:
-        thread_local.buffer = bytes(size)
-    return <char *> thread_local.buffer
+buffer = Buffer(bytearray, size=0)
 
 
 cdef class CompressorBase:
@@ -88,7 +81,8 @@ class LZ4(CompressorBase):
         cdef char *source = idata
         cdef char *dest
         osize = LZ4_compressBound(isize)
-        dest = get_buffer(osize)
+        buf = buffer.get(osize)
+        dest = <char *> buf
         with nogil:
             osize = LZ4_compress_limitedOutput(source, dest, isize, osize)
         if not osize:
@@ -108,7 +102,8 @@ class LZ4(CompressorBase):
         # allocate more if isize * 3 is already bigger, to avoid having to resize often.
         osize = max(int(1.1 * 2**23), isize * 3)
         while True:
-            dest = get_buffer(osize)
+            buf = buffer.get(osize)
+            dest = <char *> buf
             with nogil:
                 rsize = LZ4_decompress_safe(source, dest, isize, osize)
             if rsize >= 0:

+ 1 - 1
src/borg/constants.py

@@ -41,7 +41,7 @@ HASH_MASK_BITS = 21  # results in ~2MiB chunks statistically
 CHUNKER_PARAMS = (CHUNK_MIN_EXP, CHUNK_MAX_EXP, HASH_MASK_BITS, HASH_WINDOW_SIZE)
 
 # chunker params for the items metadata stream, finer granularity
-ITEMS_CHUNKER_PARAMS = (12, 16, 14, HASH_WINDOW_SIZE)
+ITEMS_CHUNKER_PARAMS = (15, 19, 17, HASH_WINDOW_SIZE)
 
 # return codes returned by borg command
 # when borg is killed by signal N, rc = 128 + N

+ 46 - 4
src/borg/helpers.py

@@ -10,9 +10,10 @@ import pwd
 import re
 import signal
 import socket
-import sys
 import stat
+import sys
 import textwrap
+import threading
 import time
 import unicodedata
 import uuid
@@ -680,6 +681,47 @@ def memoize(function):
     return decorated_function
 
 
+class Buffer:
+    """
+    provide a thread-local buffer
+    """
+    def __init__(self, allocator, size=4096, limit=None):
+        """
+        Initialize the buffer: use allocator(size) call to allocate a buffer.
+        Optionally, set the upper <limit> for the buffer size.
+        """
+        assert callable(allocator), 'must give alloc(size) function as first param'
+        assert limit is None or size <= limit, 'initial size must be <= limit'
+        self._thread_local = threading.local()
+        self.allocator = allocator
+        self.limit = limit
+        self.resize(size, init=True)
+
+    def __len__(self):
+        return len(self._thread_local.buffer)
+
+    def resize(self, size, init=False):
+        """
+        resize the buffer - to avoid frequent reallocation, we usually always grow (if needed).
+        giving init=True it is possible to first-time initialize or shrink the buffer.
+        if a buffer size beyond the limit is requested, raise ValueError.
+        """
+        size = int(size)
+        if self.limit is not None and size > self.limit:
+            raise ValueError('Requested buffer size %d is above the limit of %d.' % (size, self.limit))
+        if init or len(self) < size:
+            self._thread_local.buffer = self.allocator(size)
+
+    def get(self, size=None, init=False):
+        """
+        return a buffer of at least the requested size (None: any current size).
+        init=True can be given to trigger shrinking of the buffer to the given size.
+        """
+        if size is not None:
+            self.resize(size, init)
+        return self._thread_local.buffer
+
+
 @memoize
 def uid2user(uid, default=None):
     try:
@@ -945,7 +987,7 @@ DEFAULTISH = ('Default', 'DEFAULT', 'default', 'D', 'd', '', )
 
 
 def yes(msg=None, false_msg=None, true_msg=None, default_msg=None,
-        retry_msg=None, invalid_msg=None, env_msg=None,
+        retry_msg=None, invalid_msg=None, env_msg='{} (from {})',
         falsish=FALSISH, truish=TRUISH, defaultish=DEFAULTISH,
         default=False, retry=True, env_var_override=None, ofile=None, input=input):
     """Output <msg> (usually a question) and let user input an answer.
@@ -966,8 +1008,8 @@ def yes(msg=None, false_msg=None, true_msg=None, default_msg=None,
     :param true_msg: message to output before returning True [None]
     :param default_msg: message to output before returning a <default> [None]
     :param invalid_msg: message to output after a invalid answer was given [None]
-    :param env_msg: message to output when using input from env_var_override [None],
-           needs to have 2 placeholders for answer and env var name, e.g.: "{} (from {})"
+    :param env_msg: message to output when using input from env_var_override ['{} (from {})'],
+           needs to have 2 placeholders for answer and env var name
     :param falsish: sequence of answers qualifying as False
     :param truish: sequence of answers qualifying as True
     :param defaultish: sequence of answers qualifying as <default>

+ 10 - 0
src/borg/remote.py

@@ -189,6 +189,16 @@ class RemoteRepository:
             except self.RPCError as err:
                 if err.remote_type != 'TypeError':
                     raise
+                msg = """\
+Please note:
+If you see a TypeError complaining about the number of positional arguments
+given to open(), you can ignore it if it comes from a borg version < 1.0.7.
+This TypeError is a cosmetic side effect of the compatibility code borg
+clients >= 1.0.7 have to support older borg servers.
+This problem will go away as soon as the server has been upgraded to 1.0.7+.
+"""
+                # emit this msg in the same way as the "Remote: ..." lines that show the remote TypeError
+                sys.stderr.write(msg)
                 if append_only:
                     raise self.NoAppendOnlyOnServer()
                 self.id = self.call('open', self.location.path, create, lock_wait, lock)

+ 1 - 0
src/borg/repository.py

@@ -344,6 +344,7 @@ class Repository:
                  b'segments': self.segments,
                  b'compact': self.compact}
         transaction_id = self.io.get_segments_transaction_id()
+        assert transaction_id is not None
         hints_file = os.path.join(self.path, 'hints.%d' % transaction_id)
         with open(hints_file + '.tmp', 'wb') as fd:
             msgpack.pack(hints, fd)

+ 66 - 0
src/borg/testsuite/helpers.py

@@ -10,6 +10,7 @@ import msgpack
 import msgpack.fallback
 
 from ..helpers import Location
+from ..helpers import Buffer
 from ..helpers import partial_format, format_file_size, parse_file_size, format_timedelta, format_line, PlaceholderError
 from ..helpers import make_path_safe, clean_lines
 from ..helpers import prune_within, prune_split
@@ -713,6 +714,61 @@ def test_is_slow_msgpack():
     assert not is_slow_msgpack()
 
 
+class TestBuffer:
+    def test_type(self):
+        buffer = Buffer(bytearray)
+        assert isinstance(buffer.get(), bytearray)
+        buffer = Buffer(bytes)  # don't do that in practice
+        assert isinstance(buffer.get(), bytes)
+
+    def test_len(self):
+        buffer = Buffer(bytearray, size=0)
+        b = buffer.get()
+        assert len(buffer) == len(b) == 0
+        buffer = Buffer(bytearray, size=1234)
+        b = buffer.get()
+        assert len(buffer) == len(b) == 1234
+
+    def test_resize(self):
+        buffer = Buffer(bytearray, size=100)
+        assert len(buffer) == 100
+        b1 = buffer.get()
+        buffer.resize(200)
+        assert len(buffer) == 200
+        b2 = buffer.get()
+        assert b2 is not b1  # new, bigger buffer
+        buffer.resize(100)
+        assert len(buffer) >= 100
+        b3 = buffer.get()
+        assert b3 is b2  # still same buffer (200)
+        buffer.resize(100, init=True)
+        assert len(buffer) == 100  # except on init
+        b4 = buffer.get()
+        assert b4 is not b3  # new, smaller buffer
+
+    def test_limit(self):
+        buffer = Buffer(bytearray, size=100, limit=200)
+        buffer.resize(200)
+        assert len(buffer) == 200
+        with pytest.raises(ValueError):
+            buffer.resize(201)
+        assert len(buffer) == 200
+
+    def test_get(self):
+        buffer = Buffer(bytearray, size=100, limit=200)
+        b1 = buffer.get(50)
+        assert len(b1) >= 50  # == 100
+        b2 = buffer.get(100)
+        assert len(b2) >= 100  # == 100
+        assert b2 is b1  # did not need resizing yet
+        b3 = buffer.get(200)
+        assert len(b3) == 200
+        assert b3 is not b2  # new, resized buffer
+        with pytest.raises(ValueError):
+            buffer.get(201)  # beyond limit
+        assert len(buffer) == 200
+
+
 def test_yes_input():
     inputs = list(TRUISH)
     input = FakeInputs(inputs)
@@ -804,6 +860,16 @@ def test_yes_output(capfd):
     assert 'false-msg' in err
 
 
+def test_yes_env_output(capfd, monkeypatch):
+    env_var = 'OVERRIDE_SOMETHING'
+    monkeypatch.setenv(env_var, 'yes')
+    assert yes(env_var_override=env_var)
+    out, err = capfd.readouterr()
+    assert out == ''
+    assert env_var in err
+    assert 'yes' in err
+
+
 def test_progress_percentage_multiline(capfd):
     pi = ProgressIndicatorPercent(1000, step=5, start=0, same_line=False, msg="%3.0f%%")
     pi.show(0)

+ 21 - 1
src/borg/testsuite/xattr.py

@@ -2,7 +2,7 @@ import os
 import tempfile
 import unittest
 
-from ..xattr import is_enabled, getxattr, setxattr, listxattr
+from ..xattr import is_enabled, getxattr, setxattr, listxattr, buffer
 from . import BaseTestCase
 
 
@@ -38,3 +38,23 @@ class XattrTestCase(BaseTestCase):
         self.assert_equal(getxattr(self.tmpfile.fileno(), 'user.foo'), b'bar')
         self.assert_equal(getxattr(self.symlink, 'user.foo'), b'bar')
         self.assert_equal(getxattr(self.tmpfile.name, 'user.empty'), None)
+
+    def test_listxattr_buffer_growth(self):
+        # make it work even with ext4, which imposes rather low limits
+        buffer.resize(size=64, init=True)
+        # xattr raw key list will be size 9 * (10 + 1), which is > 64
+        keys = ['user.attr%d' % i for i in range(9)]
+        for key in keys:
+            setxattr(self.tmpfile.name, key, b'x')
+        got_keys = listxattr(self.tmpfile.name)
+        self.assert_equal_se(got_keys, keys)
+        self.assert_equal(len(buffer), 128)
+
+    def test_getxattr_buffer_growth(self):
+        # make it work even with ext4, which imposes rather low limits
+        buffer.resize(size=64, init=True)
+        value = b'x' * 126
+        setxattr(self.tmpfile.name, 'user.big', value)
+        got_value = getxattr(self.tmpfile.name, 'user.big')
+        self.assert_equal(value, got_value)
+        self.assert_equal(len(buffer), 128)

+ 205 - 146
src/borg/xattr.py

@@ -1,5 +1,5 @@
-"""A basic extended attributes (xattr) implementation for Linux and MacOS X
-"""
+"""A basic extended attributes (xattr) implementation for Linux, FreeBSD and MacOS X."""
+
 import errno
 import os
 import re
@@ -10,10 +10,22 @@ from ctypes import CDLL, create_string_buffer, c_ssize_t, c_size_t, c_char_p, c_
 from ctypes.util import find_library
 from distutils.version import LooseVersion
 
+from .helpers import Buffer
+
 from .logger import create_logger
 logger = create_logger()
 
 
+try:
+    ENOATTR = errno.ENOATTR
+except AttributeError:
+    # on some platforms, ENOATTR is missing, use ENODATA there
+    ENOATTR = errno.ENODATA
+
+
+buffer = Buffer(create_string_buffer, limit=2**24)
+
+
 def is_enabled(path=None):
     """Determine if xattr is enabled on the filesystem
     """
@@ -27,12 +39,22 @@ def is_enabled(path=None):
 
 def get_all(path, follow_symlinks=True):
     try:
-        return dict((name, getxattr(path, name, follow_symlinks=follow_symlinks))
-                    for name in listxattr(path, follow_symlinks=follow_symlinks))
+        result = {}
+        names = listxattr(path, follow_symlinks=follow_symlinks)
+        for name in names:
+            try:
+                result[name] = getxattr(path, name, follow_symlinks=follow_symlinks)
+            except OSError as e:
+                # if we get ENOATTR, a race has happened: xattr names were deleted after list.
+                # we just ignore the now missing ones. if you want consistency, do snapshots.
+                if e.errno != ENOATTR:
+                    raise
+        return result
     except OSError as e:
         if e.errno in (errno.ENOTSUP, errno.EPERM):
             return {}
 
+
 libc_name = find_library('c')
 if libc_name is None:
     # find_library didn't work, maybe we are on some minimal system that misses essential
@@ -75,11 +97,88 @@ except OSError as e:
     raise Exception(msg)
 
 
-def _check(rv, path=None):
+def split_string0(buf):
+    """split a list of zero-terminated strings into python not-zero-terminated bytes"""
+    return buf.split(b'\0')[:-1]
+
+
+def split_lstring(buf):
+    """split a list of length-prefixed strings into python not-length-prefixed bytes"""
+    result = []
+    mv = memoryview(buf)
+    while mv:
+        length = mv[0]
+        result.append(bytes(mv[1:1 + length]))
+        mv = mv[1 + length:]
+    return result
+
+
+class BufferTooSmallError(Exception):
+    """the buffer given to an xattr function was too small for the result"""
+
+
+def _check(rv, path=None, detect_buffer_too_small=False):
     if rv < 0:
-        raise OSError(get_errno(), path)
+        e = get_errno()
+        if detect_buffer_too_small and e == errno.ERANGE:
+            # listxattr and getxattr signal with ERANGE that they need a bigger result buffer.
+            # setxattr signals this way that e.g. a xattr key name is too long / inacceptable.
+            raise BufferTooSmallError
+        else:
+            try:
+                msg = os.strerror(e)
+            except ValueError:
+                msg = ''
+            if isinstance(path, int):
+                path = '<FD %d>' % path
+            raise OSError(e, msg, path)
+    if detect_buffer_too_small and rv >= len(buffer):
+        # freebsd does not error with ERANGE if the buffer is too small,
+        # it just fills the buffer, truncates and returns.
+        # so, we play sure and just assume that result is truncated if
+        # it happens to be a full buffer.
+        raise BufferTooSmallError
     return rv
 
+
+def _listxattr_inner(func, path):
+    if isinstance(path, str):
+        path = os.fsencode(path)
+    size = len(buffer)
+    while True:
+        buf = buffer.get(size)
+        try:
+            n = _check(func(path, buf, size), path, detect_buffer_too_small=True)
+        except BufferTooSmallError:
+            size *= 2
+        else:
+            return n, buf.raw
+
+
+def _getxattr_inner(func, path, name):
+    if isinstance(path, str):
+        path = os.fsencode(path)
+    name = os.fsencode(name)
+    size = len(buffer)
+    while True:
+        buf = buffer.get(size)
+        try:
+            n = _check(func(path, name, buf, size), path, detect_buffer_too_small=True)
+        except BufferTooSmallError:
+            size *= 2
+        else:
+            return n, buf.raw
+
+
+def _setxattr_inner(func, path, name, value):
+    if isinstance(path, str):
+        path = os.fsencode(path)
+    name = os.fsencode(name)
+    value = value and os.fsencode(value)
+    size = len(value) if value else 0
+    _check(func(path, name, value, size), path, detect_buffer_too_small=False)
+
+
 if sys.platform.startswith('linux'):  # pragma: linux only
     libc.llistxattr.argtypes = (c_char_p, c_char_p, c_size_t)
     libc.llistxattr.restype = c_ssize_t
@@ -95,54 +194,44 @@ if sys.platform.startswith('linux'):  # pragma: linux only
     libc.fgetxattr.restype = c_ssize_t
 
     def listxattr(path, *, follow_symlinks=True):
-        if isinstance(path, str):
-            path = os.fsencode(path)
-        if isinstance(path, int):
-            func = libc.flistxattr
-        elif follow_symlinks:
-            func = libc.listxattr
-        else:
-            func = libc.llistxattr
-        n = _check(func(path, None, 0), path)
-        if n == 0:
-            return []
-        namebuf = create_string_buffer(n)
-        n2 = _check(func(path, namebuf, n), path)
-        if n2 != n:
-            raise Exception('listxattr failed')
-        return [os.fsdecode(name) for name in namebuf.raw.split(b'\0')[:-1] if not name.startswith(b'system.posix_acl_')]
+        def func(path, buf, size):
+            if isinstance(path, int):
+                return libc.flistxattr(path, buf, size)
+            else:
+                if follow_symlinks:
+                    return libc.listxattr(path, buf, size)
+                else:
+                    return libc.llistxattr(path, buf, size)
+
+        n, buf = _listxattr_inner(func, path)
+        return [os.fsdecode(name) for name in split_string0(buf[:n])
+                if not name.startswith(b'system.posix_acl_')]
 
     def getxattr(path, name, *, follow_symlinks=True):
-        name = os.fsencode(name)
-        if isinstance(path, str):
-            path = os.fsencode(path)
-        if isinstance(path, int):
-            func = libc.fgetxattr
-        elif follow_symlinks:
-            func = libc.getxattr
-        else:
-            func = libc.lgetxattr
-        n = _check(func(path, name, None, 0))
-        if n == 0:
-            return
-        valuebuf = create_string_buffer(n)
-        n2 = _check(func(path, name, valuebuf, n), path)
-        if n2 != n:
-            raise Exception('getxattr failed')
-        return valuebuf.raw
+        def func(path, name, buf, size):
+            if isinstance(path, int):
+                return libc.fgetxattr(path, name, buf, size)
+            else:
+                if follow_symlinks:
+                    return libc.getxattr(path, name, buf, size)
+                else:
+                    return libc.lgetxattr(path, name, buf, size)
+
+        n, buf = _getxattr_inner(func, path, name)
+        return buf[:n] or None
 
     def setxattr(path, name, value, *, follow_symlinks=True):
-        name = os.fsencode(name)
-        value = value and os.fsencode(value)
-        if isinstance(path, str):
-            path = os.fsencode(path)
-        if isinstance(path, int):
-            func = libc.fsetxattr
-        elif follow_symlinks:
-            func = libc.setxattr
-        else:
-            func = libc.lsetxattr
-        _check(func(path, name, value, len(value) if value else 0, 0), path)
+        def func(path, name, value, size):
+            flags = 0
+            if isinstance(path, int):
+                return libc.fsetxattr(path, name, value, size, flags)
+            else:
+                if follow_symlinks:
+                    return libc.setxattr(path, name, value, size, flags)
+                else:
+                    return libc.lsetxattr(path, name, value, size, flags)
+
+        _setxattr_inner(func, path, name, value)
 
 elif sys.platform == 'darwin':  # pragma: darwin only
     libc.listxattr.argtypes = (c_char_p, c_char_p, c_size_t, c_int)
@@ -158,60 +247,48 @@ elif sys.platform == 'darwin':  # pragma: darwin only
     libc.fgetxattr.argtypes = (c_int, c_char_p, c_char_p, c_size_t, c_uint32, c_int)
     libc.fgetxattr.restype = c_ssize_t
 
+    XATTR_NOFLAGS = 0x0000
     XATTR_NOFOLLOW = 0x0001
 
     def listxattr(path, *, follow_symlinks=True):
-        func = libc.listxattr
-        flags = 0
-        if isinstance(path, str):
-            path = os.fsencode(path)
-        if isinstance(path, int):
-            func = libc.flistxattr
-        elif not follow_symlinks:
-            flags = XATTR_NOFOLLOW
-        n = _check(func(path, None, 0, flags), path)
-        if n == 0:
-            return []
-        namebuf = create_string_buffer(n)
-        n2 = _check(func(path, namebuf, n, flags), path)
-        if n2 != n:
-            raise Exception('listxattr failed')
-        return [os.fsdecode(name) for name in namebuf.raw.split(b'\0')[:-1]]
+        def func(path, buf, size):
+            if isinstance(path, int):
+                return libc.flistxattr(path, buf, size, XATTR_NOFLAGS)
+            else:
+                if follow_symlinks:
+                    return libc.listxattr(path, buf, size, XATTR_NOFLAGS)
+                else:
+                    return libc.listxattr(path, buf, size, XATTR_NOFOLLOW)
+
+        n, buf = _listxattr_inner(func, path)
+        return [os.fsdecode(name) for name in split_string0(buf[:n])]
 
     def getxattr(path, name, *, follow_symlinks=True):
-        name = os.fsencode(name)
-        func = libc.getxattr
-        flags = 0
-        if isinstance(path, str):
-            path = os.fsencode(path)
-        if isinstance(path, int):
-            func = libc.fgetxattr
-        elif not follow_symlinks:
-            flags = XATTR_NOFOLLOW
-        n = _check(func(path, name, None, 0, 0, flags))
-        if n == 0:
-            return
-        valuebuf = create_string_buffer(n)
-        n2 = _check(func(path, name, valuebuf, n, 0, flags), path)
-        if n2 != n:
-            raise Exception('getxattr failed')
-        return valuebuf.raw
+        def func(path, name, buf, size):
+            if isinstance(path, int):
+                return libc.fgetxattr(path, name, buf, size, 0, XATTR_NOFLAGS)
+            else:
+                if follow_symlinks:
+                    return libc.getxattr(path, name, buf, size, 0, XATTR_NOFLAGS)
+                else:
+                    return libc.getxattr(path, name, buf, size, 0, XATTR_NOFOLLOW)
+
+        n, buf = _getxattr_inner(func, path, name)
+        return buf[:n] or None
 
     def setxattr(path, name, value, *, follow_symlinks=True):
-        name = os.fsencode(name)
-        value = value and os.fsencode(value)
-        func = libc.setxattr
-        flags = 0
-        if isinstance(path, str):
-            path = os.fsencode(path)
-        if isinstance(path, int):
-            func = libc.fsetxattr
-        elif not follow_symlinks:
-            flags = XATTR_NOFOLLOW
-        _check(func(path, name, value, len(value) if value else 0, 0, flags), path)
+        def func(path, name, value, size):
+            if isinstance(path, int):
+                return libc.fsetxattr(path, name, value, size, 0, XATTR_NOFLAGS)
+            else:
+                if follow_symlinks:
+                    return libc.setxattr(path, name, value, size, 0, XATTR_NOFLAGS)
+                else:
+                    return libc.setxattr(path, name, value, size, 0, XATTR_NOFOLLOW)
+
+        _setxattr_inner(func, path, name, value)
 
 elif sys.platform.startswith('freebsd'):  # pragma: freebsd only
-    EXTATTR_NAMESPACE_USER = 0x0001
     libc.extattr_list_fd.argtypes = (c_int, c_int, c_char_p, c_size_t)
     libc.extattr_list_fd.restype = c_ssize_t
     libc.extattr_list_link.argtypes = (c_char_p, c_int, c_char_p, c_size_t)
@@ -230,63 +307,45 @@ elif sys.platform.startswith('freebsd'):  # pragma: freebsd only
     libc.extattr_set_link.restype = c_int
     libc.extattr_set_file.argtypes = (c_char_p, c_int, c_char_p, c_char_p, c_size_t)
     libc.extattr_set_file.restype = c_int
+    ns = EXTATTR_NAMESPACE_USER = 0x0001
 
     def listxattr(path, *, follow_symlinks=True):
-        ns = EXTATTR_NAMESPACE_USER
-        if isinstance(path, str):
-            path = os.fsencode(path)
-        if isinstance(path, int):
-            func = libc.extattr_list_fd
-        elif follow_symlinks:
-            func = libc.extattr_list_file
-        else:
-            func = libc.extattr_list_link
-        n = _check(func(path, ns, None, 0), path)
-        if n == 0:
-            return []
-        namebuf = create_string_buffer(n)
-        n2 = _check(func(path, ns, namebuf, n), path)
-        if n2 != n:
-            raise Exception('listxattr failed')
-        names = []
-        mv = memoryview(namebuf.raw)
-        while mv:
-            length = mv[0]
-            names.append(os.fsdecode(bytes(mv[1:1 + length])))
-            mv = mv[1 + length:]
-        return names
+        def func(path, buf, size):
+            if isinstance(path, int):
+                return libc.extattr_list_fd(path, ns, buf, size)
+            else:
+                if follow_symlinks:
+                    return libc.extattr_list_file(path, ns, buf, size)
+                else:
+                    return libc.extattr_list_link(path, ns, buf, size)
+
+        n, buf = _listxattr_inner(func, path)
+        return [os.fsdecode(name) for name in split_lstring(buf[:n])]
 
     def getxattr(path, name, *, follow_symlinks=True):
-        name = os.fsencode(name)
-        if isinstance(path, str):
-            path = os.fsencode(path)
-        if isinstance(path, int):
-            func = libc.extattr_get_fd
-        elif follow_symlinks:
-            func = libc.extattr_get_file
-        else:
-            func = libc.extattr_get_link
-        n = _check(func(path, EXTATTR_NAMESPACE_USER, name, None, 0))
-        if n == 0:
-            return
-        valuebuf = create_string_buffer(n)
-        n2 = _check(func(path, EXTATTR_NAMESPACE_USER, name, valuebuf, n), path)
-        if n2 != n:
-            raise Exception('getxattr failed')
-        return valuebuf.raw
+        def func(path, name, buf, size):
+            if isinstance(path, int):
+                return libc.extattr_get_fd(path, ns, name, buf, size)
+            else:
+                if follow_symlinks:
+                    return libc.extattr_get_file(path, ns, name, buf, size)
+                else:
+                    return libc.extattr_get_link(path, ns, name, buf, size)
+
+        n, buf = _getxattr_inner(func, path, name)
+        return buf[:n] or None
 
     def setxattr(path, name, value, *, follow_symlinks=True):
-        name = os.fsencode(name)
-        value = value and os.fsencode(value)
-        if isinstance(path, str):
-            path = os.fsencode(path)
-        if isinstance(path, int):
-            func = libc.extattr_set_fd
-        elif follow_symlinks:
-            func = libc.extattr_set_file
-        else:
-            func = libc.extattr_set_link
-        _check(func(path, EXTATTR_NAMESPACE_USER, name, value, len(value) if value else 0), path)
+        def func(path, name, value, size):
+            if isinstance(path, int):
+                return libc.extattr_set_fd(path, ns, name, value, size)
+            else:
+                if follow_symlinks:
+                    return libc.extattr_set_file(path, ns, name, value, size)
+                else:
+                    return libc.extattr_set_link(path, ns, name, value, size)
+
+        _setxattr_inner(func, path, name, value)
 
 else:  # pragma: unknown platform only
     def listxattr(path, *, follow_symlinks=True):