فهرست منبع

Merge branch 'master' into move-to-src

Thomas Waldmann 9 سال پیش
والد
کامیت
045e5a1203

+ 5 - 0
.travis.yml

@@ -14,6 +14,9 @@ matrix:
         - python: 3.5
           os: linux
           env: TOXENV=py35
+        - python: nightly
+          os: linux
+          env: TOXENV=py36
         - python: 3.5
           os: linux
           env: TOXENV=flake8
@@ -25,6 +28,8 @@ matrix:
           os: osx
           osx_image: xcode6.4
           env: TOXENV=py35
+    allow_failures:
+        - python: nightly
 
 install:
     - ./.travis/install.sh

+ 6 - 1
conftest.py

@@ -5,7 +5,12 @@ setup_logging()
 
 from borg.testsuite import has_lchflags, no_lchlfags_because, has_llfuse
 from borg.testsuite.platform import fakeroot_detected
-from borg import xattr
+from borg import xattr, constants
+
+
+def pytest_configure(config):
+    # no fixture-based monkey-patching since star-imports are used for the constants module
+    constants.PBKDF2_ITERATIONS = 1
 
 
 def pytest_report_header(config, startdir):

+ 1 - 1
setup.py

@@ -158,7 +158,7 @@ class build_usage(Command):
         print('generating usage docs')
         # allows us to build docs without the C modules fully loaded during help generation
         from borg.archiver import Archiver
-        parser = Archiver().build_parser(prog='borg')
+        parser = Archiver(prog='borg').parser
         choices = {}
         for action in parser._actions:
             if action.choices is not None:

+ 15 - 7
src/borg/_chunker.c

@@ -96,7 +96,7 @@ buzhash(const unsigned char *data, size_t len, const uint32_t *h)
 static uint32_t
 buzhash_update(uint32_t sum, unsigned char remove, unsigned char add, size_t len, const uint32_t *h)
 {
-    uint32_t lenmod = len & 0x1f;
+    uint32_t lenmod = len & 0x1f;  /* Note: replace by constant to get small speedup */
     return BARREL_SHIFT(sum, 1) ^ BARREL_SHIFT(h[remove], lenmod) ^ h[add];
 }
 
@@ -196,10 +196,10 @@ chunker_fill(Chunker *c)
         // We rollback the initial offset back to the start of the page,
         // to avoid it not being truncated as a partial page request.
         if (length > 0) {
-            // Linux kernels prior to 4.7 have a bug where they truncate
-            // last partial page of POSIX_FADV_DONTNEED request, so we need
-            // to page-align it ourselves. We'll need the rest of this page
-            // on the next read (assuming this was not EOF)
+            // All Linux kernels (at least up to and including 4.6(.0)) have a bug where
+            // they truncate last partial page of POSIX_FADV_DONTNEED request, so we need
+            // to page-align it ourselves. We'll need the rest of this page on the next
+            // read (assuming this was not EOF).
             overshoot = (offset + length) & pagemask;
         } else {
             // For length == 0 we set overshoot 0, so the below
@@ -249,11 +249,12 @@ chunker_process(Chunker *c)
             PyErr_SetString(PyExc_Exception, "chunkifier byte count mismatch");
         return NULL;
     }
-    while(c->remaining <= window_size && !c->eof) {
+    while(c->remaining < min_size + window_size + 1 && !c->eof) {  /* see assert in Chunker init */
         if(!chunker_fill(c)) {
             return NULL;
         }
     }
+    /* here we either are at eof ... */
     if(c->eof) {
         c->done = 1;
         if(c->remaining) {
@@ -268,8 +269,15 @@ chunker_process(Chunker *c)
             return NULL;
         }
     }
+    /* ... or we have at least min_size + window_size + 1 bytes remaining.
+     * We do not want to "cut" a chunk smaller than min_size and the hash
+     * window starts at the potential cutting place.
+     */
+    c->position += min_size;
+    c->remaining -= min_size;
+    n += min_size;
     sum = buzhash(c->data + c->position, window_size, c->table);
-    while(c->remaining > c->window_size && ((sum & chunk_mask) || n < min_size)) {
+    while(c->remaining > c->window_size && (sum & chunk_mask)) {
         sum = buzhash_update(sum, c->data[c->position],
                              c->data[c->position + window_size],
                              window_size, c->table);

+ 1 - 1
src/borg/archive.py

@@ -371,7 +371,7 @@ Number of files: {0.stats.nfiles}'''.format(
         """
         if dry_run or stdout:
             if b'chunks' in item:
-                for data in self.pipeline.fetch_many([c.id for c in item[b'chunks']], is_preloaded=True):
+                for _, data in self.pipeline.fetch_many([c.id for c in item[b'chunks']], is_preloaded=True):
                     if stdout:
                         sys.stdout.buffer.write(data)
                 if stdout:

+ 5 - 4
src/borg/archiver.py

@@ -96,9 +96,10 @@ def with_archive(method):
 
 class Archiver:
 
-    def __init__(self, lock_wait=None):
+    def __init__(self, lock_wait=None, prog=None):
         self.exit_code = EXIT_SUCCESS
         self.lock_wait = lock_wait
+        self.parser = self.build_parser(prog)
 
     def print_error(self, msg, *args):
         msg = args and msg % args or msg
@@ -317,6 +318,7 @@ class Archiver:
         status = None
         # Ignore if nodump flag is set
         if get_flags(path, st) & stat.UF_NODUMP:
+            self.print_file_status('x', path)
             return
         if stat.S_ISREG(st.st_mode) or read_special and not stat.S_ISDIR(st.st_mode):
             if not dry_run:
@@ -1117,7 +1119,7 @@ class Archiver:
                     self.print_warning(warning)
         return args
 
-    def build_parser(self, args=None, prog=None):
+    def build_parser(self, prog=None):
         common_parser = argparse.ArgumentParser(add_help=False, prog=prog)
 
         common_group = common_parser.add_argument_group('Common options')
@@ -2061,8 +2063,7 @@ class Archiver:
         # We can't use argparse for "serve" since we don't want it to show up in "Available commands"
         if args:
             args = self.preprocess_args(args)
-        parser = self.build_parser(args)
-        args = parser.parse_args(args or ['-h'])
+        args = self.parser.parse_args(args or ['-h'])
         update_excludes(args)
         return args
 

+ 2 - 0
src/borg/chunker.pyx

@@ -23,6 +23,8 @@ cdef class Chunker:
     def __cinit__(self, int seed, int chunk_min_exp, int chunk_max_exp, int hash_mask_bits, int hash_window_size):
         min_size = 1 << chunk_min_exp
         max_size = 1 << chunk_max_exp
+        # see chunker_process, first while loop condition, first term must be able to get True:
+        assert hash_window_size + min_size + 1 <= max_size, "too small max_size"
         hash_mask = (1 << hash_mask_bits) - 1
         self.chunker = chunker_init(hash_window_size, hash_mask, min_size, max_size, seed & 0xffffffff)
 

+ 8 - 1
src/borg/hashindex.pyx

@@ -1,9 +1,12 @@
 # -*- coding: utf-8 -*-
 from collections import namedtuple
+import locale
 import os
 
 cimport cython
 from libc.stdint cimport uint32_t, UINT32_MAX, uint64_t
+from libc.errno cimport errno
+from cpython.exc cimport PyErr_SetFromErrnoWithFilename
 
 API_VERSION = 2
 
@@ -52,6 +55,7 @@ MAX_VALUE = _MAX_VALUE
 
 assert _MAX_VALUE % 2 == 1
 
+
 @cython.internal
 cdef class IndexBase:
     cdef HashIndex *index
@@ -63,7 +67,10 @@ cdef class IndexBase:
             path = os.fsencode(path)
             self.index = hashindex_read(path)
             if not self.index:
-                raise Exception('hashindex_read failed')
+                if errno:
+                    PyErr_SetFromErrnoWithFilename(OSError, path)
+                    return
+                raise RuntimeError('hashindex_read failed')
         else:
             self.index = hashindex_init(capacity, self.key_size, self.value_size)
             if not self.index:

+ 12 - 0
src/borg/helpers.py

@@ -65,6 +65,18 @@ class ErrorWithTraceback(Error):
     traceback = True
 
 
+class InternalOSError(Error):
+    """Error while accessing repository: [Errno {}] {}: {}"""
+
+    def __init__(self, os_error):
+        self.errno = os_error.errno
+        self.strerror = os_error.strerror
+        self.filename = os_error.filename
+
+    def get_message(self):
+        return self.__doc__.format(self.errno, self.strerror, self.filename)
+
+
 class IntegrityError(ErrorWithTraceback):
     """Data integrity error"""
 

+ 10 - 10
src/borg/platform_linux.pyx

@@ -8,13 +8,13 @@ from .platform_base import SyncFile as BaseSyncFile
 from .platform_posix import swidth
 
 from libc cimport errno
+from libc.stdint cimport int64_t
 
 API_VERSION = 3
 
 cdef extern from "sys/types.h":
     int ACL_TYPE_ACCESS
     int ACL_TYPE_DEFAULT
-    ctypedef off64_t
 
 cdef extern from "sys/acl.h":
     ctypedef struct _acl_t:
@@ -31,7 +31,7 @@ cdef extern from "acl/libacl.h":
     int acl_extended_file(const char *path)
 
 cdef extern from "fcntl.h":
-    int sync_file_range(int fd, off64_t offset, off64_t nbytes, unsigned int flags)
+    int sync_file_range(int fd, int64_t offset, int64_t nbytes, unsigned int flags)
     unsigned int SYNC_FILE_RANGE_WRITE
     unsigned int SYNC_FILE_RANGE_WAIT_BEFORE
     unsigned int SYNC_FILE_RANGE_WAIT_AFTER
@@ -47,12 +47,9 @@ cdef extern from "linux/fs.h":
     int FS_APPEND_FL
     int FS_COMPR_FL
 
-cdef extern from "stropts.h":
+cdef extern from "sys/ioctl.h":
     int ioctl(int fildes, int request, ...)
 
-cdef extern from "errno.h":
-    int errno
-
 cdef extern from "string.h":
     char *strerror(int errnum)
 
@@ -79,17 +76,20 @@ def set_flags(path, bsd_flags, fd=None):
         fd = os.open(path, os.O_RDONLY|os.O_NONBLOCK|os.O_NOFOLLOW)
     try:
         if ioctl(fd, FS_IOC_SETFLAGS, &flags) == -1:
-            raise OSError(errno, strerror(errno).decode(), path)
+            error_number = errno.errno
+            if error_number != errno.EOPNOTSUPP:
+                raise OSError(error_number, strerror(error_number).decode(), path)
     finally:
         if open_fd:
             os.close(fd)
 
 
 def get_flags(path, st):
-    if stat.S_ISLNK(st.st_mode):
-        return 0
     cdef int linux_flags
-    fd = os.open(path, os.O_RDONLY|os.O_NONBLOCK|os.O_NOFOLLOW)
+    try:
+        fd = os.open(path, os.O_RDONLY|os.O_NONBLOCK|os.O_NOFOLLOW)
+    except OSError:
+        return 0
     try:
         if ioctl(fd, FS_IOC_GETFLAGS, &linux_flags) == -1:
             return 0

+ 6 - 1
src/borg/platform_posix.pyx

@@ -2,4 +2,9 @@ cdef extern from "wchar.h":
     cdef int wcswidth(const Py_UNICODE *str, size_t n)
  
 def swidth(s):
-    return wcswidth(s, len(s))
+    str_len = len(s)
+    terminal_width = wcswidth(s, str_len)
+    if terminal_width >= 0:
+        return terminal_width
+    else:
+        return str_len

+ 23 - 16
src/borg/remote.py

@@ -19,6 +19,8 @@ RPC_PROTOCOL_VERSION = 2
 
 BUFSIZE = 10 * 1024 * 1024
 
+MAX_INFLIGHT = 100
+
 
 class ConnectionClosed(Error):
     """Connection closed by remote host"""
@@ -246,7 +248,6 @@ class RemoteRepository:
 
         calls = list(calls)
         waiting_for = []
-        w_fds = [self.stdin_fd]
         while wait or calls:
             while waiting_for:
                 try:
@@ -275,6 +276,10 @@ class RemoteRepository:
                             return
                 except KeyError:
                     break
+            if self.to_send or ((calls or self.preload_ids) and len(waiting_for) < MAX_INFLIGHT):
+                w_fds = [self.stdin_fd]
+            else:
+                w_fds = []
             r, w, x = select.select(self.r_fds, w_fds, self.x_fds, 1)
             if x:
                 raise Exception('FD exception occurred')
@@ -298,20 +303,9 @@ class RemoteRepository:
                         raise ConnectionClosed()
                     data = data.decode('utf-8')
                     for line in data.splitlines(keepends=True):
-                        if line.startswith('$LOG '):
-                            _, level, msg = line.split(' ', 2)
-                            level = getattr(logging, level, logging.CRITICAL)  # str -> int
-                            if msg.startswith('Remote:'):
-                                # server format: '$LOG <level> Remote: <msg>'
-                                logging.log(level, msg.rstrip())
-                            else:
-                                # server format '$LOG <level> <logname> Remote: <msg>'
-                                logname, msg = msg.split(' ', 1)
-                                logging.getLogger(logname).log(level, msg.rstrip())
-                        else:
-                            sys.stderr.write("Remote: " + line)
+                        handle_remote_line(line)
             if w:
-                while not self.to_send and (calls or self.preload_ids) and len(waiting_for) < 100:
+                while not self.to_send and (calls or self.preload_ids) and len(waiting_for) < MAX_INFLIGHT:
                     if calls:
                         if is_preloaded:
                             if calls[0] in self.cache:
@@ -338,8 +332,6 @@ class RemoteRepository:
                         # that the fd should be writable
                         if e.errno != errno.EAGAIN:
                             raise
-                if not self.to_send and not (calls or self.preload_ids):
-                    w_fds = []
         self.ignore_responses |= set(waiting_for)
 
     def check(self, repair=False, save_space=False):
@@ -394,6 +386,21 @@ class RemoteRepository:
         self.preload_ids += ids
 
 
+def handle_remote_line(line):
+    if line.startswith('$LOG '):
+        _, level, msg = line.split(' ', 2)
+        level = getattr(logging, level, logging.CRITICAL)  # str -> int
+        if msg.startswith('Remote:'):
+            # server format: '$LOG <level> Remote: <msg>'
+            logging.log(level, msg.rstrip())
+        else:
+            # server format '$LOG <level> <logname> Remote: <msg>'
+            logname, msg = msg.split(' ', 1)
+            logging.getLogger(logname).log(level, msg.rstrip())
+    else:
+        sys.stderr.write("Remote: " + line)
+
+
 class RepositoryNoCache:
     """A not caching Repository wrapper, passes through to repository.
 

+ 225 - 73
src/borg/repository.py

@@ -9,11 +9,14 @@ logger = logging.getLogger(__name__)
 import os
 import shutil
 import struct
+from collections import defaultdict
+from functools import partial
 from zlib import crc32
 
 import msgpack
 from .constants import *  # NOQA
-from .helpers import Error, ErrorWithTraceback, IntegrityError, Location, ProgressIndicatorPercent, bin_to_hex
+from .helpers import Error, ErrorWithTraceback, IntegrityError, InternalOSError, Location, ProgressIndicatorPercent, \
+    bin_to_hex
 from .hashindex import NSIndex
 from .locking import UpgradableLock, LockError, LockErrorT
 from .lrucache import LRUCache
@@ -26,17 +29,55 @@ TAG_PUT = 0
 TAG_DELETE = 1
 TAG_COMMIT = 2
 
+FreeSpace = partial(defaultdict, int)
+
 
 class Repository:
-    """Filesystem based transactional key value store
+    """
+    Filesystem based transactional key value store
+
+    Transactionality is achieved by using a log (aka journal) to record changes. The log is a series of numbered files
+    called segments. Each segment is a series of log entries. The segment number together with the offset of each
+    entry relative to its segment start establishes an ordering of the log entries. This is the "definition" of
+    time for the purposes of the log.
+
+    Log entries are either PUT, DELETE or COMMIT.
+
+    A COMMIT is always the final log entry in a segment and marks all data from the beginning of the log until the
+    segment ending with the COMMIT as committed and consistent. The segment number of a segment ending with a COMMIT
+    is called the transaction ID of that commit, and a segment ending with a COMMIT is called committed.
+
+    When reading from a repository it is first checked whether the last segment is committed. If it is not, then
+    all segments after the last committed segment are deleted; they contain log entries whose consistency is not
+    established by a COMMIT.
+
+    Note that the COMMIT can't establish consistency by itself, but only manages to do so with proper support from
+    the platform (including the hardware). See platform_base.SyncFile for details.
+
+    A PUT inserts a key-value pair. The value is stored in the log entry, hence the repository implements
+    full data logging, meaning that all data is consistent, not just metadata (which is common in file systems).
+
+    A DELETE marks a key as deleted.
+
+    For a given key only the last entry regarding the key, which is called current (all other entries are called
+    superseded), is relevant: If there is no entry or the last entry is a DELETE then the key does not exist.
+    Otherwise the last PUT defines the value of the key.
+
+    By superseding a PUT (with either another PUT or a DELETE) the log entry becomes obsolete. A segment containing
+    such obsolete entries is called sparse, while a segment containing no such entries is called compact.
+
+    Sparse segments can be compacted and thereby disk space freed. This destroys the transaction for which the
+    superseded entries where current.
 
     On disk layout:
+
     dir/README
     dir/config
     dir/data/<X // SEGMENTS_PER_DIR>/<X>
     dir/index.X
     dir/hints.X
     """
+
     class DoesNotExist(Error):
         """Repository {} does not exist."""
 
@@ -138,7 +179,7 @@ class Repository:
         else:
             return None
 
-    def get_transaction_id(self):
+    def check_transaction(self):
         index_transaction_id = self.get_index_transaction_id()
         segments_transaction_id = self.io.get_segments_transaction_id()
         if index_transaction_id is not None and segments_transaction_id is None:
@@ -151,6 +192,9 @@ class Repository:
             else:
                 replay_from = index_transaction_id
             self.replay_segments(replay_from, segments_transaction_id)
+
+    def get_transaction_id(self):
+        self.check_transaction()
         return self.get_index_transaction_id()
 
     def break_lock(self):
@@ -191,10 +235,27 @@ class Repository:
         self.write_index()
         self.rollback()
 
-    def open_index(self, transaction_id):
+    def open_index(self, transaction_id, auto_recover=True):
         if transaction_id is None:
             return NSIndex()
-        return NSIndex.read((os.path.join(self.path, 'index.%d') % transaction_id).encode('utf-8'))
+        index_path = os.path.join(self.path, 'index.%d' % transaction_id).encode('utf-8')
+        try:
+            return NSIndex.read(index_path)
+        except RuntimeError as error:
+            assert str(error) == 'hashindex_read failed'  # everything else means we're in *deep* trouble
+            logger.warning('Repository index missing or corrupted, trying to recover')
+            try:
+                os.unlink(index_path)
+            except OSError as e:
+                raise InternalOSError(e) from None
+            if not auto_recover:
+                raise
+            self.prepare_txn(self.get_transaction_id())
+            # don't leave an open transaction around
+            self.commit()
+            return self.open_index(self.get_transaction_id())
+        except OSError as e:
+            raise InternalOSError(e) from None
 
     def prepare_txn(self, transaction_id, do_cleanup=True):
         self._active_txn = True
@@ -207,24 +268,51 @@ class Repository:
             self._active_txn = False
             raise
         if not self.index or transaction_id is None:
-            self.index = self.open_index(transaction_id)
+            try:
+                self.index = self.open_index(transaction_id, False)
+            except RuntimeError:
+                self.check_transaction()
+                self.index = self.open_index(transaction_id, False)
         if transaction_id is None:
             self.segments = {}  # XXX bad name: usage_count_of_segment_x = self.segments[x]
-            self.compact = set()  # XXX bad name: segments_needing_compaction = self.compact
+            self.compact = FreeSpace()  # XXX bad name: freeable_space_of_segment_x = self.compact[x]
         else:
             if do_cleanup:
                 self.io.cleanup(transaction_id)
-            with open(os.path.join(self.path, 'hints.%d' % transaction_id), 'rb') as fd:
-                hints = msgpack.unpack(fd)
-            if hints[b'version'] != 1:
-                raise ValueError('Unknown hints file version: %d' % hints['version'])
-            self.segments = hints[b'segments']
-            self.compact = set(hints[b'compact'])
+            hints_path = os.path.join(self.path, 'hints.%d' % transaction_id)
+            index_path = os.path.join(self.path, 'index.%d' % transaction_id)
+            try:
+                with open(hints_path, 'rb') as fd:
+                    hints = msgpack.unpack(fd)
+            except (msgpack.UnpackException, msgpack.ExtraData, FileNotFoundError) as e:
+                logger.warning('Repository hints file missing or corrupted, trying to recover')
+                if not isinstance(e, FileNotFoundError):
+                    os.unlink(hints_path)
+                # index must exist at this point
+                os.unlink(index_path)
+                self.check_transaction()
+                self.prepare_txn(transaction_id)
+                return
+            except OSError as os_error:
+                raise InternalOSError(os_error) from None
+            if hints[b'version'] == 1:
+                logger.debug('Upgrading from v1 hints.%d', transaction_id)
+                self.segments = hints[b'segments']
+                self.compact = FreeSpace()
+                for segment in sorted(hints[b'compact']):
+                    logger.debug('Rebuilding sparse info for segment %d', segment)
+                    self._rebuild_sparse(segment)
+                logger.debug('Upgrade to v2 hints complete')
+            elif hints[b'version'] != 2:
+                raise ValueError('Unknown hints file version: %d' % hints[b'version'])
+            else:
+                self.segments = hints[b'segments']
+                self.compact = FreeSpace(hints[b'compact'])
 
     def write_index(self):
-        hints = {b'version': 1,
+        hints = {b'version': 2,
                  b'segments': self.segments,
-                 b'compact': list(self.compact)}
+                 b'compact': self.compact}
         transaction_id = self.io.get_segments_transaction_id()
         hints_file = os.path.join(self.path, 'hints.%d' % transaction_id)
         with open(hints_file + '.tmp', 'wb') as fd:
@@ -238,10 +326,10 @@ class Repository:
         if self.append_only:
             with open(os.path.join(self.path, 'transactions'), 'a') as log:
                 print('transaction %d, UTC time %s' % (transaction_id, datetime.utcnow().isoformat()), file=log)
-        # Remove old indices
+        # Remove old auxiliary files
         current = '.%d' % transaction_id
         for name in os.listdir(self.path):
-            if not name.startswith('index.') and not name.startswith('hints.'):
+            if not name.startswith(('index.', 'hints.')):
                 continue
             if name.endswith(current):
                 continue
@@ -267,32 +355,40 @@ class Repository:
             for segment in unused:
                 assert self.segments.pop(segment) == 0
                 self.io.delete_segment(segment)
+                del self.compact[segment]
             unused = []
 
-        for segment in sorted(self.compact):
-            if self.io.segment_exists(segment):
-                for tag, key, offset, data in self.io.iter_objects(segment, include_data=True):
-                    if tag == TAG_PUT and self.index.get(key, (-1, -1)) == (segment, offset):
+        for segment, freeable_space in sorted(self.compact.items()):
+            if not self.io.segment_exists(segment):
+                del self.compact[segment]
+                continue
+            segment_size = self.io.segment_size(segment)
+            if segment_size > 0.2 * self.max_segment_size and freeable_space < 0.15 * segment_size:
+                logger.debug('not compacting segment %d for later (only %d bytes are sparse)',
+                             segment, freeable_space)
+                continue
+            segments.setdefault(segment, 0)
+            for tag, key, offset, data in self.io.iter_objects(segment, include_data=True):
+                if tag == TAG_PUT and self.index.get(key, (-1, -1)) == (segment, offset):
+                    try:
+                        new_segment, offset = self.io.write_put(key, data, raise_full=save_space)
+                    except LoggedIO.SegmentFull:
+                        complete_xfer()
+                        new_segment, offset = self.io.write_put(key, data)
+                    self.index[key] = new_segment, offset
+                    segments.setdefault(new_segment, 0)
+                    segments[new_segment] += 1
+                    segments[segment] -= 1
+                elif tag == TAG_DELETE:
+                    if index_transaction_id is None or segment > index_transaction_id:
                         try:
-                            new_segment, offset = self.io.write_put(key, data, raise_full=save_space)
+                            self.io.write_delete(key, raise_full=save_space)
                         except LoggedIO.SegmentFull:
                             complete_xfer()
-                            new_segment, offset = self.io.write_put(key, data)
-                        self.index[key] = new_segment, offset
-                        segments.setdefault(new_segment, 0)
-                        segments[new_segment] += 1
-                        segments[segment] -= 1
-                    elif tag == TAG_DELETE:
-                        if index_transaction_id is None or segment > index_transaction_id:
-                            try:
-                                self.io.write_delete(key, raise_full=save_space)
-                            except LoggedIO.SegmentFull:
-                                complete_xfer()
-                                self.io.write_delete(key)
-                assert segments[segment] == 0
-                unused.append(segment)
+                            self.io.write_delete(key)
+            assert segments[segment] == 0
+            unused.append(segment)
         complete_xfer()
-        self.compact = set()
 
     def replay_segments(self, index_transaction_id, segments_transaction_id):
         self.prepare_txn(index_transaction_id, do_cleanup=False)
@@ -315,11 +411,12 @@ class Repository:
     def _update_index(self, segment, objects, report=None):
         """some code shared between replay_segments and check"""
         self.segments[segment] = 0
-        for tag, key, offset in objects:
+        for tag, key, offset, size in objects:
             if tag == TAG_PUT:
                 try:
+                    # If this PUT supersedes an older PUT, mark the old segment for compaction and count the free space
                     s, _ = self.index[key]
-                    self.compact.add(s)
+                    self.compact[s] += size
                     self.segments[s] -= 1
                 except KeyError:
                     pass
@@ -327,12 +424,17 @@ class Repository:
                 self.segments[segment] += 1
             elif tag == TAG_DELETE:
                 try:
-                    s, _ = self.index.pop(key)
-                    self.segments[s] -= 1
-                    self.compact.add(s)
+                    # if the deleted PUT is not in the index, there is nothing to clean up
+                    s, offset = self.index.pop(key)
                 except KeyError:
                     pass
-                self.compact.add(segment)
+                else:
+                    if self.io.segment_exists(s):
+                        # the old index is not necessarily valid for this transaction (e.g. compaction); if the segment
+                        # is already gone, then it was already compacted.
+                        self.segments[s] -= 1
+                        size = self.io.read(s, offset, key, read_data=False)
+                        self.compact[s] += size
             elif tag == TAG_COMMIT:
                 continue
             else:
@@ -342,7 +444,22 @@ class Repository:
                 else:
                     report(msg)
         if self.segments[segment] == 0:
-            self.compact.add(segment)
+            self.compact[segment] += self.io.segment_size(segment)
+
+    def _rebuild_sparse(self, segment):
+        """Rebuild sparse bytes count for a single segment relative to the current index."""
+        self.compact[segment] = 0
+        if self.segments[segment] == 0:
+            self.compact[segment] += self.io.segment_size(segment)
+            return
+        for tag, key, offset, size in self.io.iter_objects(segment, read_data=False):
+            if tag == TAG_PUT:
+                if self.index.get(key, (-1, -1)) != (segment, offset):
+                    # This PUT is superseded later
+                    self.compact[segment] += size
+            elif tag == TAG_DELETE:
+                # The outcome of the DELETE has been recorded in the PUT branch already
+                self.compact[segment] += size
 
     def check(self, repair=False, save_space=False):
         """Check repository consistency
@@ -457,14 +574,16 @@ class Repository:
         if not self._active_txn:
             self.prepare_txn(self.get_transaction_id())
         try:
-            segment, _ = self.index[id]
-            self.segments[segment] -= 1
-            self.compact.add(segment)
-            segment = self.io.write_delete(id)
-            self.segments.setdefault(segment, 0)
-            self.compact.add(segment)
+            segment, offset = self.index[id]
         except KeyError:
             pass
+        else:
+            self.segments[segment] -= 1
+            size = self.io.read(segment, offset, id, read_data=False)
+            self.compact[segment] += size
+            segment, size = self.io.write_delete(id)
+            self.compact[segment] += size
+            self.segments.setdefault(segment, 0)
         segment, offset = self.io.write_put(id, data)
         self.segments.setdefault(segment, 0)
         self.segments[segment] += 1
@@ -478,9 +597,10 @@ class Repository:
         except KeyError:
             raise self.ObjectNotFound(id, self.path) from None
         self.segments[segment] -= 1
-        self.compact.add(segment)
-        segment = self.io.write_delete(id)
-        self.compact.add(segment)
+        size = self.io.read(segment, offset, id, read_data=False)
+        self.compact[segment] += size
+        segment, size = self.io.write_delete(id)
+        self.compact[segment] += size
         self.segments.setdefault(segment, 0)
 
     def preload(self, ids):
@@ -578,7 +698,7 @@ class LoggedIO:
         seen_commit = False
         while True:
             try:
-                tag, key, offset = next(iterator)
+                tag, key, offset, _ = next(iterator)
             except IntegrityError:
                 return False
             except StopIteration:
@@ -635,7 +755,18 @@ class LoggedIO:
     def segment_exists(self, segment):
         return os.path.exists(self.segment_filename(segment))
 
-    def iter_objects(self, segment, include_data=False):
+    def segment_size(self, segment):
+        return os.path.getsize(self.segment_filename(segment))
+
+    def iter_objects(self, segment, include_data=False, read_data=True):
+        """
+        Return object iterator for *segment*.
+
+        If read_data is False then include_data must be False as well.
+        Integrity checks are skipped: all data obtained from the iterator must be considered informational.
+
+        The iterator returns four-tuples of (tag, key, offset, data|size).
+        """
         fd = self.get_fd(segment)
         fd.seek(0)
         if fd.read(MAGIC_LEN) != MAGIC:
@@ -644,11 +775,12 @@ class LoggedIO:
         header = fd.read(self.header_fmt.size)
         while header:
             size, tag, key, data = self._read(fd, self.header_fmt, header, segment, offset,
-                                              (TAG_PUT, TAG_DELETE, TAG_COMMIT))
+                                              (TAG_PUT, TAG_DELETE, TAG_COMMIT),
+                                              read_data=read_data)
             if include_data:
                 yield tag, key, offset, data
             else:
-                yield tag, key, offset
+                yield tag, key, offset, size
             offset += size
             header = fd.read(self.header_fmt.size)
 
@@ -672,19 +804,25 @@ class LoggedIO:
                 fd.write(data[:size])
                 data = data[size:]
 
-    def read(self, segment, offset, id):
+    def read(self, segment, offset, id, read_data=True):
+        """
+        Read entry from *segment* at *offset* with *id*.
+
+        If read_data is False the size of the entry is returned instead and integrity checks are skipped.
+        The return value should thus be considered informational.
+        """
         if segment == self.segment and self._write_fd:
             self._write_fd.sync()
         fd = self.get_fd(segment)
         fd.seek(offset)
         header = fd.read(self.put_header_fmt.size)
-        size, tag, key, data = self._read(fd, self.put_header_fmt, header, segment, offset, (TAG_PUT, ))
+        size, tag, key, data = self._read(fd, self.put_header_fmt, header, segment, offset, (TAG_PUT, ), read_data)
         if id != key:
             raise IntegrityError('Invalid segment entry header, is not for wanted id [segment {}, offset {}]'.format(
                 segment, offset))
-        return data
+        return data if read_data else size
 
-    def _read(self, fd, fmt, header, segment, offset, acceptable_tags):
+    def _read(self, fd, fmt, header, segment, offset, acceptable_tags, read_data=True):
         # some code shared by read() and iter_objects()
         try:
             hdr_tuple = fmt.unpack(header)
@@ -702,18 +840,32 @@ class LoggedIO:
             raise IntegrityError('Invalid segment entry size [segment {}, offset {}]'.format(
                 segment, offset))
         length = size - fmt.size
-        data = fd.read(length)
-        if len(data) != length:
-            raise IntegrityError('Segment entry data short read [segment {}, offset {}]: expected {}, got {} bytes'.format(
-                segment, offset, length, len(data)))
-        if crc32(data, crc32(memoryview(header)[4:])) & 0xffffffff != crc:
-            raise IntegrityError('Segment entry checksum mismatch [segment {}, offset {}]'.format(
-                segment, offset))
+        if read_data:
+            data = fd.read(length)
+            if len(data) != length:
+                raise IntegrityError('Segment entry data short read [segment {}, offset {}]: expected {}, got {} bytes'.format(
+                    segment, offset, length, len(data)))
+            if crc32(data, crc32(memoryview(header)[4:])) & 0xffffffff != crc:
+                raise IntegrityError('Segment entry checksum mismatch [segment {}, offset {}]'.format(
+                    segment, offset))
+            if key is None and tag in (TAG_PUT, TAG_DELETE):
+                key, data = data[:32], data[32:]
+        else:
+            if key is None and tag in (TAG_PUT, TAG_DELETE):
+                key = fd.read(32)
+                length -= 32
+                if len(key) != 32:
+                    raise IntegrityError('Segment entry key short read [segment {}, offset {}]: expected {}, got {} bytes'.format(
+                        segment, offset, 32, len(key)))
+            oldpos = fd.tell()
+            seeked = fd.seek(length, os.SEEK_CUR) - oldpos
+            data = None
+            if seeked != length:
+                raise IntegrityError('Segment entry data short seek [segment {}, offset {}]: expected {}, got {} bytes'.format(
+                        segment, offset, length, seeked))
         if tag not in acceptable_tags:
             raise IntegrityError('Invalid segment entry header, did not get acceptable tag [segment {}, offset {}]'.format(
                 segment, offset))
-        if key is None and tag in (TAG_PUT, TAG_DELETE):
-            key, data = data[:32], data[32:]
         return size, tag, key, data
 
     def write_put(self, id, data, raise_full=False):
@@ -732,11 +884,11 @@ class LoggedIO:
         crc = self.crc_fmt.pack(crc32(id, crc32(header)) & 0xffffffff)
         fd.write(b''.join((crc, header, id)))
         self.offset += self.put_header_fmt.size
-        return self.segment
+        return self.segment, self.put_header_fmt.size
 
     def write_commit(self):
-        fd = self.get_write_fd(no_new=True)
-        fd.sync()
+        self.close_segment()
+        fd = self.get_write_fd()
         header = self.header_no_crc_fmt.pack(self.header_fmt.size, TAG_COMMIT)
         crc = self.crc_fmt.pack(crc32(header) & 0xffffffff)
         fd.write(b''.join((crc, header)))

+ 27 - 3
src/borg/testsuite/archiver.py

@@ -5,6 +5,7 @@ import inspect
 from io import StringIO
 import logging
 import random
+import socket
 import stat
 import subprocess
 import sys
@@ -357,6 +358,16 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         # the interesting parts of info_output2 and info_output should be same
         self.assert_equal(filter(info_output), filter(info_output2))
 
+    def test_unix_socket(self):
+        self.cmd('init', self.repository_location)
+        sock = socket.socket(socket.AF_UNIX, socket.SOCK_STREAM)
+        sock.bind(os.path.join(self.input_path, 'unix-socket'))
+        self.cmd('create', self.repository_location + '::test', 'input')
+        sock.close()
+        with changedir('output'):
+            self.cmd('extract', self.repository_location + '::test')
+            assert not os.path.exists('input/unix-socket')
+
     def test_symlink_extract(self):
         self.create_test_files()
         self.cmd('init', self.repository_location)
@@ -916,7 +927,7 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.assert_in('borgbackup version', output)  # implied output even without --info given
         self.assert_not_in('Starting repository check', output)  # --info not given for root logger
 
-        name = sorted(os.listdir(os.path.join(self.tmpdir, 'repository', 'data', '0')), reverse=True)[0]
+        name = sorted(os.listdir(os.path.join(self.tmpdir, 'repository', 'data', '0')), reverse=True)[1]
         with open(os.path.join(self.tmpdir, 'repository', 'data', '0', name), 'r+b') as fd:
             fd.seek(100)
             fd.write(b'XXXX')
@@ -986,14 +997,21 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.create_regular_file('file1', size=1024 * 80)
         os.utime('input/file1', (now - 5, now - 5))  # 5 seconds ago
         self.create_regular_file('file2', size=1024 * 80)
+        if has_lchflags:
+            self.create_regular_file('file3', size=1024 * 80)
+            platform.set_flags(os.path.join(self.input_path, 'file3'), stat.UF_NODUMP)
         self.cmd('init', self.repository_location)
         output = self.cmd('create', '--list', self.repository_location + '::test', 'input')
         self.assert_in("A input/file1", output)
         self.assert_in("A input/file2", output)
+        if has_lchflags:
+            self.assert_in("x input/file3", output)
         # should find second file as excluded
         output = self.cmd('create', '--list', self.repository_location + '::test1', 'input', '--exclude', '*/file2')
         self.assert_in("U input/file1", output)
         self.assert_in("x input/file2", output)
+        if has_lchflags:
+            self.assert_in("x input/file3", output)
 
     def test_create_topical(self):
         now = time.time()
@@ -1244,6 +1262,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         try:
             self.cmd('mount', self.repository_location, mountpoint, fork=True)
             self.wait_for_mount(mountpoint)
+            if has_lchflags:
+                # remove the file we did not backup, so input and output become equal
+                os.remove(os.path.join('input', 'flagfile'))
             self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive', 'input'))
             self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'archive2', 'input'))
         finally:
@@ -1265,6 +1286,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         try:
             self.cmd('mount', self.repository_location + '::archive', mountpoint, fork=True)
             self.wait_for_mount(mountpoint)
+            if has_lchflags:
+                # remove the file we did not backup, so input and output become equal
+                os.remove(os.path.join('input', 'flagfile'))
             self.assert_dirs_equal(self.input_path, os.path.join(mountpoint, 'input'))
         finally:
             if sys.platform.startswith('linux'):
@@ -1491,9 +1515,9 @@ class ArchiverTestCase(ArchiverTestCaseBase):
         self.cmd('create', self.repository_location + '::test', 'input')
         archive_before = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}')
         with patch.object(Cache, 'add_chunk', self._test_recreate_chunker_interrupt_patch()):
-            self.cmd('recreate', '-pv', '--chunker-params', '10,12,11,4095', self.repository_location)
+            self.cmd('recreate', '-pv', '--chunker-params', '10,13,11,4095', self.repository_location)
         assert 'test.recreate' in self.cmd('list', self.repository_location)
-        output = self.cmd('recreate', '-svp', '--debug', '--chunker-params', '10,12,11,4095', self.repository_location)
+        output = self.cmd('recreate', '-svp', '--debug', '--chunker-params', '10,13,11,4095', self.repository_location)
         assert 'Found test.recreate, will resume' in output
         assert 'Copied 1 chunks from a partially processed item' in output
         archive_after = self.cmd('list', self.repository_location + '::test', '--format', '{sha512}')

+ 125 - 45
src/borg/testsuite/repository.py

@@ -7,10 +7,10 @@ import tempfile
 from unittest.mock import patch
 
 from ..hashindex import NSIndex
-from ..helpers import Location, IntegrityError
+from ..helpers import Location, IntegrityError, InternalOSError
 from ..locking import UpgradableLock, LockFailed
-from ..remote import RemoteRepository, InvalidRPCMethod, ConnectionClosedWithHint
-from ..repository import Repository, LoggedIO
+from ..remote import RemoteRepository, InvalidRPCMethod, ConnectionClosedWithHint, handle_remote_line
+from ..repository import Repository, LoggedIO, MAGIC
 from . import BaseTestCase
 
 
@@ -125,6 +125,46 @@ class RepositoryTestCase(RepositoryTestCaseBase):
         self.assert_equal(len(self.repository.list(limit=50)), 50)
 
 
+class LocalRepositoryTestCase(RepositoryTestCaseBase):
+    # test case that doesn't work with remote repositories
+
+    def _assert_sparse(self):
+        # The superseded 123456... PUT
+        assert self.repository.compact[0] == 41 + 9
+        # The DELETE issued by the superseding PUT (or issued directly)
+        assert self.repository.compact[2] == 41
+        self.repository._rebuild_sparse(0)
+        assert self.repository.compact[0] == 41 + 9
+
+    def test_sparse1(self):
+        self.repository.put(b'00000000000000000000000000000000', b'foo')
+        self.repository.put(b'00000000000000000000000000000001', b'123456789')
+        self.repository.commit()
+        self.repository.put(b'00000000000000000000000000000001', b'bar')
+        self._assert_sparse()
+
+    def test_sparse2(self):
+        self.repository.put(b'00000000000000000000000000000000', b'foo')
+        self.repository.put(b'00000000000000000000000000000001', b'123456789')
+        self.repository.commit()
+        self.repository.delete(b'00000000000000000000000000000001')
+        self._assert_sparse()
+
+    def test_sparse_delete(self):
+        self.repository.put(b'00000000000000000000000000000000', b'1245')
+        self.repository.delete(b'00000000000000000000000000000000')
+        self.repository.io._write_fd.sync()
+
+        # The on-line tracking works on a per-object basis...
+        assert self.repository.compact[0] == 41 + 41 + 4
+        self.repository._rebuild_sparse(0)
+        # ...while _rebuild_sparse can mark whole segments as completely sparse (which then includes the segment magic)
+        assert self.repository.compact[0] == 41 + 41 + 4 + len(MAGIC)
+
+        self.repository.commit()
+        assert 0 not in [segment for segment, _ in self.repository.io.segment_iterator()]
+
+
 class RepositoryCommitTestCase(RepositoryTestCaseBase):
 
     def add_keys(self):
@@ -216,18 +256,69 @@ class RepositoryAppendOnlyTestCase(RepositoryTestCaseBase):
         self.repository.commit()
 
         self.repository.append_only = False
-        assert segments_in_repository() == 1
+        assert segments_in_repository() == 2
         self.repository.put(b'00000000000000000000000000000000', b'foo')
         self.repository.commit()
         # normal: compact squashes the data together, only one segment
-        assert segments_in_repository() == 1
+        assert segments_in_repository() == 4
 
         self.repository.append_only = True
-        assert segments_in_repository() == 1
+        assert segments_in_repository() == 4
         self.repository.put(b'00000000000000000000000000000000', b'foo')
         self.repository.commit()
         # append only: does not compact, only new segments written
-        assert segments_in_repository() == 2
+        assert segments_in_repository() == 6
+
+
+class RepositoryAuxiliaryCorruptionTestCase(RepositoryTestCaseBase):
+    def setUp(self):
+        super().setUp()
+        self.repository.put(b'00000000000000000000000000000000', b'foo')
+        self.repository.commit()
+        self.repository.close()
+
+    def do_commit(self):
+        with self.repository:
+            self.repository.put(b'00000000000000000000000000000000', b'fox')
+            self.repository.commit()
+
+    def test_corrupted_hints(self):
+        with open(os.path.join(self.repository.path, 'hints.1'), 'ab') as fd:
+            fd.write(b'123456789')
+        self.do_commit()
+
+    def test_deleted_hints(self):
+        os.unlink(os.path.join(self.repository.path, 'hints.1'))
+        self.do_commit()
+
+    def test_deleted_index(self):
+        os.unlink(os.path.join(self.repository.path, 'index.1'))
+        self.do_commit()
+
+    def test_unreadable_hints(self):
+        hints = os.path.join(self.repository.path, 'hints.1')
+        os.unlink(hints)
+        os.mkdir(hints)
+        with self.assert_raises(InternalOSError):
+            self.do_commit()
+
+    def test_index(self):
+        with open(os.path.join(self.repository.path, 'index.1'), 'wb') as fd:
+            fd.write(b'123456789')
+        self.do_commit()
+
+    def test_index_outside_transaction(self):
+        with open(os.path.join(self.repository.path, 'index.1'), 'wb') as fd:
+            fd.write(b'123456789')
+        with self.repository:
+            assert len(self.repository) == 1
+
+    def test_unreadable_index(self):
+        index = os.path.join(self.repository.path, 'index.1')
+        os.unlink(index)
+        os.mkdir(index)
+        with self.assert_raises(InternalOSError):
+            self.do_commit()
 
 
 class RepositoryCheckTestCase(RepositoryTestCaseBase):
@@ -296,20 +387,20 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
         self.add_objects([[1, 2, 3], [4, 5, 6]])
         self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
         self.check(status=True)
-        self.delete_segment(1)
+        self.delete_segment(2)
         self.repository.rollback()
         self.check(repair=True, status=True)
         self.assert_equal(set([1, 2, 3]), self.list_objects())
 
     def test_repair_missing_commit_segment(self):
         self.add_objects([[1, 2, 3], [4, 5, 6]])
-        self.delete_segment(1)
+        self.delete_segment(3)
         self.assert_raises(Repository.ObjectNotFound, lambda: self.get_objects(4))
         self.assert_equal(set([1, 2, 3]), self.list_objects())
 
     def test_repair_corrupted_commit_segment(self):
         self.add_objects([[1, 2, 3], [4, 5, 6]])
-        with open(os.path.join(self.tmppath, 'repository', 'data', '0', '1'), 'r+b') as fd:
+        with open(os.path.join(self.tmppath, 'repository', 'data', '0', '3'), 'r+b') as fd:
             fd.seek(-1, os.SEEK_END)
             fd.write(b'X')
         self.assert_raises(Repository.ObjectNotFound, lambda: self.get_objects(4))
@@ -319,15 +410,15 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
 
     def test_repair_no_commits(self):
         self.add_objects([[1, 2, 3]])
-        with open(os.path.join(self.tmppath, 'repository', 'data', '0', '0'), 'r+b') as fd:
+        with open(os.path.join(self.tmppath, 'repository', 'data', '0', '1'), 'r+b') as fd:
             fd.seek(-1, os.SEEK_END)
             fd.write(b'X')
         self.assert_raises(Repository.CheckNeeded, lambda: self.get_objects(4))
         self.check(status=False)
         self.check(status=False)
-        self.assert_equal(self.list_indices(), ['index.0'])
-        self.check(repair=True, status=True)
         self.assert_equal(self.list_indices(), ['index.1'])
+        self.check(repair=True, status=True)
+        self.assert_equal(self.list_indices(), ['index.3'])
         self.check(status=True)
         self.get_objects(3)
         self.assert_equal(set([1, 2, 3]), self.list_objects())
@@ -341,10 +432,10 @@ class RepositoryCheckTestCase(RepositoryTestCaseBase):
 
     def test_repair_index_too_new(self):
         self.add_objects([[1, 2, 3], [4, 5, 6]])
-        self.assert_equal(self.list_indices(), ['index.1'])
+        self.assert_equal(self.list_indices(), ['index.3'])
         self.rename_index('index.100')
         self.check(status=True)
-        self.assert_equal(self.list_indices(), ['index.1'])
+        self.assert_equal(self.list_indices(), ['index.3'])
         self.get_objects(4)
         self.assert_equal(set([1, 2, 3, 4, 5, 6]), self.list_objects())
 
@@ -400,63 +491,51 @@ class RemoteRepositoryCheckTestCase(RepositoryCheckTestCase):
         pass
 
 
-class RemoteRepositoryLoggingStub(RemoteRepository):
-    """ run a remote command that just prints a logging-formatted message to
-    stderr, and stub out enough of RemoteRepository to avoid the resulting
-    exceptions """
-    def __init__(self, *args, **kw):
-        self.msg = kw.pop('msg')
-        super().__init__(*args, **kw)
-
-    def borg_cmd(self, cmd, testing):
-        return [sys.executable, '-c', 'import sys; print("{}", file=sys.stderr)'.format(self.msg), ]
-
-    def __del__(self):
-        # clean up from exception without triggering assert
-        if self.p:
-            self.close()
-
-
-class RemoteRepositoryLoggerTestCase(RepositoryTestCaseBase):
+class RemoteLoggerTestCase(BaseTestCase):
     def setUp(self):
-        self.location = Location('__testsuite__:/doesntexist/repo')
         self.stream = io.StringIO()
         self.handler = logging.StreamHandler(self.stream)
         logging.getLogger().handlers[:] = [self.handler]
         logging.getLogger('borg.repository').handlers[:] = []
         logging.getLogger('borg.repository.foo').handlers[:] = []
+        # capture stderr
+        sys.stderr.flush()
+        self.old_stderr = sys.stderr
+        self.stderr = sys.stderr = io.StringIO()
 
     def tearDown(self):
-        pass
+        sys.stderr = self.old_stderr
 
-    def create_repository(self, msg):
-        try:
-            RemoteRepositoryLoggingStub(self.location, msg=msg)
-        except ConnectionClosedWithHint:
-            # stub is dumb, so this exception expected
-            pass
+    def test_stderr_messages(self):
+        handle_remote_line("unstructured stderr message")
+        self.assert_equal(self.stream.getvalue(), '')
+        # stderr messages don't get an implicit newline
+        self.assert_equal(self.stderr.getvalue(), 'Remote: unstructured stderr message')
 
     def test_old_format_messages(self):
         self.handler.setLevel(logging.DEBUG)
         logging.getLogger().setLevel(logging.DEBUG)
 
-        self.create_repository("$LOG INFO Remote: old format message")
+        handle_remote_line("$LOG INFO Remote: old format message")
         self.assert_equal(self.stream.getvalue(), 'Remote: old format message\n')
+        self.assert_equal(self.stderr.getvalue(), '')
 
     def test_new_format_messages(self):
         self.handler.setLevel(logging.DEBUG)
         logging.getLogger().setLevel(logging.DEBUG)
 
-        self.create_repository("$LOG INFO borg.repository Remote: new format message")
+        handle_remote_line("$LOG INFO borg.repository Remote: new format message")
         self.assert_equal(self.stream.getvalue(), 'Remote: new format message\n')
+        self.assert_equal(self.stderr.getvalue(), '')
 
     def test_remote_messages_screened(self):
         # default borg config for root logger
         self.handler.setLevel(logging.WARNING)
         logging.getLogger().setLevel(logging.WARNING)
 
-        self.create_repository("$LOG INFO borg.repository Remote: new format info message")
+        handle_remote_line("$LOG INFO borg.repository Remote: new format info message")
         self.assert_equal(self.stream.getvalue(), '')
+        self.assert_equal(self.stderr.getvalue(), '')
 
     def test_info_to_correct_local_child(self):
         logging.getLogger('borg.repository').setLevel(logging.INFO)
@@ -474,7 +553,8 @@ class RemoteRepositoryLoggerTestCase(RepositoryTestCaseBase):
         foo_handler.setLevel(logging.INFO)
         logging.getLogger('borg.repository.foo').handlers[:] = [foo_handler]
 
-        self.create_repository("$LOG INFO borg.repository Remote: new format child message")
+        handle_remote_line("$LOG INFO borg.repository Remote: new format child message")
         self.assert_equal(foo_stream.getvalue(), '')
         self.assert_equal(child_stream.getvalue(), 'Remote: new format child message\n')
         self.assert_equal(self.stream.getvalue(), '')
+        self.assert_equal(self.stderr.getvalue(), '')

+ 1 - 1
tox.ini

@@ -2,7 +2,7 @@
 # fakeroot -u tox --recreate
 
 [tox]
-envlist = py{34,35},flake8
+envlist = py{34,35,36},flake8
 
 [testenv]
 # Change dir to avoid import problem for cython code. The directory does