Browse Source

remote: deal with partial lines, fixes #2637

due to block buffering (in borg, pipes, sshd, ssh) partial lines might
be received. for plain text, this causes cosmetic issues.

the code now makes sure handle_remote_line() only gets called with a
complete line (which is terminated by any universal newline char, a
pure \r seems to be needed for remote progress displays).

it also fixes a yet undiscovered partial utf-8-sequence decoding issue
that might occur for the same reason.

(cherry picked from commit f2b48cd5c8947806306dbb8a7ee7c98d78a4c729)
Thomas Waldmann 7 years ago
parent
commit
a5bfeddffe
1 changed files with 24 additions and 8 deletions
  1. 24 8
      borg/remote.py

+ 24 - 8
borg/remote.py

@@ -204,6 +204,7 @@ class RemoteRepository:
         self.msgid = 0
         self.msgid = 0
         self.to_send = b''
         self.to_send = b''
         self.cache = {}
         self.cache = {}
+        self.stderr_received = b''  # incomplete stderr line bytes received (no \n yet)
         self.ignore_responses = set()
         self.ignore_responses = set()
         self.responses = {}
         self.responses = {}
         self.unpacker = get_limited_unpacker('client')
         self.unpacker = get_limited_unpacker('client')
@@ -392,14 +393,16 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
                     data = os.read(fd, 32768)
                     data = os.read(fd, 32768)
                     if not data:
                     if not data:
                         raise ConnectionClosed()
                         raise ConnectionClosed()
-                    data = data.decode('utf-8')
-                    for line in data.splitlines(keepends=True):
-                        if line.startswith('$LOG '):
-                            _, level, msg = line.split(' ', 2)
-                            level = getattr(logging, level, logging.CRITICAL)  # str -> int
-                            logging.log(level, msg.rstrip())
-                        else:
-                            sys.stderr.write("Remote: " + line)
+                    # deal with incomplete lines (may appear due to block buffering)
+                    if self.stderr_received:
+                        data = self.stderr_received + data
+                        self.stderr_received = b''
+                    lines = data.splitlines(keepends=True)
+                    if lines and not lines[-1].endswith((b'\r', b'\n')):
+                        self.stderr_received = lines.pop()
+                    # now we have complete lines in <lines> and any partial line in self.stderr_received.
+                    for line in lines:
+                        handle_remote_line(line.decode('utf-8'))  # decode late, avoid partial utf-8 sequences
             if w:
             if w:
                 while not self.to_send and (calls or self.preload_ids) and len(waiting_for) < MAX_INFLIGHT:
                 while not self.to_send and (calls or self.preload_ids) and len(waiting_for) < MAX_INFLIGHT:
                     if calls:
                     if calls:
@@ -482,6 +485,19 @@ This problem will go away as soon as the server has been upgraded to 1.0.7+.
         self.preload_ids += ids
         self.preload_ids += ids
 
 
 
 
+def handle_remote_line(line):
+    """Handle a remote log line."""
+    assert line.endswith(('\r', '\n'))
+    if line.startswith('$LOG '):
+        _, level, msg = line.split(' ', 2)
+        level = getattr(logging, level, logging.CRITICAL)  # str -> int
+        # server format: '$LOG <level> Remote: <msg>'
+        logging.log(level, msg.rstrip())
+    else:
+        # We circumvent logging to preserve carriage returns (\r) which are generated by remote progress displays.
+        sys.stderr.write('Remote: ' + line)
+
+
 class RepositoryNoCache:
 class RepositoryNoCache:
     """A not caching Repository wrapper, passes through to repository.
     """A not caching Repository wrapper, passes through to repository.