Răsfoiți Sursa

When using the Healthchecks monitoring hook, include borgmatic logs in the payloads for completion and failure pings (#241).

Dan Helfman 5 ani în urmă
părinte
comite
50f62d73b7

+ 7 - 0
NEWS

@@ -1,3 +1,10 @@
+1.4.11
+ * #241: When using the Healthchecks monitoring hook, include borgmatic logs in the payloads for
+   completion and failure pings.
+ * With --verbosity level 1 or 2, show error logs both inline when they occur and in the summary
+   logs at the bottom. With lower verbosity levels, suppress the summary and show error logs when
+   they occur.
+
 1.4.10
  * #246: Fix for "borgmatic restore" showing success and incorrectly extracting archive files, even
    when no databases are configured to restore. As this can overwrite files from the archive and

+ 26 - 22
borgmatic/commands/borgmatic.py

@@ -365,33 +365,36 @@ def load_configurations(config_filenames):
     return (configs, logs)
 
 
+def log_record(**kwargs):
+    '''
+    Create a log record based on the given makeLogRecord() arguments, one of which must be
+    named "levelno". Log the record and return it.
+    '''
+    record = logging.makeLogRecord(kwargs)
+    logger.handle(record)
+
+    return record
+
+
 def make_error_log_records(message, error=None):
     '''
     Given error message text and an optional exception object, yield a series of logging.LogRecord
-    instances with error summary information.
+    instances with error summary information. As a side effect, log each record.
     '''
     if not error:
-        yield logging.makeLogRecord(
-            dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
-        )
+        yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
         return
 
     try:
         raise error
     except CalledProcessError as error:
-        yield logging.makeLogRecord(
-            dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
-        )
+        yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
         if error.output:
-            yield logging.makeLogRecord(
-                dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error.output)
-            )
-        yield logging.makeLogRecord(dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error))
+            yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error.output)
+        yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error)
     except (ValueError, OSError) as error:
-        yield logging.makeLogRecord(
-            dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
-        )
-        yield logging.makeLogRecord(dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error))
+        yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
+        yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error)
     except:  # noqa: E722
         # Raising above only as a means of determining the error type. Swallow the exception here
         # because we don't want the exception to propagate out of this function.
@@ -535,13 +538,14 @@ def main():  # pragma: no cover
 
     summary_logs = list(collect_configuration_run_summary_logs(configs, arguments))
 
-    logger.info('')
-    logger.info('summary:')
-    [
-        logger.handle(log)
-        for log in parse_logs + summary_logs
-        if log.levelno >= logger.getEffectiveLevel()
-    ]
+    if logger.getEffectiveLevel() < logging.WARNING:
+        logger.info('')
+        logger.info('summary:')
+        [
+            logger.handle(log)
+            for log in parse_logs + summary_logs
+            if log.levelno >= logger.getEffectiveLevel()
+        ]
 
     if any(log.levelno == logging.CRITICAL for log in summary_logs):
         exit_with_help_link()

+ 62 - 1
borgmatic/hooks/healthchecks.py

@@ -12,6 +12,58 @@ MONITOR_STATE_TO_HEALTHCHECKS = {
     monitor.State.FAIL: 'fail',
 }
 
+PAYLOAD_TRUNCATION_INDICATOR = '...\n'
+PAYLOAD_LIMIT_BYTES = 10 * 1024 - len(PAYLOAD_TRUNCATION_INDICATOR)
+
+
+class Forgetful_buffering_handler(logging.Handler):
+    '''
+    A buffering log handler that stores log messages in memory, and throws away messages (oldest
+    first) once a particular capacity in bytes is reached.
+    '''
+
+    def __init__(self, byte_capacity):
+        super().__init__()
+
+        self.byte_capacity = byte_capacity
+        self.byte_count = 0
+        self.buffer = []
+        self.forgot = False
+
+    def emit(self, record):
+        message = record.getMessage() + '\n'
+        self.byte_count += len(message)
+        self.buffer.append(message)
+
+        while self.byte_count > self.byte_capacity and self.buffer:
+            self.byte_count -= len(self.buffer[0])
+            self.buffer.pop(0)
+            self.forgot = True
+
+
+def format_buffered_logs_for_payload():
+    '''
+    Get the handler previously added to the root logger, and slurp buffered logs out of it to
+    send to Healthchecks.
+    '''
+    try:
+        buffering_handler = next(
+            handler
+            for handler in logging.getLogger().handlers
+            if isinstance(handler, Forgetful_buffering_handler)
+        )
+    except StopIteration:
+        payload = 'Cannot find the log handler for sending logs to Healthchecks'
+        logger.warning(payload)
+        return payload
+
+    payload = ''.join(message for message in buffering_handler.buffer)
+
+    if buffering_handler.forgot:
+        return PAYLOAD_TRUNCATION_INDICATOR + payload
+
+    return payload
+
 
 def ping_monitor(ping_url_or_uuid, config_filename, state, dry_run):
     '''
@@ -19,6 +71,12 @@ def ping_monitor(ping_url_or_uuid, config_filename, state, dry_run):
     configuration filename in any log entries. If this is a dry run, then don't actually ping
     anything.
     '''
+    if state is monitor.State.START:
+        # Add a handler to the root logger that stores in memory the most recent logs emitted. That
+        # way, we can send them all to Healthchecks upon a finish or failure state.
+        logging.getLogger().addHandler(Forgetful_buffering_handler(PAYLOAD_LIMIT_BYTES))
+        payload = ''
+
     ping_url = (
         ping_url_or_uuid
         if ping_url_or_uuid.startswith('http')
@@ -35,6 +93,9 @@ def ping_monitor(ping_url_or_uuid, config_filename, state, dry_run):
     )
     logger.debug('{}: Using Healthchecks ping URL {}'.format(config_filename, ping_url))
 
+    if state in (monitor.State.FINISH, monitor.State.FAIL):
+        payload = format_buffered_logs_for_payload()
+
     if not dry_run:
         logging.getLogger('urllib3').setLevel(logging.ERROR)
-        requests.get(ping_url)
+        requests.post(ping_url, data=payload)

+ 12 - 2
docs/how-to/monitor-your-backups.md

@@ -116,9 +116,19 @@ With this hook in place, borgmatic pings your Healthchecks project when a
 backup begins, ends, or errors. Specifically, before the <a
 href="https://torsion.org/borgmatic/docs/how-to/add-preparation-and-cleanup-steps-to-backups/">`before_backup`
 hooks</a> run, borgmatic lets Healthchecks know that a backup has started.
+
 Then, if the backup completes successfully, borgmatic notifies Healthchecks of
-the success after the `after_backup` hooks run. And if an error occurs during
-the backup, borgmatic notifies Healthchecks after the `on_error` hooks run.
+the success after the `after_backup` hooks run, and includes borgmatic logs in
+the payload data sent to Healthchecks. This means that borgmatic logs show up
+in the Healthchecks UI, although be aware that Healthchecks currently has a
+10-kilobyte limit for the logs in each ping.
+
+If an error occurs during the backup, borgmatic notifies Healthchecks after
+the `on_error` hooks run, also tacking on logs including the error itself.
+
+Note that borgmatic sends logs to Healthchecks by applying the maximum of any
+other borgmatic verbosity level (`--verbosity`, `--syslog-verbosity`, etc.),
+as there is not currently a dedicated Healthchecks verbosity setting.
 
 You can configure Healthchecks to notify you by a [variety of
 mechanisms](https://healthchecks.io/#welcome-integrations) when backups fail

+ 1 - 1
setup.py

@@ -1,6 +1,6 @@
 from setuptools import find_packages, setup
 
-VERSION = '1.4.10'
+VERSION = '1.4.11'
 
 
 setup(

+ 19 - 4
tests/unit/commands/test_borgmatic.py

@@ -115,36 +115,50 @@ def test_load_configurations_logs_critical_for_parse_error():
     assert {log.levelno for log in logs} == {logging.CRITICAL}
 
 
+def test_log_record_does_not_raise():
+    module.log_record(levelno=1, foo='bar', baz='quux')
+
+
 def test_make_error_log_records_generates_output_logs_for_message_only():
+    flexmock(module).should_receive('log_record').replace_with(dict)
+
     logs = tuple(module.make_error_log_records('Error'))
 
-    assert {log.levelno for log in logs} == {logging.CRITICAL}
+    assert {log['levelno'] for log in logs} == {logging.CRITICAL}
 
 
 def test_make_error_log_records_generates_output_logs_for_called_process_error():
+    flexmock(module).should_receive('log_record').replace_with(dict)
+
     logs = tuple(
         module.make_error_log_records(
             'Error', subprocess.CalledProcessError(1, 'ls', 'error output')
         )
     )
 
-    assert {log.levelno for log in logs} == {logging.CRITICAL}
+    assert {log['levelno'] for log in logs} == {logging.CRITICAL}
     assert any(log for log in logs if 'error output' in str(log))
 
 
 def test_make_error_log_records_generates_logs_for_value_error():
+    flexmock(module).should_receive('log_record').replace_with(dict)
+
     logs = tuple(module.make_error_log_records('Error', ValueError()))
 
-    assert {log.levelno for log in logs} == {logging.CRITICAL}
+    assert {log['levelno'] for log in logs} == {logging.CRITICAL}
 
 
 def test_make_error_log_records_generates_logs_for_os_error():
+    flexmock(module).should_receive('log_record').replace_with(dict)
+
     logs = tuple(module.make_error_log_records('Error', OSError()))
 
-    assert {log.levelno for log in logs} == {logging.CRITICAL}
+    assert {log['levelno'] for log in logs} == {logging.CRITICAL}
 
 
 def test_make_error_log_records_generates_nothing_for_other_error():
+    flexmock(module).should_receive('log_record').replace_with(dict)
+
     logs = tuple(module.make_error_log_records('Error', KeyError()))
 
     assert logs == ()
@@ -268,6 +282,7 @@ def test_collect_configuration_run_summary_logs_run_configuration_error():
     flexmock(module).should_receive('run_configuration').and_return(
         [logging.makeLogRecord(dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg='Error'))]
     )
+    flexmock(module).should_receive('make_error_log_records').and_return([])
     arguments = {}
 
     logs = tuple(

+ 71 - 6
tests/unit/hooks/test_healthchecks.py

@@ -3,38 +3,103 @@ from flexmock import flexmock
 from borgmatic.hooks import healthchecks as module
 
 
+def test_forgetful_buffering_handler_emit_collects_log_records():
+    handler = module.Forgetful_buffering_handler(byte_capacity=100)
+    handler.emit(flexmock(getMessage=lambda: 'foo'))
+    handler.emit(flexmock(getMessage=lambda: 'bar'))
+
+    assert handler.buffer == ['foo\n', 'bar\n']
+    assert not handler.forgot
+
+
+def test_forgetful_buffering_handler_emit_forgets_log_records_when_capacity_reached():
+    handler = module.Forgetful_buffering_handler(byte_capacity=len('foo\nbar\n'))
+    handler.emit(flexmock(getMessage=lambda: 'foo'))
+    assert handler.buffer == ['foo\n']
+    handler.emit(flexmock(getMessage=lambda: 'bar'))
+    assert handler.buffer == ['foo\n', 'bar\n']
+    handler.emit(flexmock(getMessage=lambda: 'baz'))
+    assert handler.buffer == ['bar\n', 'baz\n']
+    handler.emit(flexmock(getMessage=lambda: 'quux'))
+    assert handler.buffer == ['quux\n']
+    assert handler.forgot
+
+
+def test_format_buffered_logs_for_payload_flattens_log_buffer():
+    handler = module.Forgetful_buffering_handler(byte_capacity=100)
+    handler.buffer = ['foo\n', 'bar\n']
+    flexmock(module.logging).should_receive('getLogger').and_return(flexmock(handlers=[handler]))
+
+    payload = module.format_buffered_logs_for_payload()
+
+    assert payload == 'foo\nbar\n'
+
+
+def test_format_buffered_logs_for_payload_inserts_truncation_indicator_when_logs_forgotten():
+    handler = module.Forgetful_buffering_handler(byte_capacity=100)
+    handler.buffer = ['foo\n', 'bar\n']
+    handler.forgot = True
+    flexmock(module.logging).should_receive('getLogger').and_return(flexmock(handlers=[handler]))
+
+    payload = module.format_buffered_logs_for_payload()
+
+    assert payload == '...\nfoo\nbar\n'
+
+
+def test_format_buffered_logs_for_payload_without_handler_produces_payload_anyway():
+    flexmock(module.logging).should_receive('getLogger').and_return(
+        flexmock(handlers=[module.logging.Handler()])
+    )
+
+    payload = module.format_buffered_logs_for_payload()
+
+    assert payload
+
+
 def test_ping_monitor_hits_ping_url_for_start_state():
+    flexmock(module).should_receive('Forgetful_buffering_handler')
     ping_url = 'https://example.com'
-    flexmock(module.requests).should_receive('get').with_args('{}/{}'.format(ping_url, 'start'))
+    flexmock(module.requests).should_receive('post').with_args(
+        '{}/{}'.format(ping_url, 'start'), data=''
+    )
 
     module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.START, dry_run=False)
 
 
 def test_ping_monitor_hits_ping_url_for_finish_state():
     ping_url = 'https://example.com'
-    flexmock(module.requests).should_receive('get').with_args(ping_url)
+    payload = flexmock()
+    flexmock(module).should_receive('format_buffered_logs_for_payload').and_return(payload)
+    flexmock(module.requests).should_receive('post').with_args(ping_url, data=payload)
 
     module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.FINISH, dry_run=False)
 
 
 def test_ping_monitor_hits_ping_url_for_fail_state():
     ping_url = 'https://example.com'
-    flexmock(module.requests).should_receive('get').with_args('{}/{}'.format(ping_url, 'fail'))
+    payload = flexmock()
+    flexmock(module).should_receive('format_buffered_logs_for_payload').and_return(payload)
+    flexmock(module.requests).should_receive('post').with_args(
+        '{}/{}'.format(ping_url, 'fail'), data=payload
+    )
 
     module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.FAIL, dry_run=False)
 
 
 def test_ping_monitor_with_ping_uuid_hits_corresponding_url():
     ping_uuid = 'abcd-efgh-ijkl-mnop'
-    flexmock(module.requests).should_receive('get').with_args(
-        'https://hc-ping.com/{}'.format(ping_uuid)
+    payload = flexmock()
+    flexmock(module).should_receive('format_buffered_logs_for_payload').and_return(payload)
+    flexmock(module.requests).should_receive('post').with_args(
+        'https://hc-ping.com/{}'.format(ping_uuid), data=payload
     )
 
     module.ping_monitor(ping_uuid, 'config.yaml', state=module.monitor.State.FINISH, dry_run=False)
 
 
 def test_ping_monitor_dry_run_does_not_hit_ping_url():
+    flexmock(module).should_receive('Forgetful_buffering_handler')
     ping_url = 'https://example.com'
-    flexmock(module.requests).should_receive('get').never()
+    flexmock(module.requests).should_receive('post').never()
 
     module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.START, dry_run=True)