浏览代码

When using the Healthchecks monitoring hook, include borgmatic logs in the payloads for completion and failure pings (#241).

Dan Helfman 5 年之前
父节点
当前提交
50f62d73b7

+ 7 - 0
NEWS

@@ -1,3 +1,10 @@
+1.4.11
+ * #241: When using the Healthchecks monitoring hook, include borgmatic logs in the payloads for
+   completion and failure pings.
+ * With --verbosity level 1 or 2, show error logs both inline when they occur and in the summary
+   logs at the bottom. With lower verbosity levels, suppress the summary and show error logs when
+   they occur.
+
 1.4.10
 1.4.10
  * #246: Fix for "borgmatic restore" showing success and incorrectly extracting archive files, even
  * #246: Fix for "borgmatic restore" showing success and incorrectly extracting archive files, even
    when no databases are configured to restore. As this can overwrite files from the archive and
    when no databases are configured to restore. As this can overwrite files from the archive and

+ 26 - 22
borgmatic/commands/borgmatic.py

@@ -365,33 +365,36 @@ def load_configurations(config_filenames):
     return (configs, logs)
     return (configs, logs)
 
 
 
 
+def log_record(**kwargs):
+    '''
+    Create a log record based on the given makeLogRecord() arguments, one of which must be
+    named "levelno". Log the record and return it.
+    '''
+    record = logging.makeLogRecord(kwargs)
+    logger.handle(record)
+
+    return record
+
+
 def make_error_log_records(message, error=None):
 def make_error_log_records(message, error=None):
     '''
     '''
     Given error message text and an optional exception object, yield a series of logging.LogRecord
     Given error message text and an optional exception object, yield a series of logging.LogRecord
-    instances with error summary information.
+    instances with error summary information. As a side effect, log each record.
     '''
     '''
     if not error:
     if not error:
-        yield logging.makeLogRecord(
-            dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
-        )
+        yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
         return
         return
 
 
     try:
     try:
         raise error
         raise error
     except CalledProcessError as error:
     except CalledProcessError as error:
-        yield logging.makeLogRecord(
-            dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
-        )
+        yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
         if error.output:
         if error.output:
-            yield logging.makeLogRecord(
-                dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error.output)
-            )
-        yield logging.makeLogRecord(dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error))
+            yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error.output)
+        yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error)
     except (ValueError, OSError) as error:
     except (ValueError, OSError) as error:
-        yield logging.makeLogRecord(
-            dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
-        )
-        yield logging.makeLogRecord(dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error))
+        yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=message)
+        yield log_record(levelno=logging.CRITICAL, levelname='CRITICAL', msg=error)
     except:  # noqa: E722
     except:  # noqa: E722
         # Raising above only as a means of determining the error type. Swallow the exception here
         # Raising above only as a means of determining the error type. Swallow the exception here
         # because we don't want the exception to propagate out of this function.
         # because we don't want the exception to propagate out of this function.
@@ -535,13 +538,14 @@ def main():  # pragma: no cover
 
 
     summary_logs = list(collect_configuration_run_summary_logs(configs, arguments))
     summary_logs = list(collect_configuration_run_summary_logs(configs, arguments))
 
 
-    logger.info('')
-    logger.info('summary:')
-    [
-        logger.handle(log)
-        for log in parse_logs + summary_logs
-        if log.levelno >= logger.getEffectiveLevel()
-    ]
+    if logger.getEffectiveLevel() < logging.WARNING:
+        logger.info('')
+        logger.info('summary:')
+        [
+            logger.handle(log)
+            for log in parse_logs + summary_logs
+            if log.levelno >= logger.getEffectiveLevel()
+        ]
 
 
     if any(log.levelno == logging.CRITICAL for log in summary_logs):
     if any(log.levelno == logging.CRITICAL for log in summary_logs):
         exit_with_help_link()
         exit_with_help_link()

+ 62 - 1
borgmatic/hooks/healthchecks.py

@@ -12,6 +12,58 @@ MONITOR_STATE_TO_HEALTHCHECKS = {
     monitor.State.FAIL: 'fail',
     monitor.State.FAIL: 'fail',
 }
 }
 
 
+PAYLOAD_TRUNCATION_INDICATOR = '...\n'
+PAYLOAD_LIMIT_BYTES = 10 * 1024 - len(PAYLOAD_TRUNCATION_INDICATOR)
+
+
+class Forgetful_buffering_handler(logging.Handler):
+    '''
+    A buffering log handler that stores log messages in memory, and throws away messages (oldest
+    first) once a particular capacity in bytes is reached.
+    '''
+
+    def __init__(self, byte_capacity):
+        super().__init__()
+
+        self.byte_capacity = byte_capacity
+        self.byte_count = 0
+        self.buffer = []
+        self.forgot = False
+
+    def emit(self, record):
+        message = record.getMessage() + '\n'
+        self.byte_count += len(message)
+        self.buffer.append(message)
+
+        while self.byte_count > self.byte_capacity and self.buffer:
+            self.byte_count -= len(self.buffer[0])
+            self.buffer.pop(0)
+            self.forgot = True
+
+
+def format_buffered_logs_for_payload():
+    '''
+    Get the handler previously added to the root logger, and slurp buffered logs out of it to
+    send to Healthchecks.
+    '''
+    try:
+        buffering_handler = next(
+            handler
+            for handler in logging.getLogger().handlers
+            if isinstance(handler, Forgetful_buffering_handler)
+        )
+    except StopIteration:
+        payload = 'Cannot find the log handler for sending logs to Healthchecks'
+        logger.warning(payload)
+        return payload
+
+    payload = ''.join(message for message in buffering_handler.buffer)
+
+    if buffering_handler.forgot:
+        return PAYLOAD_TRUNCATION_INDICATOR + payload
+
+    return payload
+
 
 
 def ping_monitor(ping_url_or_uuid, config_filename, state, dry_run):
 def ping_monitor(ping_url_or_uuid, config_filename, state, dry_run):
     '''
     '''
@@ -19,6 +71,12 @@ def ping_monitor(ping_url_or_uuid, config_filename, state, dry_run):
     configuration filename in any log entries. If this is a dry run, then don't actually ping
     configuration filename in any log entries. If this is a dry run, then don't actually ping
     anything.
     anything.
     '''
     '''
+    if state is monitor.State.START:
+        # Add a handler to the root logger that stores in memory the most recent logs emitted. That
+        # way, we can send them all to Healthchecks upon a finish or failure state.
+        logging.getLogger().addHandler(Forgetful_buffering_handler(PAYLOAD_LIMIT_BYTES))
+        payload = ''
+
     ping_url = (
     ping_url = (
         ping_url_or_uuid
         ping_url_or_uuid
         if ping_url_or_uuid.startswith('http')
         if ping_url_or_uuid.startswith('http')
@@ -35,6 +93,9 @@ def ping_monitor(ping_url_or_uuid, config_filename, state, dry_run):
     )
     )
     logger.debug('{}: Using Healthchecks ping URL {}'.format(config_filename, ping_url))
     logger.debug('{}: Using Healthchecks ping URL {}'.format(config_filename, ping_url))
 
 
+    if state in (monitor.State.FINISH, monitor.State.FAIL):
+        payload = format_buffered_logs_for_payload()
+
     if not dry_run:
     if not dry_run:
         logging.getLogger('urllib3').setLevel(logging.ERROR)
         logging.getLogger('urllib3').setLevel(logging.ERROR)
-        requests.get(ping_url)
+        requests.post(ping_url, data=payload)

+ 12 - 2
docs/how-to/monitor-your-backups.md

@@ -116,9 +116,19 @@ With this hook in place, borgmatic pings your Healthchecks project when a
 backup begins, ends, or errors. Specifically, before the <a
 backup begins, ends, or errors. Specifically, before the <a
 href="https://torsion.org/borgmatic/docs/how-to/add-preparation-and-cleanup-steps-to-backups/">`before_backup`
 href="https://torsion.org/borgmatic/docs/how-to/add-preparation-and-cleanup-steps-to-backups/">`before_backup`
 hooks</a> run, borgmatic lets Healthchecks know that a backup has started.
 hooks</a> run, borgmatic lets Healthchecks know that a backup has started.
+
 Then, if the backup completes successfully, borgmatic notifies Healthchecks of
 Then, if the backup completes successfully, borgmatic notifies Healthchecks of
-the success after the `after_backup` hooks run. And if an error occurs during
-the backup, borgmatic notifies Healthchecks after the `on_error` hooks run.
+the success after the `after_backup` hooks run, and includes borgmatic logs in
+the payload data sent to Healthchecks. This means that borgmatic logs show up
+in the Healthchecks UI, although be aware that Healthchecks currently has a
+10-kilobyte limit for the logs in each ping.
+
+If an error occurs during the backup, borgmatic notifies Healthchecks after
+the `on_error` hooks run, also tacking on logs including the error itself.
+
+Note that borgmatic sends logs to Healthchecks by applying the maximum of any
+other borgmatic verbosity level (`--verbosity`, `--syslog-verbosity`, etc.),
+as there is not currently a dedicated Healthchecks verbosity setting.
 
 
 You can configure Healthchecks to notify you by a [variety of
 You can configure Healthchecks to notify you by a [variety of
 mechanisms](https://healthchecks.io/#welcome-integrations) when backups fail
 mechanisms](https://healthchecks.io/#welcome-integrations) when backups fail

+ 1 - 1
setup.py

@@ -1,6 +1,6 @@
 from setuptools import find_packages, setup
 from setuptools import find_packages, setup
 
 
-VERSION = '1.4.10'
+VERSION = '1.4.11'
 
 
 
 
 setup(
 setup(

+ 19 - 4
tests/unit/commands/test_borgmatic.py

@@ -115,36 +115,50 @@ def test_load_configurations_logs_critical_for_parse_error():
     assert {log.levelno for log in logs} == {logging.CRITICAL}
     assert {log.levelno for log in logs} == {logging.CRITICAL}
 
 
 
 
+def test_log_record_does_not_raise():
+    module.log_record(levelno=1, foo='bar', baz='quux')
+
+
 def test_make_error_log_records_generates_output_logs_for_message_only():
 def test_make_error_log_records_generates_output_logs_for_message_only():
+    flexmock(module).should_receive('log_record').replace_with(dict)
+
     logs = tuple(module.make_error_log_records('Error'))
     logs = tuple(module.make_error_log_records('Error'))
 
 
-    assert {log.levelno for log in logs} == {logging.CRITICAL}
+    assert {log['levelno'] for log in logs} == {logging.CRITICAL}
 
 
 
 
 def test_make_error_log_records_generates_output_logs_for_called_process_error():
 def test_make_error_log_records_generates_output_logs_for_called_process_error():
+    flexmock(module).should_receive('log_record').replace_with(dict)
+
     logs = tuple(
     logs = tuple(
         module.make_error_log_records(
         module.make_error_log_records(
             'Error', subprocess.CalledProcessError(1, 'ls', 'error output')
             'Error', subprocess.CalledProcessError(1, 'ls', 'error output')
         )
         )
     )
     )
 
 
-    assert {log.levelno for log in logs} == {logging.CRITICAL}
+    assert {log['levelno'] for log in logs} == {logging.CRITICAL}
     assert any(log for log in logs if 'error output' in str(log))
     assert any(log for log in logs if 'error output' in str(log))
 
 
 
 
 def test_make_error_log_records_generates_logs_for_value_error():
 def test_make_error_log_records_generates_logs_for_value_error():
+    flexmock(module).should_receive('log_record').replace_with(dict)
+
     logs = tuple(module.make_error_log_records('Error', ValueError()))
     logs = tuple(module.make_error_log_records('Error', ValueError()))
 
 
-    assert {log.levelno for log in logs} == {logging.CRITICAL}
+    assert {log['levelno'] for log in logs} == {logging.CRITICAL}
 
 
 
 
 def test_make_error_log_records_generates_logs_for_os_error():
 def test_make_error_log_records_generates_logs_for_os_error():
+    flexmock(module).should_receive('log_record').replace_with(dict)
+
     logs = tuple(module.make_error_log_records('Error', OSError()))
     logs = tuple(module.make_error_log_records('Error', OSError()))
 
 
-    assert {log.levelno for log in logs} == {logging.CRITICAL}
+    assert {log['levelno'] for log in logs} == {logging.CRITICAL}
 
 
 
 
 def test_make_error_log_records_generates_nothing_for_other_error():
 def test_make_error_log_records_generates_nothing_for_other_error():
+    flexmock(module).should_receive('log_record').replace_with(dict)
+
     logs = tuple(module.make_error_log_records('Error', KeyError()))
     logs = tuple(module.make_error_log_records('Error', KeyError()))
 
 
     assert logs == ()
     assert logs == ()
@@ -268,6 +282,7 @@ def test_collect_configuration_run_summary_logs_run_configuration_error():
     flexmock(module).should_receive('run_configuration').and_return(
     flexmock(module).should_receive('run_configuration').and_return(
         [logging.makeLogRecord(dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg='Error'))]
         [logging.makeLogRecord(dict(levelno=logging.CRITICAL, levelname='CRITICAL', msg='Error'))]
     )
     )
+    flexmock(module).should_receive('make_error_log_records').and_return([])
     arguments = {}
     arguments = {}
 
 
     logs = tuple(
     logs = tuple(

+ 71 - 6
tests/unit/hooks/test_healthchecks.py

@@ -3,38 +3,103 @@ from flexmock import flexmock
 from borgmatic.hooks import healthchecks as module
 from borgmatic.hooks import healthchecks as module
 
 
 
 
+def test_forgetful_buffering_handler_emit_collects_log_records():
+    handler = module.Forgetful_buffering_handler(byte_capacity=100)
+    handler.emit(flexmock(getMessage=lambda: 'foo'))
+    handler.emit(flexmock(getMessage=lambda: 'bar'))
+
+    assert handler.buffer == ['foo\n', 'bar\n']
+    assert not handler.forgot
+
+
+def test_forgetful_buffering_handler_emit_forgets_log_records_when_capacity_reached():
+    handler = module.Forgetful_buffering_handler(byte_capacity=len('foo\nbar\n'))
+    handler.emit(flexmock(getMessage=lambda: 'foo'))
+    assert handler.buffer == ['foo\n']
+    handler.emit(flexmock(getMessage=lambda: 'bar'))
+    assert handler.buffer == ['foo\n', 'bar\n']
+    handler.emit(flexmock(getMessage=lambda: 'baz'))
+    assert handler.buffer == ['bar\n', 'baz\n']
+    handler.emit(flexmock(getMessage=lambda: 'quux'))
+    assert handler.buffer == ['quux\n']
+    assert handler.forgot
+
+
+def test_format_buffered_logs_for_payload_flattens_log_buffer():
+    handler = module.Forgetful_buffering_handler(byte_capacity=100)
+    handler.buffer = ['foo\n', 'bar\n']
+    flexmock(module.logging).should_receive('getLogger').and_return(flexmock(handlers=[handler]))
+
+    payload = module.format_buffered_logs_for_payload()
+
+    assert payload == 'foo\nbar\n'
+
+
+def test_format_buffered_logs_for_payload_inserts_truncation_indicator_when_logs_forgotten():
+    handler = module.Forgetful_buffering_handler(byte_capacity=100)
+    handler.buffer = ['foo\n', 'bar\n']
+    handler.forgot = True
+    flexmock(module.logging).should_receive('getLogger').and_return(flexmock(handlers=[handler]))
+
+    payload = module.format_buffered_logs_for_payload()
+
+    assert payload == '...\nfoo\nbar\n'
+
+
+def test_format_buffered_logs_for_payload_without_handler_produces_payload_anyway():
+    flexmock(module.logging).should_receive('getLogger').and_return(
+        flexmock(handlers=[module.logging.Handler()])
+    )
+
+    payload = module.format_buffered_logs_for_payload()
+
+    assert payload
+
+
 def test_ping_monitor_hits_ping_url_for_start_state():
 def test_ping_monitor_hits_ping_url_for_start_state():
+    flexmock(module).should_receive('Forgetful_buffering_handler')
     ping_url = 'https://example.com'
     ping_url = 'https://example.com'
-    flexmock(module.requests).should_receive('get').with_args('{}/{}'.format(ping_url, 'start'))
+    flexmock(module.requests).should_receive('post').with_args(
+        '{}/{}'.format(ping_url, 'start'), data=''
+    )
 
 
     module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.START, dry_run=False)
     module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.START, dry_run=False)
 
 
 
 
 def test_ping_monitor_hits_ping_url_for_finish_state():
 def test_ping_monitor_hits_ping_url_for_finish_state():
     ping_url = 'https://example.com'
     ping_url = 'https://example.com'
-    flexmock(module.requests).should_receive('get').with_args(ping_url)
+    payload = flexmock()
+    flexmock(module).should_receive('format_buffered_logs_for_payload').and_return(payload)
+    flexmock(module.requests).should_receive('post').with_args(ping_url, data=payload)
 
 
     module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.FINISH, dry_run=False)
     module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.FINISH, dry_run=False)
 
 
 
 
 def test_ping_monitor_hits_ping_url_for_fail_state():
 def test_ping_monitor_hits_ping_url_for_fail_state():
     ping_url = 'https://example.com'
     ping_url = 'https://example.com'
-    flexmock(module.requests).should_receive('get').with_args('{}/{}'.format(ping_url, 'fail'))
+    payload = flexmock()
+    flexmock(module).should_receive('format_buffered_logs_for_payload').and_return(payload)
+    flexmock(module.requests).should_receive('post').with_args(
+        '{}/{}'.format(ping_url, 'fail'), data=payload
+    )
 
 
     module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.FAIL, dry_run=False)
     module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.FAIL, dry_run=False)
 
 
 
 
 def test_ping_monitor_with_ping_uuid_hits_corresponding_url():
 def test_ping_monitor_with_ping_uuid_hits_corresponding_url():
     ping_uuid = 'abcd-efgh-ijkl-mnop'
     ping_uuid = 'abcd-efgh-ijkl-mnop'
-    flexmock(module.requests).should_receive('get').with_args(
-        'https://hc-ping.com/{}'.format(ping_uuid)
+    payload = flexmock()
+    flexmock(module).should_receive('format_buffered_logs_for_payload').and_return(payload)
+    flexmock(module.requests).should_receive('post').with_args(
+        'https://hc-ping.com/{}'.format(ping_uuid), data=payload
     )
     )
 
 
     module.ping_monitor(ping_uuid, 'config.yaml', state=module.monitor.State.FINISH, dry_run=False)
     module.ping_monitor(ping_uuid, 'config.yaml', state=module.monitor.State.FINISH, dry_run=False)
 
 
 
 
 def test_ping_monitor_dry_run_does_not_hit_ping_url():
 def test_ping_monitor_dry_run_does_not_hit_ping_url():
+    flexmock(module).should_receive('Forgetful_buffering_handler')
     ping_url = 'https://example.com'
     ping_url = 'https://example.com'
-    flexmock(module.requests).should_receive('get').never()
+    flexmock(module.requests).should_receive('post').never()
 
 
     module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.START, dry_run=True)
     module.ping_monitor(ping_url, 'config.yaml', state=module.monitor.State.START, dry_run=True)