فهرست منبع

With the PagerDuty monitoring hook, send borgmatic logs to PagerDuty so they show up in the incident UI (#409).

Dan Helfman 3 ماه پیش
والد
کامیت
c0135864c2

+ 3 - 0
NEWS

@@ -1,4 +1,7 @@
 1.9.14.dev0
+ * #409: With the PagerDuty monitoring hook, send borgmatic logs to PagerDuty so they show up in the
+   incident UI. See the documentation for more information:
+   https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#pagerduty-hook
  * #936: Clarify Zabbix monitoring hook documentation about creating items.
 
 1.9.13

+ 6 - 0
borgmatic/config/schema.yaml

@@ -2279,6 +2279,12 @@ properties:
                     PagerDuty integration key used to notify PagerDuty when a
                     backup errors. Supports the "{credential ...}" syntax.
                 example: a177cad45bd374409f78906a810a3074
+            send_logs:
+                type: boolean
+                description: |
+                    Send borgmatic logs to PagerDuty when a backup errors.
+                    Defaults to true.
+                example: false
         description: |
             Configuration for a monitoring integration with PagerDuty. Create an
             account at https://www.pagerduty.com if you'd like to use this

+ 1 - 1
borgmatic/hooks/monitoring/logs.py

@@ -64,7 +64,7 @@ def get_handler(identifier):
 def format_buffered_logs_for_payload(identifier):
     '''
     Get the handler previously added to the root logger, and slurp buffered logs out of it to
-    send to Healthchecks.
+    send to the monitoring service.
     '''
     try:
         buffering_handler = get_handler(identifier)

+ 32 - 11
borgmatic/hooks/monitoring/pagerduty.py

@@ -6,20 +6,36 @@ import platform
 import requests
 
 import borgmatic.hooks.credential.parse
+import borgmatic.hooks.monitoring.logs
 from borgmatic.hooks.monitoring import monitor
 
 logger = logging.getLogger(__name__)
 
 EVENTS_API_URL = 'https://events.pagerduty.com/v2/enqueue'
+DEFAULT_LOGS_PAYLOAD_LIMIT_BYTES = 10000
+HANDLER_IDENTIFIER = 'pagerduty'
 
 
-def initialize_monitor(
-    integration_key, config, config_filename, monitoring_log_level, dry_run
-):  # pragma: no cover
+def initialize_monitor(hook_config, config, config_filename, monitoring_log_level, dry_run):
     '''
-    No initialization is necessary for this monitor.
+    Add a handler to the root logger that stores in memory the most recent logs emitted. That way,
+    we can send them all to PagerDuty upon a failure state. But skip this if the "send_logs" option
+    is false.
     '''
-    pass
+    if hook_config.get('send_logs') is False:
+        return
+
+    ping_body_limit = max(
+        DEFAULT_LOGS_PAYLOAD_LIMIT_BYTES
+        - len(borgmatic.hooks.monitoring.logs.PAYLOAD_TRUNCATION_INDICATOR),
+        0,
+    )
+
+    borgmatic.hooks.monitoring.logs.add_handler(
+        borgmatic.hooks.monitoring.logs.Forgetful_buffering_handler(
+            HANDLER_IDENTIFIER, ping_body_limit, monitoring_log_level
+        )
+    )
 
 
 def ping_monitor(hook_config, config, config_filename, state, monitoring_log_level, dry_run):
@@ -37,9 +53,6 @@ def ping_monitor(hook_config, config, config_filename, state, monitoring_log_lev
     dry_run_label = ' (dry run; not actually sending)' if dry_run else ''
     logger.info(f'Sending failure event to PagerDuty {dry_run_label}')
 
-    if dry_run:
-        return
-
     try:
         integration_key = borgmatic.hooks.credential.parse.resolve_credential(
             hook_config.get('integration_key'), config
@@ -48,6 +61,10 @@ def ping_monitor(hook_config, config, config_filename, state, monitoring_log_lev
         logger.warning(f'PagerDuty credential error: {error}')
         return
 
+    logs_payload = borgmatic.hooks.monitoring.logs.format_buffered_logs_for_payload(
+        HANDLER_IDENTIFIER
+    )
+
     hostname = platform.node()
     local_timestamp = datetime.datetime.now(datetime.timezone.utc).astimezone().isoformat()
     payload = json.dumps(
@@ -66,11 +83,14 @@ def ping_monitor(hook_config, config, config_filename, state, monitoring_log_lev
                     'hostname': hostname,
                     'configuration filename': config_filename,
                     'server time': local_timestamp,
+                    'logs': logs_payload,
                 },
             },
         }
     )
-    logger.debug(f'Using PagerDuty payload: {payload}')
+
+    if dry_run:
+        return
 
     logging.getLogger('urllib3').setLevel(logging.ERROR)
     try:
@@ -83,6 +103,7 @@ def ping_monitor(hook_config, config, config_filename, state, monitoring_log_lev
 
 def destroy_monitor(ping_url_or_uuid, config, monitoring_log_level, dry_run):  # pragma: no cover
     '''
-    No destruction is necessary for this monitor.
+    Remove the monitor handler that was added to the root logger. This prevents the handler from
+    getting reused by other instances of this monitor.
     '''
-    pass
+    borgmatic.hooks.monitoring.logs.remove_handler(HANDLER_IDENTIFIER)

+ 21 - 0
docs/how-to/monitor-your-backups.md

@@ -292,6 +292,27 @@ If you have any issues with the integration, [please contact
 us](https://torsion.org/borgmatic/#support-and-contributing).
 
 
+### Sending logs
+
+<span class="minilink minilink-addedin">New in version 1.9.14</span> borgmatic
+logs are included in the payload data sent to PagerDuty. This means that
+(truncated) borgmatic logs, including error messages, show up in the PagerDuty
+incident UI and corresponding notification emails.
+
+You can customize the verbosity of the logs that are sent with borgmatic's
+`--monitoring-verbosity` flag. The `--list` and `--stats` flags may also be of
+use. See `borgmatic create --help` for more information.
+
+If you don't want any logs sent, you can disable this feature by setting
+`send_logs` to `false`:
+
+```yaml
+pagerduty:
+    integration_key: a177cad45bd374409f78906a810a3074
+    send_logs: false
+```
+
+
 ## Pushover hook
 
 <span class="minilink minilink-addedin">New in version 1.9.2</span>

+ 52 - 0
tests/unit/hooks/monitoring/test_pagerduty.py

@@ -3,6 +3,46 @@ from flexmock import flexmock
 from borgmatic.hooks.monitoring import pagerduty as module
 
 
+def mock_logger():
+    logger = flexmock()
+    logger.should_receive('addHandler')
+    logger.should_receive('removeHandler')
+    flexmock(module.logging).should_receive('getLogger').and_return(logger)
+
+
+def test_initialize_monitor_creates_log_handler():
+    monitoring_log_level = 1
+
+    mock_logger()
+    flexmock(module.borgmatic.hooks.monitoring.logs).should_receive(
+        'Forgetful_buffering_handler'
+    ).once()
+
+    module.initialize_monitor({}, {}, 'test.yaml', monitoring_log_level, dry_run=False)
+
+
+def test_initialize_monitor_creates_log_handler_when_send_logs_true():
+    mock_logger()
+    flexmock(module.borgmatic.hooks.monitoring.logs).should_receive(
+        'Forgetful_buffering_handler'
+    ).once()
+
+    module.initialize_monitor(
+        {'send_logs': True}, {}, 'test.yaml', monitoring_log_level=1, dry_run=False
+    )
+
+
+def test_initialize_monitor_bails_when_send_logs_false():
+    mock_logger()
+    flexmock(module.borgmatic.hooks.monitoring.logs).should_receive(
+        'Forgetful_buffering_handler'
+    ).never()
+
+    module.initialize_monitor(
+        {'send_logs': False}, {}, 'test.yaml', monitoring_log_level=1, dry_run=False
+    )
+
+
 def test_ping_monitor_ignores_start_state():
     flexmock(module.borgmatic.hooks.credential.parse).should_receive(
         'resolve_credential'
@@ -39,6 +79,9 @@ def test_ping_monitor_calls_api_for_fail_state():
     flexmock(module.borgmatic.hooks.credential.parse).should_receive(
         'resolve_credential'
     ).replace_with(lambda value, config: value)
+    flexmock(module.borgmatic.hooks.monitoring.logs).should_receive(
+        'format_buffered_logs_for_payload'
+    ).and_return('loggy\nlogs')
     flexmock(module.requests).should_receive('post').and_return(flexmock(ok=True))
 
     module.ping_monitor(
@@ -55,6 +98,9 @@ def test_ping_monitor_dry_run_does_not_call_api():
     flexmock(module.borgmatic.hooks.credential.parse).should_receive(
         'resolve_credential'
     ).replace_with(lambda value, config: value)
+    flexmock(module.borgmatic.hooks.monitoring.logs).should_receive(
+        'format_buffered_logs_for_payload'
+    ).and_return('loggy\nlogs')
     flexmock(module.requests).should_receive('post').never()
 
     module.ping_monitor(
@@ -71,6 +117,9 @@ def test_ping_monitor_with_connection_error_logs_warning():
     flexmock(module.borgmatic.hooks.credential.parse).should_receive(
         'resolve_credential'
     ).replace_with(lambda value, config: value)
+    flexmock(module.borgmatic.hooks.monitoring.logs).should_receive(
+        'format_buffered_logs_for_payload'
+    ).and_return('loggy\nlogs')
     flexmock(module.requests).should_receive('post').and_raise(
         module.requests.exceptions.ConnectionError
     )
@@ -108,6 +157,9 @@ def test_ping_monitor_with_other_error_logs_warning():
     flexmock(module.borgmatic.hooks.credential.parse).should_receive(
         'resolve_credential'
     ).replace_with(lambda value, config: value)
+    flexmock(module.borgmatic.hooks.monitoring.logs).should_receive(
+        'format_buffered_logs_for_payload'
+    ).and_return('loggy\nlogs')
     response.should_receive('raise_for_status').and_raise(
         module.requests.exceptions.RequestException
     )