Browse Source

With the PagerDuty monitoring hook, send borgmatic logs to PagerDuty so they show up in the incident UI (#409).

Dan Helfman 3 months ago
parent
commit
c0135864c2

+ 3 - 0
NEWS

@@ -1,4 +1,7 @@
 1.9.14.dev0
 1.9.14.dev0
+ * #409: With the PagerDuty monitoring hook, send borgmatic logs to PagerDuty so they show up in the
+   incident UI. See the documentation for more information:
+   https://torsion.org/borgmatic/docs/how-to/monitor-your-backups/#pagerduty-hook
  * #936: Clarify Zabbix monitoring hook documentation about creating items.
  * #936: Clarify Zabbix monitoring hook documentation about creating items.
 
 
 1.9.13
 1.9.13

+ 6 - 0
borgmatic/config/schema.yaml

@@ -2279,6 +2279,12 @@ properties:
                     PagerDuty integration key used to notify PagerDuty when a
                     PagerDuty integration key used to notify PagerDuty when a
                     backup errors. Supports the "{credential ...}" syntax.
                     backup errors. Supports the "{credential ...}" syntax.
                 example: a177cad45bd374409f78906a810a3074
                 example: a177cad45bd374409f78906a810a3074
+            send_logs:
+                type: boolean
+                description: |
+                    Send borgmatic logs to PagerDuty when a backup errors.
+                    Defaults to true.
+                example: false
         description: |
         description: |
             Configuration for a monitoring integration with PagerDuty. Create an
             Configuration for a monitoring integration with PagerDuty. Create an
             account at https://www.pagerduty.com if you'd like to use this
             account at https://www.pagerduty.com if you'd like to use this

+ 1 - 1
borgmatic/hooks/monitoring/logs.py

@@ -64,7 +64,7 @@ def get_handler(identifier):
 def format_buffered_logs_for_payload(identifier):
 def format_buffered_logs_for_payload(identifier):
     '''
     '''
     Get the handler previously added to the root logger, and slurp buffered logs out of it to
     Get the handler previously added to the root logger, and slurp buffered logs out of it to
-    send to Healthchecks.
+    send to the monitoring service.
     '''
     '''
     try:
     try:
         buffering_handler = get_handler(identifier)
         buffering_handler = get_handler(identifier)

+ 32 - 11
borgmatic/hooks/monitoring/pagerduty.py

@@ -6,20 +6,36 @@ import platform
 import requests
 import requests
 
 
 import borgmatic.hooks.credential.parse
 import borgmatic.hooks.credential.parse
+import borgmatic.hooks.monitoring.logs
 from borgmatic.hooks.monitoring import monitor
 from borgmatic.hooks.monitoring import monitor
 
 
 logger = logging.getLogger(__name__)
 logger = logging.getLogger(__name__)
 
 
 EVENTS_API_URL = 'https://events.pagerduty.com/v2/enqueue'
 EVENTS_API_URL = 'https://events.pagerduty.com/v2/enqueue'
+DEFAULT_LOGS_PAYLOAD_LIMIT_BYTES = 10000
+HANDLER_IDENTIFIER = 'pagerduty'
 
 
 
 
-def initialize_monitor(
-    integration_key, config, config_filename, monitoring_log_level, dry_run
-):  # pragma: no cover
+def initialize_monitor(hook_config, config, config_filename, monitoring_log_level, dry_run):
     '''
     '''
-    No initialization is necessary for this monitor.
+    Add a handler to the root logger that stores in memory the most recent logs emitted. That way,
+    we can send them all to PagerDuty upon a failure state. But skip this if the "send_logs" option
+    is false.
     '''
     '''
-    pass
+    if hook_config.get('send_logs') is False:
+        return
+
+    ping_body_limit = max(
+        DEFAULT_LOGS_PAYLOAD_LIMIT_BYTES
+        - len(borgmatic.hooks.monitoring.logs.PAYLOAD_TRUNCATION_INDICATOR),
+        0,
+    )
+
+    borgmatic.hooks.monitoring.logs.add_handler(
+        borgmatic.hooks.monitoring.logs.Forgetful_buffering_handler(
+            HANDLER_IDENTIFIER, ping_body_limit, monitoring_log_level
+        )
+    )
 
 
 
 
 def ping_monitor(hook_config, config, config_filename, state, monitoring_log_level, dry_run):
 def ping_monitor(hook_config, config, config_filename, state, monitoring_log_level, dry_run):
@@ -37,9 +53,6 @@ def ping_monitor(hook_config, config, config_filename, state, monitoring_log_lev
     dry_run_label = ' (dry run; not actually sending)' if dry_run else ''
     dry_run_label = ' (dry run; not actually sending)' if dry_run else ''
     logger.info(f'Sending failure event to PagerDuty {dry_run_label}')
     logger.info(f'Sending failure event to PagerDuty {dry_run_label}')
 
 
-    if dry_run:
-        return
-
     try:
     try:
         integration_key = borgmatic.hooks.credential.parse.resolve_credential(
         integration_key = borgmatic.hooks.credential.parse.resolve_credential(
             hook_config.get('integration_key'), config
             hook_config.get('integration_key'), config
@@ -48,6 +61,10 @@ def ping_monitor(hook_config, config, config_filename, state, monitoring_log_lev
         logger.warning(f'PagerDuty credential error: {error}')
         logger.warning(f'PagerDuty credential error: {error}')
         return
         return
 
 
+    logs_payload = borgmatic.hooks.monitoring.logs.format_buffered_logs_for_payload(
+        HANDLER_IDENTIFIER
+    )
+
     hostname = platform.node()
     hostname = platform.node()
     local_timestamp = datetime.datetime.now(datetime.timezone.utc).astimezone().isoformat()
     local_timestamp = datetime.datetime.now(datetime.timezone.utc).astimezone().isoformat()
     payload = json.dumps(
     payload = json.dumps(
@@ -66,11 +83,14 @@ def ping_monitor(hook_config, config, config_filename, state, monitoring_log_lev
                     'hostname': hostname,
                     'hostname': hostname,
                     'configuration filename': config_filename,
                     'configuration filename': config_filename,
                     'server time': local_timestamp,
                     'server time': local_timestamp,
+                    'logs': logs_payload,
                 },
                 },
             },
             },
         }
         }
     )
     )
-    logger.debug(f'Using PagerDuty payload: {payload}')
+
+    if dry_run:
+        return
 
 
     logging.getLogger('urllib3').setLevel(logging.ERROR)
     logging.getLogger('urllib3').setLevel(logging.ERROR)
     try:
     try:
@@ -83,6 +103,7 @@ def ping_monitor(hook_config, config, config_filename, state, monitoring_log_lev
 
 
 def destroy_monitor(ping_url_or_uuid, config, monitoring_log_level, dry_run):  # pragma: no cover
 def destroy_monitor(ping_url_or_uuid, config, monitoring_log_level, dry_run):  # pragma: no cover
     '''
     '''
-    No destruction is necessary for this monitor.
+    Remove the monitor handler that was added to the root logger. This prevents the handler from
+    getting reused by other instances of this monitor.
     '''
     '''
-    pass
+    borgmatic.hooks.monitoring.logs.remove_handler(HANDLER_IDENTIFIER)

+ 21 - 0
docs/how-to/monitor-your-backups.md

@@ -292,6 +292,27 @@ If you have any issues with the integration, [please contact
 us](https://torsion.org/borgmatic/#support-and-contributing).
 us](https://torsion.org/borgmatic/#support-and-contributing).
 
 
 
 
+### Sending logs
+
+<span class="minilink minilink-addedin">New in version 1.9.14</span> borgmatic
+logs are included in the payload data sent to PagerDuty. This means that
+(truncated) borgmatic logs, including error messages, show up in the PagerDuty
+incident UI and corresponding notification emails.
+
+You can customize the verbosity of the logs that are sent with borgmatic's
+`--monitoring-verbosity` flag. The `--list` and `--stats` flags may also be of
+use. See `borgmatic create --help` for more information.
+
+If you don't want any logs sent, you can disable this feature by setting
+`send_logs` to `false`:
+
+```yaml
+pagerduty:
+    integration_key: a177cad45bd374409f78906a810a3074
+    send_logs: false
+```
+
+
 ## Pushover hook
 ## Pushover hook
 
 
 <span class="minilink minilink-addedin">New in version 1.9.2</span>
 <span class="minilink minilink-addedin">New in version 1.9.2</span>

+ 52 - 0
tests/unit/hooks/monitoring/test_pagerduty.py

@@ -3,6 +3,46 @@ from flexmock import flexmock
 from borgmatic.hooks.monitoring import pagerduty as module
 from borgmatic.hooks.monitoring import pagerduty as module
 
 
 
 
+def mock_logger():
+    logger = flexmock()
+    logger.should_receive('addHandler')
+    logger.should_receive('removeHandler')
+    flexmock(module.logging).should_receive('getLogger').and_return(logger)
+
+
+def test_initialize_monitor_creates_log_handler():
+    monitoring_log_level = 1
+
+    mock_logger()
+    flexmock(module.borgmatic.hooks.monitoring.logs).should_receive(
+        'Forgetful_buffering_handler'
+    ).once()
+
+    module.initialize_monitor({}, {}, 'test.yaml', monitoring_log_level, dry_run=False)
+
+
+def test_initialize_monitor_creates_log_handler_when_send_logs_true():
+    mock_logger()
+    flexmock(module.borgmatic.hooks.monitoring.logs).should_receive(
+        'Forgetful_buffering_handler'
+    ).once()
+
+    module.initialize_monitor(
+        {'send_logs': True}, {}, 'test.yaml', monitoring_log_level=1, dry_run=False
+    )
+
+
+def test_initialize_monitor_bails_when_send_logs_false():
+    mock_logger()
+    flexmock(module.borgmatic.hooks.monitoring.logs).should_receive(
+        'Forgetful_buffering_handler'
+    ).never()
+
+    module.initialize_monitor(
+        {'send_logs': False}, {}, 'test.yaml', monitoring_log_level=1, dry_run=False
+    )
+
+
 def test_ping_monitor_ignores_start_state():
 def test_ping_monitor_ignores_start_state():
     flexmock(module.borgmatic.hooks.credential.parse).should_receive(
     flexmock(module.borgmatic.hooks.credential.parse).should_receive(
         'resolve_credential'
         'resolve_credential'
@@ -39,6 +79,9 @@ def test_ping_monitor_calls_api_for_fail_state():
     flexmock(module.borgmatic.hooks.credential.parse).should_receive(
     flexmock(module.borgmatic.hooks.credential.parse).should_receive(
         'resolve_credential'
         'resolve_credential'
     ).replace_with(lambda value, config: value)
     ).replace_with(lambda value, config: value)
+    flexmock(module.borgmatic.hooks.monitoring.logs).should_receive(
+        'format_buffered_logs_for_payload'
+    ).and_return('loggy\nlogs')
     flexmock(module.requests).should_receive('post').and_return(flexmock(ok=True))
     flexmock(module.requests).should_receive('post').and_return(flexmock(ok=True))
 
 
     module.ping_monitor(
     module.ping_monitor(
@@ -55,6 +98,9 @@ def test_ping_monitor_dry_run_does_not_call_api():
     flexmock(module.borgmatic.hooks.credential.parse).should_receive(
     flexmock(module.borgmatic.hooks.credential.parse).should_receive(
         'resolve_credential'
         'resolve_credential'
     ).replace_with(lambda value, config: value)
     ).replace_with(lambda value, config: value)
+    flexmock(module.borgmatic.hooks.monitoring.logs).should_receive(
+        'format_buffered_logs_for_payload'
+    ).and_return('loggy\nlogs')
     flexmock(module.requests).should_receive('post').never()
     flexmock(module.requests).should_receive('post').never()
 
 
     module.ping_monitor(
     module.ping_monitor(
@@ -71,6 +117,9 @@ def test_ping_monitor_with_connection_error_logs_warning():
     flexmock(module.borgmatic.hooks.credential.parse).should_receive(
     flexmock(module.borgmatic.hooks.credential.parse).should_receive(
         'resolve_credential'
         'resolve_credential'
     ).replace_with(lambda value, config: value)
     ).replace_with(lambda value, config: value)
+    flexmock(module.borgmatic.hooks.monitoring.logs).should_receive(
+        'format_buffered_logs_for_payload'
+    ).and_return('loggy\nlogs')
     flexmock(module.requests).should_receive('post').and_raise(
     flexmock(module.requests).should_receive('post').and_raise(
         module.requests.exceptions.ConnectionError
         module.requests.exceptions.ConnectionError
     )
     )
@@ -108,6 +157,9 @@ def test_ping_monitor_with_other_error_logs_warning():
     flexmock(module.borgmatic.hooks.credential.parse).should_receive(
     flexmock(module.borgmatic.hooks.credential.parse).should_receive(
         'resolve_credential'
         'resolve_credential'
     ).replace_with(lambda value, config: value)
     ).replace_with(lambda value, config: value)
+    flexmock(module.borgmatic.hooks.monitoring.logs).should_receive(
+        'format_buffered_logs_for_payload'
+    ).and_return('loggy\nlogs')
     response.should_receive('raise_for_status').and_raise(
     response.should_receive('raise_for_status').and_raise(
         module.requests.exceptions.RequestException
         module.requests.exceptions.RequestException
     )
     )