Browse Source

Change connection failures for monitoring hooks to be warnings instead of errors (#439).

Dan Helfman 3 years ago
parent
commit
54933ebef5

+ 3 - 0
NEWS

@@ -3,6 +3,9 @@
    logs to send to the Healthchecks server.
  * #402: Remove the error when "archive_name_format" is specified but a retention prefix isn't. 
  * #420: Warn when an unsupported variable is used in a hook command.
+ * #439: Change connection failures for monitoring hooks (Healthchecks, Cronitor, PagerDuty, and
+   Cronhub) to be warnings instead of errors. This way, the monitoring system failing does not block
+   backups.
  * #460: Add Healthchecks monitoring hook "send_logs" option to enable/disable sending borgmatic
    logs to the Healthchecks server.
  * #525: Add Healthchecks monitoring hook "states" option to only enable pinging for particular

+ 1 - 1
borgmatic/hooks/command.py

@@ -19,7 +19,7 @@ def interpolate_context(config_filename, hook_description, command, context):
         command = command.replace('{%s}' % name, str(value))
 
     for unsupported_variable in re.findall(r'{\w+}', command):
-        logger.warn(
+        logger.warning(
             f"{config_filename}: Variable '{unsupported_variable}' is not supported in {hook_description} hook"
         )
 

+ 4 - 1
borgmatic/hooks/cronhub.py

@@ -42,7 +42,10 @@ def ping_monitor(hook_config, config_filename, state, monitoring_log_level, dry_
 
     if not dry_run:
         logging.getLogger('urllib3').setLevel(logging.ERROR)
-        requests.get(ping_url)
+        try:
+            requests.get(ping_url)
+        except requests.exceptions.RequestException as error:
+            logger.warning(f'{config_filename}: Cronhub error: {error}')
 
 
 def destroy_monitor(

+ 4 - 1
borgmatic/hooks/cronitor.py

@@ -37,7 +37,10 @@ def ping_monitor(hook_config, config_filename, state, monitoring_log_level, dry_
 
     if not dry_run:
         logging.getLogger('urllib3').setLevel(logging.ERROR)
-        requests.get(ping_url)
+        try:
+            requests.get(ping_url)
+        except requests.exceptions.RequestException as error:
+            logger.warning(f'{config_filename}: Cronitor error: {error}')
 
 
 def destroy_monitor(

+ 4 - 1
borgmatic/hooks/healthchecks.py

@@ -124,7 +124,10 @@ def ping_monitor(hook_config, config_filename, state, monitoring_log_level, dry_
 
     if not dry_run:
         logging.getLogger('urllib3').setLevel(logging.ERROR)
-        requests.post(ping_url, data=payload.encode('utf-8'))
+        try:
+            requests.post(ping_url, data=payload.encode('utf-8'))
+        except requests.exceptions.RequestException as error:
+            logger.warning(f'{config_filename}: Healthchecks error: {error}')
 
 
 def destroy_monitor(hook_config, config_filename, monitoring_log_level, dry_run):

+ 4 - 1
borgmatic/hooks/pagerduty.py

@@ -68,7 +68,10 @@ def ping_monitor(hook_config, config_filename, state, monitoring_log_level, dry_
     logger.debug('{}: Using PagerDuty payload: {}'.format(config_filename, payload))
 
     logging.getLogger('urllib3').setLevel(logging.ERROR)
-    requests.post(EVENTS_API_URL, data=payload.encode('utf-8'))
+    try:
+        requests.post(EVENTS_API_URL, data=payload.encode('utf-8'))
+    except requests.exceptions.RequestException as error:
+        logger.warning(f'{config_filename}: PagerDuty error: {error}')
 
 
 def destroy_monitor(

+ 4 - 1
docs/how-to/monitor-your-backups.md

@@ -159,7 +159,10 @@ itself. But the logs are only included for errors that occur when a `prune`,
 
 You can customize the verbosity of the logs that are sent to Healthchecks with
 borgmatic's `--monitoring-verbosity` flag. The `--files` and `--stats` flags
-may also be of use. See `borgmatic --help` for more information.
+may also be of use. See `borgmatic --help` for more information. Additionally,
+see the [borgmatic configuration
+file](https://torsion.org/borgmatic/docs/reference/configuration/) for
+additional Healthchecks options.
 
 You can configure Healthchecks to notify you by a [variety of
 mechanisms](https://healthchecks.io/#welcome-integrations) when backups fail

+ 15 - 0
tests/unit/hooks/test_cronhub.py

@@ -58,3 +58,18 @@ def test_ping_monitor_dry_run_does_not_hit_ping_url():
     module.ping_monitor(
         hook_config, 'config.yaml', module.monitor.State.START, monitoring_log_level=1, dry_run=True
     )
+
+
+def test_ping_monitor_with_connection_error_does_not_raise():
+    hook_config = {'ping_url': 'https://example.com/start/abcdef'}
+    flexmock(module.requests).should_receive('get').and_raise(
+        module.requests.exceptions.ConnectionError
+    )
+
+    module.ping_monitor(
+        hook_config,
+        'config.yaml',
+        module.monitor.State.START,
+        monitoring_log_level=1,
+        dry_run=False,
+    )

+ 15 - 0
tests/unit/hooks/test_cronitor.py

@@ -45,3 +45,18 @@ def test_ping_monitor_dry_run_does_not_hit_ping_url():
     module.ping_monitor(
         hook_config, 'config.yaml', module.monitor.State.START, monitoring_log_level=1, dry_run=True
     )
+
+
+def test_ping_monitor_with_connection_error_does_not_raise():
+    hook_config = {'ping_url': 'https://example.com'}
+    flexmock(module.requests).should_receive('get').and_raise(
+        module.requests.exceptions.ConnectionError
+    )
+
+    module.ping_monitor(
+        hook_config,
+        'config.yaml',
+        module.monitor.State.START,
+        monitoring_log_level=1,
+        dry_run=False,
+    )

+ 17 - 0
tests/unit/hooks/test_healthchecks.py

@@ -231,3 +231,20 @@ def test_ping_monitor_hits_ping_url_when_states_matching():
         monitoring_log_level=1,
         dry_run=False,
     )
+
+
+def test_ping_monitor_with_connection_error_does_not_raise():
+    flexmock(module).should_receive('Forgetful_buffering_handler')
+    flexmock(module.logger).should_receive('warning')
+    hook_config = {'ping_url': 'https://example.com'}
+    flexmock(module.requests).should_receive('post').with_args(
+        'https://example.com/start', data=''.encode('utf-8')
+    ).and_raise(module.requests.exceptions.ConnectionError)
+
+    module.ping_monitor(
+        hook_config,
+        'config.yaml',
+        state=module.monitor.State.START,
+        monitoring_log_level=1,
+        dry_run=False,
+    )

+ 15 - 0
tests/unit/hooks/test_pagerduty.py

@@ -49,3 +49,18 @@ def test_ping_monitor_dry_run_does_not_call_api():
         monitoring_log_level=1,
         dry_run=True,
     )
+
+
+def test_ping_monitor_with_connection_error_does_not_raise():
+    flexmock(module.requests).should_receive('post').and_raise(
+        module.requests.exceptions.ConnectionError
+    )
+    flexmock(module.logger).should_receive('warning')
+
+    module.ping_monitor(
+        {'integration_key': 'abc123'},
+        'config.yaml',
+        module.monitor.State.FAIL,
+        monitoring_log_level=1,
+        dry_run=False,
+    )