Selaa lähdekoodia

watchdog: use dig instead of check_dns (#6685)

* watchdog: use dig instead of check_dns

check_dns is slower and uses more system resources,
dig wrapped in a script is a more performant approach and uses
fewer system resources

* added debug mode + compose image bump

---------

Co-authored-by: maxi322 <maxi322@users.noreply.github.com>
Co-authored-by: DerLinkman <niklas.meyer@servercow.de>
maxi322 1 viikko sitten
vanhempi
sitoutus
5e66ffa366

+ 3 - 2
data/Dockerfiles/watchdog/Dockerfile

@@ -16,7 +16,6 @@ RUN apk add --update \
   fcgi \
   openssl \
   nagios-plugins-mysql \
-  nagios-plugins-dns \
   nagios-plugins-disk \
   bind-tools \
   redis \
@@ -32,9 +31,11 @@ RUN apk add --update \
   tzdata \
   whois \
   && curl https://raw.githubusercontent.com/mludvig/smtp-cli/v3.10/smtp-cli -o /smtp-cli \
-  && chmod +x smtp-cli
+  && chmod +x smtp-cli \
+  && mkdir /usr/lib/mailcow
 
 COPY watchdog.sh /watchdog.sh
 COPY check_mysql_slavestatus.sh /usr/lib/nagios/plugins/check_mysql_slavestatus.sh
+COPY check_dns.sh /usr/lib/mailcow/check_dns.sh
 
 CMD ["/watchdog.sh"]

+ 39 - 0
data/Dockerfiles/watchdog/check_dns.sh

@@ -0,0 +1,39 @@
+#!/bin/sh
+
+while getopts "H:s:" opt; do
+  case "$opt" in
+    H) HOST="$OPTARG" ;;
+    s) SERVER="$OPTARG" ;;
+    *) echo "Usage: $0 -H host -s server"; exit 3 ;;
+  esac
+done
+
+if [ -z "$SERVER" ]; then
+  echo "No DNS Server provided"
+  exit 3
+fi
+
+if [ -z "$HOST" ]; then
+  echo "No host to test provided"
+  exit 3
+fi
+
+# run dig and measure the time it takes to run
+START_TIME=$(date +%s%3N)
+dig_output=$(dig +short +timeout=2 +tries=1 "$HOST" @"$SERVER" 2>/dev/null)
+dig_rc=$?
+dig_output_ips=$(echo "$dig_output" | grep -E '^[0-9.]+$' | sort | paste -sd ',' -)
+END_TIME=$(date +%s%3N)
+ELAPSED_TIME=$((END_TIME - START_TIME))
+
+# validate and perform nagios like output and exit codes
+if [ $dig_rc -ne 0 ] || [ -z "$dig_output" ]; then
+  echo "Domain $HOST was not found by the server"
+  exit 2
+elif [ $dig_rc -eq 0 ]; then
+  echo "DNS OK: $ELAPSED_TIME ms response time. $HOST returns $dig_output_ips"
+  exit 0
+else
+  echo "Unknown error"
+  exit 3
+fi

+ 6 - 1
data/Dockerfiles/watchdog/watchdog.sh

@@ -1,5 +1,10 @@
 #!/bin/bash
 
+if [ "${DEV_MODE}" != "n" ]; then
+  echo -e "\e[31mEnabled Debug Mode\e[0m"
+  set -x
+fi
+
 trap "exit" INT TERM
 trap "kill 0" EXIT
 
@@ -297,7 +302,7 @@ unbound_checks() {
     touch /tmp/unbound-mailcow; echo "$(tail -50 /tmp/unbound-mailcow)" > /tmp/unbound-mailcow
     host_ip=$(get_container_ip unbound-mailcow)
     err_c_cur=${err_count}
-    /usr/lib/nagios/plugins/check_dns -s ${host_ip} -H stackoverflow.com 2>> /tmp/unbound-mailcow 1>&2; err_count=$(( ${err_count} + $? ))
+    /usr/lib/mailcow/check_dns.sh -s ${host_ip} -H stackoverflow.com 2>> /tmp/unbound-mailcow 1>&2; err_count=$(( ${err_count} + $? ))
     DNSSEC=$(dig com +dnssec | egrep 'flags:.+ad')
     if [[ -z ${DNSSEC} ]]; then
       echo "DNSSEC failure" 2>> /tmp/unbound-mailcow 1>&2

+ 2 - 1
docker-compose.yml

@@ -521,7 +521,7 @@ services:
         - /lib/modules:/lib/modules:ro
 
     watchdog-mailcow:
-      image: ghcr.io/mailcow/watchdog:2.08
+      image: ghcr.io/mailcow/watchdog:2.09
       dns:
         - ${IPV4_NETWORK:-172.22.1}.254
       tmpfs:
@@ -588,6 +588,7 @@ services:
         - OLEFY_THRESHOLD=${OLEFY_THRESHOLD:-5}
         - MAILQ_THRESHOLD=${MAILQ_THRESHOLD:-20}
         - MAILQ_CRIT=${MAILQ_CRIT:-30}
+        - DEV_MODE=${DEV_MODE:-n}
       networks:
         mailcow-network:
           aliases: