ソースを参照

[Watchdog] Watch mail queue (added inexpensive check via "find" instead of adding an API endpoint to dockerapi-mailcow)

andryyy 5 年 前
コミット
063337b58d
2 ファイル変更49 行追加1 行削除
  1. 45 0
      data/Dockerfiles/watchdog/watchdog.sh
  2. 4 1
      docker-compose.yml

+ 45 - 0
data/Dockerfiles/watchdog/watchdog.sh

@@ -88,6 +88,7 @@ log_msg() {
 
 function mail_error() {
   [[ -z ${1} ]] && return 1
+  # If exists, body will be the content of "/tmp/${1}", even if ${2} is set
   [[ -z ${2} ]] && BODY="Service was restarted on $(date), please check your mailcow installation." || BODY="$(date) - ${2}"
   WATCHDOG_NOTIFY_EMAIL=$(echo "${WATCHDOG_NOTIFY_EMAIL}" | sed 's/"//;s|"$||')
   # Some exceptions for subject and body formats
@@ -524,6 +525,35 @@ ratelimit_checks() {
   return 1
 }
 
+mailq_checks() {
+  err_count=0
+  diff_c=0
+  THRESHOLD=${MAILQ_THRESHOLD}
+  # Reduce error count by 2 after restarting an unhealthy container
+  trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
+  while [ ${err_count} -lt ${THRESHOLD} ]; do
+    touch /tmp/mail_queue_status; echo "$(tail -50 /tmp/mail_queue_status)" > /tmp/mail_queue_status
+    MAILQ_LOG_STATUS=$(find /var/spool/postfix/deferred -type f | wc -l)
+    echo "Mail queue contains ${MAILQ_LOG_STATUS} items (critical limit is ${MAILQ_CRIT}) at $(date)" >> /tmp/mail_queue_status
+    err_c_cur=${err_count}
+    if [ ${MAILQ_LOG_STATUS} -ge ${MAILQ_CRIT} ]; then
+      err_count=$(( ${err_count} + 1 ))
+      echo "Mail queue contains ${MAILQ_LOG_STATUS} items (critical limit is ${MAILQ_CRIT}) at $(date)" >> /tmp/mail_queue_status
+    fi
+    [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1
+    [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} ))
+    progress "Mail queue" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c}
+    if [[ $? == 10 ]]; then
+      diff_c=0
+      sleep 60
+    else
+      diff_c=0
+      sleep $(( ( RANDOM % 60 ) + 20 ))
+    fi
+  done
+  return 1
+}
+
 fail2ban_checks() {
   err_count=0
   diff_c=0
@@ -825,6 +855,18 @@ PID=$!
 echo "Spawned postfix_checks with PID ${PID}"
 BACKGROUND_TASKS+=(${PID})
 
+(
+while true; do
+  if ! mailq_checks; then
+    log_msg "Mail queue hit error limit"
+    echo mail_queue_status > /tmp/com_pipe
+  fi
+done
+) &
+PID=$!
+echo "Spawned mailq_checks with PID ${PID}"
+BACKGROUND_TASKS+=(${PID})
+
 (
 while true; do
   if ! dovecot_checks; then
@@ -961,6 +1003,9 @@ while true; do
   if [[ ${com_pipe_answer} == "ratelimit" ]]; then
     log_msg "At least one ratelimit was applied"
     [[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}"
+  elif [[ ${com_pipe_answer} == "mail_queue_status" ]]; then
+    log_msg "Mail queue status is critical"
+    [[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}"
   elif [[ ${com_pipe_answer} == "external_checks" ]]; then
     log_msg "Your mailcow is an open relay!"
     [[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "${com_pipe_answer}" "Please stop mailcow now and check your network configuration!"

+ 4 - 1
docker-compose.yml

@@ -396,7 +396,7 @@ services:
         - /lib/modules:/lib/modules:ro
 
     watchdog-mailcow:
-      image: mailcow/watchdog:1.77
+      image: mailcow/watchdog:1.78
       # Debug
       #command: /watchdog.sh
       dns:
@@ -404,6 +404,7 @@ services:
       volumes:
         - rspamd-vol-1:/var/lib/rspamd
         - mysql-socket-vol-1:/var/run/mysqld/
+        - postfix-vol-1:/var/spool/postfix
         - ./data/assets/ssl:/etc/ssl/mail/:ro
       restart: always
       environment:
@@ -447,6 +448,8 @@ services:
         - IPV6NAT_THRESHOLD=1
         - RSPAMD_THRESHOLD=5
         - OLEFY_THRESHOLD=5
+        - MAILQ_THRESHOLD=3
+        - MAILQ_CRIT=30
       networks:
         mailcow-network:
           aliases: