浏览代码

[Watchdog] Watch olefy, thanks to @ntimo

andryyy 6 年之前
父节点
当前提交
4b9f022890
共有 1 个文件被更改,包括 35 次插入0 次删除
  1. 35 0
      data/Dockerfiles/watchdog/watchdog.sh

+ 35 - 0
data/Dockerfiles/watchdog/watchdog.sh

@@ -499,6 +499,31 @@ Empty
   return 1
 }
 
+olefy_checks() {
+  err_count=0
+  diff_c=0
+  THRESHOLD=20
+  # Reduce error count by 2 after restarting an unhealthy container
+  trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
+  while [ ${err_count} -lt ${THRESHOLD} ]; do
+    touch /tmp/olefy-mailcow; echo "$(tail -50 /tmp/olefy-mailcow)" > /tmp/olefy-mailcow
+    host_ip=$(get_container_ip olefy-mailcow)
+    err_c_cur=${err_count}
+    /usr/lib/nagios/plugins/check_tcp -4 -H ${host_ip} -p 10055 2>> /tmp/olefy-mailcow 1>&2; err_count=$(( ${err_count} + $? ))
+    [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1
+    [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} ))
+    progress "Olefy" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c}
+    if [[ $? == 10 ]]; then
+      diff_c=0
+      sleep 1
+    else
+      diff_c=0
+      sleep $(( ( RANDOM % 30 )  + 10 ))
+    fi
+  done
+  return 1
+}
+
 # Notify about start
 [[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "watchdog-mailcow" "Watchdog started monitoring mailcow."
 
@@ -617,6 +642,16 @@ done
 ) &
 BACKGROUND_TASKS+=($!)
 
+(
+while true; do
+  if ! olefy_checks; then
+    log_msg "Olefy hit error limit"
+    echo olefy-mailcow > /tmp/com_pipe
+  fi
+done
+) &
+BACKGROUND_TASKS+=($!)
+
 (
 while true; do
   if ! acme_checks; then