|
@@ -499,6 +499,31 @@ Empty
|
|
|
return 1
|
|
|
}
|
|
|
|
|
|
+olefy_checks() {
|
|
|
+ err_count=0
|
|
|
+ diff_c=0
|
|
|
+ THRESHOLD=20
|
|
|
+ # Reduce error count by 2 after restarting an unhealthy container
|
|
|
+ trap "[ ${err_count} -gt 1 ] && err_count=$(( ${err_count} - 2 ))" USR1
|
|
|
+ while [ ${err_count} -lt ${THRESHOLD} ]; do
|
|
|
+ touch /tmp/olefy-mailcow; echo "$(tail -50 /tmp/olefy-mailcow)" > /tmp/olefy-mailcow
|
|
|
+ host_ip=$(get_container_ip olefy-mailcow)
|
|
|
+ err_c_cur=${err_count}
|
|
|
+ /usr/lib/nagios/plugins/check_tcp -4 -H ${host_ip} -p 10055 2>> /tmp/olefy-mailcow 1>&2; err_count=$(( ${err_count} + $? ))
|
|
|
+ [ ${err_c_cur} -eq ${err_count} ] && [ ! $((${err_count} - 1)) -lt 0 ] && err_count=$((${err_count} - 1)) diff_c=1
|
|
|
+ [ ${err_c_cur} -ne ${err_count} ] && diff_c=$(( ${err_c_cur} - ${err_count} ))
|
|
|
+ progress "Olefy" ${THRESHOLD} $(( ${THRESHOLD} - ${err_count} )) ${diff_c}
|
|
|
+ if [[ $? == 10 ]]; then
|
|
|
+ diff_c=0
|
|
|
+ sleep 1
|
|
|
+ else
|
|
|
+ diff_c=0
|
|
|
+ sleep $(( ( RANDOM % 30 ) + 10 ))
|
|
|
+ fi
|
|
|
+ done
|
|
|
+ return 1
|
|
|
+}
|
|
|
+
|
|
|
# Notify about start
|
|
|
[[ ! -z ${WATCHDOG_NOTIFY_EMAIL} ]] && mail_error "watchdog-mailcow" "Watchdog started monitoring mailcow."
|
|
|
|
|
@@ -617,6 +642,16 @@ done
|
|
|
) &
|
|
|
BACKGROUND_TASKS+=($!)
|
|
|
|
|
|
+(
|
|
|
+while true; do
|
|
|
+ if ! olefy_checks; then
|
|
|
+ log_msg "Olefy hit error limit"
|
|
|
+ echo olefy-mailcow > /tmp/com_pipe
|
|
|
+ fi
|
|
|
+done
|
|
|
+) &
|
|
|
+BACKGROUND_TASKS+=($!)
|
|
|
+
|
|
|
(
|
|
|
while true; do
|
|
|
if ! acme_checks; then
|