check_mysql_slavestatus.sh 13 KB

123456789101112131415161718192021222324252627282930313233343536373839404142434445464748495051525354555657585960616263646566676869707172737475767778798081828384858687888990919293949596979899100101102103104105106107108109110111112113114115116117118119120121122123124125126127128129130131132133134135136137138139140141142143144145146147148149150151152153154155156157158159160161162163164165166167168169170171172173174175176177178179180181182183184185186187188189190191192193194195196197198199200201202203204205206207208209210211212213214215216217218219220221222223
  1. #!/bin/bash
  2. #########################################################################
  3. # Script: check_mysql_slavestatus.sh #
  4. # Author: Claudio Kuenzler www.claudiokuenzler.com #
  5. # Purpose: Monitor MySQL Replication status with Nagios #
  6. # Description: Connects to given MySQL hosts and checks for running #
  7. # SLAVE state and delivers additional info #
  8. # Original: This script is a modified version of #
  9. # check mysql slave sql running written by dhirajt #
  10. # Thanks to: Victor Balada Diaz for his ideas added on 20080930 #
  11. # Soren Klintrup for stuff added on 20081015 #
  12. # Marc Feret for Slave_IO_Running check 20111227 #
  13. # Peter Lecki for his mods added on 20120803 #
  14. # Serge Victor for his mods added on 20131223 #
  15. # Omri Bahumi for his fix added on 20131230 #
  16. # Marc Falzon for his option mods added on 20190822 #
  17. # Andreas Pfeiffer for adding socket option on 20190822 #
  18. # History: #
  19. # 2008041700 Original Script modified #
  20. # 2008041701 Added additional info if status OK #
  21. # 2008041702 Added usage of script with params -H -u -p #
  22. # 2008041703 Added bindir variable for multiple platforms #
  23. # 2008041704 Added help because mankind needs help #
  24. # 2008093000 Using /bin/sh instead of /bin/bash #
  25. # 2008093001 Added port for MySQL server #
  26. # 2008093002 Added mysqldir if mysql binary is elsewhere #
  27. # 2008101501 Changed bindir/mysqldir to use PATH #
  28. # 2008101501 Use $() instead of `` to avoid forks #
  29. # 2008101501 Use ${} for variables to prevent problems #
  30. # 2008101501 Check if required commands exist #
  31. # 2008101501 Check if mysql connection works #
  32. # 2008101501 Exit with unknown status at script end #
  33. # 2008101501 Also display help if no option is given #
  34. # 2008101501 Add warning/critical check to delay #
  35. # 2011062200 Add perfdata #
  36. # 2011122700 Checking Slave_IO_Running #
  37. # 2012080300 Changed to use only one mysql query #
  38. # 2012080301 Added warn and crit delay as optional args #
  39. # 2012080302 Added standard -h option for syntax help #
  40. # 2012080303 Added check for mandatory options passed in #
  41. # 2012080304 Added error output from mysql #
  42. # 2012080305 Changed from 'cut' to 'awk' (eliminate ws) #
  43. # 2012111600 Do not show password in error output #
  44. # 2013042800 Changed PATH to use existing PATH, too #
  45. # 2013050800 Bugfix in PATH export #
  46. # 2013092700 Bugfix in PATH export #
  47. # 2013092701 Bugfix in getopts #
  48. # 2013101600 Rewrite of threshold logic and handling #
  49. # 2013101601 Optical clean up #
  50. # 2013101602 Rewrite help output #
  51. # 2013101700 Handle Slave IO in 'Connecting' state #
  52. # 2013101701 Minor changes in output, handling UNKWNON situations now #
  53. # 2013101702 Exit CRITICAL when Slave IO in Connecting state #
  54. # 2013123000 Slave_SQL_Running also matched Slave_SQL_Running_State #
  55. # 2015011600 Added 'moving' check to catch possible connection issues #
  56. # 2015011900 Use its own threshold for replication moving check #
  57. # 2019082200 Add support for mysql option file #
  58. # 2019082201 Improve password security (remove from mysql cli) #
  59. # 2019082202 Added socket parameter (-S) #
  60. # 2019082203 Use default port 3306, makes -P optional #
  61. # 2019082204 Fix moving subcheck, improve documentation #
  62. #########################################################################
  63. # Usage: ./check_mysql_slavestatus.sh (-o file|(-H dbhost [-P port]|-S socket) -u dbuser -p dbpass) [-s connection] [-w integer] [-c integer] [-m integer]
  64. #########################################################################
  65. help="\ncheck_mysql_slavestatus.sh (c) 2008-2019 GNU GPLv2 licence
  66. Usage: $0 (-o file|(-H dbhost [-P port]|-S socket) -u username -p password) [-s connection] [-w integer] [-c integer] [-m]\n
  67. Options:\n-o Path to option file containing connection settings (e.g. /home/nagios/.my.cnf). Note: If this option is used, -H, -u, -p parameters will become optional\n-H Hostname or IP of slave server\n-P MySQL Port of slave server (optional, defaults to 3306)\n-u Username of DB-user\n-p Password of DB-user\n-S database socket\n-s Connection name (optional, with multi-source replication)\n-w Replication delay in seconds for Warning status (optional)\n-c Replication delay in seconds for Critical status (optional)\n-m Threshold in seconds since when replication did not move (compares the slaves log position)\n
  68. Attention: The DB-user you type in must have CLIENT REPLICATION rights on the DB-server. Example:\n\tGRANT REPLICATION CLIENT on *.* TO 'nagios'@'%' IDENTIFIED BY 'secret';"
  69. STATE_OK=0 # define the exit code if status is OK
  70. STATE_WARNING=1 # define the exit code if status is Warning (not really used)
  71. STATE_CRITICAL=2 # define the exit code if status is Critical
  72. STATE_UNKNOWN=3 # define the exit code if status is Unknown
  73. export PATH=$PATH:/usr/local/bin:/usr/bin:/bin # Set path
  74. crit="No" # what is the answer of MySQL Slave_SQL_Running for a Critical status?
  75. ok="Yes" # what is the answer of MySQL Slave_SQL_Running for an OK status?
  76. port="-P 3306" # on which tcp port is the target MySQL slave listening?
  77. for cmd in mysql awk grep expr [
  78. do
  79. if ! `which ${cmd} &>/dev/null`
  80. then
  81. echo "UNKNOWN: This script requires the command '${cmd}' but it does not exist; please check if command exists and PATH is correct"
  82. exit ${STATE_UNKNOWN}
  83. fi
  84. done
  85. # Check for people who need help
  86. #########################################################################
  87. if [ "${1}" = "--help" -o "${#}" = "0" ];
  88. then
  89. echo -e "${help}";
  90. exit 1;
  91. fi
  92. # Important given variables for the DB-Connect
  93. #########################################################################
  94. while getopts "H:P:u:p:S:s:w:c:o:m:h" Input;
  95. do
  96. case ${Input} in
  97. H) host="-h ${OPTARG}";slavetarget=${OPTARG};;
  98. P) port="-P ${OPTARG}";;
  99. u) user="-u ${OPTARG}";;
  100. p) password="${OPTARG}"; export MYSQL_PWD="${OPTARG}";;
  101. S) socket="-S ${OPTARG}";;
  102. s) connection=\"${OPTARG}\";;
  103. w) warn_delay=${OPTARG};;
  104. c) crit_delay=${OPTARG};;
  105. o) optfile="--defaults-extra-file=${OPTARG}";;
  106. m) moving=${OPTARG};;
  107. h) echo -e "${help}"; exit 1;;
  108. \?) echo "Wrong option given. Check help (-h, --help) for usage."
  109. exit 1
  110. ;;
  111. esac
  112. done
  113. # Check if we can write to tmp
  114. #########################################################################
  115. test -w /tmp && tmpfile="/tmp/mysql_slave_${slavetarget}_pos.txt"
  116. # Connect to the DB server and check for informations
  117. #########################################################################
  118. # Check whether all required arguments were passed in (either option file or full connection settings)
  119. if [[ -z "${optfile}" && -z "${host}" && -z "${socket}" ]]; then
  120. echo -e "Missing required parameter(s)"; exit ${STATE_UNKNOWN}
  121. elif [[ -n "${host}" && (-z "${user}" || -z "${password}") ]]; then
  122. echo -e "Missing required parameter(s)"; exit ${STATE_UNKNOWN}
  123. elif [[ -n "${socket}" && (-z "${user}" || -z "${password}") ]]; then
  124. echo -e "Missing required parameter(s)"; exit ${STATE_UNKNOWN}
  125. fi
  126. # Connect to the DB server and store output in vars
  127. if [[ -n $socket ]]; then
  128. ConnectionResult=$(mysql ${optfile} ${socket} ${user} -e "show slave ${connection} status\G" 2>&1)
  129. else
  130. ConnectionResult=$(mysql ${optfile} ${host} ${port} ${user} -e "show slave ${connection} status\G" 2>&1)
  131. fi
  132. if [ -z "`echo "${ConnectionResult}" |grep Slave_IO_State`" ]; then
  133. echo -e "CRITICAL: Unable to connect to server"
  134. exit ${STATE_CRITICAL}
  135. fi
  136. check=`echo "${ConnectionResult}" |grep Slave_SQL_Running: | awk '{print $2}'`
  137. checkio=`echo "${ConnectionResult}" |grep Slave_IO_Running: | awk '{print $2}'`
  138. masterinfo=`echo "${ConnectionResult}" |grep Master_Host: | awk '{print $2}'`
  139. delayinfo=`echo "${ConnectionResult}" |grep Seconds_Behind_Master: | awk '{print $2}'`
  140. readpos=`echo "${ConnectionResult}" |grep Read_Master_Log_Pos: | awk '{print $2}'`
  141. execpos=`echo "${ConnectionResult}" |grep Exec_Master_Log_Pos: | awk '{print $2}'`
  142. # Output of different exit states
  143. #########################################################################
  144. if [ ${check} = "NULL" ]; then
  145. echo "CRITICAL: Slave_SQL_Running is answering NULL"; exit ${STATE_CRITICAL};
  146. fi
  147. if [ ${check} = ${crit} ]; then
  148. echo "CRITICAL: ${host}:${port} Slave_SQL_Running: ${check}"; exit ${STATE_CRITICAL};
  149. fi
  150. if [ ${checkio} = ${crit} ]; then
  151. echo "CRITICAL: ${host} Slave_IO_Running: ${checkio}"; exit ${STATE_CRITICAL};
  152. fi
  153. if [ ${checkio} = "Connecting" ]; then
  154. echo "CRITICAL: ${host} Slave_IO_Running: ${checkio}"; exit ${STATE_CRITICAL};
  155. fi
  156. if [ ${check} = ${ok} ] && [ ${checkio} = ${ok} ]; then
  157. # Delay thresholds are set
  158. if [[ -n ${warn_delay} ]] && [[ -n ${crit_delay} ]]; then
  159. if ! [[ ${warn_delay} -gt 0 ]]; then echo "Warning threshold must be a valid integer greater than 0"; exit $STATE_UNKNOWN; fi
  160. if ! [[ ${crit_delay} -gt 0 ]]; then echo "Warning threshold must be a valid integer greater than 0"; exit $STATE_UNKNOWN; fi
  161. if [[ -z ${warn_delay} ]] || [[ -z ${crit_delay} ]]; then echo "Both warning and critical thresholds must be set"; exit $STATE_UNKNOWN; fi
  162. if [[ ${warn_delay} -gt ${crit_delay} ]]; then echo "Warning threshold cannot be greater than critical"; exit $STATE_UNKNOWN; fi
  163. if [[ ${delayinfo} -ge ${crit_delay} ]]
  164. then echo "CRITICAL: Slave is ${delayinfo} seconds behind Master | delay=${delayinfo}s"; exit ${STATE_CRITICAL}
  165. elif [[ ${delayinfo} -ge ${warn_delay} ]]
  166. then echo "WARNING: Slave is ${delayinfo} seconds behind Master | delay=${delayinfo}s"; exit ${STATE_WARNING}
  167. else
  168. # Everything looks OK here but now let us check if the replication is moving
  169. if [[ -n ${moving} ]] && [[ -n ${tmpfile} ]] && [[ $readpos -eq $execpos ]]
  170. then
  171. #echo "Debug: Read pos is $readpos - Exec pos is $execpos"
  172. # Check if tmp file exists
  173. curtime=`date +%s`
  174. if [[ -w $tmpfile ]]
  175. then
  176. tmpfiletime=`date +%s -r $tmpfile`
  177. if [[ `expr $curtime - $tmpfiletime` -gt ${moving} ]]
  178. then
  179. exectmp=`cat $tmpfile`
  180. #echo "Debug: Exec pos in tmpfile is $exectmp"
  181. if [[ $exectmp -eq $execpos ]]
  182. then
  183. # The value read from the tmp file and from db are the same. Replication hasnt moved!
  184. echo "WARNING: Slave replication has not moved in ${moving} seconds. Manual check required."; exit ${STATE_WARNING}
  185. else
  186. # Replication has moved since the tmp file was written. Delete tmp file and output OK.
  187. rm $tmpfile
  188. echo "OK: Slave SQL running: ${check} Slave IO running: ${checkio} / master: ${masterinfo} / slave is ${delayinfo} seconds behind master | delay=${delayinfo}s"; exit ${STATE_OK};
  189. fi
  190. else
  191. echo "OK: Slave SQL running: ${check} Slave IO running: ${checkio} / master: ${masterinfo} / slave is ${delayinfo} seconds behind master | delay=${delayinfo}s"; exit ${STATE_OK};
  192. fi
  193. else
  194. echo "$execpos" > $tmpfile
  195. echo "OK: Slave SQL running: ${check} Slave IO running: ${checkio} / master: ${masterinfo} / slave is ${delayinfo} seconds behind master | delay=${delayinfo}s"; exit ${STATE_OK};
  196. fi
  197. else # Everything OK (no additional moving check)
  198. echo "OK: Slave SQL running: ${check} Slave IO running: ${checkio} / master: ${masterinfo} / slave is ${delayinfo} seconds behind master | delay=${delayinfo}s"; exit ${STATE_OK};
  199. fi
  200. fi
  201. else
  202. # Without delay thresholds
  203. echo "OK: Slave SQL running: ${check} Slave IO running: ${checkio} / master: ${masterinfo} / slave is ${delayinfo} seconds behind master | delay=${delayinfo}s"
  204. exit ${STATE_OK};
  205. fi
  206. fi
  207. echo "UNKNOWN: should never reach this part (Slave_SQL_Running is ${check}, Slave_IO_Running is ${checkio})"
  208. exit ${STATE_UNKNOWN}