[Nagiosplug-help] mysqllag plugin

Tim Dunphy bluethundr at jokefire.com
Mon May 2 03:20:54 CEST 2011


Hello list!! I am attempting to implement a plugin that alerts when it detects lag in mysql repliction. It seems really useful and I would like to get this going, but I am running into an issue in doing so. 

I am receiving this error in the nagios ineterface:


## nagios script error

MySQL Lag
	
Notifications for this service have been disabled
	CRITICAL 	05-01-2011 16:19:58 	0d 4h 55m 58s 	3/3 	(Return code of 127 is out of bounds - plugin may be missing)

## here is the client machine info

[root at virtcent10:/usr/local/nagios/libexec] #cat /etc/redhat-release 
CentOS release 5.5 (Final)


[root at virtcent10:/usr/local/nagios/libexec] #uname -a
Linux virtcent10 2.6.18-238.5.1.1.el5xen #1 SMP Wed Mar 30 14:19:27 NOVST 2011 x86_64 x86_64 x86_64 GNU/Linux


## here is the nagios server info

[root at VIRTCENT11 ~]# cat /etc/redhat-release 
CentOS release 5.6 (Final)

[root at VIRTCENT11 ~]# uname -a
Linux VIRTCENT11 2.6.18-194.el5xen #1 SMP Fri Apr 2 15:34:40 EDT 2010 x86_64 x86_64 x86_64 GNU/Linux

Nagios Core 3.2.3
Copyright (c) 2009-2010 Nagios Core Development Team and Community Contributors
Copyright (c) 1999-2009 Ethan Galstad
Last Modified: 10-03-2010
License: GPL
io


## locally mysqllag check script works! :)

[root at virtcent10:/usr/local/nagios/libexec] #./check_mysqllag
+ STATE_OK=0
+ STATE_WARNING=1
+ STATE_CRITICAL=2
+ STATE_UNKNOWN=3
+ STATE_DEPENDENT=4
+ mysqlpath=/usr/bin/
+ user=root
+ pass=secret
+ warn=30
+ crit=60
+ null=NULL
+ usage1='Usage: ./check_mysqllag -uroot -psecret [-w <warn>] [-c <crit>]'
+ usage2='<warn> is lag time, in seconds, to warn at.  Default is 30.'
+ usage3='<crit> is lag time, in seconds, to be critical at.  Default is 60.'
+ exitstatus=1
+ test -n ''
++ /usr/bin//mysql -uroot -psecret -e 'show slave status\G'
++ /bin/grep Seconds_Behind_Master
++ /bin/cut -f2 -d:
+ seconds=' 0'
+ '[' 0 = NULL ']'
+ '[' 0 -lt 60 ']'
+ '[' 0 -ge 30 ']'
+ '[' 0 -ge 60 ']'
+ '[' 0 -lt 30 ']'
+ echo OK - Slave is 0 seconds behind
OK - Slave is 0 seconds behind
+ exit 0

## mysql on client

[root at virtcent10:/usr/local/nagios/libexec] #which mysql

/usr/bin/mysql



## ls -l of script on nagios client


[root at virtcent10:/usr/local/nagios/libexec] #ls -l check_mysqllag*
lrwxrwxrwx 1 nagios nagios   17 May  1 11:12 check_mysqllag -> check_mysqllag.sh
-rwx------ 1 nagios nagios 1908 May  1 13:20 check_mysqllag.sh


## this is the mysqllag service definition

define service{
        use                             generic-service         ; Name of service template to use
        host_name                       db2
        service_description             MySQL Lag
        check_command                   check_mysqllag
        notifications_enabled           0
        }


## this is the mysqllag command definition 

define command{
        command_name    check_mysqllag
        command_line    $USER1$/check_mysqllag -H $HOSTADDRESS$ -p 3306 -v $ARG1$ $ARG2$
        }




## this is the mysqllag script


#! /bin/sh -x

STATE_OK=0
STATE_WARNING=1
STATE_CRITICAL=2
STATE_UNKNOWN=3
STATE_DEPENDENT=4
mysqlpath='/usr/bin/'
user='root'
pass='secret'
warn=30
crit=60
null="NULL"
usage1="Usage: $0 -u$user -p$pass [-w <warn>] [-c <crit>]"
usage2="<warn> is lag time, in seconds, to warn at.  Default is 30."
usage3="<crit> is lag time, in seconds, to be critical at.  Default is 60."

exitstatus=$STATE_WARNING #default
while test -n "$1"; do
    case "$1" in
        -c)
            crit=$2
            shift
            ;;
        -w)
            warn=$2
            shift
            ;;
        -u)
            user=$2
            shift
            ;;
        -p)
            pass=$2
            shift
            ;;
        -h)
            echo $usage1;
	    echo 
            echo $usage2;
            echo $usage3;
            exit $STATE_UNKNOWN
	    ;;
	-H)
            host=$2
            shift
            ;;
        *)
            echo "Unknown argument: $1"
            echo $usage1;
	    echo 
            echo $usage2;
            echo $usage3;
            exit $STATE_UNKNOWN
            ;;
    esac
    shift
done

seconds=`$mysqlpath/mysql -u$user -p$pass -e 'show slave status\G' | /bin/grep Seconds_Behind_Master | /bin/cut -f2 -d:`

# on the number line, we need to test 6 cases:
# 0-----w-----c----->
# 0, 0<lag<w, w, w<lag<c, c, c<lag
# which we simplify to 
# lag>=c, w<=lag<c, 0<=lag<warn

# if null, critical
if [ $seconds = $null ]; then
echo CRITICAL - Slave is $seconds seconds behind 
exit $STATE_CRITICAL;
fi

#w<=lag<c
if [ $seconds -lt $crit ]; then 
if [ $seconds -ge $warn ]; then
echo WARNING - Slave is $seconds seconds behind 
exit $STATE_WARNING;
fi
fi

if [ $seconds -ge $crit ]; then
echo CRITICAL - Slave is $seconds seconds behind 
exit $STATE_CRITICAL;
fi

# 0<=lag<warn
if [ $seconds -lt $warn ]; then
echo OK - Slave is $seconds seconds behind 
exit $STATE_OK;
fi


thank you in advance for any suggestions you might have to get this working!! 

best!!

tim









More information about the Help mailing list