diff options
| -rw-r--r-- | contrib/check_procl.sh | 400 |
1 files changed, 400 insertions, 0 deletions
diff --git a/contrib/check_procl.sh b/contrib/check_procl.sh new file mode 100644 index 00000000..b1793ad5 --- /dev/null +++ b/contrib/check_procl.sh | |||
| @@ -0,0 +1,400 @@ | |||
| 1 | #!/bin/bash | ||
| 2 | |||
| 3 | # | ||
| 4 | # Check_procl.sh | ||
| 5 | # | ||
| 6 | # Program: Process load check plugin for Nagios | ||
| 7 | # License : GPL | ||
| 8 | # Copyright (c) 2002 Jerome Tytgat (j.tytgat@sioban.net) | ||
| 9 | # | ||
| 10 | # check_procl.sh,v 1.1 2002/07/04 09:35 | ||
| 11 | # | ||
| 12 | # Description : | ||
| 13 | # | ||
| 14 | # This plugin is for check the %cpu, %mem or cputime of one or more process | ||
| 15 | # | ||
| 16 | # Usage : | ||
| 17 | # | ||
| 18 | # check_procl.sh -p process1,process2,... -w a.b -c c.d --cpu | ||
| 19 | # check_procl.sh -p process1,process2,... -w a.b -c c.d --mem | ||
| 20 | # check_procl.sh -p process1,process2,... -w a:b:c -c d:e:f --cputime | ||
| 21 | # | ||
| 22 | # check_procl.sh -p %all% -e process1,process2,... -w <a.b | a:b:c> -c <c.d | d:e:f> <--cpu | --mem | --cputime> | ||
| 23 | # check_procl.sh -p %max% -e process1,process2,... -w <a.b | a:b:c> -c <c.d | d:e:f> <--cpu | --mem | --cputime> | ||
| 24 | # | ||
| 25 | # Example : | ||
| 26 | # | ||
| 27 | # To know the memory eaten by HTTPD processes, be warned when it reach 50% and be critical when it reach 75% | ||
| 28 | # check_procl.sh -p httpd -w 50.0 -c 75.0 --mem | ||
| 29 | # > OK - total %MEM for process httpd : 46.1 | ||
| 30 | # | ||
| 31 | # To know the process which eat the more cpu time, but as we are under linux and are using kapm we do : | ||
| 32 | # check_procl.sh -p %max% -e kapmd-idle,kapmd -w 0:1:0 -c 0:2:0 --cputime | ||
| 33 | # > CRITICAL - total CPUTIME for process named : 02:32:10 | ||
| 34 | # | ||
| 35 | # Tested on solaris 7/8, Linux Redhat 7.3 and Linux Suse 7.1 | ||
| 36 | # | ||
| 37 | # BUGS : problems with handling time on solaris... | ||
| 38 | |||
| 39 | |||
| 40 | help_usage() { | ||
| 41 | echo "Usage:" | ||
| 42 | echo " $0 -p <process_name1,process_name2,... | %all% | %max%>" | ||
| 43 | echo " [-e <process_name1,process_name2,...>] -w warning -c critical < --cpu | --mem | --cputime>" | ||
| 44 | echo " $0 (-v | --version)" | ||
| 45 | echo " $0 (-h | --help)" | ||
| 46 | } | ||
| 47 | |||
| 48 | help_version() { | ||
| 49 | echo "check_procl.sh (nagios-plugins) 1.1" | ||
| 50 | echo "The nagios plugins come with ABSOLUTELY NO WARRANTY. You may redistribute" | ||
| 51 | echo "copies of the plugins under the terms of the GNU General Public License." | ||
| 52 | echo "For more information about these matters, see the file named COPYING." | ||
| 53 | echo "Copyright (c) 2002 Jerome Tytgat - j.tytgat@sioban.net" | ||
| 54 | echo "Greetings goes to Websurg which kindly let me took time to develop this" | ||
| 55 | echo " Manu Feig and Jacques Kern who were my beta testers, thanks to them !" | ||
| 56 | } | ||
| 57 | |||
| 58 | verify_dep() { | ||
| 59 | needed="bash cut egrep expr grep let ps sed sort tail test tr wc" | ||
| 60 | for i in `echo $needed` | ||
| 61 | do | ||
| 62 | type $i > /dev/null 2>&1 /dev/null | ||
| 63 | if [ $? -eq 1 ] | ||
| 64 | then | ||
| 65 | echo "I am missing an important component : $i" | ||
| 66 | echo "Cannot continue, sorry, try to find the missing one..." | ||
| 67 | exit 3 | ||
| 68 | fi | ||
| 69 | done | ||
| 70 | } | ||
| 71 | |||
| 72 | myself=$0 | ||
| 73 | |||
| 74 | verify_dep | ||
| 75 | |||
| 76 | if [ "$1" = "-h" -o "$1" = "--help" ] | ||
| 77 | then | ||
| 78 | help_version | ||
| 79 | echo "" | ||
| 80 | echo "This plugin will check either the cumulutative %cpu, %mem or cputime" | ||
| 81 | echo "of a process." | ||
| 82 | echo "" | ||
| 83 | help_usage | ||
| 84 | echo "" | ||
| 85 | echo "Required Arguments:" | ||
| 86 | echo " -p, --process STRING1,STRING2,..." | ||
| 87 | echo " names of the processes we want to monitor," | ||
| 88 | echo " you can add as much as process as you want, separated by comma," | ||
| 89 | echo " hey will be cumulated" | ||
| 90 | echo " -p, --process %all%" | ||
| 91 | echo " The special keyword %all% will check the cumulative cpu/mem/time of all process" | ||
| 92 | echo " WARNING : Can be very slow on heavy loaded servers, watch your timeout !" | ||
| 93 | echo " -p, --process %max%" | ||
| 94 | echo " The special keyword %max% will check the process which eat the most" | ||
| 95 | echo " WARNING : only select the process which eat the more, not the cumulative," | ||
| 96 | echo " but return the cumulative" | ||
| 97 | echo " -w, --warning INTEGER.INTEGER or INTERGER:INTEGER:INTEGER" | ||
| 98 | echo " generate warning state if process count is outside this range" | ||
| 99 | echo " -c, --critical INTEGER.INTEGER or INTERGER:INTEGER:INTEGER" | ||
| 100 | echo " generate critical state if process count is outside this range" | ||
| 101 | echo " --cpu" | ||
| 102 | echo " return the current cpu usage for the given process" | ||
| 103 | echo " --mem" | ||
| 104 | echo " return the current memory usage for the given process" | ||
| 105 | echo " --cputime" | ||
| 106 | echo " return the total cputime usage for the given process" | ||
| 107 | echo "" | ||
| 108 | echo "Optional Argument:" | ||
| 109 | echo " -e, --exclude-process STRING1,STRING2,..." | ||
| 110 | echo " names of the processes we want don't want to monitor" | ||
| 111 | echo " only useful when associated with %all% or %max% keywords, else ignored" | ||
| 112 | echo " ex : kapm-idled on linux is a process which eat memory / cputime but not really... ;-)" | ||
| 113 | echo "" | ||
| 114 | exit 3 | ||
| 115 | fi | ||
| 116 | |||
| 117 | if [ "$1" = "-v" -o "$1" = "--version" ] | ||
| 118 | then | ||
| 119 | help_version | ||
| 120 | exit 3 | ||
| 121 | fi | ||
| 122 | |||
| 123 | if [ `echo $@|tr "=" " "|wc -w` -lt 7 ] | ||
| 124 | then | ||
| 125 | echo "Bad arguments number (need at least 7)!" | ||
| 126 | help_usage | ||
| 127 | exit 3 | ||
| 128 | fi | ||
| 129 | |||
| 130 | tt=0 | ||
| 131 | process_name="" | ||
| 132 | exclude_process_name="" | ||
| 133 | wt="" | ||
| 134 | ct="" | ||
| 135 | |||
| 136 | # Test of the command lines arguments | ||
| 137 | while test $# -gt 0 | ||
| 138 | do | ||
| 139 | |||
| 140 | case "$1" in | ||
| 141 | -p|--process) | ||
| 142 | if [ -n "$process_name" ] | ||
| 143 | then | ||
| 144 | echo "Only one --process argument is useful..." | ||
| 145 | help_usage | ||
| 146 | exit 3 | ||
| 147 | fi | ||
| 148 | shift | ||
| 149 | process_name="`echo $1|tr \",\" \"|\"`" | ||
| 150 | ;; | ||
| 151 | -e|--exclude-process) | ||
| 152 | if [ -n "$exclude_process_name" ] | ||
| 153 | then | ||
| 154 | echo "Only one --exclude-process argument is useful..." | ||
| 155 | help_usage | ||
| 156 | exit 3 | ||
| 157 | fi | ||
| 158 | shift | ||
| 159 | exclude_process_name="`echo $1|tr \",\" \"|\"`" | ||
| 160 | ;; | ||
| 161 | -w|--warning) | ||
| 162 | if [ -n "$wt" ] | ||
| 163 | then | ||
| 164 | echo "Only one --warning argument needed... Trying to test bad things ? :-)" | ||
| 165 | help_usage | ||
| 166 | exit 3 | ||
| 167 | fi | ||
| 168 | shift | ||
| 169 | wt=$1 | ||
| 170 | ;; | ||
| 171 | -c|--critical) | ||
| 172 | if [ -n "$ct" ] | ||
| 173 | then | ||
| 174 | echo "Only one --critical argument needed... Trying to test bad things ? :-)" | ||
| 175 | help_usage | ||
| 176 | exit 3 | ||
| 177 | fi | ||
| 178 | shift | ||
| 179 | ct=$1 | ||
| 180 | ;; | ||
| 181 | --cpu) | ||
| 182 | if [ $tt -eq 0 ] | ||
| 183 | then | ||
| 184 | tt=1 | ||
| 185 | else | ||
| 186 | echo "Only one of the arguments --cpu/--mem/--cputime can be used at a time !" | ||
| 187 | help_usage | ||
| 188 | exit 3 | ||
| 189 | fi | ||
| 190 | type_arg_aff="%CPU" | ||
| 191 | type_arg="pcpu" | ||
| 192 | delim="." | ||
| 193 | ;; | ||
| 194 | --mem) | ||
| 195 | if [ $tt -eq 0 ] | ||
| 196 | then | ||
| 197 | tt=2 | ||
| 198 | else | ||
| 199 | echo "Only one of the arguments --cpu/--mem/--cputime can be used at a time !" | ||
| 200 | help_usage | ||
| 201 | exit 3 | ||
| 202 | fi | ||
| 203 | type_arg_aff="%MEM" | ||
| 204 | type_arg="pmem" | ||
| 205 | delim="." | ||
| 206 | ;; | ||
| 207 | --cputime) | ||
| 208 | if [ $tt -eq 0 ] | ||
| 209 | then | ||
| 210 | tt=3 | ||
| 211 | else | ||
| 212 | echo "Only one of the arguments --cpu/--mem/--cputime can be used at a time !" | ||
| 213 | help_usage | ||
| 214 | exit 3 | ||
| 215 | fi | ||
| 216 | type_arg_aff="TIME" | ||
| 217 | type_arg="time" | ||
| 218 | delim=":" | ||
| 219 | ;; | ||
| 220 | *) | ||
| 221 | echo "Unknown argument $1" | ||
| 222 | help_usage | ||
| 223 | exit 3 | ||
| 224 | ;; | ||
| 225 | esac | ||
| 226 | shift | ||
| 227 | done | ||
| 228 | |||
| 229 | # Is the process running ? | ||
| 230 | if [ -z "`ps -e | egrep \"$process_name?\"`" -a "$process_name" != "%all%" -a "$process_name" != "%max%" ] | ||
| 231 | then | ||
| 232 | echo "WARNING: process $process_name not running !" | ||
| 233 | exit 3 | ||
| 234 | fi | ||
| 235 | |||
| 236 | # Cut of warning and critical values | ||
| 237 | wt_value1=`echo $wt|cut -d"$delim" -f1` | ||
| 238 | wt_value2=`echo $wt|cut -d"$delim" -f2` | ||
| 239 | ct_value1=`echo $ct|cut -d"$delim" -f1` | ||
| 240 | ct_value2=`echo $ct|cut -d"$delim" -f2` | ||
| 241 | |||
| 242 | if [ $tt -eq 3 ] | ||
| 243 | then | ||
| 244 | wt_value3=`echo $wt|cut -d"$delim" -f3` | ||
| 245 | ct_value3=`echo $ct|cut -d"$delim" -f3` | ||
| 246 | else | ||
| 247 | wt_value3=0 | ||
| 248 | ct_value3=0 | ||
| 249 | fi | ||
| 250 | |||
| 251 | # Integrity check of warning and critical values | ||
| 252 | if [ -z "$wt_value1" -o -z "$wt_value2" -o -z "$wt_value3" ] | ||
| 253 | then | ||
| 254 | echo "Bad expression in the WARNING field : $wt" | ||
| 255 | help_usage | ||
| 256 | exit 3 | ||
| 257 | fi | ||
| 258 | |||
| 259 | if [ "`echo $wt_value1|tr -d \"[:digit:]\"`" != "" -o "`echo $wt_value2|tr -d \"[:digit:]\"`" != "" -o "`echo $wt_value3|tr -d \"[:digit:]\"`" != "" ] | ||
| 260 | then | ||
| 261 | echo "Bad expression in the WARNING field : $wt" | ||
| 262 | help_usage | ||
| 263 | exit 3 | ||
| 264 | fi | ||
| 265 | |||
| 266 | if [ -z "$ct_value1" -o -z "$ct_value2" -o -z "$ct_value3" ] | ||
| 267 | then | ||
| 268 | echo "Bad expression in the CRITICAL field : $ct" | ||
| 269 | help_usage | ||
| 270 | exit 3 | ||
| 271 | fi | ||
| 272 | |||
| 273 | |||
| 274 | if [ "`echo $ct_value1|tr -d \"[:digit:]\"`" != "" -o "`echo $ct_value2|tr -d \"[:digit:]\"`" != "" -o "`echo $ct_value3|tr -d \"[:digit:]\"`" != "" ] | ||
| 275 | then | ||
| 276 | echo "Bad expression in the CRITICAL field : $ct" | ||
| 277 | help_usage | ||
| 278 | exit 3 | ||
| 279 | fi | ||
| 280 | |||
| 281 | # ps line construction set... | ||
| 282 | case "$process_name" in | ||
| 283 | %all%) | ||
| 284 | if [ -z "$exclude_process_name" ] | ||
| 285 | then | ||
| 286 | psline=`ps -eo $type_arg,comm|egrep -v "$myself|$type_arg_aff?"|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f1` | ||
| 287 | else | ||
| 288 | psline=`ps -eo $type_arg,comm|egrep -v "$myself|$type_arg_aff|$exclude_process_name?"|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f1` | ||
| 289 | fi | ||
| 290 | ;; | ||
| 291 | %max%) | ||
| 292 | if [ -z "$exclude_process_name" ] | ||
| 293 | then | ||
| 294 | pstmp=`ps -eo $type_arg,comm|egrep -v "$myself|$type_arg_aff?"|sort|tail -1|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f2` | ||
| 295 | else | ||
| 296 | pstmp=`ps -eo $type_arg,comm|egrep -v "$myself|$type_arg_aff|$exclude_process_name?"|sort|tail -1|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f2` | ||
| 297 | fi | ||
| 298 | psline=`ps -eo $type_arg,comm|grep $pstmp|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f1` | ||
| 299 | process_name=$pstmp | ||
| 300 | ;; | ||
| 301 | *) | ||
| 302 | psline=`ps -eo $type_arg,comm|egrep "$process_name?"|sed "s/^ *\([0-9]\)/\1/"|cut -d" " -f1` | ||
| 303 | ;; | ||
| 304 | esac | ||
| 305 | |||
| 306 | total1=0 | ||
| 307 | total2=0 | ||
| 308 | total3=0 | ||
| 309 | |||
| 310 | |||
| 311 | # fetching the values | ||
| 312 | for i in $psline | ||
| 313 | do | ||
| 314 | # Special case for solaris - several format exist for the time function... | ||
| 315 | if [ ${#i} -le 6 -a "$tt" -eq 3 ] | ||
| 316 | then | ||
| 317 | i="00:$i" | ||
| 318 | fi | ||
| 319 | value1=`echo $i|cut -d$delim -f1` | ||
| 320 | value2=`echo $i|cut -d$delim -f2` | ||
| 321 | value3=`echo $i|cut -d$delim -f3` | ||
| 322 | value3=`test -z "$value3" && echo 0 || echo $value3` | ||
| 323 | total1=`expr $total1 + $value1` | ||
| 324 | total2=`expr $total2 + $value2` | ||
| 325 | total3=`expr $total3 + $value3` | ||
| 326 | if [ $tt -eq 3 ] | ||
| 327 | then | ||
| 328 | if [ $total3 -ge 60 ] | ||
| 329 | then | ||
| 330 | let total2+=1 | ||
| 331 | let total3-=60 | ||
| 332 | fi | ||
| 333 | if [ $total2 -ge 60 ] | ||
| 334 | then | ||
| 335 | let total1+=1 | ||
| 336 | let total2-=60 | ||
| 337 | fi | ||
| 338 | else | ||
| 339 | if [ $total2 -ge 10 ] | ||
| 340 | then | ||
| 341 | let total1+=1 | ||
| 342 | let total2=total2-10 | ||
| 343 | fi | ||
| 344 | fi | ||
| 345 | done | ||
| 346 | |||
| 347 | warn=0 | ||
| 348 | crit=0 | ||
| 349 | |||
| 350 | # evaluation of the cumulative values vs warning and critical values | ||
| 351 | case "$tt" in | ||
| 352 | 1) | ||
| 353 | return_total="$total1.$total2" | ||
| 354 | test $total1 -gt $ct_value1 && crit=1 | ||
| 355 | test $total1 -eq $ct_value1 -a $total2 -ge $ct_value2 && crit=1 | ||
| 356 | test $total1 -gt $wt_value1 && warn=1 | ||
| 357 | test $total1 -eq $wt_value1 -a $total2 -ge $wt_value2 && warn=1 | ||
| 358 | ;; | ||
| 359 | 2) | ||
| 360 | return_total="$total1.$total2" | ||
| 361 | test $total1 -gt $ct_value1 && crit=1 | ||
| 362 | test $total1 -eq $ct_value1 -a $total2 -ge $ct_value2 && crit=1 | ||
| 363 | test $total1 -gt $wt_value1 && warn=1 | ||
| 364 | test $total1 -eq $wt_value1 -a $total2 -ge $wt_value2 && warn=1 | ||
| 365 | ;; | ||
| 366 | 3) | ||
| 367 | return_total="`test ${#total1} -eq 1 && echo 0`$total1:`test ${#total2} -eq 1 && echo 0`$total2:`test ${#total3} -eq 1 && echo 0`$total3" | ||
| 368 | test $total1 -gt $ct_value1 && crit=1 | ||
| 369 | test $total1 -eq $ct_value1 -a $total2 -gt $ct_value2 && crit=1 | ||
| 370 | test $total1 -eq $ct_value1 -a $total2 -eq $ct_value2 -a $total3 -ge $ct_value3 && crit=1 | ||
| 371 | test $total1 -gt $wt_value1 && warn=1 | ||
| 372 | test $total1 -eq $wt_value1 -a $total2 -gt $wt_value2 && warn=1 | ||
| 373 | test $total1 -eq $wt_value1 -a $total2 -eq $wt_value2 -a $total3 -ge $wt_value3 && warn=1 | ||
| 374 | ;; | ||
| 375 | esac | ||
| 376 | |||
| 377 | # last check ... | ||
| 378 | if [ $crit -eq 1 -a $warn -eq 0 ] | ||
| 379 | then | ||
| 380 | echo "Critical value must be greater than warning value !" | ||
| 381 | help_usage | ||
| 382 | exit 3 | ||
| 383 | fi | ||
| 384 | |||
| 385 | # Finally Inform Nagios of what we found... | ||
| 386 | if [ $crit -eq 1 ] | ||
| 387 | then | ||
| 388 | echo "CRITICAL - total $type_arg_aff for process `echo $process_name|tr \"|\" \",\"` : $return_total" | ||
| 389 | exit 2 | ||
| 390 | elif [ $warn -eq 1 ] | ||
| 391 | then | ||
| 392 | echo "WARNING - total $type_arg_aff for process `echo $process_name|tr \"|\" \",\"` : $return_total" | ||
| 393 | exit 1 | ||
| 394 | else | ||
| 395 | echo "OK - total $type_arg_aff for process `echo $process_name|tr \"|\" \",\"` : $return_total" | ||
| 396 | exit 0 | ||
| 397 | fi | ||
| 398 | |||
| 399 | # Hey what are we doing here ??? | ||
| 400 | exit 3 \ No newline at end of file | ||
