diff options
Diffstat (limited to 'contrib/check_pcpmetric.py')
| -rw-r--r-- | contrib/check_pcpmetric.py | 106 |
1 files changed, 106 insertions, 0 deletions
diff --git a/contrib/check_pcpmetric.py b/contrib/check_pcpmetric.py new file mode 100644 index 00000000..71d816d4 --- /dev/null +++ b/contrib/check_pcpmetric.py | |||
| @@ -0,0 +1,106 @@ | |||
| 1 | #! /usr/bin/env python | ||
| 2 | # | ||
| 3 | # Nagios client for checking Performance Co-Pilot metrics | ||
| 4 | # | ||
| 5 | # | ||
| 6 | |||
| 7 | from sys import argv,exit | ||
| 8 | import popen2, getopt, string, types | ||
| 9 | |||
| 10 | DEBUG=0 | ||
| 11 | |||
| 12 | nagios_pcpclient_version = 0.01 | ||
| 13 | PMVAL='/usr/bin/pmval' | ||
| 14 | COMMANDLINE=PMVAL + " -s 1" | ||
| 15 | METRIC='undefined' | ||
| 16 | CRITICAL=0 | ||
| 17 | WARNING=0 | ||
| 18 | |||
| 19 | def usage(): | ||
| 20 | print "Usage:", argv[0], "[options]" | ||
| 21 | print "Options:" | ||
| 22 | print "\t[-H host]\tHostname to contact" | ||
| 23 | print "\t[-m metric]\tPCP metric to check" | ||
| 24 | print "\t[-i instance]\tPCP metric instance" | ||
| 25 | print "\t[-w warn]\tIssue warning alert if value is larger than this" | ||
| 26 | print "\t[-c critical]\tIssue critical alert value is larger than this" | ||
| 27 | print "\t[-V]\t\tProgram version" | ||
| 28 | print "\t[-h]\t\tThis helptext" | ||
| 29 | print "" | ||
| 30 | print "F.ex. to check 5 minute loadaverage, warn if the load is above 2," | ||
| 31 | print "and give critical warning if it's above 10:" | ||
| 32 | print "\n\t%", argv[0], " -i 5 -m kernel.all.load -w 2 -c 10" | ||
| 33 | print "" | ||
| 34 | print "A list of all PCP metrics can be found with the command 'pminfo'." | ||
| 35 | print "A list of all instances within a metric can be found with 'pminfo -f metric'." | ||
| 36 | print "F.ex. to see all available instances of 'filesys.full' execute:" | ||
| 37 | print "\n\t% pminfo -f filesys.full" | ||
| 38 | print "\tfilesys.full" | ||
| 39 | print """\t\tinst [0 or "/dev/root"] value 45.35514044640914""" | ||
| 40 | print """\t\tinst [1 or "/dev/sda1"] value 46.74285959344712""" | ||
| 41 | print """\t\tinst [2 or "/dev/sdb1"] value 0.807766570678168""" | ||
| 42 | print "" | ||
| 43 | print "And the command to have nagios monitor the /dev/sda1 filesystem would be:" | ||
| 44 | print "\n\t", argv[0], " -i /dev/sda1 -m filesys.full -w 70 -c 90" | ||
| 45 | |||
| 46 | |||
| 47 | opts, args = getopt.getopt(argv[1:],'hH:c:w:m:i:V') | ||
| 48 | for opt in opts: | ||
| 49 | key,value = opt | ||
| 50 | if key == '-H': | ||
| 51 | COMMANDLINE = COMMANDLINE + " -h " + value | ||
| 52 | elif key == '-m': | ||
| 53 | METRIC=value | ||
| 54 | elif key == '-i': | ||
| 55 | COMMANDLINE = COMMANDLINE + " -i " + value | ||
| 56 | elif key == '-c': | ||
| 57 | CRITICAL = value | ||
| 58 | elif key == '-w': | ||
| 59 | WARNING = value | ||
| 60 | elif key == '-h': | ||
| 61 | usage() | ||
| 62 | exit(0) | ||
| 63 | elif key == '-V': | ||
| 64 | print "Nagios Performance CoPilot client v%.2f" % nagios_pcpclient_version | ||
| 65 | print "Written by Jan-Frode Myklebust <janfrode@parallab.uib.no>" | ||
| 66 | exit(0) | ||
| 67 | |||
| 68 | if METRIC == 'undefined': | ||
| 69 | usage() | ||
| 70 | exit(3) | ||
| 71 | |||
| 72 | COMMANDLINE = COMMANDLINE + " " + METRIC | ||
| 73 | if DEBUG: print COMMANDLINE | ||
| 74 | p=popen2.Popen4(COMMANDLINE) | ||
| 75 | exitcode=p.wait() | ||
| 76 | |||
| 77 | # Get the last line of output from 'pmval': | ||
| 78 | buffer = p.fromchild.readline() | ||
| 79 | while (buffer != ''): | ||
| 80 | output=buffer | ||
| 81 | buffer = p.fromchild.readline() | ||
| 82 | |||
| 83 | returndata = string.split(output)[0] | ||
| 84 | |||
| 85 | |||
| 86 | # Confirm that we have gotten a float, and not | ||
| 87 | # some errormessage in the returndata. If not, | ||
| 88 | # print the error, and give the UNKNOWN exit code: | ||
| 89 | |||
| 90 | try: | ||
| 91 | retval = string.atof(returndata) | ||
| 92 | except ValueError, e: | ||
| 93 | print e | ||
| 94 | exit(3) | ||
| 95 | |||
| 96 | if (retval < WARNING): | ||
| 97 | EXITCODE=0 | ||
| 98 | elif (retval > CRITICAL): | ||
| 99 | EXITCODE=2 | ||
| 100 | elif (retval > WARNING): | ||
| 101 | EXITCODE=1 | ||
| 102 | else: | ||
| 103 | EXITCODE=3 | ||
| 104 | |||
| 105 | print retval | ||
| 106 | exit(EXITCODE) | ||
