diff options
| -rw-r--r-- | contrib/check_snmp_disk_monitor.pl | 185 | ||||
| -rw-r--r-- | contrib/check_snmp_process_monitor.pl | 227 | 
2 files changed, 412 insertions, 0 deletions
| diff --git a/contrib/check_snmp_disk_monitor.pl b/contrib/check_snmp_disk_monitor.pl new file mode 100644 index 00000000..7a70f71a --- /dev/null +++ b/contrib/check_snmp_disk_monitor.pl | |||
| @@ -0,0 +1,185 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # author: Al Tobey <albert.tobey@priority-health.com> | ||
| 3 | # what: monitor diskspace using the host-resources mib | ||
| 4 | # license: GPL - http://www.fsf.org/licenses/gpl.txt | ||
| 5 | # | ||
| 6 | # Todo: | ||
| 7 | |||
| 8 | use strict; | ||
| 9 | require 5.6.0; | ||
| 10 | use lib qw( /opt/nagios/libexec ); | ||
| 11 | use utils qw(%ERRORS $TIMEOUT &print_revision &support &usage); | ||
| 12 | use SNMP 5.0; | ||
| 13 | use Getopt::Long; | ||
| 14 | use vars qw( $exit $message $opt_version $opt_timeout $opt_help $opt_command $opt_host $opt_community $opt_verbose $opt_warning $opt_critical $opt_port $opt_mountpoint $opt_stats $snmp_session $PROGNAME $TIMEOUT %mounts ); | ||
| 15 | |||
| 16 | $PROGNAME = "snmp_disk_monitor.pl"; | ||
| 17 | $opt_verbose = undef; | ||
| 18 | $opt_host = undef; | ||
| 19 | $opt_community = 'public'; | ||
| 20 | $opt_command = undef; | ||
| 21 | $opt_warning = 99; | ||
| 22 | $opt_critical = 100; | ||
| 23 | $opt_port = 161; | ||
| 24 | $opt_stats = undef; | ||
| 25 | $message = undef; | ||
| 26 | $exit = 'OK'; | ||
| 27 | %mounts = (); | ||
| 28 | |||
| 29 | sub process_options { | ||
| 30 | my( $opt_crit, $opt_warn ) = (); | ||
| 31 | Getopt::Long::Configure( 'bundling' ); | ||
| 32 | GetOptions( | ||
| 33 | 'V' => \$opt_version, 'version' => \$opt_version, | ||
| 34 | 'v' => \$opt_verbose, 'verbose' => \$opt_verbose, | ||
| 35 | 'h' => \$opt_help, 'help' => \$opt_help, | ||
| 36 | 's' => \$opt_stats, 'statistics' => \$opt_stats, | ||
| 37 | 'H:s' => \$opt_host, 'hostname:s' => \$opt_host, | ||
| 38 | 'p:i' => \$opt_port, 'port:i' => \$opt_port, | ||
| 39 | 'C:s' => \$opt_community, 'community:s' => \$opt_community, | ||
| 40 | 'c:i' => \$opt_crit, 'critical:i' => \$opt_crit, | ||
| 41 | 'w:i' => \$opt_warn, 'warning:i' => \$opt_warn, | ||
| 42 | 't:i' => \$TIMEOUT, 'timeout:i' => \$TIMEOUT, | ||
| 43 | 'm:s' => \$opt_mountpoint, 'mountpoint:s'=> \$opt_mountpoint | ||
| 44 | ); | ||
| 45 | if ( defined($opt_version) ) { local_print_revision(); } | ||
| 46 | if ( defined($opt_verbose) ) { $SNMP::debugging = 1; } | ||
| 47 | if ( !defined($opt_host) || defined($opt_help) || !defined($opt_mountpoint) ) { | ||
| 48 | print_help(); | ||
| 49 | exit $ERRORS{UNKNOWN}; | ||
| 50 | } | ||
| 51 | $opt_mountpoint = [ split(/,/, $opt_mountpoint) ]; | ||
| 52 | } | ||
| 53 | |||
| 54 | sub local_print_revision { | ||
| 55 | print_revision( $PROGNAME, '$Revision$ ' ) | ||
| 56 | } | ||
| 57 | |||
| 58 | sub print_usage { | ||
| 59 | print "Usage: $PROGNAME -H <host> -C <snmp_community> [-s] [-w <low>,<high>] [-c <low>,<high>] [-t <timeout>] -m <mountpoint>\n"; | ||
| 60 | } | ||
| 61 | |||
| 62 | sub print_help { | ||
| 63 | local_print_revision(); | ||
| 64 | print "Copyright (c) 2002 Al Tobey <albert.tobey\@priority-health.com>\n\n", | ||
| 65 | "SNMP Disk Monitor plugin for Nagios\n\n"; | ||
| 66 | print_usage(); | ||
| 67 | print <<EOT; | ||
| 68 | -v, --verbose | ||
| 69 | print extra debugging information | ||
| 70 | -h, --help | ||
| 71 | print this help message | ||
| 72 | -H, --hostname=HOST | ||
| 73 | name or IP address of host to check | ||
| 74 | -C, --community=COMMUNITY NAME | ||
| 75 | community name for the host's SNMP agent | ||
| 76 | -m, --mountpoint=MOUNTPOINT | ||
| 77 | a mountpoint, or a comma delimited list of mountpoints | ||
| 78 | -w, --warning=INTEGER | ||
| 79 | percent of disk used to generate WARNING state (Default: 99) | ||
| 80 | -c, --critical=INTEGER | ||
| 81 | percent of disk used to generate CRITICAL state (Default: 100) | ||
| 82 | -s, --statistics | ||
| 83 | output statistics in Nagios format | ||
| 84 | EOT | ||
| 85 | } | ||
| 86 | |||
| 87 | sub verbose (@) { | ||
| 88 | return if ( !defined($opt_verbose) ); | ||
| 89 | print @_; | ||
| 90 | } | ||
| 91 | |||
| 92 | sub check_for_errors { | ||
| 93 | if ( $snmp_session->{ErrorNum} ) { | ||
| 94 | print "UNKNOWN - error retrieving SNMP data: $snmp_session->{ErrorStr}\n"; | ||
| 95 | exit $ERRORS{UNKNOWN}; | ||
| 96 | } | ||
| 97 | } | ||
| 98 | |||
| 99 | # =========================================================================== # | ||
| 100 | # =====> MAIN | ||
| 101 | # =========================================================================== # | ||
| 102 | process_options(); | ||
| 103 | |||
| 104 | alarm( $TIMEOUT ); # make sure we don't hang Nagios | ||
| 105 | |||
| 106 | $snmp_session = new SNMP::Session( | ||
| 107 | DestHost => $opt_host, | ||
| 108 | Community => $opt_community, | ||
| 109 | RemotePort => $opt_port, | ||
| 110 | Version => '2c' | ||
| 111 | ); | ||
| 112 | |||
| 113 | # retrieve the data from the remote host | ||
| 114 | my( $mps, $alloc, $size, $used ) = $snmp_session->bulkwalk( 0, 4, [['hrStorageDescr'],['hrStorageAllocationUnits'],['hrStorageSize'],['hrStorageUsed']] ); | ||
| 115 | check_for_errors(); | ||
| 116 | |||
| 117 | alarm( 0 ); # all done with the network connection | ||
| 118 | |||
| 119 | # move all the data into a nice, convenient hash for processing | ||
| 120 | foreach my $mp ( @$mps ) { $mounts{$mp->iid}->{mountpoint} = $mp->val; } | ||
| 121 | foreach my $a ( @$alloc ) { $mounts{$a->iid}->{alloc_units} = $a->val; } | ||
| 122 | foreach my $si ( @$size ) { | ||
| 123 | if ( exists($mounts{$si->iid}->{alloc_units}) ) { | ||
| 124 | $mounts{$si->iid}->{size} = $si->val * $mounts{$si->iid}->{alloc_units}; | ||
| 125 | } | ||
| 126 | else { | ||
| 127 | $mounts{$si->iid}->{size} = $si->val; | ||
| 128 | } | ||
| 129 | } | ||
| 130 | foreach my $us ( @$used ) { | ||
| 131 | if ( exists($mounts{$us->iid}->{alloc_units}) ) { | ||
| 132 | $mounts{$us->iid}->{used} = $us->val * $mounts{$us->iid}->{alloc_units}; | ||
| 133 | } | ||
| 134 | else { | ||
| 135 | $mounts{$us->iid}->{used} = $us->val; | ||
| 136 | } | ||
| 137 | } | ||
| 138 | |||
| 139 | # now find the mountpoint or mountpoints that were actually requested and push onto an array for output | ||
| 140 | my @matches = (); | ||
| 141 | foreach my $mp ( @$opt_mountpoint ) { | ||
| 142 | my $found = scalar(@matches); # count all matches | ||
| 143 | foreach my $key ( keys(%mounts) ) { | ||
| 144 | if ( $mounts{$key}->{mountpoint} eq $mp ) { | ||
| 145 | |||
| 146 | # find the percentate - eval to avoid divide by zero errors | ||
| 147 | eval { $mounts{$key}->{percent_used} = $mounts{$key}->{used} / $mounts{$key}->{size} }; | ||
| 148 | $mounts{$key}->{percent_used} =~ s/^0\.([0-9]{1,2})([0-9]?).*/\1/; # truncate | ||
| 149 | if ( $2 >= 5 ) { $mounts{$key}->{percent_used}++ }; # round the number number | ||
| 150 | |||
| 151 | verbose "mountpoint $mp has ", $mounts{$key}->{percent_used}, "% used, ", | ||
| 152 | $mounts{$key}->{size}, " bytes and ",$mounts{$key}->{used}, " used\n"; | ||
| 153 | |||
| 154 | push( @matches, $mounts{$key} ); | ||
| 155 | } | ||
| 156 | } | ||
| 157 | if ( scalar(@matches) == $found ) { | ||
| 158 | print "UNKNOWN - could not locate mountpoint $mp on host\n"; | ||
| 159 | exit $ERRORS{UNKNOWN}; | ||
| 160 | } | ||
| 161 | } | ||
| 162 | |||
| 163 | # now run through and check the thresholds | ||
| 164 | foreach my $mp ( @matches ) { | ||
| 165 | if ( $mp->{percent_used} >= $opt_warning ) { | ||
| 166 | $exit = 'WARNING'; | ||
| 167 | if ( $mp->{percent_used} >= $opt_critical ) { $exit = 'CRITICAL'; } | ||
| 168 | } | ||
| 169 | $message .= $mp->{percent_used}.'% used on '.$mp->{mountpoint}.', '; | ||
| 170 | } | ||
| 171 | $message =~ s/,\s*$//; | ||
| 172 | |||
| 173 | # append statistics if requested | ||
| 174 | if ( defined($opt_stats) ) { | ||
| 175 | my @tmp = (); | ||
| 176 | foreach my $mp ( @matches ) { | ||
| 177 | push( @tmp, join(',',$mp->{mountpoint},$mp->{size},$mp->{used}) ); | ||
| 178 | } | ||
| 179 | $message .= '|'.join( ':', @tmp ); | ||
| 180 | } | ||
| 181 | |||
| 182 | print "Disk $exit - $message\n"; | ||
| 183 | exit $ERRORS{$exit}; | ||
| 184 | |||
| 185 | |||
| diff --git a/contrib/check_snmp_process_monitor.pl b/contrib/check_snmp_process_monitor.pl new file mode 100644 index 00000000..263255b5 --- /dev/null +++ b/contrib/check_snmp_process_monitor.pl | |||
| @@ -0,0 +1,227 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # author: Al Tobey <albert.tobey@priority-health.com> | ||
| 3 | # what: monitor a process using the host-resources mib | ||
| 4 | # license: GPL - http://www.fsf.org/licenses/gpl.txt | ||
| 5 | # | ||
| 6 | # Todo: | ||
| 7 | # * implement memory and cpu utilization checks | ||
| 8 | # * maybe cache pids in DBM files if snmp agents get overworked | ||
| 9 | ############################################################################### | ||
| 10 | # to get a list of processes over snmp try this command: | ||
| 11 | # snmptable -v2c -c public hostname hrSWRunTable | ||
| 12 | # for just a list of valid arguments for the '-e' option: | ||
| 13 | # snmpwalk -v2c -c public hostname hrSWRunName |perl -pe 's:.*/::' | ||
| 14 | ############################################################################### | ||
| 15 | |||
| 16 | use strict; | ||
| 17 | require 5.6.0; | ||
| 18 | use lib qw( /opt/nagios/libexec /usr/local/libexec ); | ||
| 19 | use utils qw(%ERRORS $TIMEOUT &print_revision &support &usage); | ||
| 20 | use SNMP 5.0; | ||
| 21 | use Getopt::Long; | ||
| 22 | use vars qw( $exit $opt_version $opt_timeout $opt_help $opt_command $opt_host $opt_community $opt_verbose $opt_warning $opt_critical $opt_memory $opt_cpu $opt_port $opt_regex $opt_stats %processes $snmp_session $PROGNAME $TIMEOUT ); | ||
| 23 | |||
| 24 | $PROGNAME = "snmp_process_monitor.pl"; | ||
| 25 | $opt_verbose = undef; | ||
| 26 | $opt_host = undef; | ||
| 27 | $opt_community = 'public'; | ||
| 28 | $opt_command = undef; | ||
| 29 | $opt_warning = [ 1, -1 ]; | ||
| 30 | $opt_critical = [ 1, -1 ]; | ||
| 31 | $opt_memory = undef; | ||
| 32 | $opt_cpu = undef; | ||
| 33 | $opt_port = 161; | ||
| 34 | %processes = (); | ||
| 35 | $exit = 'OK'; | ||
| 36 | |||
| 37 | sub process_options { | ||
| 38 | my( $opt_crit, $opt_warn ) = (); | ||
| 39 | Getopt::Long::Configure( 'bundling' ); | ||
| 40 | GetOptions( | ||
| 41 | 'V' => \$opt_version, 'version' => \$opt_version, | ||
| 42 | 'v' => \$opt_verbose, 'verbose' => \$opt_verbose, | ||
| 43 | 'h' => \$opt_help, 'help' => \$opt_help, | ||
| 44 | 's' => \$opt_stats, 'statistics' => \$opt_stats, | ||
| 45 | 'H:s' => \$opt_host, 'hostname:s' => \$opt_host, | ||
| 46 | 'p:i' => \$opt_port, 'port:i' => \$opt_port, | ||
| 47 | 'C:s' => \$opt_community, 'community:s' => \$opt_community, | ||
| 48 | 'c:s' => \$opt_crit, 'critical:s' => \$opt_crit, | ||
| 49 | 'w:s' => \$opt_warn, 'warning:s' => \$opt_warn, | ||
| 50 | 't:i' => \$TIMEOUT, 'timeout:i' => \$TIMEOUT, | ||
| 51 | 'e:s' => \$opt_command, 'command:s' => \$opt_command, | ||
| 52 | 'r:s' => \$opt_regex, 'regex:s' => \$opt_regex, | ||
| 53 | 'cpu:i' => \$opt_cpu, 'memory:i' => \$opt_memory, | ||
| 54 | ); | ||
| 55 | if ( defined($opt_version) ) { local_print_revision(); } | ||
| 56 | if ( defined($opt_verbose) ) { $SNMP::debugging = 1; } | ||
| 57 | if ( !defined($opt_host) || defined($opt_help) || (!defined($opt_command) && !defined($opt_regex)) ) { | ||
| 58 | print_help(); | ||
| 59 | exit $ERRORS{UNKNOWN}; | ||
| 60 | } | ||
| 61 | |||
| 62 | if ( defined($opt_crit) ) { | ||
| 63 | if ( $opt_crit =~ /,/ ) { | ||
| 64 | $opt_critical = [ split(',', $opt_crit) ]; | ||
| 65 | } | ||
| 66 | else { | ||
| 67 | $opt_critical = [ $opt_crit, -1 ]; | ||
| 68 | } | ||
| 69 | } | ||
| 70 | if ( defined($opt_warn) ) { | ||
| 71 | if ( $opt_warn =~ /,/ ) { | ||
| 72 | $opt_warning = [ split(',', $opt_warn) ]; | ||
| 73 | } | ||
| 74 | else { | ||
| 75 | $opt_warning = [ $opt_crit, -1 ]; | ||
| 76 | } | ||
| 77 | } | ||
| 78 | } | ||
| 79 | |||
| 80 | sub local_print_revision { | ||
| 81 | print_revision( $PROGNAME, '$Revision$ ' ) | ||
| 82 | } | ||
| 83 | |||
| 84 | sub print_usage { | ||
| 85 | print "Usage: $PROGNAME -H <host> -C <snmp_community> -e <command> [-w <low>,<high>] [-c <low>,<high>] [-t <timeout>]\n"; | ||
| 86 | } | ||
| 87 | |||
| 88 | sub print_help { | ||
| 89 | local_print_revision(); | ||
| 90 | print "Copyright (c) 2002 Al Tobey <albert.tobey\@priority-health.com>\n\n", | ||
| 91 | "SNMP Process Monitor plugin for Nagios\n\n"; | ||
| 92 | print_usage(); | ||
| 93 | print <<EOT; | ||
| 94 | -v, --verbose | ||
| 95 | print extra debugging information | ||
| 96 | -h, --help | ||
| 97 | print this help message | ||
| 98 | -H, --hostname=HOST | ||
| 99 | name or IP address of host to check | ||
| 100 | -C, --community=COMMUNITY NAME | ||
| 101 | community name for the host's SNMP agent | ||
| 102 | -e, --command=COMMAND NAME (ps -e style) | ||
| 103 | what command should be monitored? | ||
| 104 | -r, --regex=Perl RE | ||
| 105 | use a perl regular expression to find your process | ||
| 106 | -w, --warning=INTEGER[,INTEGER] | ||
| 107 | minimum and maximum number of processes before a warning is issued (Default 1,-1) | ||
| 108 | -c, --critical=INTEGER[,INTEGER] | ||
| 109 | minimum and maximum number of processes before a critical is issued (Default 1,-1) | ||
| 110 | EOT | ||
| 111 | } | ||
| 112 | |||
| 113 | sub verbose (@) { | ||
| 114 | return if ( !defined($opt_verbose) ); | ||
| 115 | print @_; | ||
| 116 | } | ||
| 117 | |||
| 118 | sub check_for_errors { | ||
| 119 | if ( $snmp_session->{ErrorNum} ) { | ||
| 120 | print "UNKNOWN - error retrieving SNMP data: $snmp_session->{ErrorStr}\n"; | ||
| 121 | exit $ERRORS{UNKNOWN}; | ||
| 122 | } | ||
| 123 | } | ||
| 124 | |||
| 125 | # =========================================================================== # | ||
| 126 | # =====> MAIN | ||
| 127 | # =========================================================================== # | ||
| 128 | process_options(); | ||
| 129 | |||
| 130 | alarm( $TIMEOUT ); # make sure we don't hang Nagios | ||
| 131 | |||
| 132 | $snmp_session = new SNMP::Session( | ||
| 133 | DestHost => $opt_host, | ||
| 134 | Community => $opt_community, | ||
| 135 | RemotePort => $opt_port, | ||
| 136 | Version => '2c' | ||
| 137 | ); | ||
| 138 | |||
| 139 | my $process_count = SNMP::Varbind->new( ['hrSystemProcesses', 0] ); | ||
| 140 | $snmp_session->get( $process_count ); | ||
| 141 | check_for_errors(); | ||
| 142 | |||
| 143 | # retrieve the data from the remote host | ||
| 144 | my( $names, $index ) = $snmp_session->bulkwalk( 0, $process_count->val, [['hrSWRunName'], ['hrSWRunIndex']] ); | ||
| 145 | check_for_errors(); | ||
| 146 | |||
| 147 | alarm( 0 ); # all done with the network connection | ||
| 148 | |||
| 149 | my %namecount = (); | ||
| 150 | foreach my $row ( @$names ) { | ||
| 151 | $processes{$row->iid}->{name} = $row->val; | ||
| 152 | $processes{$row->iid}->{name} =~ s#.*/##; # strip path | ||
| 153 | |||
| 154 | if ( defined($opt_regex) || | ||
| 155 | ($row->val =~ /(perl|\/usr\/bin\/sh|\/bin\/bash|\/bin\/sh)$/ | ||
| 156 | && $opt_command !~ /(perl|\/usr\/bin\/sh|\/bin\/bash|\/bin\/sh)$/) ) { | ||
| 157 | |||
| 158 | # fetch the runtime parameters of the process | ||
| 159 | my $parm_var = SNMP::Varbind->new( ['hrSWRunParameters', $row->iid] ); | ||
| 160 | $snmp_session->get( $parm_var ); | ||
| 161 | check_for_errors(); | ||
| 162 | |||
| 163 | # only strip if we're looking for a specific command | ||
| 164 | if ( defined($opt_command) ) { | ||
| 165 | verbose "process ",$row->iid," uses $1 as an interpreter - getting parameters\n"; | ||
| 166 | $processes{$row->iid}->{name} = $parm_var->val; | ||
| 167 | # strip path name off the front | ||
| 168 | $processes{$row->iid}->{name} =~ s#.*/##; | ||
| 169 | # strip everything from the first space to the end | ||
| 170 | $processes{$row->iid}->{name} =~ s/\s+.*$//; | ||
| 171 | } | ||
| 172 | else { | ||
| 173 | # get the longer full-path style listing | ||
| 174 | my $path_var = SNMP::Varbind->new( ['hrSWRunPath', $row->iid] ); | ||
| 175 | $snmp_session->get( $path_var ); | ||
| 176 | check_for_errors(); | ||
| 177 | |||
| 178 | # use the full 'ps -efl' style listing for regular expression matching | ||
| 179 | $processes{$row->iid}->{name} = $path_var->val.' '.$parm_var->val; | ||
| 180 | } | ||
| 181 | } | ||
| 182 | } | ||
| 183 | foreach my $row ( @$index ) { | ||
| 184 | $processes{$row->iid}->{pid} = $row->val; | ||
| 185 | } | ||
| 186 | |||
| 187 | my @pids = (); | ||
| 188 | my @matches = (); | ||
| 189 | foreach my $key ( keys(%processes) ) { | ||
| 190 | if ( defined($opt_command) && $processes{$key}->{name} eq $opt_command ) { | ||
| 191 | push( @matches, $processes{$key} ); | ||
| 192 | push( @pids, $processes{$key}->{pid} ); | ||
| 193 | verbose "process '$processes{$key}->{name}' has pid ", | ||
| 194 | "$processes{$key}->{pid} and index $key\n"; | ||
| 195 | } | ||
| 196 | elsif ( defined($opt_regex) && $processes{$key}->{name} =~ /$opt_regex/o ) { | ||
| 197 | push( @matches, $processes{$key} ); | ||
| 198 | push( @pids, $processes{$key}->{pid} ); | ||
| 199 | verbose "process '$processes{$key}->{name}' has pid ", | ||
| 200 | "$processes{$key}->{pid} and index $key\n"; | ||
| 201 | } | ||
| 202 | } | ||
| 203 | my $count = @matches; | ||
| 204 | |||
| 205 | # warning, critical | ||
| 206 | if ( ($opt_warning->[0] > 0 && $opt_warning->[0] > $count) | ||
| 207 | || ($opt_warning->[1] > 0 && $opt_warning->[1] <= $count) ) { | ||
| 208 | $exit = 'WARNING'; | ||
| 209 | } | ||
| 210 | if ( ($opt_critical->[0] > 0 && $opt_critical->[0] > $count) | ||
| 211 | || ($opt_critical->[1] > 0 && $opt_critical->[1] <= $count) ) { | ||
| 212 | $exit = 'CRITICAL'; | ||
| 213 | } | ||
| 214 | |||
| 215 | print "$exit - $count processes with pid(s) ",join(',',@pids); | ||
| 216 | |||
| 217 | # print the number of processes if statistics are requested | ||
| 218 | if ( defined($opt_stats) ) { | ||
| 219 | print "|count:$count\n"; | ||
| 220 | } | ||
| 221 | else { | ||
| 222 | print "\n"; | ||
| 223 | } | ||
| 224 | |||
| 225 | exit $ERRORS{$exit}; | ||
| 226 | |||
| 227 | |||
