diff options
| author | Holger Weiss <holger@zedat.fu-berlin.de> | 2013-09-02 13:16:24 +0200 |
|---|---|---|
| committer | Holger Weiss <holger@zedat.fu-berlin.de> | 2013-09-02 13:16:24 +0200 |
| commit | b15adb7762b6caaecaa83637abfcf5fdb4802092 (patch) | |
| tree | 64eddbe2aa1a7f98a140be0f7973f05d7a781ae0 /contrib/check_snmp_process_monitor.pl | |
| parent | c4d5882b9e1d07c7b61091062b7d085fa5f00284 (diff) | |
| download | monitoring-plugins-b15adb7762b6caaecaa83637abfcf5fdb4802092.tar.gz | |
Remove "contrib" plugins
These days, sites such as "Nagios Exchange" are a much better place for
publishing plugins not maintained by the Plugins Development Team.
Diffstat (limited to 'contrib/check_snmp_process_monitor.pl')
| -rw-r--r-- | contrib/check_snmp_process_monitor.pl | 331 |
1 files changed, 0 insertions, 331 deletions
diff --git a/contrib/check_snmp_process_monitor.pl b/contrib/check_snmp_process_monitor.pl deleted file mode 100644 index 0f445970..00000000 --- a/contrib/check_snmp_process_monitor.pl +++ /dev/null | |||
| @@ -1,331 +0,0 @@ | |||
| 1 | #!/usr/local/bin/perl | ||
| 2 | # author: Al Tobey <albert.tobey@priority-health.com> | ||
| 3 | # what: monitor a process using the host-resources mib | ||
| 4 | # license: GPL - http://www.fsf.org/licenses/gpl.txt | ||
| 5 | # | ||
| 6 | # Todo: | ||
| 7 | # * implement memory and cpu utilization checks | ||
| 8 | # * maybe cache pids in DBM files if snmp agents get overworked | ||
| 9 | ############################################################################### | ||
| 10 | # to get a list of processes over snmp try this command: | ||
| 11 | # snmptable -v2c -c public hostname hrSWRunTable | ||
| 12 | # for just a list of valid arguments for the '-e' option: | ||
| 13 | # snmpwalk -v2c -c public hostname hrSWRunName |perl -pe 's:.*/::' | ||
| 14 | ############################################################################### | ||
| 15 | |||
| 16 | use strict; | ||
| 17 | require 5.6.0; | ||
| 18 | use lib qw( /opt/nagios/libexec /usr/local/libexec ); | ||
| 19 | use utils qw(%ERRORS $TIMEOUT &print_revision &support &usage); | ||
| 20 | use SNMP 5.0; | ||
| 21 | use Getopt::Long; | ||
| 22 | use Storable; | ||
| 23 | use vars qw( $exit $opt_version $opt_timeout $opt_help $opt_command $opt_host $opt_community $opt_verbose $opt_warning $opt_critical $opt_memory $opt_cpu $opt_port $opt_regex $opt_stats $opt_cache $opt_nocache $cache_exp $interpreters $snmp_session $PROGNAME $TIMEOUT ); | ||
| 24 | |||
| 25 | $PROGNAME = "snmp_process_monitor.pl"; | ||
| 26 | $opt_verbose = undef; | ||
| 27 | $opt_host = undef; | ||
| 28 | $opt_community = 'public'; | ||
| 29 | $opt_command = undef; | ||
| 30 | $opt_warning = [ 1, -1 ]; | ||
| 31 | $opt_critical = [ 1, -1 ]; | ||
| 32 | $opt_memory = undef; | ||
| 33 | $opt_cpu = undef; | ||
| 34 | $opt_port = 161; | ||
| 35 | $opt_cache = 1; | ||
| 36 | $opt_nocache = undef; | ||
| 37 | $cache_exp = 600; | ||
| 38 | $exit = $ERRORS{OK}; | ||
| 39 | $interpreters = '(perl|/bin/sh|/usr/bin/sh|/bin/bash|/bin/ksh|python)'; | ||
| 40 | our $cachefile = '/var/opt/nagios/tmp/'; # completed later | ||
| 41 | our %processes = (); | ||
| 42 | |||
| 43 | sub process_options { | ||
| 44 | my( $opt_crit, $opt_warn ) = (); | ||
| 45 | Getopt::Long::Configure( 'bundling' ); | ||
| 46 | GetOptions( | ||
| 47 | 'V' => \$opt_version, 'version' => \$opt_version, | ||
| 48 | 'v' => \$opt_verbose, 'verbose' => \$opt_verbose, | ||
| 49 | 'h' => \$opt_help, 'help' => \$opt_help, | ||
| 50 | 's' => \$opt_stats, 'statistics' => \$opt_stats, | ||
| 51 | 'nocache' => \$opt_nocache, | ||
| 52 | 'H:s' => \$opt_host, 'hostname:s' => \$opt_host, | ||
| 53 | 'p:i' => \$opt_port, 'port:i' => \$opt_port, | ||
| 54 | 'C:s' => \$opt_community, 'community:s' => \$opt_community, | ||
| 55 | 'c:s' => \$opt_crit, 'critical:s' => \$opt_crit, | ||
| 56 | 'w:s' => \$opt_warn, 'warning:s' => \$opt_warn, | ||
| 57 | 't:i' => \$TIMEOUT, 'timeout:i' => \$TIMEOUT, | ||
| 58 | 'e:s' => \$opt_command, 'command:s' => \$opt_command, | ||
| 59 | 'r:s' => \$opt_regex, 'regex:s' => \$opt_regex, | ||
| 60 | 'cpu:i' => \$opt_cpu, 'memory:i' => \$opt_memory, | ||
| 61 | ); | ||
| 62 | if ( defined($opt_version) ) { local_print_revision(); } | ||
| 63 | if ( defined($opt_verbose) ) { $SNMP::debugging = 1; } | ||
| 64 | if ( !defined($opt_host) || defined($opt_help) || (!defined($opt_command) && !defined($opt_regex)) ) { | ||
| 65 | print_help(); | ||
| 66 | exit $ERRORS{UNKNOWN}; | ||
| 67 | } | ||
| 68 | |||
| 69 | if ( defined($opt_crit) ) { | ||
| 70 | if ( $opt_crit =~ /,/ ) { | ||
| 71 | $opt_critical = [ split(',', $opt_crit) ]; | ||
| 72 | } | ||
| 73 | else { | ||
| 74 | $opt_critical = [ $opt_crit, -1 ]; | ||
| 75 | } | ||
| 76 | } | ||
| 77 | if ( defined($opt_warn) ) { | ||
| 78 | if ( $opt_warn =~ /,/ ) { | ||
| 79 | $opt_warning = [ split(',', $opt_warn) ]; | ||
| 80 | } | ||
| 81 | else { | ||
| 82 | $opt_warning = [ $opt_crit, -1 ]; | ||
| 83 | } | ||
| 84 | } | ||
| 85 | if ( defined($opt_memory) ) { $opt_memory = 0 } | ||
| 86 | if ( defined($opt_cpu) ) { $opt_cpu = 0 } | ||
| 87 | if ( defined($opt_nocache)) { $opt_cache = 0 } | ||
| 88 | |||
| 89 | # complete the cachefile's name | ||
| 90 | $cachefile .= $opt_host . '.proc'; | ||
| 91 | } | ||
| 92 | |||
| 93 | sub local_print_revision { | ||
| 94 | print_revision( $PROGNAME, '$Revision: 84 $ ' ) | ||
| 95 | } | ||
| 96 | |||
| 97 | sub print_usage { | ||
| 98 | print "Usage: $PROGNAME -H <host> -C <snmp_community> -e <command> [-w <low>,<high>] [-c <low>,<high>] [-t <timeout>] [-s|--statistics] [--memory] [--cpu] [--nocache]\n"; | ||
| 99 | } | ||
| 100 | |||
| 101 | sub print_help { | ||
| 102 | local_print_revision(); | ||
| 103 | print "Copyright (c) 2002 Al Tobey <albert.tobey\@priority-health.com>\n\n", | ||
| 104 | "SNMP Process Monitor plugin for Nagios\n\n"; | ||
| 105 | print_usage(); | ||
| 106 | print <<EOT; | ||
| 107 | -v, --verbose | ||
| 108 | print extra debugging information | ||
| 109 | -h, --help | ||
| 110 | print this help message | ||
| 111 | -H, --hostname=HOST | ||
| 112 | name or IP address of host to check | ||
| 113 | -C, --community=COMMUNITY NAME | ||
| 114 | community name for the host's SNMP agent | ||
| 115 | -e, --command=COMMAND NAME (ps -e style) | ||
| 116 | what command should be monitored? | ||
| 117 | -r, --regex=Perl RE | ||
| 118 | use a perl regular expression to find your process | ||
| 119 | -w, --warning=INTEGER[,INTEGER] | ||
| 120 | minimum and maximum number of processes before a warning is issued (Default 1,-1) | ||
| 121 | -c, --critical=INTEGER[,INTEGER] | ||
| 122 | minimum and maximum number of processes before a critical is issued (Default 1,-1) | ||
| 123 | --memory | ||
| 124 | combined with '-s', will print the number of bytes of real memory used by process | ||
| 125 | --cpu | ||
| 126 | combined with '-s', will print the number of seconds of cpu time consumed by process | ||
| 127 | EOT | ||
| 128 | } | ||
| 129 | |||
| 130 | sub verbose (@) { | ||
| 131 | return if ( !defined($opt_verbose) ); | ||
| 132 | print @_; | ||
| 133 | } | ||
| 134 | |||
| 135 | sub check_for_errors { | ||
| 136 | if ( $snmp_session->{ErrorNum} ) { | ||
| 137 | %processes = (); | ||
| 138 | print "UNKNOWN - error retrieving SNMP data: $snmp_session->{ErrorStr}\n"; | ||
| 139 | exit $ERRORS{UNKNOWN}; | ||
| 140 | } | ||
| 141 | } | ||
| 142 | |||
| 143 | sub init_cache { | ||
| 144 | if ( !defined($opt_cache) ) { | ||
| 145 | %processes = (); | ||
| 146 | return; | ||
| 147 | } | ||
| 148 | if ( -r $cachefile ) { | ||
| 149 | eval { | ||
| 150 | verbose "loading cache from $cachefile\n"; | ||
| 151 | %processes = %{ retrieve( $cachefile ) }; | ||
| 152 | }; | ||
| 153 | if ( $@ ) { | ||
| 154 | verbose "cache loading failed - using blank cache: $@\n"; | ||
| 155 | %processes = () | ||
| 156 | } | ||
| 157 | } | ||
| 158 | else { | ||
| 159 | %processes = (); | ||
| 160 | } | ||
| 161 | } | ||
| 162 | |||
| 163 | sub snmpget { | ||
| 164 | my $tmpvar = SNMP::Varbind->new( shift ); | ||
| 165 | $snmp_session->get( $tmpvar ); | ||
| 166 | check_for_errors(); | ||
| 167 | return $tmpvar->val; | ||
| 168 | } | ||
| 169 | |||
| 170 | sub update_cache { | ||
| 171 | # expire the cache after $cache_exp seconds | ||
| 172 | if ( $opt_cache != 0 && exists($processes{__last_update}) | ||
| 173 | && $processes{__last_update} >= time - $cache_exp ) { | ||
| 174 | verbose "cache file is recent enough - using it\n"; | ||
| 175 | return 1; | ||
| 176 | } | ||
| 177 | |||
| 178 | verbose "retrieving full listing of processes from $opt_host\n"; | ||
| 179 | my $process_count = snmpget( ['hrSystemProcesses', 0] ); | ||
| 180 | |||
| 181 | # retrieve the data from the remote host | ||
| 182 | my ($names) = $snmp_session->bulkwalk( 0, $process_count + 1, [['hrSWRunName']] ); | ||
| 183 | check_for_errors(); | ||
| 184 | |||
| 185 | # make sure the number of processes from the bulkwalk is close to hrSystemProcesses | ||
| 186 | if ( scalar(@$names) + 10 < $process_count ) { | ||
| 187 | print "UNKNOWN - only ", scalar(@$names), " of ",$process_count, " processes returned\n";; | ||
| 188 | exit $ERRORS{UNKNOWN}; | ||
| 189 | } | ||
| 190 | |||
| 191 | # sort through the process names and create a nice hash of processes | ||
| 192 | foreach my $row ( @$names ) { | ||
| 193 | my %hash = {}; | ||
| 194 | $hash{name} = $row->val; | ||
| 195 | $hash{abs_name} = $row->val; | ||
| 196 | $hash{name} =~ s#.*/##; # strip path | ||
| 197 | |||
| 198 | if ( defined($opt_regex) || | ||
| 199 | ($row->val =~ m#$interpreters$# | ||
| 200 | && $opt_command !~ m#$interpreters$#) ) { | ||
| 201 | |||
| 202 | # fetch the runtime parameters of the process | ||
| 203 | my $parameters = snmpget( ['hrSWRunParameters', $row->iid] ); | ||
| 204 | |||
| 205 | # only strip if we're looking for a specific command | ||
| 206 | if ( defined($opt_command) ) { | ||
| 207 | verbose "process ",$row->iid," uses $1 as an interpreter - getting parameters\n"; | ||
| 208 | $hash{name} = $parameters; | ||
| 209 | $hash{name} =~ s#.*/##; # strip path name off the front | ||
| 210 | $hash{name} =~ s/\s+.*$//; # strip everything from the first space to the end | ||
| 211 | } | ||
| 212 | else { | ||
| 213 | # use the full 'ps -efl' style listing for regular expression matching | ||
| 214 | my $path = snmpget( ['hrSWRunPath', $row->iid] ); | ||
| 215 | $hash{name} = "$path $parameters"; | ||
| 216 | } | ||
| 217 | } | ||
| 218 | # store in the global hash | ||
| 219 | $processes{$row->iid} = \%hash; | ||
| 220 | } | ||
| 221 | |||
| 222 | # update the timestamp so the cache can expire | ||
| 223 | $processes{__last_update} = time; | ||
| 224 | return 0; | ||
| 225 | } | ||
| 226 | |||
| 227 | # process the %processes hash and see if there any matches for our command or regex | ||
| 228 | sub check_for_matches { | ||
| 229 | my $ret_match = 0; | ||
| 230 | foreach my $key ( keys(%processes) ) { | ||
| 231 | next if ( $key eq '__last_update' ); | ||
| 232 | my $match = 0; | ||
| 233 | |||
| 234 | # static matches are letter-for-letter (-e) | ||
| 235 | if ( defined($opt_command) && $processes{$key}->{name} eq $opt_command ) { $match++; } | ||
| 236 | # use /o to make sure the user-supplied regex (-r) is only compiled once | ||
| 237 | elsif ( defined($opt_regex) && $processes{$key}->{name} =~ /$opt_regex/o ) { $match++; } | ||
| 238 | |||
| 239 | # verify the cache's entry by doing an snmpget | ||
| 240 | if ( $match > 0 && $opt_cache != 0 ) { | ||
| 241 | my $proc = snmpget( ['hrSWRunName', $key] ); | ||
| 242 | --$match if ( !$proc || $proc ne $processes{$key}->{abs_name} ); | ||
| 243 | } | ||
| 244 | # get the process memory usage if requested | ||
| 245 | if ( $match > 0 && defined($opt_memory) ) { | ||
| 246 | $opt_memory += snmpget( ['hrSWRunPerfMem', $key] ); | ||
| 247 | } | ||
| 248 | # get the process cpu usage if requested | ||
| 249 | if ( $match > 0 && defined($opt_cpu) ) { | ||
| 250 | $opt_cpu += snmpget( ['hrSWRunPerfCPU', $key] ); | ||
| 251 | } | ||
| 252 | |||
| 253 | verbose "process '$processes{$key}->{name}' has pid $processes{$key}->{pid} and index $key\n" | ||
| 254 | if ( $match > 0 ); | ||
| 255 | |||
| 256 | $ret_match += $match; | ||
| 257 | } | ||
| 258 | return $ret_match; | ||
| 259 | } | ||
| 260 | # =========================================================================== # | ||
| 261 | # =====> MAIN | ||
| 262 | # =========================================================================== # | ||
| 263 | process_options(); | ||
| 264 | |||
| 265 | alarm( $TIMEOUT ); # make sure we don't hang Nagios | ||
| 266 | |||
| 267 | # intialize the cache, if it's enabled | ||
| 268 | init_cache(); | ||
| 269 | |||
| 270 | # create a session for conversing with the remote SNMP agent | ||
| 271 | $snmp_session = new SNMP::Session( | ||
| 272 | DestHost => $opt_host, | ||
| 273 | Community => $opt_community, | ||
| 274 | RemotePort => $opt_port, | ||
| 275 | Version => '2c' | ||
| 276 | ); | ||
| 277 | |||
| 278 | my $usage = update_cache(); | ||
| 279 | my $count = check_for_matches(); | ||
| 280 | |||
| 281 | # always try twice if caching is enabled - once with cache and once without | ||
| 282 | if ( $usage != 0 && $opt_cache != 0 && $count <= 0 ) { | ||
| 283 | verbose "did not find process in cache - trying a refresh\n"; | ||
| 284 | %processes = (); | ||
| 285 | update_cache(); | ||
| 286 | $count = check_for_matches(); | ||
| 287 | } | ||
| 288 | |||
| 289 | |||
| 290 | # the default, OK message | ||
| 291 | my $message = "OK - $count process(es) found resembling '". ($opt_command || $opt_regex); | ||
| 292 | |||
| 293 | # warning, critical | ||
| 294 | if ( ($opt_warning->[0] > 0 && $opt_warning->[0] > $count) | ||
| 295 | || ($opt_warning->[1] > 0 && $opt_warning->[1] <= $count) ) { | ||
| 296 | $message = "WARNING - no processes found resembling '". ($opt_command || $opt_regex); | ||
| 297 | $exit = $ERRORS{WARNING}; | ||
| 298 | } | ||
| 299 | if ( ($opt_critical->[0] > 0 && $opt_critical->[0] > $count) | ||
| 300 | || ($opt_critical->[1] > 0 && $opt_critical->[1] <= $count) ) { | ||
| 301 | $message = "CRITICAL - no processes found resembling '". ($opt_command || $opt_regex); | ||
| 302 | $exit = $ERRORS{CRITICAL}; | ||
| 303 | } | ||
| 304 | |||
| 305 | # output the status message | ||
| 306 | print $message, "'"; | ||
| 307 | |||
| 308 | # print the number of processes if statistics are requested | ||
| 309 | if ( defined($opt_stats) ) { | ||
| 310 | print "|count=$count"; | ||
| 311 | if ( defined($opt_memory) ) { | ||
| 312 | print ":memory=", $opt_memory; | ||
| 313 | } | ||
| 314 | if ( defined($opt_cpu) ) { | ||
| 315 | $opt_cpu = $opt_cpu / 100; | ||
| 316 | printf ":cpu=%.2f", $opt_cpu; | ||
| 317 | } | ||
| 318 | } | ||
| 319 | |||
| 320 | # store a copy of the %processes hash if we're using caching | ||
| 321 | if ( $exit == $ERRORS{OK} && $opt_cache != 0 ) { | ||
| 322 | eval { | ||
| 323 | unlink( $cachefile ) if ( -e $cachefile ); | ||
| 324 | store( \%processes, $cachefile ); | ||
| 325 | }; | ||
| 326 | } | ||
| 327 | |||
| 328 | print "\n"; | ||
| 329 | exit $exit; | ||
| 330 | |||
| 331 | |||
