summaryrefslogtreecommitdiffstats
path: root/contrib/check_snmp_process_monitor.pl
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/check_snmp_process_monitor.pl')
-rw-r--r--contrib/check_snmp_process_monitor.pl331
1 files changed, 0 insertions, 331 deletions
diff --git a/contrib/check_snmp_process_monitor.pl b/contrib/check_snmp_process_monitor.pl
deleted file mode 100644
index 0f44597..0000000
--- a/contrib/check_snmp_process_monitor.pl
+++ /dev/null
@@ -1,331 +0,0 @@
1#!/usr/local/bin/perl
2# author: Al Tobey <albert.tobey@priority-health.com>
3# what: monitor a process using the host-resources mib
4# license: GPL - http://www.fsf.org/licenses/gpl.txt
5#
6# Todo:
7# * implement memory and cpu utilization checks
8# * maybe cache pids in DBM files if snmp agents get overworked
9###############################################################################
10# to get a list of processes over snmp try this command:
11# snmptable -v2c -c public hostname hrSWRunTable
12# for just a list of valid arguments for the '-e' option:
13# snmpwalk -v2c -c public hostname hrSWRunName |perl -pe 's:.*/::'
14###############################################################################
15
16use strict;
17require 5.6.0;
18use lib qw( /opt/nagios/libexec /usr/local/libexec );
19use utils qw(%ERRORS $TIMEOUT &print_revision &support &usage);
20use SNMP 5.0;
21use Getopt::Long;
22use Storable;
23use vars qw( $exit $opt_version $opt_timeout $opt_help $opt_command $opt_host $opt_community $opt_verbose $opt_warning $opt_critical $opt_memory $opt_cpu $opt_port $opt_regex $opt_stats $opt_cache $opt_nocache $cache_exp $interpreters $snmp_session $PROGNAME $TIMEOUT );
24
25$PROGNAME = "snmp_process_monitor.pl";
26$opt_verbose = undef;
27$opt_host = undef;
28$opt_community = 'public';
29$opt_command = undef;
30$opt_warning = [ 1, -1 ];
31$opt_critical = [ 1, -1 ];
32$opt_memory = undef;
33$opt_cpu = undef;
34$opt_port = 161;
35$opt_cache = 1;
36$opt_nocache = undef;
37$cache_exp = 600;
38$exit = $ERRORS{OK};
39$interpreters = '(perl|/bin/sh|/usr/bin/sh|/bin/bash|/bin/ksh|python)';
40our $cachefile = '/var/opt/nagios/tmp/'; # completed later
41our %processes = ();
42
43sub process_options {
44 my( $opt_crit, $opt_warn ) = ();
45 Getopt::Long::Configure( 'bundling' );
46 GetOptions(
47 'V' => \$opt_version, 'version' => \$opt_version,
48 'v' => \$opt_verbose, 'verbose' => \$opt_verbose,
49 'h' => \$opt_help, 'help' => \$opt_help,
50 's' => \$opt_stats, 'statistics' => \$opt_stats,
51 'nocache' => \$opt_nocache,
52 'H:s' => \$opt_host, 'hostname:s' => \$opt_host,
53 'p:i' => \$opt_port, 'port:i' => \$opt_port,
54 'C:s' => \$opt_community, 'community:s' => \$opt_community,
55 'c:s' => \$opt_crit, 'critical:s' => \$opt_crit,
56 'w:s' => \$opt_warn, 'warning:s' => \$opt_warn,
57 't:i' => \$TIMEOUT, 'timeout:i' => \$TIMEOUT,
58 'e:s' => \$opt_command, 'command:s' => \$opt_command,
59 'r:s' => \$opt_regex, 'regex:s' => \$opt_regex,
60 'cpu:i' => \$opt_cpu, 'memory:i' => \$opt_memory,
61 );
62 if ( defined($opt_version) ) { local_print_revision(); }
63 if ( defined($opt_verbose) ) { $SNMP::debugging = 1; }
64 if ( !defined($opt_host) || defined($opt_help) || (!defined($opt_command) && !defined($opt_regex)) ) {
65 print_help();
66 exit $ERRORS{UNKNOWN};
67 }
68
69 if ( defined($opt_crit) ) {
70 if ( $opt_crit =~ /,/ ) {
71 $opt_critical = [ split(',', $opt_crit) ];
72 }
73 else {
74 $opt_critical = [ $opt_crit, -1 ];
75 }
76 }
77 if ( defined($opt_warn) ) {
78 if ( $opt_warn =~ /,/ ) {
79 $opt_warning = [ split(',', $opt_warn) ];
80 }
81 else {
82 $opt_warning = [ $opt_crit, -1 ];
83 }
84 }
85 if ( defined($opt_memory) ) { $opt_memory = 0 }
86 if ( defined($opt_cpu) ) { $opt_cpu = 0 }
87 if ( defined($opt_nocache)) { $opt_cache = 0 }
88
89 # complete the cachefile's name
90 $cachefile .= $opt_host . '.proc';
91}
92
93sub local_print_revision {
94 print_revision( $PROGNAME, '$Revision: 84 $ ' )
95}
96
97sub print_usage {
98 print "Usage: $PROGNAME -H <host> -C <snmp_community> -e <command> [-w <low>,<high>] [-c <low>,<high>] [-t <timeout>] [-s|--statistics] [--memory] [--cpu] [--nocache]\n";
99}
100
101sub print_help {
102 local_print_revision();
103 print "Copyright (c) 2002 Al Tobey <albert.tobey\@priority-health.com>\n\n",
104 "SNMP Process Monitor plugin for Nagios\n\n";
105 print_usage();
106 print <<EOT;
107-v, --verbose
108 print extra debugging information
109-h, --help
110 print this help message
111-H, --hostname=HOST
112 name or IP address of host to check
113-C, --community=COMMUNITY NAME
114 community name for the host's SNMP agent
115-e, --command=COMMAND NAME (ps -e style)
116 what command should be monitored?
117-r, --regex=Perl RE
118 use a perl regular expression to find your process
119-w, --warning=INTEGER[,INTEGER]
120 minimum and maximum number of processes before a warning is issued (Default 1,-1)
121-c, --critical=INTEGER[,INTEGER]
122 minimum and maximum number of processes before a critical is issued (Default 1,-1)
123--memory
124 combined with '-s', will print the number of bytes of real memory used by process
125--cpu
126 combined with '-s', will print the number of seconds of cpu time consumed by process
127EOT
128}
129
130sub verbose (@) {
131 return if ( !defined($opt_verbose) );
132 print @_;
133}
134
135sub check_for_errors {
136 if ( $snmp_session->{ErrorNum} ) {
137 %processes = ();
138 print "UNKNOWN - error retrieving SNMP data: $snmp_session->{ErrorStr}\n";
139 exit $ERRORS{UNKNOWN};
140 }
141}
142
143sub init_cache {
144 if ( !defined($opt_cache) ) {
145 %processes = ();
146 return;
147 }
148 if ( -r $cachefile ) {
149 eval {
150 verbose "loading cache from $cachefile\n";
151 %processes = %{ retrieve( $cachefile ) };
152 };
153 if ( $@ ) {
154 verbose "cache loading failed - using blank cache: $@\n";
155 %processes = ()
156 }
157 }
158 else {
159 %processes = ();
160 }
161}
162
163sub snmpget {
164 my $tmpvar = SNMP::Varbind->new( shift );
165 $snmp_session->get( $tmpvar );
166 check_for_errors();
167 return $tmpvar->val;
168}
169
170sub update_cache {
171 # expire the cache after $cache_exp seconds
172 if ( $opt_cache != 0 && exists($processes{__last_update})
173 && $processes{__last_update} >= time - $cache_exp ) {
174 verbose "cache file is recent enough - using it\n";
175 return 1;
176 }
177
178 verbose "retrieving full listing of processes from $opt_host\n";
179 my $process_count = snmpget( ['hrSystemProcesses', 0] );
180
181 # retrieve the data from the remote host
182 my ($names) = $snmp_session->bulkwalk( 0, $process_count + 1, [['hrSWRunName']] );
183 check_for_errors();
184
185 # make sure the number of processes from the bulkwalk is close to hrSystemProcesses
186 if ( scalar(@$names) + 10 < $process_count ) {
187 print "UNKNOWN - only ", scalar(@$names), " of ",$process_count, " processes returned\n";;
188 exit $ERRORS{UNKNOWN};
189 }
190
191 # sort through the process names and create a nice hash of processes
192 foreach my $row ( @$names ) {
193 my %hash = {};
194 $hash{name} = $row->val;
195 $hash{abs_name} = $row->val;
196 $hash{name} =~ s#.*/##; # strip path
197
198 if ( defined($opt_regex) ||
199 ($row->val =~ m#$interpreters$#
200 && $opt_command !~ m#$interpreters$#) ) {
201
202 # fetch the runtime parameters of the process
203 my $parameters = snmpget( ['hrSWRunParameters', $row->iid] );
204
205 # only strip if we're looking for a specific command
206 if ( defined($opt_command) ) {
207 verbose "process ",$row->iid," uses $1 as an interpreter - getting parameters\n";
208 $hash{name} = $parameters;
209 $hash{name} =~ s#.*/##; # strip path name off the front
210 $hash{name} =~ s/\s+.*$//; # strip everything from the first space to the end
211 }
212 else {
213 # use the full 'ps -efl' style listing for regular expression matching
214 my $path = snmpget( ['hrSWRunPath', $row->iid] );
215 $hash{name} = "$path $parameters";
216 }
217 }
218 # store in the global hash
219 $processes{$row->iid} = \%hash;
220 }
221
222 # update the timestamp so the cache can expire
223 $processes{__last_update} = time;
224 return 0;
225}
226
227# process the %processes hash and see if there any matches for our command or regex
228sub check_for_matches {
229 my $ret_match = 0;
230 foreach my $key ( keys(%processes) ) {
231 next if ( $key eq '__last_update' );
232 my $match = 0;
233
234 # static matches are letter-for-letter (-e)
235 if ( defined($opt_command) && $processes{$key}->{name} eq $opt_command ) { $match++; }
236 # use /o to make sure the user-supplied regex (-r) is only compiled once
237 elsif ( defined($opt_regex) && $processes{$key}->{name} =~ /$opt_regex/o ) { $match++; }
238
239 # verify the cache's entry by doing an snmpget
240 if ( $match > 0 && $opt_cache != 0 ) {
241 my $proc = snmpget( ['hrSWRunName', $key] );
242 --$match if ( !$proc || $proc ne $processes{$key}->{abs_name} );
243 }
244 # get the process memory usage if requested
245 if ( $match > 0 && defined($opt_memory) ) {
246 $opt_memory += snmpget( ['hrSWRunPerfMem', $key] );
247 }
248 # get the process cpu usage if requested
249 if ( $match > 0 && defined($opt_cpu) ) {
250 $opt_cpu += snmpget( ['hrSWRunPerfCPU', $key] );
251 }
252
253 verbose "process '$processes{$key}->{name}' has pid $processes{$key}->{pid} and index $key\n"
254 if ( $match > 0 );
255
256 $ret_match += $match;
257 }
258 return $ret_match;
259}
260# =========================================================================== #
261# =====> MAIN
262# =========================================================================== #
263process_options();
264
265alarm( $TIMEOUT ); # make sure we don't hang Nagios
266
267# intialize the cache, if it's enabled
268init_cache();
269
270# create a session for conversing with the remote SNMP agent
271$snmp_session = new SNMP::Session(
272 DestHost => $opt_host,
273 Community => $opt_community,
274 RemotePort => $opt_port,
275 Version => '2c'
276);
277
278my $usage = update_cache();
279my $count = check_for_matches();
280
281# always try twice if caching is enabled - once with cache and once without
282if ( $usage != 0 && $opt_cache != 0 && $count <= 0 ) {
283 verbose "did not find process in cache - trying a refresh\n";
284 %processes = ();
285 update_cache();
286 $count = check_for_matches();
287}
288
289
290# the default, OK message
291my $message = "OK - $count process(es) found resembling '". ($opt_command || $opt_regex);
292
293# warning, critical
294if ( ($opt_warning->[0] > 0 && $opt_warning->[0] > $count)
295 || ($opt_warning->[1] > 0 && $opt_warning->[1] <= $count) ) {
296 $message = "WARNING - no processes found resembling '". ($opt_command || $opt_regex);
297 $exit = $ERRORS{WARNING};
298}
299if ( ($opt_critical->[0] > 0 && $opt_critical->[0] > $count)
300 || ($opt_critical->[1] > 0 && $opt_critical->[1] <= $count) ) {
301 $message = "CRITICAL - no processes found resembling '". ($opt_command || $opt_regex);
302 $exit = $ERRORS{CRITICAL};
303}
304
305# output the status message
306print $message, "'";
307
308# print the number of processes if statistics are requested
309if ( defined($opt_stats) ) {
310 print "|count=$count";
311 if ( defined($opt_memory) ) {
312 print ":memory=", $opt_memory;
313 }
314 if ( defined($opt_cpu) ) {
315 $opt_cpu = $opt_cpu / 100;
316 printf ":cpu=%.2f", $opt_cpu;
317 }
318}
319
320# store a copy of the %processes hash if we're using caching
321if ( $exit == $ERRORS{OK} && $opt_cache != 0 ) {
322 eval {
323 unlink( $cachefile ) if ( -e $cachefile );
324 store( \%processes, $cachefile );
325 };
326}
327
328print "\n";
329exit $exit;
330
331