summaryrefslogtreecommitdiffstats
path: root/contrib/check_remote_nagios_status.pl
diff options
context:
space:
mode:
Diffstat (limited to 'contrib/check_remote_nagios_status.pl')
-rw-r--r--contrib/check_remote_nagios_status.pl666
1 files changed, 0 insertions, 666 deletions
diff --git a/contrib/check_remote_nagios_status.pl b/contrib/check_remote_nagios_status.pl
deleted file mode 100644
index dc99705..0000000
--- a/contrib/check_remote_nagios_status.pl
+++ /dev/null
@@ -1,666 +0,0 @@
1#!/usr/bin/perl -w
2
3# check_status.pl Nagios Plugin - Version 1.3
4# Last Updated: 1/9/2003
5#
6# Report any bugs/questions to Russell Scibetti at russell@quadrix.com
7#
8# check_status Change Log:
9#
10# To do for 1.4
11# - Better help and documentation (separate doc?)
12# - Take argument (patterns to match) from a separate spec file
13#
14# New Addition to 1.3
15# - Added ChangeLog information and updated --help output
16# - hostdown (hd) argument for how a service check should respond
17# when its host is Down/Unreachable
18# (--hostdown="ok|warning|critical|unknown")
19# - Changed name from check_state to check_status
20# - Set hostdown to default to OK when the argument isn't specified
21# - Number of Hosts checked is now output in OK result
22#
23# Version 1.2 additions:
24#
25# - Added ability to handle ack'd and downtimed services differently
26# depending on argument provided
27# (--ack="ok|warning|critical|unknown|down|unreachable"
28# --dt="ok|warning|critical|unknown|down|unreachable")
29#
30# Version 1.1 additions:
31#
32# - Added --host=<regex>, --servhost=<regex> to allow for specific field
33# matching (host for matching hostname in host checks, servhost for
34# matching the hostname in service checks, service for matching the
35# service name in service checks)
36# - Output the number of OK services for an OK output
37#
38# Version 1.0 features:
39#
40# - Freshness check of status.log (timestamp)
41# - Match service or host checks
42# - Can ignore acknowledged or downtimes services/hosts (--ack, --dt)
43# - Can output different levels of detail dependent on # of problems
44# - Can check for number of critical, warning, or unknowns
45#
46#############################################################
47
48use Getopt::Long;
49use File::stat;
50
51Getopt::Long::Configure('bundling');
52
53GetOptions
54 ("V" => \$version, "version" => \$version,
55 "h" => \$help, "help" => \$help,
56 "v" => \$verbose, "verbose" => \$verbose,
57 "w=s" => \$warning, "warning=s" => \$warning,
58 "c=s" => \$critical, "critical=s" => \$critical,
59 "u=s" => \$unknown, "unknown=s" => \$unknown,
60 "p=s" => \$pattern, "pattern=s" => \$pattern,
61 "S:s" => \$service, "service:s" => \$service,
62 "s=s" => \$status, "status=s" => \$status,
63 "d=s" => \$dir, "dir=s" => \$dir,
64 "D=s" => \$details, "details=s" => \$details,
65 "H:s" => \$host, "host:s" => \$host,
66 "f=s" => \$freshness, "freshness=s" => \$freshness,
67 "servhost=s" => \$servhost,
68 "a:s" => \$ack, "ack:s" => \$ack,
69 "dt:s"=> \$dt, "downtime:s" => \$dt,
70 "hd:s"=> \$hdown, "hostdown:s" => \$hdown,
71 "ok" => \$ok);
72
73#Constants:
74my $OK = 0;
75my $WARNING = 1;
76my $CRITICAL = 2;
77my $UNKNOWN = 3;
78
79my $crit="CRITICAL";
80my $warn="WARNING";
81my $unk="UNKNOWN";
82my $down="DOWN";
83my $unreach="UNREACHABLE";
84
85# Print out Help information
86if ($help) {
87 printVersion();
88 printHelp();
89 exitcheck($UNKNOWN);
90}
91
92# Print out version information
93if ($version) {
94 printVersion();
95 exitcheck($UNKNOWN);
96}
97
98# Check for status log or directory argument or print usage
99if (!$status) {
100 if (!$dir) {
101 print "Usage: $0 -s <status file> | -d <Nagios log dir>\n";
102 print "Use the --help option for full list of arguments\n";
103 exitcheck($UNKNOWN);
104 }
105 elsif ($dir =~ m#[^/]/$#) {
106 $status = $dir . "status.log";
107 }
108 else {
109 $status = $dir . "/status.log";
110 }
111}
112
113if (defined $host) {
114 if (!$host) {
115 $host="[^\\s]*";
116 }
117}
118
119if (!$host && !$servhost) {
120 $servhost="[^\\s]*";
121}
122
123if (!$host && !$service) {
124 $service="[^\\s]*";
125}
126
127if (defined $ack) {
128 if (!$ack) {
129 $ack="ok";
130 }
131 elsif (!($ack =~ "ok|critical|warning|unknown|down|unreachable")) {
132 print "Invalid value for ack\n";
133 exitcheck($UNKNOWN);
134 }
135}
136
137if (defined $dt) {
138 if (!$dt) {
139 $dt="ok";
140 }
141 elsif (!($dt =~ "ok|critical|warning|unknown|down|unreachable")) {
142 print "Invalid value for dt\n";
143 exitcheck($UNKNOWN);
144 }
145}
146
147if (defined $hdown) {
148 if (!$hdown) {
149 $hdown="ok";
150 }
151 elsif (!($hdown =~ "ok|critical|warning|unknown|down|unreachable")) {
152 print "Invalid value for hostdown\n";
153 exitcheck($UNKNOWN);
154 }
155}
156
157my $much_details = 0;
158
159my $ServiceNotOK = "CRITICAL|WARNING|UNKNOWN";
160my $HostNotOK = "DOWN|UNREACHABLE";
161
162my %numprob = ("WARNING",0,"CRITICAL",0,"UNKNOWN",0,"DOWN",0,"UNREACHABLE",0);
163
164my $CritOnly = 0;
165my $WarnOnly = 0;
166my $UnkOnly = 0;
167
168my @wlev;
169my @clev;
170my @ulev;
171my %warnlevel = ("WARNING",0,"CRITICAL",0,"UNKNOWN",0);
172my %critlevel = ("WARNING",0,"CRITICAL",0,"UNKNOWN",0);
173my %unklevel = ("WARNING",0,"CRITICAL",0,"UNKNOWN",0);
174my %hostlevel = ("DOWN",0,"UNREACHABLE",0);
175
176# Store Hosts in downtime
177my @hostdowntime;
178my $numdowntime = 0;
179
180# Store Hosts in a Down/Unreachable state
181my @hostdown;
182my $numdown = 0;
183
184# Hash for storing state-change to OK times for hosts:
185my %hostoktimes;
186
187# Number of matches in parsing
188my $nummatch = 0;
189
190if ($warning) {
191 if ($warning =~ /,/) {
192 @wlev = split /,/,$warning;
193 $warnlevel{"WARNING"} = $wlev[0];
194 $warnlevel{"CRITICAL"} = $wlev[1];
195 if ($wlev[2] ) {
196 $warnlevel{"UNKNOWN"} = $wlev[2];
197 }
198 }
199 else {
200 $WarnOnly = $warning;
201 }
202}
203else {
204 $WarnOnly = 1;
205}
206
207if ($critical) {
208 if ($critical =~ /,/) {
209 @clev = split /,/,$critical;
210 $critlevel{"WARNING"} = $clev[0];
211 $critlevel{"CRITICAL"} = $clev[1];
212 if ($clev[2] ) {
213 $critlevel{"UNKNOWN"} = $clev[2];
214 }
215 }
216 else {
217 $CritOnly = $critical;
218 }
219}
220else {
221 $CritOnly = 1;
222}
223
224if ($unknown) {
225 if ($unknown =~ /,/) {
226 @ulev = split /,/,$unknown;
227 $unklevel{"WARNING"} = $ulev[0];
228 $unklevel{"CRITICAL"} = $ulev[1];
229 if ($ulev[2] ) {
230 $unklevel{"UNKNOWN"} = $ulev[2];
231 }
232 }
233 else {
234 $UnkOnly = $unknown;
235 }
236}
237else {
238 $UnkOnly = 1;
239}
240
241
242if (!$freshness) {
243 $freshness = 30 * 60;
244}
245else {
246 $freshness = $freshness * 60;
247}
248
249my %ct = ("CRITICAL",0,"WARNING",0,"UNKNOWN",0,"DOWN",0,"UNREACHABLE",0);
250my %much_ct = ("CRITICAL",0,"WARNING",0,"UNKNOWN",0,"DOWN",0,"UNREACHABLE",0);
251
252my %output = ("CRITICAL","","WARNING","","UNKNOWN","","DOWN","","UNREACHABLE","");
253my %much_output = ("CRITICAL","","WARNING","","UNKNOWN","","DOWN","","UNREACHABLE","");
254
255if ($details) {
256 if ($details =~ /,/) {
257 my @tempv = split /,/,$details;
258 $much_details = $tempv[0];
259 $details = $tempv[1];
260 }
261}
262
263open("sta","$status") || die "Cannot open status file $status!";
264
265$curr_time = time;
266$file_time = stat($status)->mtime;
267
268if ($curr_time - $file_time > $freshness) {
269 printf "State CRITICAL - Status file is stale!!!\n";
270 exitcheck($CRITICAL);
271}
272
273while(<sta>) {
274 chomp;
275 if (/^[^\s]+[\s]+HOST;/) {
276 @hdata = split /;/,$_;
277
278# If you care about matching hosts (not services):
279 if ($host && $hdata[1] =~ /$host/) {
280 $nummatch++;
281 if ( $hdata[2] =~ /$HostNotOK/ ) {
282 addproblem($_,$hdata[2]);
283 }
284 }
285
286# If you are matching services, gather host information:
287 else {
288 if ( $hdata[2] =~ /$HostNotOK/ ) {
289 $hostdown[$numdown] = $hdata[1];
290 $numdown++;
291 }
292 else {
293 $hostoktimes{$hdata[1]} = $hdata[4];
294 }
295 if ( $hdata[17] ne "0" ) {
296 $hostdowntime[$numdowntime] = $hdata[1];
297 $numdowntime++;
298 }
299 }
300 }
301 elsif (!$host && /^[^\s]+[\s]+SERVICE;/) {
302 @servdata = split /;/,$_;
303 if ( ( $pattern && ($_ =~ /$pattern/)) ||
304 (($servdata[1] =~ /$servhost/) && ($servdata[2] =~ /$service/)) ){
305 $nummatch++;
306 if (($servdata[5] eq "HARD") && ($servdata[3] =~ /$ServiceNotOK/)) {
307 addproblem($_,$servdata[3]);
308 }
309 }
310 }
311}
312
313close("sta");
314
315if ($nummatch==0) {
316 print "Nothing Matches your criteria!\n";
317 exitcheck($UNKNOWN);
318}
319
320# Count the number of problems (for reference):
321if ($host) {
322 $total = $numprob{"DOWN"} + $numprob{"UNREACHABLE"};
323}
324else {
325 $total = $numprob{"WARNING"} + $numprob{"CRITICAL"} + $numprob{"UNKNOWN"};
326}
327
328my $numok = $nummatch - $total;
329
330# If this is a host state check:
331if ($host) {
332 if ($numprob{"DOWN"}>0 || $numprob{"UNREACHABLE"}>0 ) {
333 if ($details && ($total <= $details)) {
334 print "State CRITICAL - $total Host Problems: $output{$down} $output{$unreach}\n";
335 exitcheck($CRITICAL);
336 }
337 else {
338 print "State CRITICAL - $numprob{$down} Hosts Down, $numprob{$unreach} Hosts Unreachable\n";
339 exitcheck($CRITICAL);
340 }
341 }
342 else {
343 print "State OK - $numok Hosts Up, $total Problems\n";
344 exitcheck($OK);
345 }
346}
347
348#If you only defined a Critical level in terms of # of criticals...
349elsif ($CritOnly && ($numprob{"CRITICAL"} >= $CritOnly)) {
350 countAndPrint($crit,$numprob{$crit},0);
351 exitcheck($CRITICAL);
352}
353
354#Critical in terms on # criticals and # warnings...
355elsif (!$CritOnly && ($numprob{"WARNING"} >= $critlevel{"WARNING"} ||
356 $numprob{"CRITICAL"} >= $critlevel{"CRITICAL"} ||
357 $numprob{"UNKNOWN"} >= $critlevel{"UNKNOWN"} )) {
358 countAndPrint($crit,$total,1);
359 exitcheck($CRITICAL);
360}
361
362#Warning in terms of # warnings only...
363elsif ($WarnOnly && ($numprob{"WARNING"} >= $WarnOnly)) {
364 countAndPrint($warn,$numprob{$warn},0);
365 exitcheck($WARNING);
366}
367
368#Warning in terms of # warnings and # criticals...
369elsif (!$WarnOnly && ($numprob{"WARNING"} >= $warnlevel{"WARNING"} ||
370 $numprob{"CRITICAL"} >= $warnlevel{"CRITICAL"} ||
371 $numprob{"UNKNOWN"} >= $warnlevel{"UNKNOWN"})) {
372 countAndPrint($warn,$total,1);
373 exitcheck($WARNING);
374}
375
376#Unknown in terms on # unknown only...
377elsif ( $UnkOnly && ($numprob{"UNKNOWN"}>=$UnkOnly) ) {
378 countAndPrint($unk,$numprob{$unk},0);
379 exitcheck($UNKNOWN);
380}
381
382#Unknown in terms of # warning, critical, and unknown...
383elsif (!$UnkOnly && ($numprob{"WARNING"} >= $unklevel{"WARNING"} ||
384 $numprob{"CRITICAL"} >= $unklevel{"CRITICAL"} ||
385 $numprob{"UNKNOWN"} >= $unklevel{"UNKNOWN"})) {
386 countAndPrint($unk,$total,1);
387 exitcheck($UNKNOWN);
388}
389
390# Everything is OK!
391else {
392 print "State OK - $numok OK, $total problems\n";
393 exitcheck($OK);
394}
395
396
397
398############################
399# Subroutines
400############################
401
402# Return the proper exit code for Critical, Warning, Unknown, or OK
403sub exitcheck {
404 if ($ok) {
405 exit 0;
406 }
407 else {
408 exit $_[0];
409 }
410}
411
412# Decide what to print for services:
413sub countAndPrint {
414 my $state = $_[0];
415 my $count = $_[1];
416 my $alltypes = $_[2];
417 my $output = "State $state - ";
418
419 if ($details) {
420 if ($count<=$much_details) {
421 if ($alltypes) {
422 $output .= "$count problems: $much_output{$crit} $much_output{$warn} $much_output{$unk}";
423 }
424 else {
425 $output .= "$count \L$state\E: $much_output{$state}";
426 }
427 }
428 elsif ($count<=$details) {
429 if ($alltypes) {
430 $output .= "$count problems: $output{$crit} $output{$warn} $output{$unk}";
431 }
432 else {
433 $output .= "$count \L$state\E: $output{$state}";
434 }
435 }
436 else {
437 if ($alltypes) {
438 $output .= "$numprob{$crit} critical, $numprob{$warn} warning, $numprob{$unk} unknown";
439 }
440 else {
441 $output .= "$count \L$state\E";
442 }
443 }
444 }
445 else {
446 $output .= "$count problems";
447 }
448
449 print "$output\n";
450}
451
452
453# Add-in the problem found in the status log
454sub addproblem {
455
456 $test = 1;
457 $type = $_[1];
458 my $diffout = "";
459
460 my @values = split /;/,$_[0];
461
462 if (!$host) {
463 my $namehold = $values[1];
464 if ($ack && ($values[13] eq "1")) {
465 if ($ack =~ "ok") {
466 $test = 0;
467 }
468 else {
469 $type = "\U$ack";
470 }
471 }
472 elsif ($hdown && grep /$namehold/, @hostdown) {
473 if ($hdown =~ "ok") {
474 $test = 0;
475 }
476 else {
477 $type = "\U$hdown";
478 $diffout = "$values[1] is down";
479 }
480 }
481 elsif ($dt && (($values[27] ne "0") || (grep /$namehold/, @hostdowntime))){
482 if ($dt =~ "ok") {
483 $test = 0;
484 }
485 else {
486 $type = "\U$dt";
487 }
488 }
489 elsif (exists $hostoktimes{$namehold}) {
490 # If the state change time of the host is more recent than the last
491 # service check, must wait until the next service check runs!
492 if ($hostoktimes{$namehold} > $values[6]) {
493 $test = 0;
494 }
495 }
496 }
497 else {
498 if ($ack && $values[5]) {
499 if ($ack =~ "ok") {
500 $test = 0;
501 }
502 else {
503 $type = "\U$ack";
504 }
505 }
506 elsif ($dt && ($values[17] ne "0")) {
507 if ($dt =~ "ok") {
508 $test = 0;
509 }
510 else {
511 $type = "\U$dt";
512 }
513 }
514 }
515
516 if ($details && $test) {
517 if (!$host) {
518 if ($diffout) {
519 $much_output{$type} .= " $diffout;";
520 $output{$type} .= "$diffout;";
521 $much_ct{$type}++;
522 $ct{$type}++;
523 }
524 else {
525 if ($much_details && $much_ct{$type}<$much_details) {
526 $much_output{$type} .= " $values[2] on $values[1] $values[31];";
527 $much_ct{$type}++;
528 }
529 if ($ct{$type} < $details) {
530 $output{$type} .= " $values[2] on $values[1];";
531 $ct{$type}++;
532 }
533 }
534 }
535 else {
536 $much_output{$type} .= " $values[1] $_[1] $values[20],";
537 $much_ct{type}++;
538 $output{$type} .= " $values[1] HOST $_[1],";
539 $ct{$type}++;
540 }
541 }
542 if ($test) {
543 $numprob{$type}++;
544 }
545}
546
547################################
548#
549# Version and Help Information
550#
551################################
552
553sub printVersion {
554 printf <<EndVersion;
555$0 (nagios-plugins) 1.3
556The nagios plugins come with ABSOLUTELY NO WARRANTY. You may redistribute
557copies of the plugins under the terms of the GNU General Public License.
558For more information about these matters, see the file named COPYING.
559EndVersion
560}
561
562sub printHelp {
563 printf <<EOF;
564
565This plugin parses through the Nagios status log and will return a
566Critical, Warning, or Unknown state depending on the number of
567Critical, Warning, and/or Unknown services found in the log
568(or Down/Unreachable hosts when matching against hosts)
569
570Usage: $0 -s <Status File> | -d <Nagios Log Directory>
571 [-w #[,#][,#]] [-c #[,#][,#]] [-u #[,#][,#]]
572 [--service=<RegEx> | --servhost=<RegEx> | --pattern=<RegEx> |
573 --host | --host=<RegEx>]
574 [--ack[=string]] [--dt[=string]] [--hostdown[=string]]
575 [-D #[,#]] [--ok] [-f <Log freshness in # minutes>]
576 $0 --help
577 $0 --version
578NOTE: One of -s and -d must be specified
579
580Options:
581 -s, --status=FILE_NAME
582 Location and name of status log (e.g. /usr/local/nagios/var/status.log)
583 -d, --dir=DIRECTORY_NAME
584 Directory that contains the nagios logs (e.g. /usr/local/nagios/var/)
585 -w, --warning=INTEGER[,INTEGER][,INTEGER]
586 #: Number of warnings to result in a WARNING state
587 OR
588 #,#: Warning,Criticals to result in a WARNING state
589 OR
590 #,#,#: Warning,Critical,Unknown to result in a WARNING state
591 Default: -w=1
592 -c, --critical=INTEGER[,INTEGER][,INTEGER]
593 #: Number of criticals to result in a CRITICAL state
594 OR
595 #,#: Warning,Criticals to result in a CRITICAL state
596 OR
597 #,#,#: Warning,Critical,Unknown to result in a CRITICAL state
598 Default: -c=1
599 -u, --unknown=INTEGER[,INTEGER][,INTEGER]
600 #: Number of unknowns to result in a UNKNOWN state
601 OR
602 #,#: Warning,Criticals to result in a UNKNOWN state
603 OR
604 #,#,#: Warning,Critical,Unknown to result in a UNKNOWN state
605 Default: -u=1
606 -r, --service[=REGEX]
607 Only match services [that match the RegEx]
608 (--service is default setting if no other matching arguments provided)
609 --servhost=REGEX
610 Only match services whose host match the RegEx
611 -p, --pattern=REGEX
612 Only parse for this regular expression (services only, not hosts)
613 --host[=REGEX]
614 Report on the state of hosts (whose name matches the RegEx if provided)
615 -a, --ack[=ok|warning|critical|unknown|down|unreachable]
616 Handle Acknowledged problems [--ack defaults to ok]
617 --dt, --downtime[=ok|warning|critical|unknown|down|unreachable]
618 Handle problems in scheduled downtime [--dt defaults to ok]
619 --hd, --hostdown[=ok|warning|critical|unknown|down|unreachable]
620 Handle services whose Host is down [--hd defaults to ok]
621 -D, --details=INTEGER[,INTEGER]
622 Amount of verbosity to output
623 If # problems:
624 <= 1st integer, return full details (each plugin's output)
625 <= 2nd integer, return some details (list each service host pair)
626 > 2nd integer, return the # of problems
627 -f, --freshness=INTEGER
628 Number of minutes old the log can be to make sure Nagios is running
629 (Default = 30 minutes)
630 --ok
631 Return an OK exit code, regardless of number of problems found
632 -h, --help
633 Print detailed help screen
634 -V, --version
635 Print version information
636
637For service checking (use --service and/or --servhost):
6381. The values of warning, critical, and unknown default to 1, i.e.
639$0 will return CRITICAL if there is at least 1 critical service,
640WARNING if there is at least 1 warning service, and UNKNOWN if there is
641at least one unknown service.
642
6432. If a service's host is DOWN or UNREACHABLE, $0 will use the
644value of --hostdown to determine how to treat the service. Without that
645argument, $0 will count the service as OK.
646
6473. If a service's host is OK, but the last host-state change occurred more
648recently than the last service check, $0 will ignore that service
649(want to wait until the service has been checked after a host has recovered
650or you may get service alert for services that still need to be checked)
651
6524. If the --dt, --ack, or --hd tags are used, $0 will use the value
653of the arguments to determine how to handle services in downtime, acknowledged,
654or with down hosts (default=OK). For service checks, --dt will also check
655if the service's host is in a downtime.
656
657For host checking (use --host):
6581. Using the --host argument, $0 will look for DOWN and UNREACHABLE
659hosts. If any are found, $0 will return a CRITICAL. You can provide
660an REGEX for --host to only check hosts with matching host names.
661
6622. If the --dt or --ack tags are used, $0 will use the value of the
663--dt/--ack arguments to determine the state of the host (default is OK)
664
665EOF
666}