diff options
| author | Holger Weiss <holger@zedat.fu-berlin.de> | 2013-09-02 13:16:24 +0200 |
|---|---|---|
| committer | Holger Weiss <holger@zedat.fu-berlin.de> | 2013-09-02 13:16:24 +0200 |
| commit | b15adb7762b6caaecaa83637abfcf5fdb4802092 (patch) | |
| tree | 64eddbe2aa1a7f98a140be0f7973f05d7a781ae0 /contrib/check_remote_nagios_status.pl | |
| parent | c4d5882b9e1d07c7b61091062b7d085fa5f00284 (diff) | |
| download | monitoring-plugins-b15adb7762b6caaecaa83637abfcf5fdb4802092.tar.gz | |
Remove "contrib" plugins
These days, sites such as "Nagios Exchange" are a much better place for
publishing plugins not maintained by the Plugins Development Team.
Diffstat (limited to 'contrib/check_remote_nagios_status.pl')
| -rw-r--r-- | contrib/check_remote_nagios_status.pl | 666 |
1 files changed, 0 insertions, 666 deletions
diff --git a/contrib/check_remote_nagios_status.pl b/contrib/check_remote_nagios_status.pl deleted file mode 100644 index dc99705e..00000000 --- a/contrib/check_remote_nagios_status.pl +++ /dev/null | |||
| @@ -1,666 +0,0 @@ | |||
| 1 | #!/usr/bin/perl -w | ||
| 2 | |||
| 3 | # check_status.pl Nagios Plugin - Version 1.3 | ||
| 4 | # Last Updated: 1/9/2003 | ||
| 5 | # | ||
| 6 | # Report any bugs/questions to Russell Scibetti at russell@quadrix.com | ||
| 7 | # | ||
| 8 | # check_status Change Log: | ||
| 9 | # | ||
| 10 | # To do for 1.4 | ||
| 11 | # - Better help and documentation (separate doc?) | ||
| 12 | # - Take argument (patterns to match) from a separate spec file | ||
| 13 | # | ||
| 14 | # New Addition to 1.3 | ||
| 15 | # - Added ChangeLog information and updated --help output | ||
| 16 | # - hostdown (hd) argument for how a service check should respond | ||
| 17 | # when its host is Down/Unreachable | ||
| 18 | # (--hostdown="ok|warning|critical|unknown") | ||
| 19 | # - Changed name from check_state to check_status | ||
| 20 | # - Set hostdown to default to OK when the argument isn't specified | ||
| 21 | # - Number of Hosts checked is now output in OK result | ||
| 22 | # | ||
| 23 | # Version 1.2 additions: | ||
| 24 | # | ||
| 25 | # - Added ability to handle ack'd and downtimed services differently | ||
| 26 | # depending on argument provided | ||
| 27 | # (--ack="ok|warning|critical|unknown|down|unreachable" | ||
| 28 | # --dt="ok|warning|critical|unknown|down|unreachable") | ||
| 29 | # | ||
| 30 | # Version 1.1 additions: | ||
| 31 | # | ||
| 32 | # - Added --host=<regex>, --servhost=<regex> to allow for specific field | ||
| 33 | # matching (host for matching hostname in host checks, servhost for | ||
| 34 | # matching the hostname in service checks, service for matching the | ||
| 35 | # service name in service checks) | ||
| 36 | # - Output the number of OK services for an OK output | ||
| 37 | # | ||
| 38 | # Version 1.0 features: | ||
| 39 | # | ||
| 40 | # - Freshness check of status.log (timestamp) | ||
| 41 | # - Match service or host checks | ||
| 42 | # - Can ignore acknowledged or downtimes services/hosts (--ack, --dt) | ||
| 43 | # - Can output different levels of detail dependent on # of problems | ||
| 44 | # - Can check for number of critical, warning, or unknowns | ||
| 45 | # | ||
| 46 | ############################################################# | ||
| 47 | |||
| 48 | use Getopt::Long; | ||
| 49 | use File::stat; | ||
| 50 | |||
| 51 | Getopt::Long::Configure('bundling'); | ||
| 52 | |||
| 53 | GetOptions | ||
| 54 | ("V" => \$version, "version" => \$version, | ||
| 55 | "h" => \$help, "help" => \$help, | ||
| 56 | "v" => \$verbose, "verbose" => \$verbose, | ||
| 57 | "w=s" => \$warning, "warning=s" => \$warning, | ||
| 58 | "c=s" => \$critical, "critical=s" => \$critical, | ||
| 59 | "u=s" => \$unknown, "unknown=s" => \$unknown, | ||
| 60 | "p=s" => \$pattern, "pattern=s" => \$pattern, | ||
| 61 | "S:s" => \$service, "service:s" => \$service, | ||
| 62 | "s=s" => \$status, "status=s" => \$status, | ||
| 63 | "d=s" => \$dir, "dir=s" => \$dir, | ||
| 64 | "D=s" => \$details, "details=s" => \$details, | ||
| 65 | "H:s" => \$host, "host:s" => \$host, | ||
| 66 | "f=s" => \$freshness, "freshness=s" => \$freshness, | ||
| 67 | "servhost=s" => \$servhost, | ||
| 68 | "a:s" => \$ack, "ack:s" => \$ack, | ||
| 69 | "dt:s"=> \$dt, "downtime:s" => \$dt, | ||
| 70 | "hd:s"=> \$hdown, "hostdown:s" => \$hdown, | ||
| 71 | "ok" => \$ok); | ||
| 72 | |||
| 73 | #Constants: | ||
| 74 | my $OK = 0; | ||
| 75 | my $WARNING = 1; | ||
| 76 | my $CRITICAL = 2; | ||
| 77 | my $UNKNOWN = 3; | ||
| 78 | |||
| 79 | my $crit="CRITICAL"; | ||
| 80 | my $warn="WARNING"; | ||
| 81 | my $unk="UNKNOWN"; | ||
| 82 | my $down="DOWN"; | ||
| 83 | my $unreach="UNREACHABLE"; | ||
| 84 | |||
| 85 | # Print out Help information | ||
| 86 | if ($help) { | ||
| 87 | printVersion(); | ||
| 88 | printHelp(); | ||
| 89 | exitcheck($UNKNOWN); | ||
| 90 | } | ||
| 91 | |||
| 92 | # Print out version information | ||
| 93 | if ($version) { | ||
| 94 | printVersion(); | ||
| 95 | exitcheck($UNKNOWN); | ||
| 96 | } | ||
| 97 | |||
| 98 | # Check for status log or directory argument or print usage | ||
| 99 | if (!$status) { | ||
| 100 | if (!$dir) { | ||
| 101 | print "Usage: $0 -s <status file> | -d <Nagios log dir>\n"; | ||
| 102 | print "Use the --help option for full list of arguments\n"; | ||
| 103 | exitcheck($UNKNOWN); | ||
| 104 | } | ||
| 105 | elsif ($dir =~ m#[^/]/$#) { | ||
| 106 | $status = $dir . "status.log"; | ||
| 107 | } | ||
| 108 | else { | ||
| 109 | $status = $dir . "/status.log"; | ||
| 110 | } | ||
| 111 | } | ||
| 112 | |||
| 113 | if (defined $host) { | ||
| 114 | if (!$host) { | ||
| 115 | $host="[^\\s]*"; | ||
| 116 | } | ||
| 117 | } | ||
| 118 | |||
| 119 | if (!$host && !$servhost) { | ||
| 120 | $servhost="[^\\s]*"; | ||
| 121 | } | ||
| 122 | |||
| 123 | if (!$host && !$service) { | ||
| 124 | $service="[^\\s]*"; | ||
| 125 | } | ||
| 126 | |||
| 127 | if (defined $ack) { | ||
| 128 | if (!$ack) { | ||
| 129 | $ack="ok"; | ||
| 130 | } | ||
| 131 | elsif (!($ack =~ "ok|critical|warning|unknown|down|unreachable")) { | ||
| 132 | print "Invalid value for ack\n"; | ||
| 133 | exitcheck($UNKNOWN); | ||
| 134 | } | ||
| 135 | } | ||
| 136 | |||
| 137 | if (defined $dt) { | ||
| 138 | if (!$dt) { | ||
| 139 | $dt="ok"; | ||
| 140 | } | ||
| 141 | elsif (!($dt =~ "ok|critical|warning|unknown|down|unreachable")) { | ||
| 142 | print "Invalid value for dt\n"; | ||
| 143 | exitcheck($UNKNOWN); | ||
| 144 | } | ||
| 145 | } | ||
| 146 | |||
| 147 | if (defined $hdown) { | ||
| 148 | if (!$hdown) { | ||
| 149 | $hdown="ok"; | ||
| 150 | } | ||
| 151 | elsif (!($hdown =~ "ok|critical|warning|unknown|down|unreachable")) { | ||
| 152 | print "Invalid value for hostdown\n"; | ||
| 153 | exitcheck($UNKNOWN); | ||
| 154 | } | ||
| 155 | } | ||
| 156 | |||
| 157 | my $much_details = 0; | ||
| 158 | |||
| 159 | my $ServiceNotOK = "CRITICAL|WARNING|UNKNOWN"; | ||
| 160 | my $HostNotOK = "DOWN|UNREACHABLE"; | ||
| 161 | |||
| 162 | my %numprob = ("WARNING",0,"CRITICAL",0,"UNKNOWN",0,"DOWN",0,"UNREACHABLE",0); | ||
| 163 | |||
| 164 | my $CritOnly = 0; | ||
| 165 | my $WarnOnly = 0; | ||
| 166 | my $UnkOnly = 0; | ||
| 167 | |||
| 168 | my @wlev; | ||
| 169 | my @clev; | ||
| 170 | my @ulev; | ||
| 171 | my %warnlevel = ("WARNING",0,"CRITICAL",0,"UNKNOWN",0); | ||
| 172 | my %critlevel = ("WARNING",0,"CRITICAL",0,"UNKNOWN",0); | ||
| 173 | my %unklevel = ("WARNING",0,"CRITICAL",0,"UNKNOWN",0); | ||
| 174 | my %hostlevel = ("DOWN",0,"UNREACHABLE",0); | ||
| 175 | |||
| 176 | # Store Hosts in downtime | ||
| 177 | my @hostdowntime; | ||
| 178 | my $numdowntime = 0; | ||
| 179 | |||
| 180 | # Store Hosts in a Down/Unreachable state | ||
| 181 | my @hostdown; | ||
| 182 | my $numdown = 0; | ||
| 183 | |||
| 184 | # Hash for storing state-change to OK times for hosts: | ||
| 185 | my %hostoktimes; | ||
| 186 | |||
| 187 | # Number of matches in parsing | ||
| 188 | my $nummatch = 0; | ||
| 189 | |||
| 190 | if ($warning) { | ||
| 191 | if ($warning =~ /,/) { | ||
| 192 | @wlev = split /,/,$warning; | ||
| 193 | $warnlevel{"WARNING"} = $wlev[0]; | ||
| 194 | $warnlevel{"CRITICAL"} = $wlev[1]; | ||
| 195 | if ($wlev[2] ) { | ||
| 196 | $warnlevel{"UNKNOWN"} = $wlev[2]; | ||
| 197 | } | ||
| 198 | } | ||
| 199 | else { | ||
| 200 | $WarnOnly = $warning; | ||
| 201 | } | ||
| 202 | } | ||
| 203 | else { | ||
| 204 | $WarnOnly = 1; | ||
| 205 | } | ||
| 206 | |||
| 207 | if ($critical) { | ||
| 208 | if ($critical =~ /,/) { | ||
| 209 | @clev = split /,/,$critical; | ||
| 210 | $critlevel{"WARNING"} = $clev[0]; | ||
| 211 | $critlevel{"CRITICAL"} = $clev[1]; | ||
| 212 | if ($clev[2] ) { | ||
| 213 | $critlevel{"UNKNOWN"} = $clev[2]; | ||
| 214 | } | ||
| 215 | } | ||
| 216 | else { | ||
| 217 | $CritOnly = $critical; | ||
| 218 | } | ||
| 219 | } | ||
| 220 | else { | ||
| 221 | $CritOnly = 1; | ||
| 222 | } | ||
| 223 | |||
| 224 | if ($unknown) { | ||
| 225 | if ($unknown =~ /,/) { | ||
| 226 | @ulev = split /,/,$unknown; | ||
| 227 | $unklevel{"WARNING"} = $ulev[0]; | ||
| 228 | $unklevel{"CRITICAL"} = $ulev[1]; | ||
| 229 | if ($ulev[2] ) { | ||
| 230 | $unklevel{"UNKNOWN"} = $ulev[2]; | ||
| 231 | } | ||
| 232 | } | ||
| 233 | else { | ||
| 234 | $UnkOnly = $unknown; | ||
| 235 | } | ||
| 236 | } | ||
| 237 | else { | ||
| 238 | $UnkOnly = 1; | ||
| 239 | } | ||
| 240 | |||
| 241 | |||
| 242 | if (!$freshness) { | ||
| 243 | $freshness = 30 * 60; | ||
| 244 | } | ||
| 245 | else { | ||
| 246 | $freshness = $freshness * 60; | ||
| 247 | } | ||
| 248 | |||
| 249 | my %ct = ("CRITICAL",0,"WARNING",0,"UNKNOWN",0,"DOWN",0,"UNREACHABLE",0); | ||
| 250 | my %much_ct = ("CRITICAL",0,"WARNING",0,"UNKNOWN",0,"DOWN",0,"UNREACHABLE",0); | ||
| 251 | |||
| 252 | my %output = ("CRITICAL","","WARNING","","UNKNOWN","","DOWN","","UNREACHABLE",""); | ||
| 253 | my %much_output = ("CRITICAL","","WARNING","","UNKNOWN","","DOWN","","UNREACHABLE",""); | ||
| 254 | |||
| 255 | if ($details) { | ||
| 256 | if ($details =~ /,/) { | ||
| 257 | my @tempv = split /,/,$details; | ||
| 258 | $much_details = $tempv[0]; | ||
| 259 | $details = $tempv[1]; | ||
| 260 | } | ||
| 261 | } | ||
| 262 | |||
| 263 | open("sta","$status") || die "Cannot open status file $status!"; | ||
| 264 | |||
| 265 | $curr_time = time; | ||
| 266 | $file_time = stat($status)->mtime; | ||
| 267 | |||
| 268 | if ($curr_time - $file_time > $freshness) { | ||
| 269 | printf "State CRITICAL - Status file is stale!!!\n"; | ||
| 270 | exitcheck($CRITICAL); | ||
| 271 | } | ||
| 272 | |||
| 273 | while(<sta>) { | ||
| 274 | chomp; | ||
| 275 | if (/^[^\s]+[\s]+HOST;/) { | ||
| 276 | @hdata = split /;/,$_; | ||
| 277 | |||
| 278 | # If you care about matching hosts (not services): | ||
| 279 | if ($host && $hdata[1] =~ /$host/) { | ||
| 280 | $nummatch++; | ||
| 281 | if ( $hdata[2] =~ /$HostNotOK/ ) { | ||
| 282 | addproblem($_,$hdata[2]); | ||
| 283 | } | ||
| 284 | } | ||
| 285 | |||
| 286 | # If you are matching services, gather host information: | ||
| 287 | else { | ||
| 288 | if ( $hdata[2] =~ /$HostNotOK/ ) { | ||
| 289 | $hostdown[$numdown] = $hdata[1]; | ||
| 290 | $numdown++; | ||
| 291 | } | ||
| 292 | else { | ||
| 293 | $hostoktimes{$hdata[1]} = $hdata[4]; | ||
| 294 | } | ||
| 295 | if ( $hdata[17] ne "0" ) { | ||
| 296 | $hostdowntime[$numdowntime] = $hdata[1]; | ||
| 297 | $numdowntime++; | ||
| 298 | } | ||
| 299 | } | ||
| 300 | } | ||
| 301 | elsif (!$host && /^[^\s]+[\s]+SERVICE;/) { | ||
| 302 | @servdata = split /;/,$_; | ||
| 303 | if ( ( $pattern && ($_ =~ /$pattern/)) || | ||
| 304 | (($servdata[1] =~ /$servhost/) && ($servdata[2] =~ /$service/)) ){ | ||
| 305 | $nummatch++; | ||
| 306 | if (($servdata[5] eq "HARD") && ($servdata[3] =~ /$ServiceNotOK/)) { | ||
| 307 | addproblem($_,$servdata[3]); | ||
| 308 | } | ||
| 309 | } | ||
| 310 | } | ||
| 311 | } | ||
| 312 | |||
| 313 | close("sta"); | ||
| 314 | |||
| 315 | if ($nummatch==0) { | ||
| 316 | print "Nothing Matches your criteria!\n"; | ||
| 317 | exitcheck($UNKNOWN); | ||
| 318 | } | ||
| 319 | |||
| 320 | # Count the number of problems (for reference): | ||
| 321 | if ($host) { | ||
| 322 | $total = $numprob{"DOWN"} + $numprob{"UNREACHABLE"}; | ||
| 323 | } | ||
| 324 | else { | ||
| 325 | $total = $numprob{"WARNING"} + $numprob{"CRITICAL"} + $numprob{"UNKNOWN"}; | ||
| 326 | } | ||
| 327 | |||
| 328 | my $numok = $nummatch - $total; | ||
| 329 | |||
| 330 | # If this is a host state check: | ||
| 331 | if ($host) { | ||
| 332 | if ($numprob{"DOWN"}>0 || $numprob{"UNREACHABLE"}>0 ) { | ||
| 333 | if ($details && ($total <= $details)) { | ||
| 334 | print "State CRITICAL - $total Host Problems: $output{$down} $output{$unreach}\n"; | ||
| 335 | exitcheck($CRITICAL); | ||
| 336 | } | ||
| 337 | else { | ||
| 338 | print "State CRITICAL - $numprob{$down} Hosts Down, $numprob{$unreach} Hosts Unreachable\n"; | ||
| 339 | exitcheck($CRITICAL); | ||
| 340 | } | ||
| 341 | } | ||
| 342 | else { | ||
| 343 | print "State OK - $numok Hosts Up, $total Problems\n"; | ||
| 344 | exitcheck($OK); | ||
| 345 | } | ||
| 346 | } | ||
| 347 | |||
| 348 | #If you only defined a Critical level in terms of # of criticals... | ||
| 349 | elsif ($CritOnly && ($numprob{"CRITICAL"} >= $CritOnly)) { | ||
| 350 | countAndPrint($crit,$numprob{$crit},0); | ||
| 351 | exitcheck($CRITICAL); | ||
| 352 | } | ||
| 353 | |||
| 354 | #Critical in terms on # criticals and # warnings... | ||
| 355 | elsif (!$CritOnly && ($numprob{"WARNING"} >= $critlevel{"WARNING"} || | ||
| 356 | $numprob{"CRITICAL"} >= $critlevel{"CRITICAL"} || | ||
| 357 | $numprob{"UNKNOWN"} >= $critlevel{"UNKNOWN"} )) { | ||
| 358 | countAndPrint($crit,$total,1); | ||
| 359 | exitcheck($CRITICAL); | ||
| 360 | } | ||
| 361 | |||
| 362 | #Warning in terms of # warnings only... | ||
| 363 | elsif ($WarnOnly && ($numprob{"WARNING"} >= $WarnOnly)) { | ||
| 364 | countAndPrint($warn,$numprob{$warn},0); | ||
| 365 | exitcheck($WARNING); | ||
| 366 | } | ||
| 367 | |||
| 368 | #Warning in terms of # warnings and # criticals... | ||
| 369 | elsif (!$WarnOnly && ($numprob{"WARNING"} >= $warnlevel{"WARNING"} || | ||
| 370 | $numprob{"CRITICAL"} >= $warnlevel{"CRITICAL"} || | ||
| 371 | $numprob{"UNKNOWN"} >= $warnlevel{"UNKNOWN"})) { | ||
| 372 | countAndPrint($warn,$total,1); | ||
| 373 | exitcheck($WARNING); | ||
| 374 | } | ||
| 375 | |||
| 376 | #Unknown in terms on # unknown only... | ||
| 377 | elsif ( $UnkOnly && ($numprob{"UNKNOWN"}>=$UnkOnly) ) { | ||
| 378 | countAndPrint($unk,$numprob{$unk},0); | ||
| 379 | exitcheck($UNKNOWN); | ||
| 380 | } | ||
| 381 | |||
| 382 | #Unknown in terms of # warning, critical, and unknown... | ||
| 383 | elsif (!$UnkOnly && ($numprob{"WARNING"} >= $unklevel{"WARNING"} || | ||
| 384 | $numprob{"CRITICAL"} >= $unklevel{"CRITICAL"} || | ||
| 385 | $numprob{"UNKNOWN"} >= $unklevel{"UNKNOWN"})) { | ||
| 386 | countAndPrint($unk,$total,1); | ||
| 387 | exitcheck($UNKNOWN); | ||
| 388 | } | ||
| 389 | |||
| 390 | # Everything is OK! | ||
| 391 | else { | ||
| 392 | print "State OK - $numok OK, $total problems\n"; | ||
| 393 | exitcheck($OK); | ||
| 394 | } | ||
| 395 | |||
| 396 | |||
| 397 | |||
| 398 | ############################ | ||
| 399 | # Subroutines | ||
| 400 | ############################ | ||
| 401 | |||
| 402 | # Return the proper exit code for Critical, Warning, Unknown, or OK | ||
| 403 | sub exitcheck { | ||
| 404 | if ($ok) { | ||
| 405 | exit 0; | ||
| 406 | } | ||
| 407 | else { | ||
| 408 | exit $_[0]; | ||
| 409 | } | ||
| 410 | } | ||
| 411 | |||
| 412 | # Decide what to print for services: | ||
| 413 | sub countAndPrint { | ||
| 414 | my $state = $_[0]; | ||
| 415 | my $count = $_[1]; | ||
| 416 | my $alltypes = $_[2]; | ||
| 417 | my $output = "State $state - "; | ||
| 418 | |||
| 419 | if ($details) { | ||
| 420 | if ($count<=$much_details) { | ||
| 421 | if ($alltypes) { | ||
| 422 | $output .= "$count problems: $much_output{$crit} $much_output{$warn} $much_output{$unk}"; | ||
| 423 | } | ||
| 424 | else { | ||
| 425 | $output .= "$count \L$state\E: $much_output{$state}"; | ||
| 426 | } | ||
| 427 | } | ||
| 428 | elsif ($count<=$details) { | ||
| 429 | if ($alltypes) { | ||
| 430 | $output .= "$count problems: $output{$crit} $output{$warn} $output{$unk}"; | ||
| 431 | } | ||
| 432 | else { | ||
| 433 | $output .= "$count \L$state\E: $output{$state}"; | ||
| 434 | } | ||
| 435 | } | ||
| 436 | else { | ||
| 437 | if ($alltypes) { | ||
| 438 | $output .= "$numprob{$crit} critical, $numprob{$warn} warning, $numprob{$unk} unknown"; | ||
| 439 | } | ||
| 440 | else { | ||
| 441 | $output .= "$count \L$state\E"; | ||
| 442 | } | ||
| 443 | } | ||
| 444 | } | ||
| 445 | else { | ||
| 446 | $output .= "$count problems"; | ||
| 447 | } | ||
| 448 | |||
| 449 | print "$output\n"; | ||
| 450 | } | ||
| 451 | |||
| 452 | |||
| 453 | # Add-in the problem found in the status log | ||
| 454 | sub addproblem { | ||
| 455 | |||
| 456 | $test = 1; | ||
| 457 | $type = $_[1]; | ||
| 458 | my $diffout = ""; | ||
| 459 | |||
| 460 | my @values = split /;/,$_[0]; | ||
| 461 | |||
| 462 | if (!$host) { | ||
| 463 | my $namehold = $values[1]; | ||
| 464 | if ($ack && ($values[13] eq "1")) { | ||
| 465 | if ($ack =~ "ok") { | ||
| 466 | $test = 0; | ||
| 467 | } | ||
| 468 | else { | ||
| 469 | $type = "\U$ack"; | ||
| 470 | } | ||
| 471 | } | ||
| 472 | elsif ($hdown && grep /$namehold/, @hostdown) { | ||
| 473 | if ($hdown =~ "ok") { | ||
| 474 | $test = 0; | ||
| 475 | } | ||
| 476 | else { | ||
| 477 | $type = "\U$hdown"; | ||
| 478 | $diffout = "$values[1] is down"; | ||
| 479 | } | ||
| 480 | } | ||
| 481 | elsif ($dt && (($values[27] ne "0") || (grep /$namehold/, @hostdowntime))){ | ||
| 482 | if ($dt =~ "ok") { | ||
| 483 | $test = 0; | ||
| 484 | } | ||
| 485 | else { | ||
| 486 | $type = "\U$dt"; | ||
| 487 | } | ||
| 488 | } | ||
| 489 | elsif (exists $hostoktimes{$namehold}) { | ||
| 490 | # If the state change time of the host is more recent than the last | ||
| 491 | # service check, must wait until the next service check runs! | ||
| 492 | if ($hostoktimes{$namehold} > $values[6]) { | ||
| 493 | $test = 0; | ||
| 494 | } | ||
| 495 | } | ||
| 496 | } | ||
| 497 | else { | ||
| 498 | if ($ack && $values[5]) { | ||
| 499 | if ($ack =~ "ok") { | ||
| 500 | $test = 0; | ||
| 501 | } | ||
| 502 | else { | ||
| 503 | $type = "\U$ack"; | ||
| 504 | } | ||
| 505 | } | ||
| 506 | elsif ($dt && ($values[17] ne "0")) { | ||
| 507 | if ($dt =~ "ok") { | ||
| 508 | $test = 0; | ||
| 509 | } | ||
| 510 | else { | ||
| 511 | $type = "\U$dt"; | ||
| 512 | } | ||
| 513 | } | ||
| 514 | } | ||
| 515 | |||
| 516 | if ($details && $test) { | ||
| 517 | if (!$host) { | ||
| 518 | if ($diffout) { | ||
| 519 | $much_output{$type} .= " $diffout;"; | ||
| 520 | $output{$type} .= "$diffout;"; | ||
| 521 | $much_ct{$type}++; | ||
| 522 | $ct{$type}++; | ||
| 523 | } | ||
| 524 | else { | ||
| 525 | if ($much_details && $much_ct{$type}<$much_details) { | ||
| 526 | $much_output{$type} .= " $values[2] on $values[1] $values[31];"; | ||
| 527 | $much_ct{$type}++; | ||
| 528 | } | ||
| 529 | if ($ct{$type} < $details) { | ||
| 530 | $output{$type} .= " $values[2] on $values[1];"; | ||
| 531 | $ct{$type}++; | ||
| 532 | } | ||
| 533 | } | ||
| 534 | } | ||
| 535 | else { | ||
| 536 | $much_output{$type} .= " $values[1] $_[1] $values[20],"; | ||
| 537 | $much_ct{type}++; | ||
| 538 | $output{$type} .= " $values[1] HOST $_[1],"; | ||
| 539 | $ct{$type}++; | ||
| 540 | } | ||
| 541 | } | ||
| 542 | if ($test) { | ||
| 543 | $numprob{$type}++; | ||
| 544 | } | ||
| 545 | } | ||
| 546 | |||
| 547 | ################################ | ||
| 548 | # | ||
| 549 | # Version and Help Information | ||
| 550 | # | ||
| 551 | ################################ | ||
| 552 | |||
| 553 | sub printVersion { | ||
| 554 | printf <<EndVersion; | ||
| 555 | $0 (nagios-plugins) 1.3 | ||
| 556 | The nagios plugins come with ABSOLUTELY NO WARRANTY. You may redistribute | ||
| 557 | copies of the plugins under the terms of the GNU General Public License. | ||
| 558 | For more information about these matters, see the file named COPYING. | ||
| 559 | EndVersion | ||
| 560 | } | ||
| 561 | |||
| 562 | sub printHelp { | ||
| 563 | printf <<EOF; | ||
| 564 | |||
| 565 | This plugin parses through the Nagios status log and will return a | ||
| 566 | Critical, Warning, or Unknown state depending on the number of | ||
| 567 | Critical, Warning, and/or Unknown services found in the log | ||
| 568 | (or Down/Unreachable hosts when matching against hosts) | ||
| 569 | |||
| 570 | Usage: $0 -s <Status File> | -d <Nagios Log Directory> | ||
| 571 | [-w #[,#][,#]] [-c #[,#][,#]] [-u #[,#][,#]] | ||
| 572 | [--service=<RegEx> | --servhost=<RegEx> | --pattern=<RegEx> | | ||
| 573 | --host | --host=<RegEx>] | ||
| 574 | [--ack[=string]] [--dt[=string]] [--hostdown[=string]] | ||
| 575 | [-D #[,#]] [--ok] [-f <Log freshness in # minutes>] | ||
| 576 | $0 --help | ||
| 577 | $0 --version | ||
| 578 | NOTE: One of -s and -d must be specified | ||
| 579 | |||
| 580 | Options: | ||
| 581 | -s, --status=FILE_NAME | ||
| 582 | Location and name of status log (e.g. /usr/local/nagios/var/status.log) | ||
| 583 | -d, --dir=DIRECTORY_NAME | ||
| 584 | Directory that contains the nagios logs (e.g. /usr/local/nagios/var/) | ||
| 585 | -w, --warning=INTEGER[,INTEGER][,INTEGER] | ||
| 586 | #: Number of warnings to result in a WARNING state | ||
| 587 | OR | ||
| 588 | #,#: Warning,Criticals to result in a WARNING state | ||
| 589 | OR | ||
| 590 | #,#,#: Warning,Critical,Unknown to result in a WARNING state | ||
| 591 | Default: -w=1 | ||
| 592 | -c, --critical=INTEGER[,INTEGER][,INTEGER] | ||
| 593 | #: Number of criticals to result in a CRITICAL state | ||
| 594 | OR | ||
| 595 | #,#: Warning,Criticals to result in a CRITICAL state | ||
| 596 | OR | ||
| 597 | #,#,#: Warning,Critical,Unknown to result in a CRITICAL state | ||
| 598 | Default: -c=1 | ||
| 599 | -u, --unknown=INTEGER[,INTEGER][,INTEGER] | ||
| 600 | #: Number of unknowns to result in a UNKNOWN state | ||
| 601 | OR | ||
| 602 | #,#: Warning,Criticals to result in a UNKNOWN state | ||
| 603 | OR | ||
| 604 | #,#,#: Warning,Critical,Unknown to result in a UNKNOWN state | ||
| 605 | Default: -u=1 | ||
| 606 | -r, --service[=REGEX] | ||
| 607 | Only match services [that match the RegEx] | ||
| 608 | (--service is default setting if no other matching arguments provided) | ||
| 609 | --servhost=REGEX | ||
| 610 | Only match services whose host match the RegEx | ||
| 611 | -p, --pattern=REGEX | ||
| 612 | Only parse for this regular expression (services only, not hosts) | ||
| 613 | --host[=REGEX] | ||
| 614 | Report on the state of hosts (whose name matches the RegEx if provided) | ||
| 615 | -a, --ack[=ok|warning|critical|unknown|down|unreachable] | ||
| 616 | Handle Acknowledged problems [--ack defaults to ok] | ||
| 617 | --dt, --downtime[=ok|warning|critical|unknown|down|unreachable] | ||
| 618 | Handle problems in scheduled downtime [--dt defaults to ok] | ||
| 619 | --hd, --hostdown[=ok|warning|critical|unknown|down|unreachable] | ||
| 620 | Handle services whose Host is down [--hd defaults to ok] | ||
| 621 | -D, --details=INTEGER[,INTEGER] | ||
| 622 | Amount of verbosity to output | ||
| 623 | If # problems: | ||
| 624 | <= 1st integer, return full details (each plugin's output) | ||
| 625 | <= 2nd integer, return some details (list each service host pair) | ||
| 626 | > 2nd integer, return the # of problems | ||
| 627 | -f, --freshness=INTEGER | ||
| 628 | Number of minutes old the log can be to make sure Nagios is running | ||
| 629 | (Default = 30 minutes) | ||
| 630 | --ok | ||
| 631 | Return an OK exit code, regardless of number of problems found | ||
| 632 | -h, --help | ||
| 633 | Print detailed help screen | ||
| 634 | -V, --version | ||
| 635 | Print version information | ||
| 636 | |||
| 637 | For service checking (use --service and/or --servhost): | ||
| 638 | 1. The values of warning, critical, and unknown default to 1, i.e. | ||
| 639 | $0 will return CRITICAL if there is at least 1 critical service, | ||
| 640 | WARNING if there is at least 1 warning service, and UNKNOWN if there is | ||
| 641 | at least one unknown service. | ||
| 642 | |||
| 643 | 2. If a service's host is DOWN or UNREACHABLE, $0 will use the | ||
| 644 | value of --hostdown to determine how to treat the service. Without that | ||
| 645 | argument, $0 will count the service as OK. | ||
| 646 | |||
| 647 | 3. If a service's host is OK, but the last host-state change occurred more | ||
| 648 | recently than the last service check, $0 will ignore that service | ||
| 649 | (want to wait until the service has been checked after a host has recovered | ||
| 650 | or you may get service alert for services that still need to be checked) | ||
| 651 | |||
| 652 | 4. If the --dt, --ack, or --hd tags are used, $0 will use the value | ||
| 653 | of the arguments to determine how to handle services in downtime, acknowledged, | ||
| 654 | or with down hosts (default=OK). For service checks, --dt will also check | ||
| 655 | if the service's host is in a downtime. | ||
| 656 | |||
| 657 | For host checking (use --host): | ||
| 658 | 1. Using the --host argument, $0 will look for DOWN and UNREACHABLE | ||
| 659 | hosts. If any are found, $0 will return a CRITICAL. You can provide | ||
| 660 | an REGEX for --host to only check hosts with matching host names. | ||
| 661 | |||
| 662 | 2. If the --dt or --ack tags are used, $0 will use the value of the | ||
| 663 | --dt/--ack arguments to determine the state of the host (default is OK) | ||
| 664 | |||
| 665 | EOF | ||
| 666 | } | ||
