#!/usr/bin/perl -w # # check_softraid.pl - Nagios plugin to check software RAID status on Linux # # # Copyright (C) 2003 Kenny Root # # This program is free software; you can redistribute it and/or # modify it under the terms of the GNU General Public License # as published by the Free Software Foundation; either version 2 # of the License, or (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, Inc., 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # # # Report bugs to: kenny@the-b.org, nagiosplug-help@lists.sf.net # # 30-05-2003 Version 0.2 # # use strict; use Getopt::Long; use lib "nagios/plugins"; use utils qw($TIMEOUT %ERRORS &print_revision &support); use vars qw($PROGNAME $MDSTAT $VERSION $opt_h $opt_V $exitstatus $exitstring); Getopt::Long::Configure('bundling'); $PROGNAME="check_softraid"; $VERSION="0.2"; $MDSTAT="/proc/mdstat"; # Paranoia: clear out our environment variables. $ENV{'PATH'} = ''; $ENV{'BASH_ENV'} = ''; $ENV{'ENV'} = ''; # Set the exit status to 0 (OK) until proven otherwise. $exitstatus = $ERRORS{'OK'}; $exitstring = ''; # Create a timeout in case we can't read the mdstat file. $SIG{'ALRM'} = sub { print ("ERROR: Cannot read $MDSTAT\n"); exit $ERRORS{'UNKNOWN'}; }; alarm($TIMEOUT); GetOptions("V" => \$opt_V, "version" => \$opt_V, "h" => \$opt_h, "help" => \$opt_h); # Print out the program version if requested. if ($opt_V) { print_revision($PROGNAME,$VERSION); exit $ERRORS{'OK'}; } # Print usage information if requested. if ($opt_h) { print_help(); exit $ERRORS{'OK'}; } # Don't be tricked into opening a symlink. if (! -f $MDSTAT) { print "$MDSTAT is not a regular file!\n"; exit $ERRORS{'UNKNOWN'}; } open(MDSTAT, "< $MDSTAT"); # Initialize the hash that will hold all the information about the status # of each RAID device. my %raid = (); while (readline(*MDSTAT)) { chomp; # Parse each line in the mdstat. Each RAID device should have three # lines. One for the personality and disk status, the second for # the RAID personality's status. The third line is for things like # resync. # md1 : active raid1 hda2[1] hdb2[2] # 82348723 blocks [2/2] [UU] # if ($_ =~ /^(md[0-9]*) : ([^ ]*) ([^ ]*)( \(read-only\))? ([^\[]*\[.*)/) { my $device = $1; $raid{$device}{status} = $2; $raid{$device}{personality} = $3; my $diskstring = $5; # Split the disk status string into each disk, record those # into an array for possible parsing during the RAID status # line. Also record if they're flagged as faulty disks. my @raiddisks = split(/ /, $diskstring); for my $eachdisknum (0 .. $#raiddisks) { # Each disk status would like like "hda2[1]" or # "hda2[1](F)" for a faulty disk. if ($raiddisks[$eachdisknum] =~ /([^\[]*)\[([0-9]*)\](\(F\))?$/) { my $diskname = $1; push @{$raid{$device}{disks}}, $diskname; # Record if this device is marked as faulted. if (defined $3 and ($3 eq "(F)")) { push @{$raid{$device}{faulted}}, $diskname; } } } # If the RAID device is active, the RAID personality should # have a status message as the second line. Let's parse that. if ($raid{$device}{status} eq "active") { my $syncstatusline = ; chomp $syncstatusline; # RAID1 and RAID5 personalities have a status line that # tells you whether you're out of sync or not. Parse # that. if ($syncstatusline =~ / \[([0-9]*)\/([0-9]*)\] \[([^\]]*)\]/) { # Record the number of total disks, number of # disks operational, and compute the current # operational capacity. $raid{$device}{totaldisks} = $1; $raid{$device}{operationaldisks} = $2; $raid{$device}{capacity} = ($2 * 100) / $1; # Each operational disk is listed as "U" # while each out-of-sync disk is listed as "_" my $diskstatusstring = $3; my @diskstatuses = split //, $diskstatusstring; foreach my $disknumber (0 .. $#diskstatuses) { if (substr($diskstatusstring, $disknumber, 1) eq "_") { push @{$raid{$device}{desync}}, @{$raid{$device}{disks}}[$disknumber]; } } } } } } close(MDSTAT); foreach my $device (sort keys %raid) { if ($raid{$device}{status} eq "active") { if (exists $raid{$device}{faulted}) { $exitstring = $exitstring . "$device has faulted disks " . join(" ", @{$raid{$device}{faulted}}) . ". "; $exitstatus = $ERRORS{'CRITICAL'}; } elsif (exists $raid{$device}{desync}) { $exitstring = $exitstring . "$device has desynced disks " . join(" ", @{$raid{$device}{desync}}) . ". "; $exitstatus = $ERRORS{'WARNING'} if ($exitstatus != $ERRORS{'CRITICAL'}); } } } if ($exitstring eq "") { $exitstring = "All RAID disks OK."; } print $exitstring . "\n"; exit $exitstatus; sub print_help { printf "$PROGNAME plugin for Nagios monitors Linux's software RAID \n"; printf "status for the local machine.\n"; printf "\nUsage:\n"; printf "\t-V (--version)\tPlugin version\n"; printf "\t-h (--help)\tThis usage help.\n\n"; print_revision($PROGNAME, $VERSION); }