Add megaraid script and conf
This commit is contained in:
parent
bf617f451e
commit
bcc92a6193
|
@ -0,0 +1,12 @@
|
|||
# Description: MegaRaid Status
|
||||
# Type: Agent or Agent (active)
|
||||
# Key: raid.mega.status
|
||||
# Type of Information: Character
|
||||
# Show Value: As is
|
||||
|
||||
# The value reported is like:
|
||||
# State: OK: 0:0:RAID-1:2 drives:68GB:Optimal 0:1:RAID-5:4 drives:837GB:Optimal Drives:7
|
||||
|
||||
# You can add a simple trigger on this check like:
|
||||
# { hostname:raid.mega.status.str( OK ) }=0
|
||||
UserParameter=raid.mega.status,/usr/bin/sudo /var/lib/zabbix/bin/check_raid_megaraid_sudo
|
|
@ -0,0 +1,227 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
# check_megaraid_sas Nagios plugin
|
||||
# Copyright (C) 2007 Jonathan Delgado, delgado@molbio.mgh.harvard.edu
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
# as published by the Free Software Foundation; either version 2
|
||||
# of the License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
# Nagios plugin to monitor the status of volumes attached to a LSI Megaraid SAS
|
||||
# controller, such as the Dell PERC5/i and PERC5/e. If you have any hotspares
|
||||
# attached to the controller, you can specify the number you should expect to
|
||||
# find with the '-s' flag.
|
||||
#
|
||||
# The paths for the Nagios plugins lib and MegaCli may need to me changed.
|
||||
#
|
||||
# $Author: delgado $
|
||||
# $Revision: #3 $ $Date: 2007/06/07 $
|
||||
|
||||
# Slightly modified by Daniel B. for zabbix
|
||||
# 23 Apr 2009
|
||||
|
||||
use strict;
|
||||
use Getopt::Std;
|
||||
|
||||
our($opt_h, $opt_s, $opt_o, $opt_m, $opt_p);
|
||||
|
||||
|
||||
getopts('hs:o:p:m:');
|
||||
|
||||
if ( $opt_h ) {
|
||||
print "Usage: $0 [-s number] [-m number] [-o number]\n";
|
||||
print " -s is how many hotspares are attached to the controller\n";
|
||||
print " -m is the number of media errors to ignore\n";
|
||||
print " -p is the predictive error count to ignore\n";
|
||||
print " -o is the number of other disk errors to ignore\n";
|
||||
exit;
|
||||
}
|
||||
|
||||
|
||||
my $megacli = (-x '/opt/MegaRAID/MegaCli/MegaCli64') ?
|
||||
'/opt/MegaRAID/MegaCli/MegaCli64' : '/opt/MegaRAID/MegaCli/MegaCli';
|
||||
|
||||
## Return codes for Nagios
|
||||
my %ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2,'UNKNOWN'=>3,'DEPENDENT'=>4);
|
||||
|
||||
my ($adapters);
|
||||
my $hotspares = 0;
|
||||
my $hotsparecount = 0;
|
||||
my $pdbad = 0;
|
||||
my $pdcount = 0;
|
||||
my $mediaerrors = 0;
|
||||
my $mediaallow = 0;
|
||||
my $prederrors = 0;
|
||||
my $predallow = 0;
|
||||
my $othererrors = 0;
|
||||
my $otherallow = 0;
|
||||
my $result = '';
|
||||
my $status = 'OK';
|
||||
|
||||
sub max_state ($$) {
|
||||
my ($current, $compare) = @_;
|
||||
|
||||
if (($compare eq 'CRITICAL') || ($current eq 'CRITICAL')) {
|
||||
return 'CRITICAL';
|
||||
} elsif ($compare eq 'OK') {
|
||||
return $current;
|
||||
} elsif ($compare eq 'WARNING') {
|
||||
return 'WARNING';
|
||||
} elsif (($compare eq 'UNKNOWN') && ($current eq 'OK')) {
|
||||
return 'UNKNOWN';
|
||||
} else {
|
||||
return $current;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if ( $opt_s ) {
|
||||
$hotspares = $opt_s;
|
||||
}
|
||||
if ( $opt_m ) {
|
||||
$mediaallow = $opt_m;
|
||||
}
|
||||
if ( $opt_p ) {
|
||||
$predallow = $opt_p;
|
||||
}
|
||||
if ( $opt_o ) {
|
||||
$otherallow = $opt_o;
|
||||
}
|
||||
|
||||
# Get the number of RAID controllers we have
|
||||
open (ADPCOUNT, "$megacli -adpCount -NoLog |")
|
||||
|| die "error: Could not execute MegaCli -adpCount";
|
||||
|
||||
while (<ADPCOUNT>) {
|
||||
if ( m/Controller Count:\s*(\d+)/ ) {
|
||||
$adapters = $1;
|
||||
last;
|
||||
}
|
||||
}
|
||||
close ADPCOUNT;
|
||||
|
||||
ADAPTER: for ( my $adp = 0; $adp < $adapters; $adp++ ) {
|
||||
# Get the number of logical drives on this adapter
|
||||
open (LDGETNUM, "$megacli -LdGetNum -a$adp -NoLog |")
|
||||
|| die "error: Could not execute $megacli -LdGetNum -a$adp";
|
||||
|
||||
my ($ldnum);
|
||||
while (<LDGETNUM>) {
|
||||
if ( m/Number of Virtual drives configured on adapter \d:\s*(\d+)/i ) {
|
||||
$ldnum = $1;
|
||||
last;
|
||||
}
|
||||
}
|
||||
close LDGETNUM;
|
||||
|
||||
LDISK: for ( my $ld = 0; $ld < $ldnum; $ld++ ) {
|
||||
# Get info on this particular logical drive
|
||||
open (LDINFO, "$megacli -LdInfo -L$ld -a$adp -NoLog |")
|
||||
|| die "error: Could not execute $megacli -LdInfo -L$ld -a$adp -NoLog";
|
||||
|
||||
my ($size, $unit, $raidlevel, $ldpdcount, $spandepth, $state);
|
||||
while (<LDINFO>) {
|
||||
if ( m/^Size\s*:\s*(\d+(\.\d+)?)\s*(MB|GB|TB)/ ) {
|
||||
$size = $1;
|
||||
$unit = $3;
|
||||
# Adjust MB to GB if that's what we got
|
||||
if ( $unit eq 'MB' ) {
|
||||
$size = sprintf( "%.0f", ($size / 1024) );
|
||||
$unit= 'GB';
|
||||
}
|
||||
} elsif ( m/^State\s*:\s*(\w+)/ ) {
|
||||
$state = $1;
|
||||
if ( $state ne 'Optimal' ) {
|
||||
$status = 'CRITICAL';
|
||||
}
|
||||
} elsif ( m/^Number Of Drives( per span)?\s*:\s*(\d+)/ ) {
|
||||
$ldpdcount = $2;
|
||||
} elsif ( m/^Span Depth\s*:\s*(\d+)/ ) {
|
||||
$spandepth = $1;
|
||||
$ldpdcount = $ldpdcount * $spandepth;
|
||||
} elsif ( m/^RAID Level\s*:\s*Primary-(\d)/ ) {
|
||||
$raidlevel = $1;
|
||||
}
|
||||
}
|
||||
close LDINFO;
|
||||
|
||||
$result .= "$adp:$ld:RAID-$raidlevel:$ldpdcount drives:$size$unit:$state ";
|
||||
|
||||
} #LDISK
|
||||
close LDINFO;
|
||||
|
||||
# Get info on physical disks for this adapter
|
||||
open (PDLIST, "$megacli -PdList -a$adp -NoLog |")
|
||||
|| die "error: Could not execute $megacli -PdList -a$adp -NoLog";
|
||||
|
||||
my ($slotnumber,$fwstate);
|
||||
PDISKS: while (<PDLIST>) {
|
||||
if ( m/Slot Number:\s*(\d+)/ ) {
|
||||
$slotnumber = $1;
|
||||
# Don't care about backplane error counts
|
||||
next if ( $slotnumber == 255 );
|
||||
$pdcount++;
|
||||
} elsif ( m/(\w+) Error Count:\s*(\d+)/ ) {
|
||||
if ( $1 eq 'Media') {
|
||||
$mediaerrors += $2;
|
||||
} else {
|
||||
$othererrors += $2;
|
||||
}
|
||||
} elsif ( m/Predictive Failure Count:\s*(\d+)/ ) {
|
||||
$prederrors += $1;
|
||||
} elsif ( m/Firmware state:\s*(\w+)/ ) {
|
||||
$fwstate = $1;
|
||||
if ( $fwstate =~ m/Hotspare/ ) {
|
||||
$hotsparecount++;
|
||||
} elsif ( $fwstate =~ m/^Online/ ) {
|
||||
# Do nothing
|
||||
} elsif ( $slotnumber != 255 ) {
|
||||
$pdbad++;
|
||||
$status = 'CRITICAL';
|
||||
}
|
||||
}
|
||||
} #PDISKS
|
||||
close PDLIST;
|
||||
}
|
||||
|
||||
$result .= "Drives:$pdcount ";
|
||||
|
||||
# Any bad disks?
|
||||
if ( $pdbad ) {
|
||||
$result .= "$pdbad Bad Drives ";
|
||||
}
|
||||
|
||||
my $errorcount = $mediaerrors + $prederrors + $othererrors;
|
||||
# Were there any errors?
|
||||
if ( $errorcount ) {
|
||||
$result .= "($errorcount Errors) ";
|
||||
if ( ( $mediaerrors > $mediaallow ) ||
|
||||
( $prederrors > $predallow ) ||
|
||||
( $othererrors > $otherallow ) ) {
|
||||
$status = max_state($status, 'WARNING');
|
||||
}
|
||||
}
|
||||
|
||||
# Do we have as many hotspares as expected (if any)
|
||||
if ( $hotspares ) {
|
||||
if ( $hotsparecount < $hotspares ) {
|
||||
$status = max_state($status, 'WARNING');
|
||||
$result .= "Hotspare(s):$hotsparecount (of $hotspares)";
|
||||
} else {
|
||||
$result .= "Hotspare(s):$hotsparecount";
|
||||
}
|
||||
}
|
||||
|
||||
print STDOUT "$status: $result\n";
|
||||
exit $ERRORS{$status};
|
Loading…
Reference in New Issue