commit f922de9d81a4e42c9f87082003b7f60d46573935 Author: Daniel Berteaud Date: Thu Apr 11 21:42:21 2013 +0200 Add mdadm RAID script and conf diff --git a/conf/raid_mdadm.conf b/conf/raid_mdadm.conf new file mode 100644 index 0000000..09b4c8f --- /dev/null +++ b/conf/raid_mdadm.conf @@ -0,0 +1,12 @@ +# Description: Software Raid Status +# Type: Agent or Agent (active) +# Key: raid.sw.status +# Type of Information: Character +# Show Value: As is + +# The value reported is like: +# OK: md3:raid1:2 drives:931GB:Optimal md2:raid1:2 drives:931GB:Optimal md1:raid1:2 drives:101MB:Optimal + +# You can add a simple trigger on this check like: +# { hostname:raid.sw.status.str( OK ) }=0 +UserParameter=raid.sw.status,/var/lib/zabbix/bin/check_raid_mdadm diff --git a/scripts/check_raid_mdadm b/scripts/check_raid_mdadm new file mode 100644 index 0000000..50f8d40 --- /dev/null +++ b/scripts/check_raid_mdadm @@ -0,0 +1,140 @@ +#!/usr/bin/env perl + +# Get status of Linux software RAID for SNMP / Nagios +# Author: Michal Ludvig +# http://www.logix.cz/michal/devel/nagios +# Slightly modified by Daniel B. for integration on SME Server / Zabbix + +# +# Simple parser for /proc/mdstat that outputs status of all +# or some RAID devices. Possible results are +# - OK: all arrays are optimal +# - WARNING: Array rebuilding +# - CRITICAL: Array degraded + +use strict; +use Getopt::Long; + +# Sample /proc/mdstat output: +# +# Personalities : [raid1] [raid5] +# md0 : active (read-only) raid1 sdc1[1] +# 2096384 blocks [2/1] [_U] +# +# md1 : active raid5 sdb3[2] sdb4[3] sdb2[4](F) sdb1[0] sdb5[5](S) +# 995712 blocks level 5, 64k chunk, algorithm 2 [3/2] [U_U] +# [=================>...] recovery = 86.0% (429796/497856) finish=0.0min speed=23877K/sec +# +# unused devices: + +my $file = "/proc/mdstat"; +my $device = "all"; + +# Get command line options. +GetOptions ('file=s' => \$file, + 'device=s' => \$device, + 'help' => sub { &usage() } ); + +## Strip leading "/dev/" from --device in case it has been given +$device =~ s/^\/dev\///; + +## Return codes for Nagios +my %ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2,'UNKNOWN'=>3,'DEPENDENT'=>4); + +## This is a global return value - set to the worst result we get overall +my $retval = 0; + +my (%active_devs, %failed_devs, %spare_devs, %devs_total, %level, %size, %status); +my @raids; +my $result = 'OK'; + +open FILE, "< $file" or die "Can't open $file : $!"; +while () { + next if ! /^(md\d+)+\s*:/; + next if $device ne "all" and $device ne $1; + my $dev = $1; + push @raids, $dev; + + my @array = split(/ /); + $devs_total{$dev} = 0; + my $devs_up = 0; + my $missing = 0; + for $_ (@array) { + $level{$dev} = $1 if /^(raid\d+)$/; + next if ! /(\w+)\[\d+\](\(.\))*/; + $devs_total{$dev}++; + if ($2 eq "(F)") { + $failed_devs{$dev} .= "$1,"; + } + elsif ($2 eq "(S)") { + $spare_devs{$dev} .= "$1,"; + } + else { + $active_devs{$dev} .= "$1,"; + $devs_up++; + } + } + if (! defined($active_devs{$dev})) { $active_devs{$dev} = "none"; } + else { $active_devs{$dev} =~ s/,$//; } + if (! defined($spare_devs{$dev})) { $spare_devs{$dev} = "none"; } + else { $spare_devs{$dev} =~ s/,$//; } + if (! defined($failed_devs{$dev})) { $failed_devs{$dev} = "none"; } + else { $failed_devs{$dev} =~ s/,$//; } + + $_ = ; + /(\d+)\ blocks\ (.*)(\[.*\])\s?$/; + $size{$dev} = int($1/1024); + #print "$3\n"; + $missing = 1 if ($3 =~ m/_/); + if ($size{$dev} > 1024){ + $size{$dev} = int($size{$dev}/1024)."GB"; + } + else{ + $size{$dev} .= "MB"; + } + $_ = ; + if (($devs_total{$dev} > $devs_up) || ($failed_devs{$dev} ne "none") || (($missing) && (!/recovery/))) { + $status{$dev} = "Degraded"; + $result = "CRITICAL"; + $retval = $ERRORS{"CRITICAL"}; + } + else { + $status{$dev} = "Optimal"; + } + if (/recovery/){ + $status{$dev} = "Rebuilding"; + if ($result eq "OK"){ + $result = "WARNING"; + $retval = $ERRORS{"WARNING"}; + } + } + + +} +print "$result: "; +foreach my $raid (@raids){ + print "$raid:$level{$raid}:$devs_total{$raid} drives:$size{$raid}:$status{$raid} "; +} +print "\n"; +close FILE; +exit $retval; + +# ===== +sub usage() +{ + printf(" +Check status of Linux SW RAID + +Author: Michal Ludvig (c) 2006 + http://www.logix.cz/michal/devel/nagios +Modified by Daniel B. : + +Usage: $0 [options] + + --file= Name of file to parse. Default is /proc/mdstat + --device= Name of MD device, e.g. md0. Default is \"all\" + +"); + exit(1); +} +