Big cleanup, start relying on zabbix-agent-addons

This commit is contained in:
Daniel Berteaud 2013-04-16 12:08:18 +02:00
parent 8b456bb08d
commit fac2e4cce0
27 changed files with 11 additions and 748 deletions

View File

@ -3,8 +3,13 @@
use esmith::Build::CreateLinks qw(:all);
# Templates to expand
templates2events("/etc/zabbix/zabbix_agent.conf", qw(zabbix-agent-update bootstrap-console-save));
templates2events("/etc/zabbix/zabbix_agentd.conf", qw(zabbix-agent-update bootstrap-console-save));
foreach my $conf (qw!zabbix_agent.conf zabbix_agentd.conf zabbix_agentd.conf.d/asterisk.conf
zabbix_agentd.conf.d/deprecated_cpunum.conf zabbix_agentd.conf.d/deprecated_memory.conf
zabbix_agentd.conf.d/deprecated_netstat.conf zabbix_agentd.conf.d/deprecated_swap.conf
zabbix_agentd.conf.d/extip.conf zabbix_agentd.conf.d/mail.conf zabbix_agentd.conf.d/mysql.conf
/zabbix_agentd.conf.d/yumupdates.conf!){
templates2events("/etc/zabbix/$conf", qw(zabbix-agent-update bootstrap-console-save));
}
templates2events("/etc/sudoers", "zabbix-agent-update");
templates2events("/etc/rc.d/init.d/masq", "zabbix-agent-update");
templates2events("/etc/crontab", "zabbix-agent-update");

View File

@ -1,11 +1,2 @@
{
my $runasroot = '/usr/bin/mysqladmin status, /sbin/e-smith/db yum_updates show, /var/lib/zabbix/bin/sensors *, /var/lib/zabbix/bin/check_lvm *, /usr/sbin/smartctl -A /dev/*';
if ( -x '/opt/MegaRAID/MegaCli/MegaCli' ){
$runasroot .= ', /var/lib/zabbix/bin/megaraid-parser.pl';
}
$runasroot .= "\n";
$OUT .= 'Cmnd_Alias ZABBIX_AGENT_ROOT = '.$runasroot;
}
Cmnd_Alias ZABBIX_AGENT_MYSQL = /usr/bin/du -s /var/lib/mysql
Cmnd_Alias ZABBIX_AGENT = /var/lib/zabbix/bin/*_sudo

View File

@ -1,2 +1,2 @@
zabbix ALL=(root) NOPASSWD: ZABBIX_AGENT_ROOT
zabbix ALL=(mysql) NOPASSWD: ZABBIX_AGENT_MYSQL
zabbix ALL=(root) NOPASSWD: ZABBIX_AGENT

View File

@ -1,14 +0,0 @@
####### USER-DEFINED MONITORED PARAMETERS #######
# Format: UserParameter=<key>,<shell command>
# Note that shell command must not return empty string or EOL only
#UserParameter=system.test,who|wc -l
### Set of parameter for monitoring MySQL server (v3.23.42 and later)
### Change -u<username> and add -p<password> if required
#UserParameter=mysql.ping,mysqladmin -uroot ping|grep alive|wc -l
#UserParameter=mysql.uptime,mysqladmin -uroot status|cut -f2 -d":"|cut -f1 -d"T"
#UserParameter=mysql.threads,mysqladmin -uroot status|cut -f3 -d":"|cut -f1 -d"Q"
#UserParameter=mysql.questions,mysqladmin -uroot status|cut -f4 -d":"|cut -f1 -d"S"
#UserParameter=mysql.slowqueries,mysqladmin -uroot status|cut -f5 -d":"|cut -f1 -d"O"
#UserParameter=mysql.qps,mysqladmin -uroot status|cut -f9 -d":"
#UserParameter=mysql.version,mysql -V

View File

@ -0,0 +1 @@
Include=/etc/zabbix/zabbix_agentd.conf.d/

View File

@ -1,42 +0,0 @@
# Disk I/O Monitoring
# Description: Read operations on hdX
# X can be from 1 to 8
# you'll have to create a custom template if
# you want to support more than 8 drives
# You can also monitor raid devices (/dev/md1 for example)
# Type: Agent or Agent (active)
# Key: vfs.dev.read.hdX vfs.dev.write.hdX
# Type of information: Numeric (Float or integer 64bit)
# Units: bytes/sec
# Use multiplier: 512
# Update interval: 60 (for example)
# Store Value: Delta (speed/sec)
# Show Value: As is
# For these UserParameter to work, you need to configure the drives you want to monitor
# in the DB:
# db configuration setprop zabbix-agent HardDrives /dev/sda,/dev/sdb,/dev/sdc,/dev/sdd
# signal-event zabbix-agent-update
{
my @hd = split( /[,;]/,( ${'zabbix-agent'}{'HardDrives'} || '' ));
my $cnt = 1;
foreach my $drive (@hd){
if ( -e $drive){
$drive =~ s|/dev/||;
$OUT .= "Alias=vfs.dev.read.hd" . $cnt . ":vfs.dev.read[$drive,sectors]\n";
$OUT .= "Alias=vfs.dev.write.hd" . $cnt . ":vfs.dev.write[$drive,sectors]\n";
$cnt++;
}
}
for (;$cnt < 9; $cnt++){
$OUT .= "UserParameter=vfs.dev.read.hd" . $cnt . ",echo '0'\n";
$OUT .= "UserParameter=vfs.dev.write.hd" . $cnt . ",echo '0'\n";
}
}

View File

@ -1,10 +0,0 @@
# Type: Agent or Agent (active)
# Key: lvm[key] where key can be snapshot_max_allow, snapshots, lv or vg
# Type of information: Numeric (integer 64bit) or characters (for version)
# Units: depends on the key (snapshot_max_alloc is in %)
# Custom multiplier: Do not use
# Store Value: As is
UserParameter=lvm[*],/usr/bin/sudo /var/lib/zabbix/bin/check_lvm $1

View File

@ -1,37 +0,0 @@
{
my $options = '';
my $spares = ${'zabbix-agent'}{'MegaRaidSpares'} || '';
$options .= "-s $spares " if ($spares =~ /\d+/);
my $media_error = ${'zabbix-agent'}{'MegaRaidMediaError'} || '';
$options .= "-m $media_error " if ($media_error =~ /\d+/);
my $other_error = ${'zabbix-agent'}{'MegaRaidOtherError'} || '';
$options .= "-o $other_error " if ($other_error =~ /\d+/);
my $predictive_error = ${'zabbix-agent'}{'MegaRaidPredictiveError'} || '';
$options .= "-p $predictive_error " if ($predictive_error =~ /\d+/);
# As this check requires the MegaCli utility, first check if it's present:
if ( -x '/opt/MegaRAID/MegaCli/MegaCli' ){
$OUT .=<<"HERE";
# Report status of every Raid Array using the MegaRaid controler (Requires the MegaCli utility)
# This controler is used for example on perc5/6(i) Raid card
# Description: MegaRaid Status
# Type: Agent or Agent (active)
# Key: raid.mega.status
# Type of Information: Character
# Show Value: As is
# The value reported is like:
# State: OK: 0:0:RAID-1:2 drives:68GB:Optimal 0:1:RAID-5:4 drives:837GB:Optimal Drives:7
#
# Tips: You can add a simple trigger on this check like:
# \{ hostname:raid.mega.status.str( OK ) \}=0
UserParameter=raid.mega.status,/usr/bin/sudo /var/lib/zabbix/bin/megaraid-parser.pl $options
HERE
}
}

View File

@ -1,11 +0,0 @@
# Description: Temperature
# Type: Agent or Agent (active)
# Key: sensors[mb] (for example)
# Type of information: Numeric (float)
# Units: °C
# Custom multiplier: Do not use
# Store Value: As is
UserParameter=sensors[*],/usr/bin/sudo /var/lib/zabbix/bin/sensors $1

View File

@ -1,43 +0,0 @@
# Smart Monitoring
# Description: Smart Value <key>
# Key can be one of: Raw_Read_Error_Rate, Spin_Up_Time, Start_Stop_Count
# Reallocated_Sector_Ct, Seek_Error_Rate, Power_On_Hours, Spin_Retry_Count,
# Power_Cycle_Count, Temperature_Celsius, Hardware_ECC_Recovered,
# Current_Pending_Sector, Offline_Uncorrectable, UDMA_CRC_Error_Count,
# Multi_Zone_Error_Rate, TA_Increase_Count
# Type: Agent or Agent (active)
# Key: system.smart.hdX[<key>] (for example system.smart.hd1[Reallocated_Sector_Ct])
# Type of information: Numeric (integer 64bit)
# Units: (none)
# Use multiplier: No
# Update interval: 120 (for example)
# Store Value: As is
# Show Value: As is
# For Seek_Error_Rate, Raw_Read_Error_Rate, Hardware_ECC_Recovered you can store value as Delta
# in order to graph the error rate in a readable format
# For these UserParameter to work, you need to configure the drives you want to monitor
# in the DB:
# db configuration setprop zabbix-agent SmartDrives /dev/sda,/dev/sdb,/dev/sdc,/dev/sdd
# signal-event zabbix-agent-update
{
my @hd = split( /[,;]/,( ${'zabbix-agent'}{'SmartDrives'} || '' ));
my $cnt = 1;
foreach my $drive (@hd){
if ( -e $drive){
$OUT .= "UserParameter=system.smartd.hd" . $cnt. "[*],/usr/bin/sudo /usr/sbin/smartctl -A $drive| grep \$1| tail -1| cut -c 88-|cut -f1 -d' '\n";
$cnt++;
}
}
for (;$cnt < 9; $cnt++){
$OUT .= "UserParameter=system.smartd.hd" . $cnt. "[*],echo '0'\n";
}
}

View File

@ -1,53 +0,0 @@
# Squid
# Description: Squid Request Hit Ratio
# Type: Agent or Agent (active)
# Key: squid.request_hit_ratio
# Type of information: Numeric (float)
# Units: %
# Custom multiplier: Do not use
# Store Value: As is
UserParameter=squid.request_hit_ratio,squidclient mgr:info|grep 'Request Hit Ratios:'|cut -d':' -f3|cut -d',' -f1|tr -d ' %'
# Description: Squid Byte Hit Ratio
# Type: Agent or Agent (active)
# Key: squid.byte_hit_ratio
# Type of information: Numeric (float)
# Units: %
# Custom multiplier: Do not use
# Store Value: As is
UserParameter=squid.byte_hit_ratio,squidclient mgr:info|grep 'Byte Hit Ratios:'|cut -d':' -f3|cut -d',' -f1|tr -d ' %'
# Description: Squid Average HTTP request per minute
# Type: Agent or Agent (active)
# Key: squid.avg_http_req_per_min
# Type of information: Numeric (float)
# Units: Req/min
# Custom multiplier: Do not use
# Store Value: As is
UserParameter=squid.avg_http_req_per_min,squidclient mgr:info|grep 'Average HTTP requests per minute since start:'|cut -d':' -f2| tr -d ' \t'
# Description: Squid Disk Cache Size
# Type: Agent or Agent (active)
# Key: squid.cache_size_disk
# Type of information: Numeric (integer 64bits)
# Units: Bytes
# Custom multiplier: 1024
# Store Value: As is
UserParameter=squid.cache_size_disk,squidclient mgr:info|grep 'Storage Swap size:' | awk '\{print $4\}'
# Description: Squid Memory Cache Size
# Type: Agent or Agent (active)
# Key: squid.cache_size_mem
# Type of information: Numeric (integer 64bits)
# Units: Bytes
# Custom multiplier: 1024
# Store Value: As is
UserParameter=squid.cache_size_mem,squidclient mgr:info|grep 'Storage Mem size:' | awk '\{print $4\}'

View File

@ -1,16 +0,0 @@
# Report status of every Raid Array (parsing /proc/mdtstat)
# Description: Software Raid Status
# Type: Agent or Agent (active)
# Key: raid.sw.status
# Type of Information: Character
# Show Value: As is
# The value reported is like:
# OK: md3:raid1:2 drives:931GB:Optimal md2:raid1:2 drives:931GB:Optimal md1:raid1:2 drives:101MB:Optimal
# Tips: You can add a simple trigger on this check like:
# \{ hostname:raid.sw.status.str( OK ) \}=0
UserParameter=raid.sw.status,/var/lib/zabbix/bin/mdstat-parser.pl

View File

@ -1,34 +0,0 @@
# Monitor UPS status
# Description: Nut UPS load
# Type: Agent or Agent (active)
# Key: ups.load
# Type of information: Numeric (float)
# Units: %
# Multiplier: Do not use
# Store Value: As is
UserParameter=ups.load[*],upsc $1@localhost ups.load
# Description: Nut UPS Battery Charge
# Type: Agent or Agent (active)
# Key: ups.battery.charge
# Type of information: Numeric (float)
# Units: %
# Multiplier: Do not use
# Store Value: As is
UserParameter=ups.battery.charge[*],upsc $1@localhost battery.charge
# Description: Nut UPS Status
# Type: Agent or Agent (active)
# Key: ups.status
# Type of information: Character
# Show Value: As is (you can also define a dictionnary OL=>On Line etc...)
UserParameter=ups.status[*],upsc $1@localhost ups.status
# Description: Nut UPS Model
# Type: Agent or Agent (active)
# Key: ups.model
# Type of information: Text
UserParameter=ups.model[*],upsc $1@localhost ups.model

View File

@ -1,36 +0,0 @@
#!/bin/bash
PATH=$PATH:/usr/sbin:/usr/local/sbin
snapshot_max_alloc(){
MAX_PERCENT=0
for PERCENT in $(lvdisplay | grep % | sed -e 's/ Allocated to snapshot //g' -e 's/%//g'); do
if [[ "$PERCENT" > "$MAX_PERCENT" ]]; then
MAX_PERCENT=$PERCENT
fi
done
echo "$MAX_PERCENT"
}
snapshots(){
echo $(lvdisplay | grep % | wc -l)
}
lv(){
echo $(lvdisplay | grep 'LV Name' | wc -l)
}
vg(){
echo $(vgdisplay | grep 'VG Name' | wc -l)
}
case $1 in
snapshot_max_alloc|snapshots|lv|vg)
$1
;;
*)
echo 'ZBX_NOTSUPPORTED'
esac

View File

@ -1,155 +0,0 @@
#!/usr/bin/env perl
# Get status of Linux software RAID for SNMP / Nagios
# Author: Michal Ludvig <michal@logix.cz>
# http://www.logix.cz/michal/devel/nagios
# Slightly modified by Daniel B. for integration on SME Server / Zabbix
# 24 Apr 2009
# - One line Repport
# - Support RAID 0 Array
# - Repport Warning if an array is rebuilding
#
# Simple parser for /proc/mdstat that outputs status of all
# or some RAID devices. Possible results are OK and CRITICAL.
# It could eventually be extended to output WARNING result in
# case the array is being rebuilt or if there are still some
# spares remaining, but for now leave it as it is.
#
# To run the script remotely via SNMP daemon (net-snmp) add the
# following line to /etc/snmpd.conf:
#
# extend raid-md0 /root/parse-mdstat.pl --device=md0
#
# The script result will be available e.g. with command:
#
# snmpwalk -v2c -c public localhost .1.3.6.1.4.1.8072.1.3.2
use strict;
use Getopt::Long;
# Sample /proc/mdstat output:
#
# Personalities : [raid1] [raid5]
# md0 : active (read-only) raid1 sdc1[1]
# 2096384 blocks [2/1] [_U]
#
# md1 : active raid5 sdb3[2] sdb4[3] sdb2[4](F) sdb1[0] sdb5[5](S)
# 995712 blocks level 5, 64k chunk, algorithm 2 [3/2] [U_U]
# [=================>...] recovery = 86.0% (429796/497856) finish=0.0min speed=23877K/sec
#
# unused devices: <none>
my $file = "/proc/mdstat";
my $device = "all";
# Get command line options.
GetOptions ('file=s' => \$file,
'device=s' => \$device,
'help' => sub { &usage() } );
## Strip leading "/dev/" from --device in case it has been given
$device =~ s/^\/dev\///;
## Return codes for Nagios
my %ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2,'UNKNOWN'=>3,'DEPENDENT'=>4);
## This is a global return value - set to the worst result we get overall
my $retval = 0;
my (%active_devs, %failed_devs, %spare_devs, %devs_total, %level, %size, %status);
my @raids;
my $result = 'OK';
open FILE, "< $file" or die "Can't open $file : $!";
while (<FILE>) {
next if ! /^(md\d+)+\s*:/;
next if $device ne "all" and $device ne $1;
my $dev = $1;
push @raids, $dev;
my @array = split(/ /);
$devs_total{$dev} = 0;
my $devs_up = 0;
my $missing = 0;
for $_ (@array) {
$level{$dev} = $1 if /^(raid\d+)$/;
next if ! /(\w+)\[\d+\](\(.\))*/;
$devs_total{$dev}++;
if ($2 eq "(F)") {
$failed_devs{$dev} .= "$1,";
}
elsif ($2 eq "(S)") {
$spare_devs{$dev} .= "$1,";
}
else {
$active_devs{$dev} .= "$1,";
$devs_up++;
}
}
if (! defined($active_devs{$dev})) { $active_devs{$dev} = "none"; }
else { $active_devs{$dev} =~ s/,$//; }
if (! defined($spare_devs{$dev})) { $spare_devs{$dev} = "none"; }
else { $spare_devs{$dev} =~ s/,$//; }
if (! defined($failed_devs{$dev})) { $failed_devs{$dev} = "none"; }
else { $failed_devs{$dev} =~ s/,$//; }
$_ = <FILE>;
/(\d+)\ blocks\ (.*)(\[.*\])\s?$/;
$size{$dev} = int($1/1024);
#print "$3\n";
$missing = 1 if ($3 =~ m/_/);
if ($size{$dev} > 1024){
$size{$dev} = int($size{$dev}/1024)."GB";
}
else{
$size{$dev} .= "MB";
}
$_ = <FILE>;
if (($devs_total{$dev} > $devs_up) || ($failed_devs{$dev} ne "none") || (($missing) && (!/recovery/))) {
$status{$dev} = "Degraded";
$result = "CRITICAL";
$retval = $ERRORS{"CRITICAL"};
}
else {
$status{$dev} = "Optimal";
}
if (/recovery/){
$status{$dev} = "Rebuilding";
if ($result eq "OK"){
$result = "WARNING";
$retval = $ERRORS{"WARNING"};
}
}
}
print "$result: ";
foreach my $raid (@raids){
print "$raid:$level{$raid}:$devs_total{$raid} drives:$size{$raid}:$status{$raid} ";
}
print "\n";
close FILE;
exit $retval;
# =====
sub usage()
{
printf("
Check status of Linux SW RAID
Author: Michal Ludvig <michal\@logix.cz> (c) 2006
http://www.logix.cz/michal/devel/nagios
Modified by Daniel B. <daniel\@firewall-services.com>:
Usage: mdstat-parser.pl [options]
--file=<filename> Name of file to parse. Default is /proc/mdstat
--device=<device> Name of MD device, e.g. md0. Default is \"all\"
");
exit(1);
}

View File

@ -1,226 +0,0 @@
#!/usr/bin/perl -w
# check_megaraid_sas Nagios plugin
# Copyright (C) 2007 Jonathan Delgado, delgado@molbio.mgh.harvard.edu
#
# This program is free software; you can redistribute it and/or
# modify it under the terms of the GNU General Public License
# as published by the Free Software Foundation; either version 2
# of the License, or (at your option) any later version.
#
# This program is distributed in the hope that it will be useful,
# but WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
# GNU General Public License for more details.
#
# You should have received a copy of the GNU General Public License
# along with this program; if not, write to the Free Software
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
#
#
# Nagios plugin to monitor the status of volumes attached to a LSI Megaraid SAS
# controller, such as the Dell PERC5/i and PERC5/e. If you have any hotspares
# attached to the controller, you can specify the number you should expect to
# find with the '-s' flag.
#
# The paths for the Nagios plugins lib and MegaCli may need to me changed.
#
# $Author: delgado $
# $Revision: #3 $ $Date: 2007/06/07 $
# Slightly modified by Daniel B. for SME Server integration with zabbix
# 23 Apr 2009
use strict;
use Getopt::Std;
our($opt_h, $opt_s, $opt_o, $opt_m, $opt_p);
getopts('hs:o:p:m:');
if ( $opt_h ) {
print "Usage: $0 [-s number] [-m number] [-o number]\n";
print " -s is how many hotspares are attached to the controller\n";
print " -m is the number of media errors to ignore\n";
print " -p is the predictive error count to ignore\n";
print " -o is the number of other disk errors to ignore\n";
exit;
}
my $megacli = '/opt/MegaRAID/MegaCli/MegaCli';
## Return codes for Nagios
my %ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2,'UNKNOWN'=>3,'DEPENDENT'=>4);
my ($adapters);
my $hotspares = 0;
my $hotsparecount = 0;
my $pdbad = 0;
my $pdcount = 0;
my $mediaerrors = 0;
my $mediaallow = 0;
my $prederrors = 0;
my $predallow = 0;
my $othererrors = 0;
my $otherallow = 0;
my $result = '';
my $status = 'OK';
sub max_state ($$) {
my ($current, $compare) = @_;
if (($compare eq 'CRITICAL') || ($current eq 'CRITICAL')) {
return 'CRITICAL';
} elsif ($compare eq 'OK') {
return $current;
} elsif ($compare eq 'WARNING') {
return 'WARNING';
} elsif (($compare eq 'UNKNOWN') && ($current eq 'OK')) {
return 'UNKNOWN';
} else {
return $current;
}
}
if ( $opt_s ) {
$hotspares = $opt_s;
}
if ( $opt_m ) {
$mediaallow = $opt_m;
}
if ( $opt_p ) {
$predallow = $opt_p;
}
if ( $opt_o ) {
$otherallow = $opt_o;
}
# Get the number of RAID controllers we have
open (ADPCOUNT, "$megacli -adpCount -NoLog |")
|| die "error: Could not execute MegaCli -adpCount";
while (<ADPCOUNT>) {
if ( m/Controller Count:\s*(\d+)/ ) {
$adapters = $1;
last;
}
}
close ADPCOUNT;
ADAPTER: for ( my $adp = 0; $adp < $adapters; $adp++ ) {
# Get the number of logical drives on this adapter
open (LDGETNUM, "$megacli -LdGetNum -a$adp -NoLog |")
|| die "error: Could not execute $megacli -LdGetNum -a$adp";
my ($ldnum);
while (<LDGETNUM>) {
if ( m/Number of Virtual drives configured on adapter \d:\s*(\d+)/i ) {
$ldnum = $1;
last;
}
}
close LDGETNUM;
LDISK: for ( my $ld = 0; $ld < $ldnum; $ld++ ) {
# Get info on this particular logical drive
open (LDINFO, "$megacli -LdInfo -L$ld -a$adp -NoLog |")
|| die "error: Could not execute $megacli -LdInfo -L$ld -a$adp -NoLog";
my ($size, $unit, $raidlevel, $ldpdcount, $spandepth, $state);
while (<LDINFO>) {
if ( m/Size:\s*((\d+)(MB|GB|TB))/ ) {
$size = $2;
$unit = $3;
# Adjust MB to GB if that's what we got
if ( $unit eq 'MB' ) {
$size = sprintf( "%.0f", ($size / 1024) );
$unit= 'GB';
}
} elsif ( m/State:\s*(\w+)/ ) {
$state = $1;
if ( $state ne 'Optimal' ) {
$status = 'CRITICAL';
}
} elsif ( m/Number Of Drives( per span)?:\s*(\d+)/ ) {
$ldpdcount = $2;
} elsif ( m/Span Depth:\s*(\d+)/ ) {
$spandepth = $1;
$ldpdcount = $ldpdcount * $spandepth;
} elsif ( m/RAID Level: Primary-(\d)/ ) {
$raidlevel = $1;
}
}
close LDINFO;
$result .= "$adp:$ld:RAID-$raidlevel:$ldpdcount drives:$size$unit:$state ";
} #LDISK
close LDINFO;
# Get info on physical disks for this adapter
open (PDLIST, "$megacli -PdList -a$adp -NoLog |")
|| die "error: Could not execute $megacli -PdList -a$adp -NoLog";
my ($slotnumber,$fwstate);
PDISKS: while (<PDLIST>) {
if ( m/Slot Number:\s*(\d+)/ ) {
$slotnumber = $1;
# Don't care about backplane error counts
next if ( $slotnumber == 255 );
$pdcount++;
} elsif ( m/(\w+) Error Count:\s*(\d+)/ ) {
if ( $1 eq 'Media') {
$mediaerrors += $2;
} else {
$othererrors += $2;
}
} elsif ( m/Predictive Failure Count:\s*(\d+)/ ) {
$prederrors += $1;
} elsif ( m/Firmware state:\s*(\w+)/ ) {
$fwstate = $1;
if ( $fwstate eq 'Hotspare' ) {
$hotsparecount++;
} elsif ( $fwstate eq 'Online' ) {
# Do nothing
} elsif ( $slotnumber != 255 ) {
$pdbad++;
$status = 'CRITICAL';
}
}
} #PDISKS
close PDLIST;
}
$result .= "Drives:$pdcount ";
# Any bad disks?
if ( $pdbad ) {
$result .= "$pdbad Bad Drives ";
}
my $errorcount = $mediaerrors + $prederrors + $othererrors;
# Were there any errors?
if ( $errorcount ) {
$result .= "($errorcount Errors) ";
if ( ( $mediaerrors > $mediaallow ) ||
( $prederrors > $predallow ) ||
( $othererrors > $otherallow ) ) {
$status = max_state($status, 'WARNING');
}
}
# Do we have as many hotspares as expected (if any)
if ( $hotspares ) {
if ( $hotsparecount < $hotspares ) {
$status = max_state($status, 'WARNING');
$result .= "Hotspare(s):$hotsparecount (of $hotspares)";
} else {
$result .= "Hotspare(s):$hotsparecount";
}
}
print STDOUT "$status: $result\n";
exit $ERRORS{$status};

View File

@ -1,57 +0,0 @@
#!/bin/bash
KEY=$1
case $KEY in
cpu0)
# Here are some examples on how to retrieve temperatures
# of your system:
#
# If your motherboard support IPMI and you have the ipmitool package
# You can use this:
# Of course, you'll have to adapt command as each controler may report different sensors name
# /usr/bin/ipmitool sdr | grep 'P1 Therm Margin' | cut -d'|' -f 2 | awk '{print $1'}
# Else, if your motherboard support lm_sensor, you can use something
# like this:
# /usr/bin/sensors | grep temp1 | cut -d':' -f 2 | awk '{print $1'} | sed -e "s/+//g" -e "s/.C//g"
# You can also try to get your CPU temperature with acpi:
# cat /proc/acpi/thermal_zone/THRM/temperature | awk '{print $2}'
# It's important that your commands return only numerical values
# The default for now is to use IPMI
/usr/bin/ipmitool sdr type Temperature | grep 'P1 Therm Margin' | cut -d'|' -f 2 | awk '{print $1'}
;;
cpu1)
# This will be the same as the above, but for the second CPU
/usr/bin/ipmitool sdr type Temperature | grep 'P2 Therm Margin' | cut -d'|' -f 2 | awk '{print $1'}
;;
mb)
# AFAIK, motherboard temperature can be retrieved only with lm_sensor or IPMI
/usr/bin/ipmitool sdr type Temperature | grep 'Baseboard' | cut -d'|' -f 2 | awk '{print $1'}
;;
ambiant)
# Some IPMI controler also report the ambiant temperature
/usr/bin/ipmitool sdr type Temperature | grep Ambient | cut -d'|' -f 2 | awk '{print $1'}
;;
hd*|sd*)
# Here, we want a harddrive temperature, so we'll use smartctl
# We could also use hddtemp but it doesn't seems to work for a lot of drive, where smartctl do
/usr/sbin/smartctl -a /dev/$KEY | grep Temperature_Celsius | awk '{print $10}'
;;
*)
# Else, we tell the server the item is not supported
echo 'ZBX_NOTSUPPORTED'
;;
esac