Big cleanup, start relying on zabbix-agent-addons
This commit is contained in:
parent
8b456bb08d
commit
fac2e4cce0
|
@ -3,8 +3,13 @@
|
|||
use esmith::Build::CreateLinks qw(:all);
|
||||
|
||||
# Templates to expand
|
||||
templates2events("/etc/zabbix/zabbix_agent.conf", qw(zabbix-agent-update bootstrap-console-save));
|
||||
templates2events("/etc/zabbix/zabbix_agentd.conf", qw(zabbix-agent-update bootstrap-console-save));
|
||||
foreach my $conf (qw!zabbix_agent.conf zabbix_agentd.conf zabbix_agentd.conf.d/asterisk.conf
|
||||
zabbix_agentd.conf.d/deprecated_cpunum.conf zabbix_agentd.conf.d/deprecated_memory.conf
|
||||
zabbix_agentd.conf.d/deprecated_netstat.conf zabbix_agentd.conf.d/deprecated_swap.conf
|
||||
zabbix_agentd.conf.d/extip.conf zabbix_agentd.conf.d/mail.conf zabbix_agentd.conf.d/mysql.conf
|
||||
/zabbix_agentd.conf.d/yumupdates.conf!){
|
||||
templates2events("/etc/zabbix/$conf", qw(zabbix-agent-update bootstrap-console-save));
|
||||
}
|
||||
templates2events("/etc/sudoers", "zabbix-agent-update");
|
||||
templates2events("/etc/rc.d/init.d/masq", "zabbix-agent-update");
|
||||
templates2events("/etc/crontab", "zabbix-agent-update");
|
||||
|
|
|
@ -1,11 +1,2 @@
|
|||
{
|
||||
my $runasroot = '/usr/bin/mysqladmin status, /sbin/e-smith/db yum_updates show, /var/lib/zabbix/bin/sensors *, /var/lib/zabbix/bin/check_lvm *, /usr/sbin/smartctl -A /dev/*';
|
||||
if ( -x '/opt/MegaRAID/MegaCli/MegaCli' ){
|
||||
$runasroot .= ', /var/lib/zabbix/bin/megaraid-parser.pl';
|
||||
}
|
||||
$runasroot .= "\n";
|
||||
$OUT .= 'Cmnd_Alias ZABBIX_AGENT_ROOT = '.$runasroot;
|
||||
|
||||
}
|
||||
Cmnd_Alias ZABBIX_AGENT_MYSQL = /usr/bin/du -s /var/lib/mysql
|
||||
Cmnd_Alias ZABBIX_AGENT = /var/lib/zabbix/bin/*_sudo
|
||||
|
||||
|
|
|
@ -1,2 +1,2 @@
|
|||
zabbix ALL=(root) NOPASSWD: ZABBIX_AGENT_ROOT
|
||||
zabbix ALL=(mysql) NOPASSWD: ZABBIX_AGENT_MYSQL
|
||||
zabbix ALL=(root) NOPASSWD: ZABBIX_AGENT
|
||||
|
||||
|
|
|
@ -1,14 +0,0 @@
|
|||
####### USER-DEFINED MONITORED PARAMETERS #######
|
||||
# Format: UserParameter=<key>,<shell command>
|
||||
# Note that shell command must not return empty string or EOL only
|
||||
#UserParameter=system.test,who|wc -l
|
||||
### Set of parameter for monitoring MySQL server (v3.23.42 and later)
|
||||
### Change -u<username> and add -p<password> if required
|
||||
#UserParameter=mysql.ping,mysqladmin -uroot ping|grep alive|wc -l
|
||||
#UserParameter=mysql.uptime,mysqladmin -uroot status|cut -f2 -d":"|cut -f1 -d"T"
|
||||
#UserParameter=mysql.threads,mysqladmin -uroot status|cut -f3 -d":"|cut -f1 -d"Q"
|
||||
#UserParameter=mysql.questions,mysqladmin -uroot status|cut -f4 -d":"|cut -f1 -d"S"
|
||||
#UserParameter=mysql.slowqueries,mysqladmin -uroot status|cut -f5 -d":"|cut -f1 -d"O"
|
||||
#UserParameter=mysql.qps,mysqladmin -uroot status|cut -f9 -d":"
|
||||
#UserParameter=mysql.version,mysql -V
|
||||
|
|
@ -0,0 +1 @@
|
|||
Include=/etc/zabbix/zabbix_agentd.conf.d/
|
|
@ -1,42 +0,0 @@
|
|||
# Disk I/O Monitoring
|
||||
|
||||
# Description: Read operations on hdX
|
||||
# X can be from 1 to 8
|
||||
# you'll have to create a custom template if
|
||||
# you want to support more than 8 drives
|
||||
# You can also monitor raid devices (/dev/md1 for example)
|
||||
|
||||
# Type: Agent or Agent (active)
|
||||
# Key: vfs.dev.read.hdX vfs.dev.write.hdX
|
||||
# Type of information: Numeric (Float or integer 64bit)
|
||||
# Units: bytes/sec
|
||||
# Use multiplier: 512
|
||||
# Update interval: 60 (for example)
|
||||
# Store Value: Delta (speed/sec)
|
||||
# Show Value: As is
|
||||
|
||||
# For these UserParameter to work, you need to configure the drives you want to monitor
|
||||
# in the DB:
|
||||
# db configuration setprop zabbix-agent HardDrives /dev/sda,/dev/sdb,/dev/sdc,/dev/sdd
|
||||
# signal-event zabbix-agent-update
|
||||
|
||||
{
|
||||
|
||||
my @hd = split( /[,;]/,( ${'zabbix-agent'}{'HardDrives'} || '' ));
|
||||
|
||||
my $cnt = 1;
|
||||
foreach my $drive (@hd){
|
||||
if ( -e $drive){
|
||||
$drive =~ s|/dev/||;
|
||||
$OUT .= "Alias=vfs.dev.read.hd" . $cnt . ":vfs.dev.read[$drive,sectors]\n";
|
||||
$OUT .= "Alias=vfs.dev.write.hd" . $cnt . ":vfs.dev.write[$drive,sectors]\n";
|
||||
$cnt++;
|
||||
}
|
||||
}
|
||||
for (;$cnt < 9; $cnt++){
|
||||
$OUT .= "UserParameter=vfs.dev.read.hd" . $cnt . ",echo '0'\n";
|
||||
$OUT .= "UserParameter=vfs.dev.write.hd" . $cnt . ",echo '0'\n";
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,10 +0,0 @@
|
|||
|
||||
# Type: Agent or Agent (active)
|
||||
# Key: lvm[key] where key can be snapshot_max_allow, snapshots, lv or vg
|
||||
# Type of information: Numeric (integer 64bit) or characters (for version)
|
||||
# Units: depends on the key (snapshot_max_alloc is in %)
|
||||
# Custom multiplier: Do not use
|
||||
# Store Value: As is
|
||||
|
||||
UserParameter=lvm[*],/usr/bin/sudo /var/lib/zabbix/bin/check_lvm $1
|
||||
|
|
@ -1,37 +0,0 @@
|
|||
|
||||
{
|
||||
|
||||
my $options = '';
|
||||
my $spares = ${'zabbix-agent'}{'MegaRaidSpares'} || '';
|
||||
$options .= "-s $spares " if ($spares =~ /\d+/);
|
||||
my $media_error = ${'zabbix-agent'}{'MegaRaidMediaError'} || '';
|
||||
$options .= "-m $media_error " if ($media_error =~ /\d+/);
|
||||
my $other_error = ${'zabbix-agent'}{'MegaRaidOtherError'} || '';
|
||||
$options .= "-o $other_error " if ($other_error =~ /\d+/);
|
||||
my $predictive_error = ${'zabbix-agent'}{'MegaRaidPredictiveError'} || '';
|
||||
$options .= "-p $predictive_error " if ($predictive_error =~ /\d+/);
|
||||
|
||||
# As this check requires the MegaCli utility, first check if it's present:
|
||||
if ( -x '/opt/MegaRAID/MegaCli/MegaCli' ){
|
||||
$OUT .=<<"HERE";
|
||||
|
||||
# Report status of every Raid Array using the MegaRaid controler (Requires the MegaCli utility)
|
||||
# This controler is used for example on perc5/6(i) Raid card
|
||||
|
||||
# Description: MegaRaid Status
|
||||
# Type: Agent or Agent (active)
|
||||
# Key: raid.mega.status
|
||||
# Type of Information: Character
|
||||
# Show Value: As is
|
||||
|
||||
# The value reported is like:
|
||||
# State: OK: 0:0:RAID-1:2 drives:68GB:Optimal 0:1:RAID-5:4 drives:837GB:Optimal Drives:7
|
||||
#
|
||||
|
||||
# Tips: You can add a simple trigger on this check like:
|
||||
# \{ hostname:raid.mega.status.str( OK ) \}=0
|
||||
UserParameter=raid.mega.status,/usr/bin/sudo /var/lib/zabbix/bin/megaraid-parser.pl $options
|
||||
|
||||
HERE
|
||||
}
|
||||
}
|
|
@ -1,11 +0,0 @@
|
|||
|
||||
# Description: Temperature
|
||||
# Type: Agent or Agent (active)
|
||||
# Key: sensors[mb] (for example)
|
||||
# Type of information: Numeric (float)
|
||||
# Units: °C
|
||||
# Custom multiplier: Do not use
|
||||
# Store Value: As is
|
||||
|
||||
UserParameter=sensors[*],/usr/bin/sudo /var/lib/zabbix/bin/sensors $1
|
||||
|
|
@ -1,43 +0,0 @@
|
|||
# Smart Monitoring
|
||||
|
||||
# Description: Smart Value <key>
|
||||
# Key can be one of: Raw_Read_Error_Rate, Spin_Up_Time, Start_Stop_Count
|
||||
# Reallocated_Sector_Ct, Seek_Error_Rate, Power_On_Hours, Spin_Retry_Count,
|
||||
# Power_Cycle_Count, Temperature_Celsius, Hardware_ECC_Recovered,
|
||||
# Current_Pending_Sector, Offline_Uncorrectable, UDMA_CRC_Error_Count,
|
||||
# Multi_Zone_Error_Rate, TA_Increase_Count
|
||||
|
||||
# Type: Agent or Agent (active)
|
||||
# Key: system.smart.hdX[<key>] (for example system.smart.hd1[Reallocated_Sector_Ct])
|
||||
# Type of information: Numeric (integer 64bit)
|
||||
# Units: (none)
|
||||
# Use multiplier: No
|
||||
# Update interval: 120 (for example)
|
||||
# Store Value: As is
|
||||
# Show Value: As is
|
||||
|
||||
# For Seek_Error_Rate, Raw_Read_Error_Rate, Hardware_ECC_Recovered you can store value as Delta
|
||||
# in order to graph the error rate in a readable format
|
||||
|
||||
# For these UserParameter to work, you need to configure the drives you want to monitor
|
||||
# in the DB:
|
||||
# db configuration setprop zabbix-agent SmartDrives /dev/sda,/dev/sdb,/dev/sdc,/dev/sdd
|
||||
# signal-event zabbix-agent-update
|
||||
|
||||
{
|
||||
|
||||
my @hd = split( /[,;]/,( ${'zabbix-agent'}{'SmartDrives'} || '' ));
|
||||
|
||||
my $cnt = 1;
|
||||
foreach my $drive (@hd){
|
||||
if ( -e $drive){
|
||||
$OUT .= "UserParameter=system.smartd.hd" . $cnt. "[*],/usr/bin/sudo /usr/sbin/smartctl -A $drive| grep \$1| tail -1| cut -c 88-|cut -f1 -d' '\n";
|
||||
$cnt++;
|
||||
}
|
||||
}
|
||||
for (;$cnt < 9; $cnt++){
|
||||
$OUT .= "UserParameter=system.smartd.hd" . $cnt. "[*],echo '0'\n";
|
||||
}
|
||||
|
||||
}
|
||||
|
|
@ -1,53 +0,0 @@
|
|||
# Squid
|
||||
|
||||
# Description: Squid Request Hit Ratio
|
||||
# Type: Agent or Agent (active)
|
||||
# Key: squid.request_hit_ratio
|
||||
# Type of information: Numeric (float)
|
||||
# Units: %
|
||||
# Custom multiplier: Do not use
|
||||
# Store Value: As is
|
||||
|
||||
UserParameter=squid.request_hit_ratio,squidclient mgr:info|grep 'Request Hit Ratios:'|cut -d':' -f3|cut -d',' -f1|tr -d ' %'
|
||||
|
||||
# Description: Squid Byte Hit Ratio
|
||||
# Type: Agent or Agent (active)
|
||||
# Key: squid.byte_hit_ratio
|
||||
# Type of information: Numeric (float)
|
||||
# Units: %
|
||||
# Custom multiplier: Do not use
|
||||
# Store Value: As is
|
||||
|
||||
UserParameter=squid.byte_hit_ratio,squidclient mgr:info|grep 'Byte Hit Ratios:'|cut -d':' -f3|cut -d',' -f1|tr -d ' %'
|
||||
|
||||
# Description: Squid Average HTTP request per minute
|
||||
# Type: Agent or Agent (active)
|
||||
# Key: squid.avg_http_req_per_min
|
||||
# Type of information: Numeric (float)
|
||||
# Units: Req/min
|
||||
# Custom multiplier: Do not use
|
||||
# Store Value: As is
|
||||
|
||||
UserParameter=squid.avg_http_req_per_min,squidclient mgr:info|grep 'Average HTTP requests per minute since start:'|cut -d':' -f2| tr -d ' \t'
|
||||
|
||||
# Description: Squid Disk Cache Size
|
||||
# Type: Agent or Agent (active)
|
||||
# Key: squid.cache_size_disk
|
||||
# Type of information: Numeric (integer 64bits)
|
||||
# Units: Bytes
|
||||
# Custom multiplier: 1024
|
||||
# Store Value: As is
|
||||
|
||||
UserParameter=squid.cache_size_disk,squidclient mgr:info|grep 'Storage Swap size:' | awk '\{print $4\}'
|
||||
|
||||
# Description: Squid Memory Cache Size
|
||||
# Type: Agent or Agent (active)
|
||||
# Key: squid.cache_size_mem
|
||||
# Type of information: Numeric (integer 64bits)
|
||||
# Units: Bytes
|
||||
# Custom multiplier: 1024
|
||||
# Store Value: As is
|
||||
|
||||
UserParameter=squid.cache_size_mem,squidclient mgr:info|grep 'Storage Mem size:' | awk '\{print $4\}'
|
||||
|
||||
|
|
@ -1,16 +0,0 @@
|
|||
|
||||
# Report status of every Raid Array (parsing /proc/mdtstat)
|
||||
|
||||
# Description: Software Raid Status
|
||||
# Type: Agent or Agent (active)
|
||||
# Key: raid.sw.status
|
||||
# Type of Information: Character
|
||||
# Show Value: As is
|
||||
|
||||
# The value reported is like:
|
||||
# OK: md3:raid1:2 drives:931GB:Optimal md2:raid1:2 drives:931GB:Optimal md1:raid1:2 drives:101MB:Optimal
|
||||
|
||||
# Tips: You can add a simple trigger on this check like:
|
||||
# \{ hostname:raid.sw.status.str( OK ) \}=0
|
||||
UserParameter=raid.sw.status,/var/lib/zabbix/bin/mdstat-parser.pl
|
||||
|
|
@ -1,34 +0,0 @@
|
|||
# Monitor UPS status
|
||||
|
||||
# Description: Nut UPS load
|
||||
# Type: Agent or Agent (active)
|
||||
# Key: ups.load
|
||||
# Type of information: Numeric (float)
|
||||
# Units: %
|
||||
# Multiplier: Do not use
|
||||
# Store Value: As is
|
||||
UserParameter=ups.load[*],upsc $1@localhost ups.load
|
||||
|
||||
# Description: Nut UPS Battery Charge
|
||||
# Type: Agent or Agent (active)
|
||||
# Key: ups.battery.charge
|
||||
# Type of information: Numeric (float)
|
||||
# Units: %
|
||||
# Multiplier: Do not use
|
||||
# Store Value: As is
|
||||
UserParameter=ups.battery.charge[*],upsc $1@localhost battery.charge
|
||||
|
||||
# Description: Nut UPS Status
|
||||
# Type: Agent or Agent (active)
|
||||
# Key: ups.status
|
||||
# Type of information: Character
|
||||
# Show Value: As is (you can also define a dictionnary OL=>On Line etc...)
|
||||
UserParameter=ups.status[*],upsc $1@localhost ups.status
|
||||
|
||||
# Description: Nut UPS Model
|
||||
# Type: Agent or Agent (active)
|
||||
# Key: ups.model
|
||||
# Type of information: Text
|
||||
UserParameter=ups.model[*],upsc $1@localhost ups.model
|
||||
|
||||
|
|
@ -1,36 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
PATH=$PATH:/usr/sbin:/usr/local/sbin
|
||||
|
||||
snapshot_max_alloc(){
|
||||
MAX_PERCENT=0
|
||||
|
||||
for PERCENT in $(lvdisplay | grep % | sed -e 's/ Allocated to snapshot //g' -e 's/%//g'); do
|
||||
if [[ "$PERCENT" > "$MAX_PERCENT" ]]; then
|
||||
MAX_PERCENT=$PERCENT
|
||||
fi
|
||||
done
|
||||
|
||||
echo "$MAX_PERCENT"
|
||||
}
|
||||
|
||||
snapshots(){
|
||||
echo $(lvdisplay | grep % | wc -l)
|
||||
}
|
||||
|
||||
lv(){
|
||||
echo $(lvdisplay | grep 'LV Name' | wc -l)
|
||||
}
|
||||
|
||||
vg(){
|
||||
echo $(vgdisplay | grep 'VG Name' | wc -l)
|
||||
}
|
||||
|
||||
case $1 in
|
||||
snapshot_max_alloc|snapshots|lv|vg)
|
||||
$1
|
||||
;;
|
||||
*)
|
||||
echo 'ZBX_NOTSUPPORTED'
|
||||
esac
|
||||
|
|
@ -1,155 +0,0 @@
|
|||
#!/usr/bin/env perl
|
||||
|
||||
# Get status of Linux software RAID for SNMP / Nagios
|
||||
# Author: Michal Ludvig <michal@logix.cz>
|
||||
# http://www.logix.cz/michal/devel/nagios
|
||||
|
||||
# Slightly modified by Daniel B. for integration on SME Server / Zabbix
|
||||
# 24 Apr 2009
|
||||
# - One line Repport
|
||||
# - Support RAID 0 Array
|
||||
# - Repport Warning if an array is rebuilding
|
||||
|
||||
|
||||
#
|
||||
# Simple parser for /proc/mdstat that outputs status of all
|
||||
# or some RAID devices. Possible results are OK and CRITICAL.
|
||||
# It could eventually be extended to output WARNING result in
|
||||
# case the array is being rebuilt or if there are still some
|
||||
# spares remaining, but for now leave it as it is.
|
||||
#
|
||||
# To run the script remotely via SNMP daemon (net-snmp) add the
|
||||
# following line to /etc/snmpd.conf:
|
||||
#
|
||||
# extend raid-md0 /root/parse-mdstat.pl --device=md0
|
||||
#
|
||||
# The script result will be available e.g. with command:
|
||||
#
|
||||
# snmpwalk -v2c -c public localhost .1.3.6.1.4.1.8072.1.3.2
|
||||
|
||||
use strict;
|
||||
use Getopt::Long;
|
||||
|
||||
# Sample /proc/mdstat output:
|
||||
#
|
||||
# Personalities : [raid1] [raid5]
|
||||
# md0 : active (read-only) raid1 sdc1[1]
|
||||
# 2096384 blocks [2/1] [_U]
|
||||
#
|
||||
# md1 : active raid5 sdb3[2] sdb4[3] sdb2[4](F) sdb1[0] sdb5[5](S)
|
||||
# 995712 blocks level 5, 64k chunk, algorithm 2 [3/2] [U_U]
|
||||
# [=================>...] recovery = 86.0% (429796/497856) finish=0.0min speed=23877K/sec
|
||||
#
|
||||
# unused devices: <none>
|
||||
|
||||
my $file = "/proc/mdstat";
|
||||
my $device = "all";
|
||||
|
||||
# Get command line options.
|
||||
GetOptions ('file=s' => \$file,
|
||||
'device=s' => \$device,
|
||||
'help' => sub { &usage() } );
|
||||
|
||||
## Strip leading "/dev/" from --device in case it has been given
|
||||
$device =~ s/^\/dev\///;
|
||||
|
||||
## Return codes for Nagios
|
||||
my %ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2,'UNKNOWN'=>3,'DEPENDENT'=>4);
|
||||
|
||||
## This is a global return value - set to the worst result we get overall
|
||||
my $retval = 0;
|
||||
|
||||
my (%active_devs, %failed_devs, %spare_devs, %devs_total, %level, %size, %status);
|
||||
my @raids;
|
||||
my $result = 'OK';
|
||||
|
||||
open FILE, "< $file" or die "Can't open $file : $!";
|
||||
while (<FILE>) {
|
||||
next if ! /^(md\d+)+\s*:/;
|
||||
next if $device ne "all" and $device ne $1;
|
||||
my $dev = $1;
|
||||
push @raids, $dev;
|
||||
|
||||
my @array = split(/ /);
|
||||
$devs_total{$dev} = 0;
|
||||
my $devs_up = 0;
|
||||
my $missing = 0;
|
||||
for $_ (@array) {
|
||||
$level{$dev} = $1 if /^(raid\d+)$/;
|
||||
next if ! /(\w+)\[\d+\](\(.\))*/;
|
||||
$devs_total{$dev}++;
|
||||
if ($2 eq "(F)") {
|
||||
$failed_devs{$dev} .= "$1,";
|
||||
}
|
||||
elsif ($2 eq "(S)") {
|
||||
$spare_devs{$dev} .= "$1,";
|
||||
}
|
||||
else {
|
||||
$active_devs{$dev} .= "$1,";
|
||||
$devs_up++;
|
||||
}
|
||||
}
|
||||
if (! defined($active_devs{$dev})) { $active_devs{$dev} = "none"; }
|
||||
else { $active_devs{$dev} =~ s/,$//; }
|
||||
if (! defined($spare_devs{$dev})) { $spare_devs{$dev} = "none"; }
|
||||
else { $spare_devs{$dev} =~ s/,$//; }
|
||||
if (! defined($failed_devs{$dev})) { $failed_devs{$dev} = "none"; }
|
||||
else { $failed_devs{$dev} =~ s/,$//; }
|
||||
|
||||
$_ = <FILE>;
|
||||
/(\d+)\ blocks\ (.*)(\[.*\])\s?$/;
|
||||
$size{$dev} = int($1/1024);
|
||||
#print "$3\n";
|
||||
$missing = 1 if ($3 =~ m/_/);
|
||||
if ($size{$dev} > 1024){
|
||||
$size{$dev} = int($size{$dev}/1024)."GB";
|
||||
}
|
||||
else{
|
||||
$size{$dev} .= "MB";
|
||||
}
|
||||
$_ = <FILE>;
|
||||
if (($devs_total{$dev} > $devs_up) || ($failed_devs{$dev} ne "none") || (($missing) && (!/recovery/))) {
|
||||
$status{$dev} = "Degraded";
|
||||
$result = "CRITICAL";
|
||||
$retval = $ERRORS{"CRITICAL"};
|
||||
}
|
||||
else {
|
||||
$status{$dev} = "Optimal";
|
||||
}
|
||||
if (/recovery/){
|
||||
$status{$dev} = "Rebuilding";
|
||||
if ($result eq "OK"){
|
||||
$result = "WARNING";
|
||||
$retval = $ERRORS{"WARNING"};
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
}
|
||||
print "$result: ";
|
||||
foreach my $raid (@raids){
|
||||
print "$raid:$level{$raid}:$devs_total{$raid} drives:$size{$raid}:$status{$raid} ";
|
||||
}
|
||||
print "\n";
|
||||
close FILE;
|
||||
exit $retval;
|
||||
|
||||
# =====
|
||||
sub usage()
|
||||
{
|
||||
printf("
|
||||
Check status of Linux SW RAID
|
||||
|
||||
Author: Michal Ludvig <michal\@logix.cz> (c) 2006
|
||||
http://www.logix.cz/michal/devel/nagios
|
||||
Modified by Daniel B. <daniel\@firewall-services.com>:
|
||||
|
||||
Usage: mdstat-parser.pl [options]
|
||||
|
||||
--file=<filename> Name of file to parse. Default is /proc/mdstat
|
||||
--device=<device> Name of MD device, e.g. md0. Default is \"all\"
|
||||
|
||||
");
|
||||
exit(1);
|
||||
}
|
||||
|
|
@ -1,226 +0,0 @@
|
|||
#!/usr/bin/perl -w
|
||||
|
||||
# check_megaraid_sas Nagios plugin
|
||||
# Copyright (C) 2007 Jonathan Delgado, delgado@molbio.mgh.harvard.edu
|
||||
#
|
||||
# This program is free software; you can redistribute it and/or
|
||||
# modify it under the terms of the GNU General Public License
|
||||
# as published by the Free Software Foundation; either version 2
|
||||
# of the License, or (at your option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful,
|
||||
# but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
# GNU General Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License
|
||||
# along with this program; if not, write to the Free Software
|
||||
# Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
#
|
||||
#
|
||||
# Nagios plugin to monitor the status of volumes attached to a LSI Megaraid SAS
|
||||
# controller, such as the Dell PERC5/i and PERC5/e. If you have any hotspares
|
||||
# attached to the controller, you can specify the number you should expect to
|
||||
# find with the '-s' flag.
|
||||
#
|
||||
# The paths for the Nagios plugins lib and MegaCli may need to me changed.
|
||||
#
|
||||
# $Author: delgado $
|
||||
# $Revision: #3 $ $Date: 2007/06/07 $
|
||||
|
||||
# Slightly modified by Daniel B. for SME Server integration with zabbix
|
||||
# 23 Apr 2009
|
||||
|
||||
use strict;
|
||||
use Getopt::Std;
|
||||
|
||||
our($opt_h, $opt_s, $opt_o, $opt_m, $opt_p);
|
||||
|
||||
|
||||
getopts('hs:o:p:m:');
|
||||
|
||||
if ( $opt_h ) {
|
||||
print "Usage: $0 [-s number] [-m number] [-o number]\n";
|
||||
print " -s is how many hotspares are attached to the controller\n";
|
||||
print " -m is the number of media errors to ignore\n";
|
||||
print " -p is the predictive error count to ignore\n";
|
||||
print " -o is the number of other disk errors to ignore\n";
|
||||
exit;
|
||||
}
|
||||
|
||||
|
||||
my $megacli = '/opt/MegaRAID/MegaCli/MegaCli';
|
||||
|
||||
## Return codes for Nagios
|
||||
my %ERRORS=('OK'=>0,'WARNING'=>1,'CRITICAL'=>2,'UNKNOWN'=>3,'DEPENDENT'=>4);
|
||||
|
||||
my ($adapters);
|
||||
my $hotspares = 0;
|
||||
my $hotsparecount = 0;
|
||||
my $pdbad = 0;
|
||||
my $pdcount = 0;
|
||||
my $mediaerrors = 0;
|
||||
my $mediaallow = 0;
|
||||
my $prederrors = 0;
|
||||
my $predallow = 0;
|
||||
my $othererrors = 0;
|
||||
my $otherallow = 0;
|
||||
my $result = '';
|
||||
my $status = 'OK';
|
||||
|
||||
sub max_state ($$) {
|
||||
my ($current, $compare) = @_;
|
||||
|
||||
if (($compare eq 'CRITICAL') || ($current eq 'CRITICAL')) {
|
||||
return 'CRITICAL';
|
||||
} elsif ($compare eq 'OK') {
|
||||
return $current;
|
||||
} elsif ($compare eq 'WARNING') {
|
||||
return 'WARNING';
|
||||
} elsif (($compare eq 'UNKNOWN') && ($current eq 'OK')) {
|
||||
return 'UNKNOWN';
|
||||
} else {
|
||||
return $current;
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
if ( $opt_s ) {
|
||||
$hotspares = $opt_s;
|
||||
}
|
||||
if ( $opt_m ) {
|
||||
$mediaallow = $opt_m;
|
||||
}
|
||||
if ( $opt_p ) {
|
||||
$predallow = $opt_p;
|
||||
}
|
||||
if ( $opt_o ) {
|
||||
$otherallow = $opt_o;
|
||||
}
|
||||
|
||||
# Get the number of RAID controllers we have
|
||||
open (ADPCOUNT, "$megacli -adpCount -NoLog |")
|
||||
|| die "error: Could not execute MegaCli -adpCount";
|
||||
|
||||
while (<ADPCOUNT>) {
|
||||
if ( m/Controller Count:\s*(\d+)/ ) {
|
||||
$adapters = $1;
|
||||
last;
|
||||
}
|
||||
}
|
||||
close ADPCOUNT;
|
||||
|
||||
ADAPTER: for ( my $adp = 0; $adp < $adapters; $adp++ ) {
|
||||
# Get the number of logical drives on this adapter
|
||||
open (LDGETNUM, "$megacli -LdGetNum -a$adp -NoLog |")
|
||||
|| die "error: Could not execute $megacli -LdGetNum -a$adp";
|
||||
|
||||
my ($ldnum);
|
||||
while (<LDGETNUM>) {
|
||||
if ( m/Number of Virtual drives configured on adapter \d:\s*(\d+)/i ) {
|
||||
$ldnum = $1;
|
||||
last;
|
||||
}
|
||||
}
|
||||
close LDGETNUM;
|
||||
|
||||
LDISK: for ( my $ld = 0; $ld < $ldnum; $ld++ ) {
|
||||
# Get info on this particular logical drive
|
||||
open (LDINFO, "$megacli -LdInfo -L$ld -a$adp -NoLog |")
|
||||
|| die "error: Could not execute $megacli -LdInfo -L$ld -a$adp -NoLog";
|
||||
|
||||
my ($size, $unit, $raidlevel, $ldpdcount, $spandepth, $state);
|
||||
while (<LDINFO>) {
|
||||
if ( m/Size:\s*((\d+)(MB|GB|TB))/ ) {
|
||||
$size = $2;
|
||||
$unit = $3;
|
||||
# Adjust MB to GB if that's what we got
|
||||
if ( $unit eq 'MB' ) {
|
||||
$size = sprintf( "%.0f", ($size / 1024) );
|
||||
$unit= 'GB';
|
||||
}
|
||||
} elsif ( m/State:\s*(\w+)/ ) {
|
||||
$state = $1;
|
||||
if ( $state ne 'Optimal' ) {
|
||||
$status = 'CRITICAL';
|
||||
}
|
||||
} elsif ( m/Number Of Drives( per span)?:\s*(\d+)/ ) {
|
||||
$ldpdcount = $2;
|
||||
} elsif ( m/Span Depth:\s*(\d+)/ ) {
|
||||
$spandepth = $1;
|
||||
$ldpdcount = $ldpdcount * $spandepth;
|
||||
} elsif ( m/RAID Level: Primary-(\d)/ ) {
|
||||
$raidlevel = $1;
|
||||
}
|
||||
}
|
||||
close LDINFO;
|
||||
|
||||
$result .= "$adp:$ld:RAID-$raidlevel:$ldpdcount drives:$size$unit:$state ";
|
||||
|
||||
} #LDISK
|
||||
close LDINFO;
|
||||
|
||||
# Get info on physical disks for this adapter
|
||||
open (PDLIST, "$megacli -PdList -a$adp -NoLog |")
|
||||
|| die "error: Could not execute $megacli -PdList -a$adp -NoLog";
|
||||
|
||||
my ($slotnumber,$fwstate);
|
||||
PDISKS: while (<PDLIST>) {
|
||||
if ( m/Slot Number:\s*(\d+)/ ) {
|
||||
$slotnumber = $1;
|
||||
# Don't care about backplane error counts
|
||||
next if ( $slotnumber == 255 );
|
||||
$pdcount++;
|
||||
} elsif ( m/(\w+) Error Count:\s*(\d+)/ ) {
|
||||
if ( $1 eq 'Media') {
|
||||
$mediaerrors += $2;
|
||||
} else {
|
||||
$othererrors += $2;
|
||||
}
|
||||
} elsif ( m/Predictive Failure Count:\s*(\d+)/ ) {
|
||||
$prederrors += $1;
|
||||
} elsif ( m/Firmware state:\s*(\w+)/ ) {
|
||||
$fwstate = $1;
|
||||
if ( $fwstate eq 'Hotspare' ) {
|
||||
$hotsparecount++;
|
||||
} elsif ( $fwstate eq 'Online' ) {
|
||||
# Do nothing
|
||||
} elsif ( $slotnumber != 255 ) {
|
||||
$pdbad++;
|
||||
$status = 'CRITICAL';
|
||||
}
|
||||
}
|
||||
} #PDISKS
|
||||
close PDLIST;
|
||||
}
|
||||
|
||||
$result .= "Drives:$pdcount ";
|
||||
|
||||
# Any bad disks?
|
||||
if ( $pdbad ) {
|
||||
$result .= "$pdbad Bad Drives ";
|
||||
}
|
||||
|
||||
my $errorcount = $mediaerrors + $prederrors + $othererrors;
|
||||
# Were there any errors?
|
||||
if ( $errorcount ) {
|
||||
$result .= "($errorcount Errors) ";
|
||||
if ( ( $mediaerrors > $mediaallow ) ||
|
||||
( $prederrors > $predallow ) ||
|
||||
( $othererrors > $otherallow ) ) {
|
||||
$status = max_state($status, 'WARNING');
|
||||
}
|
||||
}
|
||||
|
||||
# Do we have as many hotspares as expected (if any)
|
||||
if ( $hotspares ) {
|
||||
if ( $hotsparecount < $hotspares ) {
|
||||
$status = max_state($status, 'WARNING');
|
||||
$result .= "Hotspare(s):$hotsparecount (of $hotspares)";
|
||||
} else {
|
||||
$result .= "Hotspare(s):$hotsparecount";
|
||||
}
|
||||
}
|
||||
|
||||
print STDOUT "$status: $result\n";
|
||||
exit $ERRORS{$status};
|
|
@ -1,57 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
KEY=$1
|
||||
|
||||
case $KEY in
|
||||
cpu0)
|
||||
# Here are some examples on how to retrieve temperatures
|
||||
# of your system:
|
||||
#
|
||||
# If your motherboard support IPMI and you have the ipmitool package
|
||||
# You can use this:
|
||||
# Of course, you'll have to adapt command as each controler may report different sensors name
|
||||
|
||||
# /usr/bin/ipmitool sdr | grep 'P1 Therm Margin' | cut -d'|' -f 2 | awk '{print $1'}
|
||||
|
||||
# Else, if your motherboard support lm_sensor, you can use something
|
||||
# like this:
|
||||
# /usr/bin/sensors | grep temp1 | cut -d':' -f 2 | awk '{print $1'} | sed -e "s/+//g" -e "s/.C//g"
|
||||
|
||||
# You can also try to get your CPU temperature with acpi:
|
||||
# cat /proc/acpi/thermal_zone/THRM/temperature | awk '{print $2}'
|
||||
|
||||
# It's important that your commands return only numerical values
|
||||
|
||||
# The default for now is to use IPMI
|
||||
/usr/bin/ipmitool sdr type Temperature | grep 'P1 Therm Margin' | cut -d'|' -f 2 | awk '{print $1'}
|
||||
|
||||
;;
|
||||
cpu1)
|
||||
# This will be the same as the above, but for the second CPU
|
||||
|
||||
/usr/bin/ipmitool sdr type Temperature | grep 'P2 Therm Margin' | cut -d'|' -f 2 | awk '{print $1'}
|
||||
|
||||
;;
|
||||
mb)
|
||||
# AFAIK, motherboard temperature can be retrieved only with lm_sensor or IPMI
|
||||
|
||||
/usr/bin/ipmitool sdr type Temperature | grep 'Baseboard' | cut -d'|' -f 2 | awk '{print $1'}
|
||||
|
||||
;;
|
||||
ambiant)
|
||||
# Some IPMI controler also report the ambiant temperature
|
||||
/usr/bin/ipmitool sdr type Temperature | grep Ambient | cut -d'|' -f 2 | awk '{print $1'}
|
||||
|
||||
;;
|
||||
hd*|sd*)
|
||||
# Here, we want a harddrive temperature, so we'll use smartctl
|
||||
# We could also use hddtemp but it doesn't seems to work for a lot of drive, where smartctl do
|
||||
/usr/sbin/smartctl -a /dev/$KEY | grep Temperature_Celsius | awk '{print $10}'
|
||||
|
||||
;;
|
||||
*)
|
||||
# Else, we tell the server the item is not supported
|
||||
echo 'ZBX_NOTSUPPORTED'
|
||||
;;
|
||||
esac
|
||||
|
Loading…
Reference in New Issue