zabbix-agent-addons/zabbix_scripts/check_zfs

183 lines
5.5 KiB
Perl

#!/usr/bin/perl -w
use strict;
use warnings;
use JSON;
use File::Which;
use Getopt::Long;
my $json = {};
my $pool = undef;
my $dataset = undef;
my $sanoidmon = undef;
my $stats = undef;
my $pretty = 0;
GetOptions(
"zpool|pool=s" => \$pool,
"dataset=s" => \$dataset,
"sanoid=s" => \$sanoidmon,
"stats=s" => \$stats,
"pretty" => \$pretty
);
my $zpool = which('zpool');
my $zfs = which('zfs');
my $sanoid = which('sanoid');
if (not $zpool or not $zfs){
print 'ZBX_NOTSUPPOTED';
exit 0;
}
if (defined $sanoidmon and not $sanoid){
die 'ZBX_NOTSUPPOTED';
}
if (defined $sanoidmon and not grep { $_ eq $sanoidmon } qw(snapshot capacity health)){
die 'ZBX_NOTSUPPOTED';
}
if (not $pool and not $dataset and not $sanoidmon and not $stats){
print <<_EOF;
Usage:
$0 [--zpool=<name>|--dataset=<fs zvol or snap>|--sanoid=<snapshot|capacity|health>]
_EOF
exit 1;
}
# Value map. For Zabbix, we want 0 instead of none
# We also prefer on/off represented as 1/0 as it's more efficient
my $map = {
18446744073709551615 => 0, # See https://github.com/zfsonlinux/zfs/issues/9306
none => 0,
on => 1,
off => 0
};
if ($pool){
foreach (qx($zpool get all $pool -p -H)){
chomp;
my @parse = split /\t+/, $_;
$json->{$parse[1]} = (defined $map->{$parse[2]}) ? $map->{$parse[2]} : $parse[2];
$json->{errors} = get_zpool_errors($pool);
$json->{stats} = get_zpool_stats($pool);
}
} elsif ($dataset){
# Convert %40 back to @ (we send them as %40 in the discovery because @ is not allowed in item keys
$dataset =~ s/%40/\@/g;
foreach (qx($zfs get all $dataset -p -H)){
chomp;
my @parse = split /\t+/, $_;
$json->{$parse[1]} = (defined $map->{$parse[2]}) ? $map->{$parse[2]} : $parse[2];
if ($parse[1] =~ m/compressratio$/){
# Remove trailing x for compressratio and refcompressratio as before 0.8.0 it can be like 1.23x
$json->{$parse[1]} =~ s/x$//;
}
}
} elsif ($sanoidmon){
print qx($sanoid --monitor-$sanoidmon);
exit $?;
} elsif ($stats){
if (not -e '/proc/spl/kstat/zfs/' . $stats){
print 'ZBX_NOTSUPPORTED';
exit 0;
}
open STATS, '</proc/spl/kstat/zfs/' . $stats;
while (<STATS>){
next unless (m/^(\w+)\s+4\s+(\d+)$/);
$json->{$1} = $2;
}
}
print to_json($json, { pretty => $pretty }) . "\n";
exit 0;
sub get_zpool_errors {
my $pool = shift;
my $errors = {
read_errors => 0,
write_errors => 0,
cksum_errors => 0
};
my $i = 0;
my $index = {};
foreach my $line (qx($zpool status $pool 2>/dev/null)){
# Output looks like
# pool: rpool
# state: ONLINE
# status: One or more devices has experienced an unrecoverable error. An
# attempt was made to correct the error. Applications are unaffected.
# action: Determine if the device needs to be replaced, and clear the errors
# using 'zpool clear' or replace the device with 'zpool replace'.
# see: http://zfsonlinux.org/msg/ZFS-8000-9P
# scan: scrub repaired 0B in 0h5m with 0 errors on Tue May 29 10:04:31 2018
# config:
#
# NAME STATE READ WRITE CKSUM
# rpool ONLINE 0 0 0
# mirror-0 ONLINE 0 0 0
# sda2 ONLINE 0 0 0
# sdb2 ONLINE 0 0 474
#
# errors: No known data errors
# We want to save status, action, scan and errors
if ($line =~ m/^\s*(scan|action|status|errors):\s+(\w+.*)/){
$errors->{$1} = $2;
$index->{$i} = $1;
} elsif ($line !~ /:/ and defined $index->{$i-1}){
# Here, we reconstitute multiline values (like status and action)
chomp($line);
$line =~ s/\s+/ /g;
$errors->{$index->{$i-1}} .= $line;
} elsif ($line =~ m/\s+[a-zA-Z0-9_\-]+\s+[A-Z]+\s+(?<read>\d+(\.\d+)?)(?<read_suffix>[KMT])?\s+(?<write>\d+(\.\d+)?)(?<write_suffix>[KMT])?\s+(?<cksum>\d+(\.\d+)?)(?<cksum_suffix>[KMT])?/){
# And here, we count the number of read, write and checksum errors
# Note that on ZoL 0.8.0 we could use zpool status -p to get rid of the suffixes
# But -p is not supported on 0.7 and earlier, so, we just convert them manually
$errors->{read_errors} += convert_suffix($+{'read'},$+{'read_suffix'});
$errors->{write_errors} += convert_suffix($+{'write'},$+{'write_suffix'});
$errors->{cksum_errors} += convert_suffix($+{'write'},$+{'write_suffix'});
}
$i++;
}
# Ensure evey item returns something
$errors->{$_} ||= '' foreach (qw(scan action status errors));
return $errors;
}
# Error counter can be suffixed. Apply this suffix to get raw error numbers
sub convert_suffix {
my $val = shift;
my $suf = shift;
if (!$suf){
return $val;
} elsif ($suf eq 'K'){
$val *= 1000;
} elsif ($suf eq 'M') {
$val *= 1000000;
} elsif ($suf eq 'T') {
$val *= 1000000000;
}
return $val;
}
sub get_zpool_stats {
my $pool = shift;
my $stats = {};
open UPTIME, "</proc/uptime";
$_ = <UPTIME>;
chomp;
my ($uptime , undef) = split;
$uptime = int $uptime;
close UPTIME;
foreach my $line (qx($zpool iostat $pool -pH)){
if ($line =~ m/^$pool\s+\d+\s+\d+\s+(?<reads>\d+)\s+(?<writes>\d+)\s+(?<nread>\d+)\s+(?<nwritten>\d+)/){
# zpool iostat shows average IO since boot, so just multiply it
# by the uptime in seconds to get cumulated IO since boot
# Zabbix server will then be able to calculate the delta between two values
$stats->{$_} = $+{$_} * $uptime foreach (keys %+);
last;
}
}
return $stats;
}