Better debug info and more robust execution for GlusterFS scripts

This commit is contained in:
Daniel Berteaud 2014-07-16 10:36:30 +02:00
parent 329eb1557a
commit 5e8df466e0
2 changed files with 52 additions and 37 deletions

View File

@ -35,6 +35,23 @@ If --what=peer you need to pass --peer=<host>
EOF
}
sub gluster($){
my $cmd = shift;
my $code = 256;
my @result = ();
# Loop to run gluster cmd as it can fail if two run at the same time
for (my $i = 0; ($code != 0 && $i < 10); $i++){
open (RES, "$cmd |")
|| die "error: Could not execute $cmd";
@result = <RES>;
close RES;
$code = $?;
sleep(1) unless ($code == 0);
}
die "error: Could not execute $cmd" unless ($code == 0);
return @result;
}
if (($what eq 'volume' && !$volume) ||
($what eq 'peer' && !$peer) ||
($what ne 'volume' && $what ne 'peer')){
@ -42,67 +59,56 @@ if (($what eq 'volume' && !$volume) ||
}
if ($what eq 'volume'){
open (VOLUMEINFO, "$gluster vol status $volume |")
|| die "error: Could not execute gluster vol status $volume";
my @volinfo = gluster("$gluster vol status $volume");
my $bricksfound = 0;
my $status = 'OK';
foreach my $line (<VOLUMEINFO>){
foreach my $line (@volinfo){
# Check that all bricks are online
if ($line =~ m/^Brick\ ([\w\.]+:\/[\w\.\/]+)\s+\d+\s+(Y|N)/){
if ($line =~ m/^Brick\ ([\w\.]+:\/[\w\.\/]+)\s+\d+\s+([A-Z])/){
$bricksfound++;
$status = "CRITICAL: brick status ($1)" if ($2 ne 'Y');
$status = "CRITICAL: brick status (reported $2 on $1)" if ($2 ne 'Y');
}
# Check the Self-Heal daemons are up and running
elsif ($line =~ m/^Self-heal\ Daemon\ on\ ([\w\.]+)\s+N\/A\\s+(Y|N)/){
$status = "CRITICAL: self-heal daemon ($1)" if ($2 ne 'Y');
elsif ($line =~ m/^Self-heal\ Daemon\ on\ ([\w\.]+)\s+N\/A\\s+([A-Z])/){
$status = "CRITICAL: self-heal daemon (reported $2 on $1)" if ($2 ne 'Y');
}
}
# Check the number of bricks is the one we expect
if ($bricks && $bricks != $bricksfound){
$status = 'CRITICAL: bricks count mismatch';
$status = "CRITICAL: bricks count mismatch (found $bricksfound while expecting $bricks)";
}
close VOLUMEINFO;
open (VOLUMEINFO, "$gluster vol heal $volume info heal-failed |")
|| die "error: Could not execute gluster vol heal $volume info heal-failed";
foreach my $line (<VOLUMEINFO>){
@volinfo = gluster("$gluster vol heal $volume info heal-failed");
foreach my $line (@volinfo){
# Now, check we don't have any file which the Self-Heal daemon couldn't sync
if ($line =~ m/^Number\ of\ entries:\s+(\d+)$/){
$status = "CRITICAL: self-heal error ($1)" if ($1 gt 0);
}
}
close VOLUMEINFO;
open (VOLUMEINFO, "$gluster vol heal $volume info split-brain |")
|| die "error: Could not execute gluster vol heal $volume info split-brain";
foreach my $line (<VOLUMEINFO>){
@volinfo = gluster("$gluster vol heal $volume info split-brain");
foreach my $line (@volinfo){
# Now, check we don't have any file in a split-brain situation
if ($line =~ m/^Number\ of\ entries:\s+(\d+)$/){
$status = "CRITICAL: split-bran ($1)" if ($1 gt 0);
}
}
close VOLUMEINFO;
open (VOLUMEINFO, "$gluster vol info $volume |")
|| die "error: Could not execute gluster vol info $volume";
foreach my $line (<VOLUMEINFO>){
@volinfo = gluster("$gluster vol info $volume");
foreach my $line (@volinfo){
# Check the volume is started
if ($line =~ m/^Status:\s+(\w+)$/){
$status = 'CRITICAL: The volume is not started' unless ($1 eq 'Started');
}
}
close VOLUMEINFO;
print $status;
}
elsif ($what eq 'peer'){
open (PEERLIST, "$gluster pool list |")
|| die "error: Could not execute gluster pool list";
my @peers = gluster("$gluster pool list");
my $status = 'unknown';
foreach my $line (<PEERLIST>){
foreach my $line (@peers){
if (($line =~ m/^$peer\s+/) ||
($line =~ m/^[0-9a-f]{8}-([0-9a-f]{4}-){3}[0-9a-f]{12}\s+$peer\s+/)){
(undef,undef,$status) = split(/\s+/, $line);
}
}
close PEERLIST;
print $status;
}

View File

@ -29,11 +29,25 @@ Usage: $0 --what=[volumes|peers]
EOF
}
if ($what eq 'volumes'){
open (VOLUMES, "$gluster vol info all |")
|| die "error: Could not execute gluster vol info all";
sub gluster($){
my $cmd = shift;
my $code = 256;
my @result = ();
# Loop to run gluster cmd as it can fail if two run at the same time
for (my $i = 0; ($code != 0 && $i < 10); $i++){
open (RES, "$cmd |")
|| die "error: Could not execute $cmd";
@result = <RES>;
close RES;
$code = $?;
sleep(1) unless ($code == 0);
}
die "error: Could not execute $cmd" unless ($code == 0);
return @result;
}
foreach my $line (<VOLUMES>){
if ($what eq 'volumes'){
foreach my $line (gluster("$gluster vol info all")){
if ($line =~ m/^Volume\ Name:\ (\w+)$/){
my $vol = $1;
my ($type,$bricks,$uuid,$status,$transport) = ('unknown');
@ -56,7 +70,6 @@ if ($what eq 'volumes'){
$bricks = $1;
}
}
close VOLUMEINFO;
push @{$json->{data}}, {
"{#GLUSTER_VOL_NAME}" => $vol,
"{#GLUSTER_VOL_TYPE}" => $type,
@ -67,15 +80,12 @@ if ($what eq 'volumes'){
};
}
}
close VOLUMES;
}
elsif ($what eq 'peers'){
open (PEERS, "$gluster peer status |")
|| die "error: Could not execute gluster peer status";
elsif ($what eq 'peers'){
my $peerno = 0;
my ($host,$uuid,$status) = ('unknown');
foreach my $line (<PEERS>){
foreach my $line (gluster("$gluster peer status")){
if ($line =~ m/^Number of Peers:\ (\d+)$/){
$peerno = $1;
}
@ -94,7 +104,6 @@ elsif ($what eq 'peers'){
};
}
}
close PEERS;
}
else{
usage();