Wednesday, December 31, 2014

x86 Hardware RAID Traps with BS_RAID_CHK


The following script is designed to run on Solaris x86 or Redhat systems with LSI and Adaptec hardware controllers.  It will check for degraded states of those controllers using mpt-status, arccon or raidctl depending on hardware vendor and OS.   If a degraded state is found it will send an snmptrap to your snmptrap collector.

#!/usr/bin/perl
#################################################################
# Script checks status of hardware raid on X86 hardware        #
# Supported OS's: Solaris x86, Redhat                #
# Supported Controller's: LSI & Adaptec                #
# Note: Requires mpt-status for LSI                #
# Note: Requires arccon for adaptec                #
# Note: Requires raidctl for Solaris x86            #
# Sends trap if degraded state                    #
#################################################################
use strict;
my $prefix = "bs_raid_chk";
my $servicechk = "unix_traps";
my $community = "asdpublic";
my $manager = "10.66.65.23";
my $raidctl = '/usr/sbin/raidctl';
my $mptstatus = '/usr/sbin/mpt-status';
my $arcconf = '/usr/StorMan/arcconf';
my (@components,@command);
my ($num,$status,$sendtrap,$volume);
### If Solaris System use this check ###
if (`uname -a` =~ /SunOS/) {
    if ( -e $raidctl) {
        @command = `$raidctl -S`;
        foreach (@command) {
            chomp();
            @components = split();
            $num = $components[1] + 2;
            $status = "$components[$#components] - Controller: $components[0], RAID: $components[$num], Number of Disks: $components[1]\n";
            if ($_ =~ /DEGRADED/) {
                system "/bin/rm /tmp/$prefix.* >/dev/null 2>&1";
                system "/bin/touch /tmp/$prefix.CRITICAL";
                system "/usr/sfw/bin/snmptrap -v 2c -c $community $manager '' .1.3.6.1.4.1.11.2.17.1.0.1005 .1.3.6.1.4.1.11.2.17 s \"$servicechk\" .1.3.6.1.4.1.11.2.17 s \"$status\"";
                        exit;
            } elsif ($_ =~ /SYNC/) {
                system "/bin/rm /tmp/$prefix.* >/dev/null 2>&1";
                                system "/bin/touch /tmp/$prefix.WARNING";
                system "/usr/sfw/bin/snmptrap -v 2c -c $community $manager '' .1.3.6.1.4.1.11.2.17.1.0.1006 .1.3.6.1.4.1.11.2.17 s \"$servicechk\" .1.3.6.1.4.1.11.2.17 s \"$status\"";
                        exit;
            } elsif ($_ =~ /OPTIMAL/) {
                if ( !-e "/tmp/$prefix.OK" ) {
                    system "/bin/rm /tmp/$prefix.* >/dev/null 2>&1";
                    system "/bin/touch /tmp/$prefix.OK";
                    system "/usr/sfw/bin/snmptrap -v 2c -c $community $manager '' .1.3.6.1.4.1.11.2.17.1.0.1007 .1.3.6.1.4.1.11.2.17 s \"$servicechk\" .1.3.6.1.4.1.11.2.17 s \"$status\"";
                }
            }
        }
    }
}
### If Linux system use this check  ###
if (`uname -a` =~ /Linux/) {
    # If system has LSI controller, then mptstatus should be installed
    if (-e $mptstatus) {
        my $modstatus = `/sbin/lsmod |grep mptctl|wc -l`;
        chomp($modstatus);
        if ($modstatus eq "0") {
            my $modload = `/sbin/modprobe mptctl`;
            $modstatus = `/sbin/lsmod |grep mptctl|wc -l`;
            chomp($modstatus);
            if ($modstatus eq "0") { print "ABORT: Failed to load mptctl module.\n";exit;}
        }
        my $controller = `$mptstatus -p -s|grep Found`;
        chomp($controller);
        my ($id,$junk) = split(/,/,$controller);
        $id =~ s/Found SCSI id=//g;
        @command = `$mptstatus -i $id -s`;
        $status="";
        foreach (@command) {
            chomp();
            $status = "$status $_";   
        }
        $status = "$status";
        #print "$status\n";
        foreach (@command) {
            chomp();
            if ( $_ =~ /DEGRADED/ ) {
                system "/bin/rm /tmp/$prefix.* >/dev/null 2>&1";
                system "/bin/touch /tmp/$prefix.CRITICAL";   
                system "/usr/bin/snmptrap -v 2c -c $community $manager '' .1.3.6.1.4.1.11.2.17.1.0.1005 .1.3.6.1.4.1.11.2.17 s \"$servicechk\" .1.3.6.1.4.1.11.2.17 s \"$status\"";
                exit;
            } elsif ($_ =~ /SYNC/ ) {
                system "/bin/rm /tmp/$prefix.* >/dev/null 2>&1";
                system "/bin/touch /tmp/$prefix.WARNING";
                system "/usr/bin/snmptrap -v 2c -c $community $manager '' .1.3.6.1.4.1.11.2.17.1.0.1006 .1.3.6.1.4.1.11.2.17 s \"$servicechk\" .1.3.6.1.4.1.11.2.17 s \"$status\"";
                exit;
            } elsif ($_ =~ /OPTIMAL/ ) {
                if ( !-e "/tmp/$prefix.OK" ) {
                    system "/bin/rm /tmp/$prefix.* >/dev/null 2>&1";
                    system "/bin/touch /tmp/$prefix.OK";
                    system "/usr/bin/snmptrap -v 2c -c $community $manager '' .1.3.6.1.4.1.11.2.17.1.0.1007 .1.3.6.1.4.1.11.2.17 s \"$servicechk\" .1.3.6.1.4.1.11.2.17 s \"$status\"";
                }
            }
        }
    }
    # if system has Adaptec controller then arcconf should be installed
    if ( -e $arcconf ) {
        @command = `$arcconf getconfig 1|grep Status|grep :`;
        foreach (@command) {
            if (( $_ =~ /Controller Status/ ) && ($_ !~ /Optimal/ )) {
                $status = "Controller not optimal";
                system "/bin/rm /tmp/$prefix.* >/dev/null 2>&1";
                system "/bin/touch /tmp/$prefix.CRITICAL";
                system "/usr/bin/snmptrap -v 2c -c $community $manager '' .1.3.6.1.4.1.11.2.17.1.0.1005 .1.3.6.1.4.1.11.2.17 s \"$servicechk\" .1.3.6.1.4.1.11.2.17 s \"$status\"";
                exit;
            }
            if (( $_ =~ /  Status  / ) && ($_ !~ /Optimal/ )) {
                $status = "Battery not optimal";
                system "/bin/rm /tmp/$prefix.* >/dev/null 2>&1";
                system "/bin/touch /tmp/$prefix.WARNING";
                system "/usr/bin/snmptrap -v 2c -c $community $manager '' .1.3.6.1.4.1.11.2.17.1.0.1006 .1.3.6.1.4.1.11.2.17 s \"$servicechk\" .1.3.6.1.4.1.11.2.17 s \"$status\"";
                exit;
            }
            if (( $_ =~ /Status of logical device/) && ($_ !~ /Optimal/ )) {
                $status = "Logical HW RAID Volume not optimal";
                system "/bin/rm /tmp/$prefix.* >/dev/null 2>&1";
                system "/bin/touch /tmp/$prefix.CRITICAL";
                system "/usr/bin/snmptrap -v 2c -c $community $manager '' .1.3.6.1.4.1.11.2.17.1.0.1005 .1.3.6.1.4.1.11.2.17 s \"$servicechk\" .1.3.6.1.4.1.11.2.17 s \"$status\"";
                exit;
            }
            if ( $_ =~ /Optimal/ ) {
                if ( !-e "/tmp/$prefix.OK" ) {
                    $status = "Hardware RAID - OK";
                    system "/bin/rm /tmp/$prefix.* >/dev/null 2>&1";
                    system "/bin/touch /tmp/$prefix.OK";
                    system "/usr/bin/snmptrap -v 2c -c $community $manager '' .1.3.6.1.4.1.11.2.17.1.0.1007 .1.3.6.1.4.1.11.2.17 s \"$servicechk\" .1.3.6.1.4.1.11.2.17 s \"$status\"";
                }
            }
        }
    }   
}
exit;