#!/usr/bin/perl -wT
# author: Al Tobey <albert.tobey@priority-health.com>
# what:   monitor a process using the host-resources mib
# license: GPL - http://www.fsf.org/licenses/gpl.txt
#
# Todo:
# * implement memory and cpu utilization checks
# * maybe cache pids in DBM files if snmp agents get overworked
###############################################################################
# to get a list of processes over snmp try this command:
# snmptable -v2c -c public hostname hrSWRunTable
# for just a list of valid arguments for the '-e' option:
# snmpwalk -v2c -c public hostname hrSWRunName |perl -pe 's:.*/::'
###############################################################################

require 5.006;
use strict;
use lib utils.pm ;
use Plugin;
use Plugin::Parameter qw (:DEFAULT :snmp :thresholds $configfileparameter);

use Net::SNMP;

use Storable;

use vars qw( $exit $opt_version $opt_timeout $opt_help $opt_command $opt_host $opt_community
	     $opt_verbose $opt_warning $opt_critical $opt_memory $opt_cpu $opt_port $opt_regex
	     $opt_warn $opt_crit $opt_t $cachefile
	     $opt_stats $opt_cache $opt_nocache $cache_exp $interpreters $snmp_session $PROGNAME );

use utils qw(%ERRORS &usage);

$H_opt->binding(\$opt_host);
$C_opt->binding(\$opt_community);
my $e_opt = new Plugin::Parameter(-name => "command", -flags => [ 'e', 'command' ],
				  -optional => "yes", -valueoptional => "no", -type => "COMMANDNAME",
				  -binding => \$opt_command,
				  -description => "what command should be monitored? (ps -e style)");
my $s_opt = new Plugin::Parameter(-name => "statistics", -flags => [ 's', 'statistics' ],
				  -optional => "yes", -valueoptional => "yes", -type => "NONE",
				  -binding => \$opt_stats,
				  -description => "Output Nagios statistics");
my $memory_opt = new Plugin::Parameter(-name => "memory", -flags => [ 'memory' ],
				       -optional => "yes", -valueoptional => "yes", -type => "NONE",
				       -binding => \$opt_memory,
				       -description => "combined with '-s', will print the number of bytes of real memory used by process");
my $cpu_opt = new Plugin::Parameter(-name => "cpu", -flags => [ 'cpu' ],
				    -optional => "yes", -valueoptional => "yes", -type => "NONE",
				    -binding => \$opt_cpu,
				    -description => "combined with '-s', will print the number of seconds of cpu time consumed by process");
my $nocache_opt = new Plugin::Parameter(-name => "nocache", -flags => [ 'nocache' ],
					-optional => "yes", -valueoptional => "yes", -type => "NONE",
					-binding => \$opt_nocache,
					-description => "Do not cache SNMP data");
my $r_opt = new Plugin::Parameter(-name => "regex", -flags => [ 'r', 'regex' ],
				  -optional => "yes", -valueoptional => "no", -type => "REGEX",
				  -binding => $opt_regex,
				  -description => "use a perl regular expression to find your process");
$w_opt->binding(\$opt_warn);
$w_opt->description("minimum and maximum number of processes before a warning is issued");
$w_opt->optional("yes");
$c_opt->binding(\$opt_crit);
$c_opt->description("minimum and maximum number of processes before a critical is issued");
$c_opt->optional("yes");
$p_opt->binding(\$opt_port);
$p_opt->default(161);
$v_opt->binding(\$opt_verbose);
my $cf_opt = $configfileparameter;
$cf_opt->flags([ 'cf', 'config-file' ]);
$cf_opt->binding(\$cachefile);
$cf_opt->default("/var/tmp/nagios/");
my $plugin = new Plugin(-revision => '$Revision: 1.2 $',
			-copyright => "2002 Al Tobey <albert.tobey\@priority-health.com>, 2004 Howard Wilkinson <howard\@cohtech.com>",
			-shortcomment => "SNMP Process Monitor plugin for Nagios",
			-longcomment => "One of -e (command) or -r (regex) must be specified",
			-checker => sub { my ($plugin) = @_;
					  if (!defined $opt_command && !defined $opt_regex) {
					    $plugin->usage();
					    usage("$PROGNAME UNKNOWN: Specify either -e or -r\n");
					  }
					  if (defined $opt_warn) {
					    if ($opt_warn =~ m/^(-?\d+),(-?\d+)$/) {
					      $opt_warning = [ $1, $2 ];
					    } elsif ( $opt_warn =~ m/^(-?\d+)$/) {
					      $opt_warning = [ $1, -1 ];
					    } else {
					      $plugin->usage();
					      usage("$PROGNAME UNKNOWN: Warning values must be integers\n");
					    }
					  }
					  if (defined $opt_crit) {
					    if ($opt_crit =~ m/^(-?\d+),(-?\d+)$/) {
					      $opt_critical = [ $1, $2 ];
					    } elsif ( $opt_crit =~ m/^(-?\d+)$/) {
					      $opt_critical = [ $1, -1 ];
					    } else {
					      $plugin->usage();
					      usage("$PROGNAME UNKNOWN: Critical values must be integers\n");
					    }
					  }
					  $opt_memory = 0 if (defined $opt_memory);
					  $opt_cpu = 0 if (defined $opt_cpu);
					  $opt_cache = 0 if (defined $opt_nocache);
					},
			-parameterlists => [ [ $H_opt, $C_opt, $e_opt, $r_opt, $w_opt, $c_opt, $t_opt, $s_opt, $memory_opt, $cpu_opt, $nocache_opt, $p_opt, $cf_opt, $v_opt ], $h_opts, $V_opts ]);

$opt_warning   = [ 1, -1 ];
$opt_critical  = [ 1, -1 ];
$opt_cache     = 1;
$cache_exp     = 600;
$exit          = $ERRORS{OK};

$interpreters  = '(perl|/bin/sh|/usr/bin/sh|/bin/bash|/bin/ksh|python)';

my $isoMIB = "1";
my $orgMIB = "$isoMIB.3";
my $dodMIB = "$orgMIB.6";
my $internetMIB = "$dodMIB.1";
my $mgmtMIB = "$internetMIB.2";
my $mib2MIB = "$mgmtMIB.1";
my $hostMIB = "$mib2MIB.25";
my $hrSystemMIB = "$hostMIB.1";
my $hrStorageMIB = "$hostMIB.2";
my $hrDeviceMIB = "$hostMIB.3";
my $hrSWRunMIB = "$hostMIB.4";
my $hrSWRunPerfMIB = "$hostMIB.5";
my $hrSWInstalled = "$hostMIB.6";
my $hrMIBAdminInfoMIB = "$hostMIB.7";

my $hrSystemProcessesMIB = "$hrSystemMIB.6.0";

my $hrStorageTableMIB = "$hrStorageMIB.3";
my $hrStorageEntryMIB = "$hrStorageTableMIB.1";
my $hrStorageIndexMIB = "$hrStorageEntryMIB.1";
my $hrStorageTypeMIB = "$hrStorageEntryMIB.2";
my $hrStorageDescrMIB = "$hrStorageEntryMIB.3";
my $hrStorageAllocationUnitsMIB = "$hrStorageEntryMIB.4.";
my $hrStorageSizeMIB = "$hrStorageEntryMIB.5.";
my $hrStorageUsedMIB = "$hrStorageEntryMIB.6.";
my $hrStorageAllocationFailuresMIB = "$hrStorageEntryMIB.7";

my $hrSWRunTableMIB = "$hrSWRunMIB.2";
my $hrSWRunEntryMIB = "$hrSWRunTableMIB.1";
my $hrSWRunIndexMIB = "$hrSWRunEntryMIB.1";
my $hrSWRunNameMIB = "$hrSWRunEntryMIB.2";
my $hrSWRunIDMIB = "$hrSWRunEntryMIB.3";
my $hrSWRunPathMIB = "$hrSWRunEntryMIB.4";
my $hrSWRunParametersMIB = "$hrSWRunEntryMIB.5";
my $hrSWRunTypeMIB = "$hrSWRunEntryMIB.6";
my $hrSWRunStatusMIB = "$hrSWRunEntryMIB.7";

my $hrSWRunPerfTableMIB = "$hrSWRunPerfMIB.1";
my $hrSWRunPerfEntryMIB = "$hrSWRunPerfTableMIB.1";
my $hrSWRunPerfCPUMIB = "$hrSWRunPerfEntryMIB.1";
my $hrSWRunPerfMemMIB = "$hrSWRunPerfEntryMIB.2";

our %processes = ();

sub verbose (@) {
    return if ( !defined($opt_verbose) );
    print @_;
}

sub init_cache {
    if ( !defined($opt_cache) ) {
        %processes = ();
        return;
    }
    if ( -r $cachefile ) {
        eval {
            verbose "loading cache from $cachefile\n";
            %processes = %{ retrieve( $cachefile ) };
        };
        if ( $@ ) {
            verbose "cache loading failed - using blank cache: $@\n";
            %processes = ()
        }
    }
    else {
        %processes = ();
    }
}

sub snmpget ($$) {
  my ($session, $oids) = @_;
  my $response = $session->get_request(-varbindlist => $oids);
  if (!defined $response) {
    usage("$PROGNAME UNKNOWN: error retrieving SNMP data " . $session->error . "\n");
  }
  return $response;
    my $tmpvar = SNMP::Varbind->new( shift );
    $snmp_session->get( $tmpvar );
    check_for_errors();
    return $tmpvar->val;
}

sub update_cache ($) {
  my ($session) = @_;
  # expire the cache after $cache_exp seconds
  if ( $opt_cache != 0 && exists($processes{__last_update})
       && $processes{__last_update} >= time - $cache_exp )  {
    verbose "cache file is recent enough - using it\n";
    return 1;
  }

  verbose "retrieving full listing of processes from $opt_host\n";
  my $process_count = snmpget( $session, [ $hrSystemProcessesMIB ] )->{$hrSystemProcessesMIB};

  # retrieve the data from the remote host
  my $response = $session->get_table(-baseoid => $hrSWRunNameMIB);
  if (!defined $response) {
    usage("$PROGNAME UNKNOWN: error retrieving the SNMP Running Software Names " . $session->error . "\n");
  }

  my $names = { %{$response} };

  # make sure the number of processes from the bulkwalk is close to hrSystemProcesses
  if ( scalar(values %{$names}) + 10 < $process_count ) {
    print "UNKNOWN - only ", scalar(values %{$names}), " of ",$process_count, " processes returned\n";;
    exit $ERRORS{UNKNOWN};
  }

  # sort through the process names and create a nice hash of processes
  foreach my $oid (keys %{$names} ) {
    $oid =~ m/(\d+)$/;
    my $iid = $1;
    my %hash = ();
    $hash{name}     = $names->{$oid};
    $hash{abs_name} = $names->{$oid}; # Probably should get the path from elsewhere!!!!!!!!!!!!!!
    $hash{name}     =~ s#.*/##; # strip path
    $hash{pid} = $iid;
    if ( defined($opt_regex) ||
	 ($names->{$oid} =~ m#$interpreters$#
	  && $opt_command !~ m#$interpreters$#) ) {
      # fetch the runtime parameters of the process
      my $parameters = snmpget( $session, [ "$hrSWRunParametersMIB.$iid" ] )->{"$hrSWRunParametersMIB.$iid"};

      if (defined $parameters) {
	# only strip if we're looking for a specific command
	if ( defined($opt_command) ) {
	  verbose "process ",$iid," uses $1 as an interpreter - getting parameters\n";
	  $hash{name} = $parameters;
	  $hash{name} =~ s#.*/##;    # strip path name off the front
	  $hash{name} =~ s/\s+.*$//; # strip everything from the first space to the end
	} else {
	  # use the full 'ps -efl' style listing for regular expression matching
	  my $path = snmpget( $session, "$hrSWRunPathMIB.$iid" )->{"$hrSWRunPathMIB.$iid"};
	  $hash{name} = "$path $parameters";
	}
      }
    }
    # store in the global hash
    $processes{$iid} = \%hash;
  }

  # update the timestamp so the cache can expire
  $processes{__last_update} = time;
  return 0;
}

# process the %processes hash and see if there any matches for our command or regex
sub check_for_matches ($) {
  my ($session) = @_;
  my $ret_match = 0;
  foreach my $key ( keys(%processes) ) {
    next if ( $key eq '__last_update' );
    my $match = 0;

    # static matches are letter-for-letter (-e)
    if ( defined($opt_command)  && $processes{$key}->{name} eq  $opt_command ) { $match++; }
    # use /o to make sure the user-supplied regex (-r) is only compiled once
    elsif ( defined($opt_regex) && $processes{$key}->{name} =~ /$opt_regex/o ) { $match++; }

    # verify the cache's entry by doing an snmpget
    if ( $match > 0 && $opt_cache != 0 ) {
      my $proc = snmpget( $session, [ "$hrSWRunNameMIB.$key" ] )->{"$hrSWRunNameMIB.$key"};
      --$match if ( !$proc || $proc ne $processes{$key}->{abs_name} );
    }
    # get the process memory usage if requested
    if ( $match > 0 && defined($opt_memory) ) {
      $opt_memory += snmpget ($session, [ "$hrSWRunPerfMemMIB.$key" ])->{"$hrSWRunPerfMemMIB.$key"};
    }
    # get the process cpu usage if requested
    if ( $match > 0 && defined($opt_cpu) ) {
      $opt_cpu += snmpget($session, [ "$hrSWRunPerfCPUMIB.$key" ])->{"$hrSWRunPerfCPUMIB.$key"};
    }

    verbose "process '$processes{$key}->{name}' has pid $processes{$key}->{pid} and index $key\n"
      if ( $match > 0 );

    $ret_match += $match;
  }
  return $ret_match;
}
# =========================================================================== #
# =====> MAIN
# =========================================================================== #

$plugin->init();

# complete the cachefile's name
$cachefile .= '/' . $opt_host . '.proc';

# intialize the cache, if it's enabled
init_cache();

# create a session for conversing with the remote SNMP agent
my ($session, $error) = Net::SNMP->session(-hostname => $opt_host,
					   -community => $opt_community,
					   -port => $opt_port,
					   -version   => '2c',
					   -timeout => $opt_t
					  );

my $usage = update_cache($session);
my $count = check_for_matches($session);

# always try twice if caching is enabled - once with cache and once without
if ( $usage != 0 && $opt_cache != 0 && $count <= 0 ) {
    verbose "did not find process in cache - trying a refresh\n";
    %processes = ();
    update_cache($session);
    $count = check_for_matches($session);
}


# the default, OK message
my $message = "$PROGNAME OK: $count process(es) found resembling '". ($opt_command || $opt_regex);

# warning, critical
if ( ($opt_warning->[0] > 0 && $opt_warning->[0]  >  $count)
  || ($opt_warning->[1] > 0 && $opt_warning->[1]  <= $count) ) {
    $message = "$PROGNAME WARNING: no processes found resembling '". ($opt_command || $opt_regex);
    $exit = $ERRORS{WARNING};
}
if ( ($opt_critical->[0] > 0 && $opt_critical->[0]  >  $count)
  || ($opt_critical->[1] > 0 && $opt_critical->[1]  <= $count) ) {
    $message = "$PROGNAME CRITICAL: no processes found resembling '". ($opt_command || $opt_regex);
    $exit = $ERRORS{CRITICAL};
}

# output the status message
print $message, "'";

# print the number of processes if statistics are requested
if ( defined($opt_stats) ) {
    print "|count=$count";
    if ( defined($opt_memory) ) {
        print ":memory=", $opt_memory;
    }
    if ( defined($opt_cpu) ) {
        $opt_cpu = $opt_cpu / 100;
        printf ":cpu=%.2f", $opt_cpu;
    }
}

# store a copy of the %processes hash if we're using caching
if ( $exit == $ERRORS{OK} && $opt_cache != 0 ) {
    eval {
        unlink( $cachefile ) if ( -e $cachefile );
        store( \%processes, $cachefile );
    };
}

print "\n";
exit $exit;


