[Nagiosplug-checkins] CVS: nagiosplug/contrib check_snmp_process_monitor.pl,1.1,1.2

Subhendu Ghosh sghosh at users.sourceforge.net
Thu Aug 22 15:09:03 CEST 2002


Update of /cvsroot/nagiosplug/nagiosplug/contrib
In directory usw-pr-cvs1:/tmp/cvs-serv28536

Modified Files:
	check_snmp_process_monitor.pl 
Log Message:
updates

Index: check_snmp_process_monitor.pl
===================================================================
RCS file: /cvsroot/nagiosplug/nagiosplug/contrib/check_snmp_process_monitor.pl,v
retrieving revision 1.1
retrieving revision 1.2
diff -C2 -r1.1 -r1.2
*** check_snmp_process_monitor.pl	22 Aug 2002 14:42:43 -0000	1.1
--- check_snmp_process_monitor.pl	22 Aug 2002 22:08:46 -0000	1.2
***************
*** 20,24 ****
  use SNMP 5.0;
  use Getopt::Long;
! use vars qw( $exit $opt_version $opt_timeout $opt_help $opt_command $opt_host $opt_community $opt_verbose $opt_warning $opt_critical $opt_memory $opt_cpu $opt_port $opt_regex $opt_stats %processes $snmp_session $PROGNAME $TIMEOUT );
  
  $PROGNAME      = "snmp_process_monitor.pl";
--- 20,25 ----
  use SNMP 5.0;
  use Getopt::Long;
! use Storable;
! use vars qw( $exit $opt_version $opt_timeout $opt_help $opt_command $opt_host $opt_community $opt_verbose $opt_warning $opt_critical $opt_memory $opt_cpu $opt_port $opt_regex $opt_stats $opt_cache $opt_nocache $cache_exp $interpreters $snmp_session $PROGNAME $TIMEOUT );
  
  $PROGNAME      = "snmp_process_monitor.pl";
***************
*** 32,37 ****
  $opt_cpu       = undef;
  $opt_port      = 161;
! %processes     = ();
! $exit          = 'OK';
  
  sub process_options {
--- 33,43 ----
  $opt_cpu       = undef;
  $opt_port      = 161;
! $opt_cache     = 1;
! $opt_nocache   = undef;
! $cache_exp     = 600;
! $exit          = $ERRORS{OK};
! $interpreters  = '(perl|/bin/sh|/usr/bin/sh|/bin/bash|/bin/ksh|python)';
! our $cachefile = '/var/opt/nagios/tmp/'; # completed later
! our %processes = ();
  
  sub process_options {
***************
*** 43,46 ****
--- 49,53 ----
          'h'     => \$opt_help,          'help'        => \$opt_help,
          's'     => \$opt_stats,         'statistics'  => \$opt_stats,
+                                         'nocache'     => \$opt_nocache,
          'H:s'   => \$opt_host,          'hostname:s'  => \$opt_host,
          'p:i'   => \$opt_port,          'port:i'      => \$opt_port,
***************
*** 76,79 ****
--- 83,92 ----
          }
      }
+     if ( defined($opt_memory) ) { $opt_memory = 0 }
+     if ( defined($opt_cpu)    ) { $opt_cpu    = 0 }
+     if ( defined($opt_nocache)) { $opt_cache  = 0 }
+ 
+     # complete the cachefile's name
+     $cachefile .= $opt_host . '.proc';
  }
  
***************
*** 83,87 ****
  
  sub print_usage {
!     print "Usage: $PROGNAME -H <host> -C <snmp_community> -e <command> [-w <low>,<high>] [-c <low>,<high>] [-t <timeout>]\n";
  }
  
--- 96,100 ----
  
  sub print_usage {
!     print "Usage: $PROGNAME -H <host> -C <snmp_community> -e <command> [-w <low>,<high>] [-c <low>,<high>] [-t <timeout>] [-s|--statistics] [--memory] [--cpu] [--nocache]\n";
  }
  
***************
*** 108,111 ****
--- 121,128 ----
  -c, --critical=INTEGER[,INTEGER]
     minimum and maximum number of processes before a critical is issued (Default 1,-1)
+ --memory
+    combined with '-s', will print the number of bytes of real memory used by process
+ --cpu
+    combined with '-s', will print the number of seconds of cpu time consumed by process
  EOT
  }
***************
*** 118,121 ****
--- 135,139 ----
  sub check_for_errors {
      if ( $snmp_session->{ErrorNum} ) {
+         %processes = ();
          print "UNKNOWN - error retrieving SNMP data: $snmp_session->{ErrorStr}\n";
          exit $ERRORS{UNKNOWN};
***************
*** 123,126 ****
--- 141,261 ----
  }
  
+ sub init_cache {
+     if ( !defined($opt_cache) ) {
+         %processes = ();
+         return;
+     }
+     if ( -r $cachefile ) {
+         eval {
+             verbose "loading cache from $cachefile\n";
+             %processes = %{ retrieve( $cachefile ) };
+         };
+         if ( $@ ) {
+             verbose "cache loading failed - using blank cache: $@\n";
+             %processes = ()
+         }
+     }
+     else {
+         %processes = ();
+     }
+ }
+ 
+ sub snmpget {
+     my $tmpvar = SNMP::Varbind->new( shift );
+     $snmp_session->get( $tmpvar );
+     check_for_errors();
+     return $tmpvar->val;
+ }
+ 
+ sub update_cache {
+     # expire the cache after $cache_exp seconds
+     if ( $opt_cache != 0 && exists($processes{__last_update})
+       && $processes{__last_update} >= time - $cache_exp )  {
+         verbose "cache file is recent enough - using it\n";
+         return 1;
+     }
+ 
+     verbose "retrieving full listing of processes from $opt_host\n";
+     my $process_count = snmpget( ['hrSystemProcesses', 0] );
+ 
+     # retrieve the data from the remote host
+ 	my ($names) = $snmp_session->bulkwalk( 0, $process_count + 1, [['hrSWRunName']] );
+ 	check_for_errors();
+ 
+     # make sure the number of processes from the bulkwalk is close to hrSystemProcesses
+     if ( scalar(@$names) + 10 < $process_count ) {
+         print "UNKNOWN - only ", scalar(@$names), " of ",$process_count, " processes returned\n";;
+         exit $ERRORS{UNKNOWN};
+     }
+ 
+     # sort through the process names and create a nice hash of processes
+ 	foreach my $row ( @$names ) {
+         my %hash = {};
+ 	    $hash{name}     = $row->val;
+         $hash{abs_name} = $row->val;
+ 	    $hash{name}     =~ s#.*/##; # strip path
+ 	
+ 	    if ( defined($opt_regex) ||
+ 	        ($row->val =~ m#$interpreters$#
+ 	        && $opt_command !~ m#$interpreters$#) ) {
+ 	
+ 	        # fetch the runtime parameters of the process
+             my $parameters = snmpget( ['hrSWRunParameters', $row->iid] );
+ 	
+ 	        # only strip if we're looking for a specific command
+ 	        if ( defined($opt_command) ) {
+ 	            verbose "process ",$row->iid," uses $1 as an interpreter - getting parameters\n";
+ 	            $hash{name} = $parameters;
+ 	            $hash{name} =~ s#.*/##;    # strip path name off the front
+ 	            $hash{name} =~ s/\s+.*$//; # strip everything from the first space to the end
+ 	        }
+ 	        else {
+ 	            # use the full 'ps -efl' style listing for regular expression matching
+                 my $path = snmpget( ['hrSWRunPath', $row->iid] );
+ 	            $hash{name} = "$path $parameters";
+ 	        }
+ 	    }
+         # store in the global hash
+         $processes{$row->iid} = \%hash;
+ 	}
+ 
+     # update the timestamp so the cache can expire
+     $processes{__last_update} = time;
+     return 0;
+ }
+ 
+ # process the %processes hash and see if there any matches for our command or regex
+ sub check_for_matches {
+     my $ret_match = 0;
+ 	foreach my $key ( keys(%processes) ) {
+         next if ( $key eq '__last_update' );
+         my $match = 0;
+ 
+         # static matches are letter-for-letter (-e)
+ 	    if ( defined($opt_command)  && $processes{$key}->{name} eq  $opt_command ) { $match++; }
+         # use /o to make sure the user-supplied regex (-r) is only compiled once
+ 	    elsif ( defined($opt_regex) && $processes{$key}->{name} =~ /$opt_regex/o ) { $match++; }
+ 
+         # verify the cache's entry by doing an snmpget
+         if ( $match > 0 && $opt_cache != 0 ) {
+             my $proc = snmpget( ['hrSWRunName', $key] );
+             --$match if ( !$proc || $proc ne $processes{$key}->{abs_name} );
+         }
+         # get the process memory usage if requested
+         if ( $match > 0 && defined($opt_memory) ) {
+             $opt_memory += snmpget( ['hrSWRunPerfMem', $key] );
+         }
+         # get the process cpu usage if requested
+         if ( $match > 0 && defined($opt_cpu) ) {
+             $opt_cpu += snmpget( ['hrSWRunPerfCPU', $key] );
+         }
+ 
+ 	    verbose "process '$processes{$key}->{name}' has pid $processes{$key}->{pid} and index $key\n"
+             if ( $match > 0 );
+ 
+         $ret_match += $match;
+ 	}
+ 	return $ret_match;
+ }
  # =========================================================================== #
  # =====> MAIN
***************
*** 130,133 ****
--- 265,272 ----
  alarm( $TIMEOUT ); # make sure we don't hang Nagios
  
+ # intialize the cache, if it's enabled
+ init_cache();
+ 
+ # create a session for conversing with the remote SNMP agent
  $snmp_session = new SNMP::Session(
      DestHost => $opt_host,
***************
*** 137,227 ****
  );
  
! my $process_count = SNMP::Varbind->new( ['hrSystemProcesses', 0] );
! $snmp_session->get( $process_count ); 
! check_for_errors();
! 
! # retrieve the data from the remote host
! my( $names, $index ) = $snmp_session->bulkwalk( 0, $process_count->val, [['hrSWRunName'], ['hrSWRunIndex']] );
! check_for_errors();
! 
! alarm( 0 ); # all done with the network connection
! 
! my %namecount = ();
! foreach my $row ( @$names ) {
!     $processes{$row->iid}->{name} = $row->val;
!     $processes{$row->iid}->{name} =~ s#.*/##; # strip path
! 
!     if ( defined($opt_regex) ||
!         ($row->val =~ /(perl|\/usr\/bin\/sh|\/bin\/bash|\/bin\/sh)$/
!         && $opt_command !~ /(perl|\/usr\/bin\/sh|\/bin\/bash|\/bin\/sh)$/) ) {
! 
!         # fetch the runtime parameters of the process
!         my $parm_var = SNMP::Varbind->new( ['hrSWRunParameters', $row->iid] );
!         $snmp_session->get( $parm_var );
!         check_for_errors();
! 
!         # only strip if we're looking for a specific command
!         if ( defined($opt_command) ) {
!             verbose "process ",$row->iid," uses $1 as an interpreter - getting parameters\n";
!             $processes{$row->iid}->{name} = $parm_var->val;
!             # strip path name off the front
!             $processes{$row->iid}->{name} =~ s#.*/##;
!             # strip everything from the first space to the end
!             $processes{$row->iid}->{name} =~ s/\s+.*$//;
!         }
!         else {
!             # get the longer full-path style listing
!             my $path_var = SNMP::Varbind->new( ['hrSWRunPath', $row->iid] );
!             $snmp_session->get( $path_var );
!             check_for_errors();
  
!             # use the full 'ps -efl' style listing for regular expression matching
!             $processes{$row->iid}->{name} = $path_var->val.' '.$parm_var->val;
!         }
!     }
! }
! foreach my $row ( @$index ) {
!     $processes{$row->iid}->{pid}  = $row->val;
  }
  
! my @pids    = ();
! my @matches = ();
! foreach my $key ( keys(%processes) ) {
!     if ( defined($opt_command) && $processes{$key}->{name} eq $opt_command ) {
!         push( @matches, $processes{$key} );
!         push( @pids, $processes{$key}->{pid} );
!         verbose "process '$processes{$key}->{name}' has pid ",
!             "$processes{$key}->{pid} and index $key\n";
!     }
!     elsif ( defined($opt_regex) && $processes{$key}->{name} =~ /$opt_regex/o ) {
!         push( @matches, $processes{$key} );
!         push( @pids, $processes{$key}->{pid} );
!         verbose "process '$processes{$key}->{name}' has pid ",
!             "$processes{$key}->{pid} and index $key\n";
!     }
! }
! my $count = @matches;
  
  # warning, critical
  if ( ($opt_warning->[0] > 0 && $opt_warning->[0]  >  $count)
    || ($opt_warning->[1] > 0 && $opt_warning->[1]  <= $count) ) {
!     $exit = 'WARNING';
  }
  if ( ($opt_critical->[0] > 0 && $opt_critical->[0]  >  $count)
    || ($opt_critical->[1] > 0 && $opt_critical->[1]  <= $count) ) {
!     $exit = 'CRITICAL';
  }
  
! print "$exit - $count processes with pid(s) ",join(',', at pids);
  
  # print the number of processes if statistics are requested
  if ( defined($opt_stats) ) {
!     print "|count:$count\n";
  }
! else {
!     print "\n";
  }
  
! exit $ERRORS{$exit};
  
  
--- 276,331 ----
  );
  
! my $usage = update_cache();
! my $count = check_for_matches();
  
! # always try twice if caching is enabled - once with cache and once without
! if ( $usage != 0 && $opt_cache != 0 && $count <= 0 ) {
!     verbose "did not find process in cache - trying a refresh\n";
!     %processes = ();
!     update_cache();
!     $count = check_for_matches();
  }
  
! 
! # the default, OK message
! my $message = "OK - $count process(es) found resembling '". ($opt_command || $opt_regex);
  
  # warning, critical
  if ( ($opt_warning->[0] > 0 && $opt_warning->[0]  >  $count)
    || ($opt_warning->[1] > 0 && $opt_warning->[1]  <= $count) ) {
!     $message = "WARNING - no processes found resembling '". ($opt_command || $opt_regex);
!     $exit = $ERRORS{WARNING};
  }
  if ( ($opt_critical->[0] > 0 && $opt_critical->[0]  >  $count)
    || ($opt_critical->[1] > 0 && $opt_critical->[1]  <= $count) ) {
!     $message = "CRITICAL - no processes found resembling '". ($opt_command || $opt_regex);
!     $exit = $ERRORS{CRITICAL};
  }
  
! # output the status message
! print $message, "'";
  
  # print the number of processes if statistics are requested
  if ( defined($opt_stats) ) {
!     print "|count=$count";
!     if ( defined($opt_memory) ) {
!         print ":memory=", $opt_memory;
!     }
!     if ( defined($opt_cpu) ) {
!         $opt_cpu = $opt_cpu / 100;
!         printf ":cpu=%.2f", $opt_cpu;
!     }
  }
! 
! # store a copy of the %processes hash if we're using caching
! if ( $exit == $ERRORS{OK} && $opt_cache != 0 ) {
!     eval {
!         unlink( $cachefile ) if ( -e $cachefile );
!         store( \%processes, $cachefile );
!     };
  }
  
! print "\n";
! exit $exit;
  
  





More information about the Commits mailing list