From 0b6423f9c99d9edf8c96fefd0f6c453859395aa1 Mon Sep 17 00:00:00 2001 From: Holger Weiss Date: Mon, 30 Sep 2013 00:03:24 +0200 Subject: Import Nagios Plugins site Import the Nagios Plugins web site, Cronjobs, infrastructure scripts, and configuration files. --- web/attachments/327688-check_procs.c.patch | 796 +++++++++++++++++++++++++++++ 1 file changed, 796 insertions(+) create mode 100644 web/attachments/327688-check_procs.c.patch (limited to 'web/attachments/327688-check_procs.c.patch') diff --git a/web/attachments/327688-check_procs.c.patch b/web/attachments/327688-check_procs.c.patch new file mode 100644 index 0000000..2c262f7 --- /dev/null +++ b/web/attachments/327688-check_procs.c.patch @@ -0,0 +1,796 @@ +--- check_procs.c 2009-02-21 09:59:24.000000000 +0000 ++++ check_procs.c.new 2009-05-19 10:41:14.000000000 +0000 +@@ -27,7 +27,8 @@ + * + * You should have received a copy of the GNU General Public License + * along with this program. If not, see . +-* ++* ++* State file stuff originally by: Alain Williams + * + *****************************************************************************/ + +@@ -42,13 +43,7 @@ + #include "regex.h" + + #include +- +-int process_arguments (int, char **); +-int validate_arguments (void); +-int check_thresholds (int); +-int convert_to_seconds (char *); +-void print_help (void); +-void print_usage (void); ++#include + + int wmax = -1; + int cmax = -1; +@@ -77,6 +72,7 @@ + METRIC_ELAPSED + }; + enum metric metric = METRIC_PROCS; ++char metric_state_name = 'P'; /* Metric name in the state file */ + + int verbose = 0; + int uid; +@@ -92,9 +88,98 @@ + char *fmt; + char *fails; + char tmp[MAX_INPUT_BUFFER]; ++time_t now; ++time_t state_limit_start; + + FILE *ps_input = NULL; + ++/* Optionally trigger an alert if a process has been in a state for ++ * some time. This time will be measured in minutes, ie much longer than ++ * this program runs for - thus a state file is needed to store this ++ * between runs of this program. ++ * This happens if --state-file is specified. ++ * ++ * The state file records information about processes that exceed some criteria ++ * for a warning or critical notice. ++ * The file will contain one 'V' line. ++ * If the metric is PROCS there will be one N line, else zero or more P lines. ++ * Format of the state file: ++ * Max line length of 500 ++ * Empty lines and lines starting '#' are ignored ++ * Lines consist of a type character, a space and optional extra information ++ * V version_number ++ * P pid ppid name MS secs ++ * name is limited to a max 20 characters ++ * M is the metric: ++ * V virtual memory size ++ * R resident set memory size ++ * C percentage CPU ++ * E time elapsed in seconds ++ * S is the state: ++ * W Warning ++ * C Critical ++ * N MS secs ++ * M will be 'P' ++ * S is the state as above ++ * secs is the epoch time the metric was first exceeded - hex number ++ * The 'MS secs' or 'S secs' may appear twice as it is possible for a process (or the ++ * max # processes) to exceed both the warning and critical thresholds but for different times. ++ * If something is C then it is implicitly W. ++ * ++ * There might be a trailing space on a N line. ++ * ++ * Eg: ++ * P 1234 1200 cpu_hog CW 4a05a817 CC 4a05a91f ++ * N PW 4a05a91f ++ */ ++#define STATE_VERSION 1 /* Change me if the file format changes */ ++#define STATE_MAX_LINE 500 /* Longest line */ ++#define MAX_PROG_NAME 20 /* Longest name of program - search for this if you change it */ ++#define METRIC_CODES "PVRCE" /* For input validation */ ++#define STATE_CODES "WC" /* For input validation */ ++#define STATE2state(x) ((x) == STATE_WARNING ? 'W' : 'C') /* Convert STATE_WARNING or STATE_CRITICAL to 'W' or 'C' */ ++ ++/* A process can exceed various limits. This describes on of them ++ */ ++typedef struct plimit { ++ struct plimit* pl_next; /* NULL terminated list */ ++ time_t pl_when; /* When it first exceeded this limit */ ++ int pl_state; /* STATE_WARNING or STATE_CRITICAL */ ++ int pl_seen; /* Exceeded this run */ ++ char pl_metric; /* What is exceeded - as in file */ ++} PLimit; ++ ++/* Something to describe a process that is exceeding something ++ */ ++typedef struct exproc { ++ struct exproc* ep_next; /* NULL terminated list */ ++ pid_t ep_pid; /* Process ID */ ++ pid_t ep_ppid; /* Parent PID */ ++ char* ep_prog; /* Program name */ ++ PLimit* ep_limits; /* Limits exceeded list */ ++ int ep_seen; /* Updated/noticed this run */ ++} ExProc; ++ ++char* state_filename; /* File that we store this in */ ++int state_time = 5; /* Trigger time - minutes */ ++ExProc* state_list; /* Used for process specific metrics - ie metric is *not* PROCS */ ++PLimit* state_nprocs; /* Info on # procs exceeded - used if metric is PROCS */ ++int state_changed; /* Ie need to write back to file */ ++int must_rewrite; /* Set this if there is a syntax error in the file, or ++ * some other reason which means we must rewrite it */ ++ ++int process_arguments (int, char **); ++int validate_arguments (void); ++int check_thresholds (int); ++int convert_to_seconds (char *); ++void print_help (void); ++void print_usage (void); ++void read_state_file(void); ++void write_state_file(char** argv); ++void record_state(pid_t procpid, pid_t procppid, char* procprog, char prog_metric, int state, time_t start_time); ++void record_limit(PLimit** l_ref, int state, char proc_metric, time_t start_time); ++void read_limit_line(const char* in_line, PLimit** ppl, char* state_filename, int line_no); ++int check_limit(PLimit* pl); + + int + main (int argc, char **argv) +@@ -129,13 +214,16 @@ + int result = STATE_UNKNOWN; + output chld_out, chld_err; + ++ now = time(NULL); ++ + setlocale (LC_ALL, ""); + bindtextdomain (PACKAGE, LOCALEDIR); + textdomain (PACKAGE); + setlocale(LC_NUMERIC, "POSIX"); + +- input_buffer = malloc (MAX_INPUT_BUFFER); +- procprog = malloc (MAX_INPUT_BUFFER); ++ if( ! (input_buffer = malloc (MAX_INPUT_BUFFER)) || ++ ! (procprog = malloc (MAX_INPUT_BUFFER))) ++ die(STATE_UNKNOWN, _("Out of memory in startup\n")); + + asprintf (&metric_name, "PROCS"); + metric = METRIC_PROCS; +@@ -168,6 +256,9 @@ + result = cmd_file_read( input_filename, &chld_out, 0); + } + ++ /* What do we remember from last time ? */ ++ read_state_file(); ++ + /* flush first line: j starts at 1 */ + for (j = 1; j < chld_out.lines; j++) { + input_line = chld_out.line[j]; +@@ -237,6 +328,10 @@ + procetime, procprog, procargs); + } + ++ /* This is all made simpler because metric can only talk about ++ * one metric, ie can't check more than one thing at a time. ++ * This means that metric_state_name is the char equivalent of metric. ++ */ + if (metric == METRIC_VSZ) + i = check_thresholds (procvsz); + else if (metric == METRIC_RSS) +@@ -248,15 +343,29 @@ + i = check_thresholds (procseconds); + + if (metric != METRIC_PROCS) { +- if (i == STATE_WARNING) { +- warn++; +- asprintf (&fails, "%s%s%s", fails, (strcmp(fails,"") ? ", " : ""), procprog); +- result = max_state (result, i); +- } +- if (i == STATE_CRITICAL) { +- crit++; +- asprintf (&fails, "%s%s%s", fails, (strcmp(fails,"") ? ", " : ""), procprog); +- result = max_state (result, i); ++ if(state_filename) { ++ /* State is being stored - ie don't report immediately. ++ * Note what we have found: ++ */ ++ if(i == STATE_WARNING || i == STATE_CRITICAL) ++ record_state(procpid, procppid, procprog, metric_state_name, i, now); ++ } else { ++ if (i == STATE_WARNING) { ++ char* str = fails; ++ warn++; ++ asprintf (&fails, "%s%s%s", fails, (strcmp(fails,"") ? ", " : ""), procprog); ++ result = max_state (result, i); ++ if(str) ++ free(str); ++ } ++ if (i == STATE_CRITICAL) { ++ char* str = fails; ++ crit++; ++ asprintf (&fails, "%s%s%s", fails, (strcmp(fails,"") ? ", " : ""), procprog); ++ result = max_state (result, i); ++ if(str) ++ free(str); ++ } + } + } + } +@@ -276,7 +385,59 @@ + + /* Needed if procs found, but none match filter */ + if ( metric == METRIC_PROCS ) { +- result = max_state (result, check_thresholds (procs) ); ++ int threshold = check_thresholds(procs); ++ int putative_result = max_state(result, threshold); ++ ++ if(state_filename) { /* Do not report immediately - note what we found */ ++ /* Only record something if we may need to report it */ ++ if(putative_result == STATE_WARNING || putative_result == STATE_CRITICAL) ++ record_limit(&state_nprocs, putative_result, 'P', now); ++ } else ++ result = putative_result; ++ } ++ ++ /* If we have a state file, the above has just stored the results away, so have ++ * a look and see if there is anything that we should note. ++ * The slight subtlety is that we could have something recorded as both a warning ++ * & a critical - in this case only report the critical. ++ */ ++ if(state_filename) { ++ /* Compute the start time of any state that we must report. ++ * Ie any state younger than this we keep quiet about. ++ */ ++ state_limit_start = (time_t)((unsigned long)now - state_time * 60); ++ ++ if(verbose >= 3) ++ printf("Checking metric %c, limit_start %s", metric_state_name, ctime(&state_limit_start)); ++ ++ if(metric == METRIC_PROCS) { ++ result = check_limit(state_nprocs); ++ } else { ++ ExProc* pp; ++ ++ for(pp = state_list; pp; pp = pp->ep_next) { ++ char* str = fails; ++ ++ /* What is the state of this recorded process ? */ ++ int res = check_limit(pp->ep_limits); ++ ++ switch(res) { ++ case STATE_OK: ++ continue; /* Don't do the stuff below */ ++ case STATE_WARNING: ++ warn++; ++ break; ++ case STATE_CRITICAL: ++ crit++; ++ break; ++ } ++ ++ asprintf (&fails, "%s%s%s", fails, (strcmp(fails,"") ? ", " : ""), pp->ep_prog); ++ result = max_state(result, res); ++ if(str) ++ free(str); ++ } ++ } + } + + if ( result == STATE_OK ) { +@@ -302,6 +463,9 @@ + printf (" [%s]", fails); + + printf ("\n"); ++ ++ write_state_file(argv); ++ + return result; + } + +@@ -336,6 +500,8 @@ + {"verbose", no_argument, 0, 'v'}, + {"ereg-argument-array", required_argument, 0, CHAR_MAX+1}, + {"input-file", required_argument, 0, CHAR_MAX+2}, ++ {"state-file", required_argument, 0, 'S'}, ++ {"state-time", required_argument, 0, 'T'}, + {0, 0, 0, 0} + }; + +@@ -344,7 +510,7 @@ + strcpy (argv[c], "-t"); + + while (1) { +- c = getopt_long (argc, argv, "Vvht:c:w:p:s:u:C:a:z:r:m:P:", ++ c = getopt_long (argc, argv, "Vvht:c:w:p:s:u:C:a:z:r:m:P:S:T:", + longopts, &option); + + if (c == -1 || c == EOF) +@@ -479,22 +645,27 @@ + asprintf (&metric_name, "%s", optarg); + if ( strcmp(optarg, "PROCS") == 0) { + metric = METRIC_PROCS; ++ metric_state_name = 'P'; + break; + } + else if ( strcmp(optarg, "VSZ") == 0) { + metric = METRIC_VSZ; ++ metric_state_name = 'V'; + break; + } + else if ( strcmp(optarg, "RSS") == 0 ) { + metric = METRIC_RSS; ++ metric_state_name = 'R'; + break; + } + else if ( strcmp(optarg, "CPU") == 0 ) { + metric = METRIC_CPU; ++ metric_state_name = 'C'; + break; + } + else if ( strcmp(optarg, "ELAPSED") == 0) { + metric = METRIC_ELAPSED; ++ metric_state_name = 'E'; + break; + } + +@@ -505,6 +676,16 @@ + case CHAR_MAX+2: + input_filename = optarg; + break; ++ case 'S': /* state-file */ ++ state_filename = optarg; ++ break; ++ case 'T': /* state-time */ ++ if (!is_integer (optarg)) ++ usage2 (_("state-time must be a positive integer"), optarg); ++ else ++ if((state_time = atoi (optarg)) < 0) /* Treat -ve time as zero */ ++ state_time = 0; ++ break; + } + } + +@@ -727,6 +908,12 @@ + printf (" %s\n", "-C, --command=COMMAND"); + printf (" %s\n", _("Only scan for exact matches of COMMAND (without path).")); + ++ printf ("\n"); ++ printf ( "%s\n", "State memory (complain if a process exceeds a limit for a long time):"); ++ printf ( " %s\n", "-S, --state-file=StateFile"); ++ printf ( " %s\n", _("Store process information in this file")); ++ printf ( " %s\n", "-T, --state-time=minutes"); ++ + printf(_("\n\ + RANGEs are specified 'min:max' or 'min:' or ':max' (or 'max'). If\n\ + specified 'max:min', a warning status will be generated if the\n\ +@@ -755,7 +942,9 @@ + printf (" %s\n", "check_procs -w 50000 -c 100000 --metric=VSZ"); + printf (" %s\n\n", _("Alert if VSZ of any processes over 50K or 100K")); + printf (" %s\n", "check_procs -w 10 -c 20 --metric=CPU"); +- printf (" %s\n", _("Alert if CPU of any processes over 10%% or 20%%")); ++ printf (" %s\n\n", _("Alert if CPU of any processes over 10% or 20%")); ++ printf (" %s\n", "check_procs -w 80 -c 90 --metric=CPU --state-time=10 --state-file=/tmp/CPU-state"); ++ printf (" %s\n", _("Alert if CPU of any processes over 80% or 90% for at least 10 minutes, record state in /tmp/CPU-state")); + + printf (_(UT_SUPPORT)); + } +@@ -766,5 +955,423 @@ + printf (_("Usage: ")); + printf ("%s -w -c [-m metric] [-s state] [-p ppid]\n", progname); + printf (" [-u user] [-r rss] [-z vsz] [-P %%cpu] [-a argument-array]\n"); +- printf (" [-C command] [-t timeout] [-v]\n"); ++ printf (" [-C command] [-t timeout] [-v] [-S state_file] [-T state_time_minutes]\n"); ++} ++ ++/* Read the state file - if there is one. ++ * ++ * This file is not locked. On a horribly over loaded system it might happen that reads & writes ++ * could overlap in the wrong way. Locking could make things worse, you may end up with many ++ * instances of this program waiting on the lock. ++ */ ++void ++read_state_file(void) ++{ ++ FILE* sf; ++ char* errstr; ++ char* str; ++ char in_buf[STATE_MAX_LINE]; /* Input buffer */ ++ int line_no = 0; ++ char prog_name[MAX_PROG_NAME + 1]; ++ int eaten; ++ ExProc* pp; ++ ++ if( ! state_filename) /* No file specified */ ++ return; ++ ++ if( ! (sf = fopen(state_filename, "r"))) { ++ /* It is OK if it doesn't exist, we just haven't created it yet */ ++ if(errno == ENOENT) { ++ must_rewrite = 1; /* Force it to be created */ ++ return; ++ } ++ ++ /* Anything else - should not happen */ ++ errstr = strerror(errno); ++ die(STATE_UNKNOWN, _("Can't open %s for reading as: %s"), state_filename, errstr); ++ } ++ ++ /* Read a line at a time */ ++ while(fgets(in_buf, STATE_MAX_LINE, sf)) { ++ line_no++; ++ if( ! (str = strchr(in_buf, '\n'))) { ++ printf(_("State file %s corrupt, line too long, at line %d\n"), state_filename, line_no); ++ must_rewrite = 1; /* Force write */ ++ goto read_off; /* Will be fixed when we rewrite it in a moment */ ++ } ++ *str = '\0'; ++ ++ /* Empty line or comment ? */ ++ if(in_buf[0] == '\0' || in_buf[0] == '#') ++ continue; ++ ++ if(in_buf[1] != ' ') { ++ printf(_("State file %s corrupt, no space at position 1, at line %d\n"), state_filename, line_no); ++ must_rewrite = 1; /* Force write */ ++ goto read_off; ++ } ++ ++ /* What line type ? */ ++ switch(in_buf[0]) { ++ case 'V': /* In case we are running 1st time after upgrade */ ++ if(atoi(in_buf + 2) != STATE_VERSION) { ++ printf(_("State file %s is wrong version, expecting %d. File ignored\n"), state_filename, STATE_VERSION); ++ must_rewrite = 1; /* Force write */ ++ goto read_off; ++ } ++ break; ++ case 'P': /* Info about a process */ ++ /* P 1234 1200 cpu_hog CW 4a05a817 CC 4a05a92f */ ++ if( ! (pp = calloc(sizeof(ExProc), 1))) ++ die(STATE_UNKNOWN, _("Out of memory reading %s line %d"), state_filename, line_no); ++ ++ pp->ep_next = state_list; ++ state_list = pp; ++ ++ /* MAX_PROG_NAME on next line */ ++ if(sscanf(in_buf, "P %d %d %20s%n", &pp->ep_pid, &pp->ep_ppid, prog_name, &eaten) != 3) { ++ printf(_("State file corrupt, bad process line, file %s line %d\n"), state_filename, line_no); ++ must_rewrite = 1; /* Force write */ ++ goto read_off; ++ } ++ ++ if( ! (pp->ep_prog = strdup(prog_name))) ++ die(STATE_UNKNOWN, _("Out of memory reading %s line %d\n"), state_filename, line_no); ++ ++ if(verbose >= 3) ++ printf("Read pid %d ppid %d proc %s\n", pp->ep_pid, pp->ep_ppid, pp->ep_prog); ++ ++ read_limit_line(in_buf + eaten, &pp->ep_limits, state_filename, line_no); ++ break; ++ case 'N': /* Number of procs exceeded */ ++ if(verbose >= 3) ++ printf("Read N:\n"); ++ read_limit_line(in_buf + 1, &state_nprocs, state_filename, line_no); ++ break; ++ default: ++ printf(_("State file %s corrupt, unknown line type, at line %d\n"), state_filename, line_no); ++ must_rewrite = 1; /* Force write */ ++ goto read_off; ++ } ++ } ++ ++ /* Ignore changes so far */ ++read_off: ++ state_changed = 0; ++ ++ fclose(sf); ++} ++ ++/* Read a line (or rest of) a process or global limit line. ++ * Expect the first character of in_line to be NUL or a space. ++ * Args: ++ * in_line the line to read ++ * ppl pointer to pointer to linked list where to store what is read ++ * filename the name of the file being read ++ * line_no that was read ++ * ++ * Read lines like: ++ * PW 4a05a91f PC 4a05a817 ++ */ ++void ++read_limit_line(const char* in_line, PLimit** ppl, char* state_filename, int line_no) ++{ ++ PLimit* pl; ++ unsigned long when; ++ int eaten; ++ ++ while(*in_line) { ++ if(*in_line == ' ') { /* Ignore spaces */ ++ in_line++; ++ continue; ++ } ++ ++ /* We have found something, allocate somewhere to put it */ ++ if( ! (pl = calloc(sizeof(PLimit), 1))) ++ die(STATE_UNKNOWN, _("Out of memory reading %s line %d\n"), state_filename, line_no); ++ ++ /* Read a metric code */ ++ if( ! strchr(METRIC_CODES, *in_line)) { ++ printf(_("State file %s corrupt, unknown metric code, at line %d\n"), state_filename, line_no); ++ must_rewrite = 1; ++ free(pl); ++ return; ++ } ++ pl->pl_metric = *in_line++; ++ ++ /* Read the state code */ ++ if(*in_line == '\0' || ! strchr(STATE_CODES, *in_line)) { ++ printf(_("State file %s corrupt, unknown state code, at line %d\n"), state_filename, line_no); ++ must_rewrite = 1; ++ free(pl); ++ return; ++ } ++ pl->pl_state = *in_line++ == 'W' ? STATE_WARNING : STATE_CRITICAL; ++ ++ /* Read the time */ ++ if(sscanf(in_line, " %lx%n", &when, &eaten) != 1) { ++ printf(_("State file %s corrupt, bad time, at line %d\n"), state_filename, line_no); ++ must_rewrite = 1; ++ free(pl); ++ return; ++ } ++ pl->pl_when = (time_t)when; ++ in_line += eaten; ++ ++ if(verbose >= 3) ++ printf(" metric=%c state=%c since %s", pl->pl_metric, STATE2state(pl->pl_state), ctime(&pl->pl_when)); ++ ++ /* Link it in */ ++ pl->pl_next = *ppl; ++ *ppl = pl; ++ } ++ ++ if(verbose >= 3) ++ printf("\n"); ++} ++ ++/* Write back to the state file ++ */ ++void ++write_state_file(char** argv) ++{ ++ FILE* sf; ++ char* errstr; ++ ExProc* pp; ++ PLimit* pl; ++ ++ if( ! state_filename) /* No file specified */ ++ return; ++ ++ /* Work out if we are going to write back what we read in. ++ * If there is something that has not been seen then it was read in ++ * from the file - need to write back to loose the entry, scan for that. ++ * Otherwise: state_changed will tell us what we want to know and was set ++ * when a change was made. ++ */ ++ if(metric == METRIC_PROCS) { ++ for(pl = state_nprocs; pl; pl = pl->pl_next) ++ if( ! pl->pl_seen) ++ state_changed = 1; ++ } else { ++ for(pp = state_list; pp; pp = pp->ep_next) { ++ if( ! pp->ep_seen) ++ state_changed = 1; ++ ++ for(pl = pp->ep_limits; pl; pl = pl->pl_next) ++ if( ! pl->pl_seen) ++ state_changed = 1; ++ } ++ } ++ ++ if(verbose >= 3) ++ printf("Write state, changed=%d\n", state_changed); ++ ++ /* No change to the state file ? */ ++ if( ! state_changed && ! must_rewrite) ++ return; ++ ++ if( ! (sf = fopen(state_filename, "w"))) { ++ errstr = strerror(errno); ++ die(STATE_UNKNOWN, _("Can't open %s for writing as: %s"), state_filename, errstr); ++ } ++ ++ fprintf(sf, "# Process state file written by %s - DO NOT HAND EDIT\n", progname); ++ fprintf(sf, "# Args:"); ++ for(; *argv; argv++) ++ fprintf(sf, " %s", *argv); ++ fprintf(sf, "\n"); ++ fprintf(sf, "V %d\n", STATE_VERSION); ++ ++ if(metric != METRIC_PROCS) { ++ /* Traverse the processes that we know about */ ++ for(pp = state_list; pp; pp = pp->ep_next) { ++ if( ! pp->ep_seen) ++ continue; ++ ++ fprintf(sf, "P %d %d %.*s", pp->ep_pid, pp->ep_ppid, MAX_PROG_NAME, pp->ep_prog); ++ for(pl = pp->ep_limits; pl; pl = pl->pl_next) ++ if(pl->pl_seen) ++ fprintf(sf, " %c%c %lx", pl->pl_metric, STATE2state(pl->pl_state), (unsigned long)pl->pl_when); ++ ++ fprintf(sf, "\n"); ++ } ++ } else { ++ /* Print when the # processes is what is being checked */ ++ fprintf(sf, "N"); ++ for(pl = state_nprocs; pl; pl = pl->pl_next) ++ if(pl->pl_seen) ++ fprintf(sf, " %c%c %lx", pl->pl_metric, STATE2state(pl->pl_state), (unsigned long)pl->pl_when); ++ ++ /* Space before \n is important - else get error on read if no limits follow -- which ++ * will happen if all is well. ++ */ ++ fprintf(sf, " \n"); ++ } ++ ++ fclose(sf); ++} ++ ++/* Record a state for a program. ++ * Create a new entry if we need to, or update an existing one. ++ * Program must match on the first 3 args to update. ++ * Args: ++ * procpid Process ID ++ * procppid Parent process ID ++ * progprog Program name ++ * prog_metric What we are measuring (METRIC_something but represented as the character in the file) ++ * state Error or warning (STATE_something) ++ */ ++void ++record_state(pid_t procpid, pid_t procppid, char* procprog, char prog_metric, int state, time_t start_time) ++{ ++ ExProc* pp; ++ ++ /* Look for the process */ ++ for(pp = state_list; pp; pp = pp->ep_next) { ++ if(pp->ep_pid != procpid) ++ continue; ++ ++ /* Right process, but if it has mutated - throw it away and start again. ++ * This doesn't detect processes that exec() a lot w/out fork(), but that is rare. ++ */ ++ if(pp->ep_ppid != procppid || strcmp(pp->ep_prog, procprog)) { ++ PLimit* pl; ++ while(pl = pp->ep_limits) { ++ pp->ep_limits = pl->pl_next; ++ free(pl); ++ } ++ pp->ep_ppid = procppid; ++ if(strcmp(pp->ep_prog, procprog)) { ++ free(pp->ep_prog); ++ if( ! (pp->ep_prog = strdup(procprog))) ++ die(STATE_UNKNOWN, _("Out of memory")); ++ } ++ ++ state_changed = 1; ++ } ++ ++ if(verbose >= 3) ++ printf("Record found: pid %d %s\n", pp->ep_pid, pp->ep_prog); ++ ++ break; ++ } ++ ++ /* Didn't find the process, allocate a new entry */ ++ if( ! pp) { ++ if( ! (pp = calloc(sizeof(ExProc), 1))) ++ die(STATE_UNKNOWN, _("Out of memory")); ++ pp->ep_pid = procpid; ++ pp->ep_ppid = procppid; ++ if( ! (pp->ep_prog = strdup(procprog))) ++ die(STATE_UNKNOWN, _("Out of memory")); ++ ++ pp->ep_next = state_list; ++ state_list = pp; ++ state_changed = 1; ++ ++ if(verbose >= 3) ++ printf("Record alloc: pid %d %s\n", pp->ep_pid, pp->ep_prog); ++ } ++ ++ pp->ep_seen = 1; /* Ensure that this gets written out */ ++ ++ record_limit(&pp->ep_limits, state, prog_metric, start_time); ++} ++ ++/* Store a limit ++ * l_ref address of head of limits chain ++ * prog_metric What we are measuring (METRIC_something but represented as the character in the file) ++ * state Error or warning (STATE_something) ++ * start_time The time to record when it started, if we already record this - don't change the time ++ * unless this is older. ++ * ++ * If something is C then it is implicitly W. This is important: if something goes from ++ * W to C, it might remain at C for less than the state time (which it might do W -> C ++ * & back again several times) - but the time above the W level might be notifiable. ++ */ ++void ++record_limit(PLimit** l_ref, int state, char proc_metric, time_t start_time) ++{ ++ PLimit* pl; ++ PLimit* pl_found = NULL; ++ int seen_warning = 0; ++ ++ /* Find the individual process limit. ++ * Scan the whole lot since we want to 'seen' a Warning if we have Critical. ++ */ ++ for(pl = *l_ref; pl; pl = pl->pl_next) ++ if(pl->pl_metric == proc_metric) { ++ if(state == STATE_CRITICAL && pl->pl_state == STATE_WARNING) { ++ pl->pl_seen = 1; /* Ensure that it is output */ ++ seen_warning = 1; ++ } ++ ++ if(pl->pl_state == state) ++ pl_found = pl; /* We found what we were looking for */ ++ } ++ ++ pl = pl_found; ++ ++ /* Didn't find it, allocate a new one */ ++ if( ! pl) { ++ if( ! (pl = calloc(sizeof(PLimit), 1))) ++ die(STATE_UNKNOWN, _("Out of memory")); ++ pl->pl_next = *l_ref; ++ *l_ref = pl; ++ pl->pl_when = start_time; ++ pl->pl_state = state; ++ pl->pl_metric = proc_metric; ++ ++ state_changed = 1; ++ } else /* It is possible that the time was set earlier when a 'C' generated ++ * an implicit 'W'. Since the 'W' prob started earlier that the 'C' ++ * we may have recorded the later 'C' time rather than the 'W' time. ++ */ ++ if(pl->pl_when > start_time) ++ pl->pl_when = start_time; ++ ++ pl->pl_seen = 1; /* Ensure that it is output */ ++ ++ if(verbose >= 3) ++ printf("Record limit: metric=%c state=%c since %s", pl->pl_metric, STATE2state(pl->pl_state), ctime(&pl->pl_when)); ++ ++ /* If this is a critical, but we didn't see the warning - generate the warning */ ++ if(state == 'C' && ! seen_warning) ++ record_limit(l_ref, 'W', proc_metric, start_time); ++} ++ ++/* Check if limits have been exceeded for at least the state-time ++ * Check only metric_state_name regardless of what is stored. ++ * ++ * Args: ++ * pl List of limit values ++ * ++ * Return: STATE_OK, STATE_WARNING or STATE_CRITICAL ++ */ ++int ++check_limit(PLimit* pl) ++{ ++ int result = STATE_OK; ++ ++ for(; pl; pl = pl->pl_next) { ++ if( ! pl->pl_seen) ++ continue; /* Of no interest, not updated this run */ ++ ++ if(pl->pl_metric != metric_state_name) ++ continue; /* Not what we are looking for */ ++ ++ /* Is this something that has been going on for long enough that we ++ * are to report it ? ++ */ ++ if(pl->pl_when <= state_limit_start) { ++ if(result == STATE_OK) ++ result = pl->pl_state; ++ ++ if(result == STATE_WARNING && pl->pl_state == STATE_CRITICAL) ++ result = STATE_CRITICAL; ++ } ++ } ++ ++ return(result); + } -- cgit v1.2.3-74-g34f1