From 08a475a14fff2d0eee2e49f4765cb286cac8d2e2 Mon Sep 17 00:00:00 2001 From: Lorenz Kästle <12514511+RincewindsHat@users.noreply.github.com> Date: Thu, 13 Mar 2025 23:41:12 +0100 Subject: Refactor check_load + new ouput --- plugins/check_load.c | 420 +++++++++++++++++++++++++++++---------------------- 1 file changed, 243 insertions(+), 177 deletions(-) (limited to 'plugins/check_load.c') diff --git a/plugins/check_load.c b/plugins/check_load.c index e3a45f58..57be8c69 100644 --- a/plugins/check_load.c +++ b/plugins/check_load.c @@ -28,6 +28,9 @@ * *****************************************************************************/ +#include "output.h" +#include "perfdata.h" +#include "thresholds.h" const char *progname = "check_load"; const char *copyright = "1999-2022"; const char *email = "devel@monitoring-plugins.org"; @@ -36,6 +39,10 @@ const char *email = "devel@monitoring-plugins.org"; #include "./runcmd.h" #include "./utils.h" #include "./popen.h" +#include "states.h" +#include "check_load.d/config.h" + +#include "../gl/stdlib.h" #include @@ -50,70 +57,68 @@ const char *email = "devel@monitoring-plugins.org"; # define LOADAVG_15MIN 2 #endif /* !defined LOADAVG_1MIN */ -static int process_arguments(int argc, char **argv); -static int validate_arguments(void); +typedef struct { + int errorcode; + check_load_config config; +} check_load_config_wrapper; +static check_load_config_wrapper process_arguments(int argc, char **argv); +static check_load_config_wrapper validate_arguments(check_load_config_wrapper /*config_wrapper*/); + void print_help(void); void print_usage(void); -static int print_top_consuming_processes(); - -static int n_procs_to_show = 0; - -/* strictly for pretty-print usage in loops */ -static const int nums[3] = {1, 5, 15}; - -/* provide some fairly sane defaults */ -double wload[3] = {0.0, 0.0, 0.0}; -double cload[3] = {0.0, 0.0, 0.0}; -#define la1 la[0] -#define la5 la[1] -#define la15 la[2] - -char *status_line; -bool take_into_account_cpus = false; - -static void get_threshold(char *arg, double *th) { - size_t i, n; - int valid = 0; - char *str = arg, *p; - - n = strlen(arg); - for (i = 0; i < 3; i++) { - th[i] = strtod(str, &p); - if (p == str) { +typedef struct { + int errorcode; + char **top_processes; +} top_processes_result; +static top_processes_result print_top_consuming_processes(int /*n_procs_to_show*/); + +typedef struct { + mp_range load[3]; +} parsed_thresholds; +static parsed_thresholds get_threshold(char *arg) { + size_t index; + char *str = arg; + char *tmp_pointer; + bool valid = false; + + parsed_thresholds result = { + .load[0] = mp_range_init(), + .load[1] = mp_range_init(), + .load[2] = mp_range_init(), + }; + + size_t arg_length = strlen(arg); + for (index = 0; index < 3; index++) { + double tmp = strtod(str, &tmp_pointer); + if (tmp_pointer == str) { break; } - valid = 1; - str = p + 1; - if (n <= (size_t)(str - arg)) { + result.load[index] = mp_range_set_end(result.load[index], mp_create_pd_value(tmp)); + + valid = true; + str = tmp_pointer + 1; + if (arg_length <= (size_t)(str - arg)) { break; } } /* empty argument or non-floatish, so warn about it and die */ - if (!i && !valid) { + if (!index && !valid) { usage(_("Warning threshold must be float or float triplet!\n")); } - if (i != 2) { + if (index != 2) { /* one or more numbers were given, so fill array with last * we got (most likely to NOT produce the least expected result) */ - for (n = i; n < 3; n++) { - th[n] = th[i]; + for (size_t tmp_index = index; tmp_index < 3; tmp_index++) { + result.load[tmp_index] = result.load[index]; } } + return result; } int main(int argc, char **argv) { - int result = -1; - int i; - long numcpus; - - double la[3] = {0.0, 0.0, 0.0}; /* NetBSD complains about uninitialized arrays */ -#ifndef HAVE_GETLOADAVG - char input_buffer[MAX_INPUT_BUFFER]; -#endif - setlocale(LC_ALL, ""); bindtextdomain(PACKAGE, LOCALEDIR); textdomain(PACKAGE); @@ -122,112 +127,138 @@ int main(int argc, char **argv) { /* Parse extra opts if any */ argv = np_extra_opts(&argc, argv, progname); - if (process_arguments(argc, argv) == ERROR) { + check_load_config_wrapper tmp_config = process_arguments(argc, argv); + if (tmp_config.errorcode == ERROR) { usage4(_("Could not parse arguments")); } -#ifdef HAVE_GETLOADAVG - result = getloadavg(la, 3); - if (result != 3) { - return STATE_UNKNOWN; - } -#else - child_process = spopen(PATH_TO_UPTIME); - if (child_process == NULL) { - printf(_("Error opening %s\n"), PATH_TO_UPTIME); - return STATE_UNKNOWN; - } - child_stderr = fdopen(child_stderr_array[fileno(child_process)], "r"); - if (child_stderr == NULL) { - printf(_("Could not open stderr for %s\n"), PATH_TO_UPTIME); - } - fgets(input_buffer, MAX_INPUT_BUFFER - 1, child_process); - if (strstr(input_buffer, "load average:")) { - sscanf(input_buffer, "%*[^l]load average: %lf, %lf, %lf", &la1, &la5, &la15); - } else if (strstr(input_buffer, "load averages:")) { - sscanf(input_buffer, "%*[^l]load averages: %lf, %lf, %lf", &la1, &la5, &la15); - } else { - printf(_("could not parse load from uptime %s: %d\n"), PATH_TO_UPTIME, result); - return STATE_UNKNOWN; - } + const check_load_config config = tmp_config.config; - result = spclose(child_process); - if (result) { - printf(_("Error code %d returned in %s\n"), result, PATH_TO_UPTIME); - return STATE_UNKNOWN; - } -#endif + double load_values[3] = {0, 0, 0}; - if ((la[0] < 0.0) || (la[1] < 0.0) || (la[2] < 0.0)) { -#ifdef HAVE_GETLOADAVG - printf(_("Error in getloadavg()\n")); -#else - printf(_("Error processing %s\n"), PATH_TO_UPTIME); -#endif - return STATE_UNKNOWN; + int error = getloadavg(load_values, 3); + if (error != 3) { + die(STATE_UNKNOWN, _("Failed to retrieve load values")); } - /* we got this far, so assume OK until we've measured */ - result = STATE_OK; - - xasprintf(&status_line, _("load average: %.2f, %.2f, %.2f"), la1, la5, la15); - xasprintf(&status_line, ("total %s"), status_line); + mp_check overall = mp_check_init(); + if (config.output_format_set) { + mp_set_format(config.output_format); + } - double scaled_la[3] = {0.0, 0.0, 0.0}; bool is_using_scaled_load_values = false; - - if (take_into_account_cpus == true && (numcpus = GET_NUMBER_OF_CPUS()) > 0) { + long numcpus; + if (config.take_into_account_cpus && ((numcpus = GET_NUMBER_OF_CPUS()) > 0)) { is_using_scaled_load_values = true; - scaled_la[0] = la[0] / numcpus; - scaled_la[1] = la[1] / numcpus; - scaled_la[2] = la[2] / numcpus; + double scaled_la[3] = {0.0, 0.0, 0.0}; + scaled_la[0] = load_values[0] / numcpus; + scaled_la[1] = load_values[1] / numcpus; + scaled_la[2] = load_values[2] / numcpus; + + mp_subcheck scaled_load_sc = mp_subcheck_init(); + scaled_load_sc = mp_set_subcheck_default_state(scaled_load_sc, STATE_OK); + scaled_load_sc.output = "Scaled Load (divided by number of CPUs"; + + mp_perfdata pd_scaled_load1 = perfdata_init(); + pd_scaled_load1.label = "scaled_load1"; + pd_scaled_load1 = mp_set_pd_value(pd_scaled_load1, scaled_la[0]); + pd_scaled_load1 = mp_pd_set_thresholds(pd_scaled_load1, config.th_load[0]); + + mp_subcheck scaled_load_sc1 = mp_subcheck_init(); + scaled_load_sc1 = mp_set_subcheck_state(scaled_load_sc1, mp_get_pd_status(pd_scaled_load1)); + mp_add_perfdata_to_subcheck(&scaled_load_sc1, pd_scaled_load1); + xasprintf(&scaled_load_sc1.output, "1 Minute: %s", pd_value_to_string(pd_scaled_load1.value)); + mp_add_subcheck_to_subcheck(&scaled_load_sc, scaled_load_sc1); + + mp_perfdata pd_scaled_load5 = perfdata_init(); + pd_scaled_load5.label = "scaled_load5"; + pd_scaled_load5 = mp_set_pd_value(pd_scaled_load5, scaled_la[1]); + pd_scaled_load5 = mp_pd_set_thresholds(pd_scaled_load5, config.th_load[1]); + + mp_subcheck scaled_load_sc5 = mp_subcheck_init(); + scaled_load_sc5 = mp_set_subcheck_state(scaled_load_sc5, mp_get_pd_status(pd_scaled_load5)); + mp_add_perfdata_to_subcheck(&scaled_load_sc5, pd_scaled_load5); + xasprintf(&scaled_load_sc5.output, "5 Minutes: %s", pd_value_to_string(pd_scaled_load5.value)); + mp_add_subcheck_to_subcheck(&scaled_load_sc, scaled_load_sc5); + + mp_perfdata pd_scaled_load15 = perfdata_init(); + pd_scaled_load15.label = "scaled_load15"; + pd_scaled_load15 = mp_set_pd_value(pd_scaled_load15, scaled_la[2]); + pd_scaled_load15 = mp_pd_set_thresholds(pd_scaled_load15, config.th_load[2]); + + mp_subcheck scaled_load_sc15 = mp_subcheck_init(); + scaled_load_sc15 = mp_set_subcheck_state(scaled_load_sc15, mp_get_pd_status(pd_scaled_load15)); + mp_add_perfdata_to_subcheck(&scaled_load_sc15, pd_scaled_load15); + xasprintf(&scaled_load_sc15.output, "15 Minutes: %s", pd_value_to_string(pd_scaled_load15.value)); + mp_add_subcheck_to_subcheck(&scaled_load_sc, scaled_load_sc15); + + mp_add_subcheck_to_check(&overall, scaled_load_sc); + } + + mp_subcheck load_sc = mp_subcheck_init(); + load_sc = mp_set_subcheck_default_state(load_sc, STATE_OK); + load_sc.output = "Total Load"; - char *tmp = NULL; - xasprintf(&tmp, _("load average: %.2f, %.2f, %.2f"), scaled_la[0], scaled_la[1], scaled_la[2]); - xasprintf(&status_line, "scaled %s - %s", tmp, status_line); + mp_perfdata pd_load1 = perfdata_init(); + pd_load1.label = "load1"; + pd_load1 = mp_set_pd_value(pd_load1, load_values[0]); + if (!is_using_scaled_load_values) { + pd_load1 = mp_pd_set_thresholds(pd_load1, config.th_load[0]); } - for (i = 0; i < 3; i++) { - if (is_using_scaled_load_values) { - if (scaled_la[i] > cload[i]) { - result = STATE_CRITICAL; - break; - } else if (scaled_la[i] > wload[i]) { - result = STATE_WARNING; - } - } else { - if (la[i] > cload[i]) { - result = STATE_CRITICAL; - break; - } else if (la[i] > wload[i]) { - result = STATE_WARNING; - } - } + mp_subcheck load_sc1 = mp_subcheck_init(); + load_sc1 = mp_set_subcheck_state(load_sc1, mp_get_pd_status(pd_load1)); + mp_add_perfdata_to_subcheck(&load_sc1, pd_load1); + xasprintf(&load_sc1.output, "1 Minute: %s", pd_value_to_string(pd_load1.value)); + mp_add_subcheck_to_subcheck(&load_sc, load_sc1); + + mp_perfdata pd_load5 = perfdata_init(); + pd_load5.label = "load5"; + pd_load5 = mp_set_pd_value(pd_load5, load_values[1]); + if (!is_using_scaled_load_values) { + pd_load5 = mp_pd_set_thresholds(pd_load5, config.th_load[1]); } - printf("LOAD %s - %s|", state_text(result), status_line); - for (i = 0; i < 3; i++) { - if (is_using_scaled_load_values) { - printf("load%d=%.3f;;;0; ", nums[i], la[i]); - printf("scaled_load%d=%.3f;%.3f;%.3f;0; ", nums[i], scaled_la[i], wload[i], cload[i]); - } else { - printf("load%d=%.3f;%.3f;%.3f;0; ", nums[i], la[i], wload[i], cload[i]); - } + mp_subcheck load_sc5 = mp_subcheck_init(); + load_sc5 = mp_set_subcheck_state(load_sc5, mp_get_pd_status(pd_load5)); + mp_add_perfdata_to_subcheck(&load_sc5, pd_load5); + xasprintf(&load_sc5.output, "5 Minutes: %s", pd_value_to_string(pd_load5.value)); + mp_add_subcheck_to_subcheck(&load_sc, load_sc5); + + mp_perfdata pd_load15 = perfdata_init(); + pd_load15.label = "load15"; + pd_load15 = mp_set_pd_value(pd_load15, load_values[2]); + if (!is_using_scaled_load_values) { + pd_load15 = mp_pd_set_thresholds(pd_load15, config.th_load[2]); } - putchar('\n'); - if (n_procs_to_show > 0) { - print_top_consuming_processes(); + mp_subcheck load_sc15 = mp_subcheck_init(); + load_sc15 = mp_set_subcheck_state(load_sc15, mp_get_pd_status(pd_load15)); + mp_add_perfdata_to_subcheck(&load_sc15, pd_load15); + xasprintf(&load_sc15.output, "15 Minutes: %s", pd_value_to_string(pd_load15.value)); + mp_add_subcheck_to_subcheck(&load_sc, load_sc15); + + mp_add_subcheck_to_check(&overall, load_sc); + + if (config.n_procs_to_show > 0) { + mp_subcheck top_proc_sc = mp_subcheck_init(); + top_proc_sc = mp_set_subcheck_state(top_proc_sc, STATE_OK); + top_processes_result top_proc = print_top_consuming_processes(config.n_procs_to_show); + top_proc_sc.output = ""; + + if (top_proc.errorcode == OK) { + for (int i = 0; i < config.n_procs_to_show; i++) { + xasprintf(&top_proc_sc.output, "%s\n%s", top_proc_sc.output, top_proc.top_processes[i]); + } + } } - return result; + + mp_exit(overall); } /* process command-line arguments */ -static int process_arguments(int argc, char **argv) { - int c = 0; - - int option = 0; +static check_load_config_wrapper process_arguments(int argc, char **argv) { static struct option longopts[] = {{"warning", required_argument, 0, 'w'}, {"critical", required_argument, 0, 'c'}, {"percpu", no_argument, 0, 'r'}, @@ -236,26 +267,45 @@ static int process_arguments(int argc, char **argv) { {"procs-to-show", required_argument, 0, 'n'}, {0, 0, 0, 0}}; + check_load_config_wrapper result = { + .errorcode = OK, + .config = check_load_config_init(), + }; + if (argc < 2) { - return ERROR; + result.errorcode = ERROR; + return result; } - while (1) { - c = getopt_long(argc, argv, "Vhrc:w:n:", longopts, &option); + while (true) { + int option = 0; + int option_index = getopt_long(argc, argv, "Vhrc:w:n:", longopts, &option); - if (c == -1 || c == EOF) { + if (option_index == -1 || option_index == EOF) { break; } - switch (c) { - case 'w': /* warning time threshold */ - get_threshold(optarg, wload); - break; - case 'c': /* critical time threshold */ - get_threshold(optarg, cload); - break; + switch (option_index) { + case 'w': /* warning time threshold */ { + parsed_thresholds warning_range = get_threshold(optarg); + result.config.th_load[0].warning = warning_range.load[0]; + result.config.th_load[0].warning_is_set = true; + result.config.th_load[1].warning = warning_range.load[1]; + result.config.th_load[1].warning_is_set = true; + result.config.th_load[2].warning = warning_range.load[2]; + result.config.th_load[2].warning_is_set = true; + } break; + case 'c': /* critical time threshold */ { + parsed_thresholds critical_range = get_threshold(optarg); + result.config.th_load[0].critical = critical_range.load[0]; + result.config.th_load[0].critical_is_set = true; + result.config.th_load[1].critical = critical_range.load[1]; + result.config.th_load[1].critical_is_set = true; + result.config.th_load[2].critical = critical_range.load[2]; + result.config.th_load[2].critical_is_set = true; + } break; case 'r': /* Divide load average by number of CPUs */ - take_into_account_cpus = true; + result.config.take_into_account_cpus = true; break; case 'V': /* version */ print_revision(progname, NP_VERSION); @@ -264,49 +314,49 @@ static int process_arguments(int argc, char **argv) { print_help(); exit(STATE_UNKNOWN); case 'n': - n_procs_to_show = atoi(optarg); + result.config.n_procs_to_show = atoi(optarg); break; case '?': /* help */ usage5(); } } - c = optind; - if (c == argc) { - return validate_arguments(); + int index = optind; + if (index == argc) { + return validate_arguments(result); } /* handle the case if both arguments are missing, * but not if only one is given without -c or -w flag */ - if (c - argc == 2) { - get_threshold(argv[c++], wload); - get_threshold(argv[c++], cload); - } else if (c - argc == 1) { - get_threshold(argv[c++], cload); + if (index - argc == 2) { + parsed_thresholds warning_range = get_threshold(argv[index++]); + result.config.th_load[0].warning = warning_range.load[0]; + result.config.th_load[0].warning_is_set = true; + result.config.th_load[1].warning = warning_range.load[1]; + result.config.th_load[1].warning_is_set = true; + result.config.th_load[2].warning = warning_range.load[2]; + result.config.th_load[2].warning_is_set = true; + parsed_thresholds critical_range = get_threshold(argv[index++]); + result.config.th_load[0].critical = critical_range.load[0]; + result.config.th_load[0].critical_is_set = true; + result.config.th_load[1].critical = critical_range.load[1]; + result.config.th_load[1].critical_is_set = true; + result.config.th_load[2].critical = critical_range.load[2]; + result.config.th_load[2].critical_is_set = true; + } else if (index - argc == 1) { + parsed_thresholds critical_range = get_threshold(argv[index++]); + result.config.th_load[0].critical = critical_range.load[0]; + result.config.th_load[0].critical_is_set = true; + result.config.th_load[1].critical = critical_range.load[1]; + result.config.th_load[1].critical_is_set = true; + result.config.th_load[2].critical = critical_range.load[2]; + result.config.th_load[2].critical_is_set = true; } - return validate_arguments(); + return validate_arguments(result); } -static int validate_arguments(void) { - int i = 0; - - /* match cload first, as it will give the most friendly error message - * if user hasn't given the -c switch properly */ - for (i = 0; i < 3; i++) { - if (cload[i] < 0) { - die(STATE_UNKNOWN, _("Critical threshold for %d-minute load average is not specified\n"), nums[i]); - } - if (wload[i] < 0) { - die(STATE_UNKNOWN, _("Warning threshold for %d-minute load average is not specified\n"), nums[i]); - } - if (wload[i] > cload[i]) { - die(STATE_UNKNOWN, _("Parameter inconsistency: %d-minute \"warning load\" is greater than \"critical load\"\n"), nums[i]); - } - } - - return OK; -} +static check_load_config_wrapper validate_arguments(check_load_config_wrapper config_wrapper) { return config_wrapper; } void print_help(void) { print_revision(progname, NP_VERSION); @@ -363,23 +413,39 @@ int cmpstringp(const void *p1, const void *p2) { } #endif /* PS_USES_PROCPCPU */ -static int print_top_consuming_processes() { - int i = 0; - struct output chld_out, chld_err; +static top_processes_result print_top_consuming_processes(int n_procs_to_show) { + top_processes_result result = { + .errorcode = OK, + }; + struct output chld_out; + struct output chld_err; if (np_runcmd(PS_COMMAND, &chld_out, &chld_err, 0) != 0) { fprintf(stderr, _("'%s' exited with non-zero status.\n"), PS_COMMAND); - return STATE_UNKNOWN; + result.errorcode = ERROR; + return result; } + if (chld_out.lines < 2) { fprintf(stderr, _("some error occurred getting procs list.\n")); - return STATE_UNKNOWN; + result.errorcode = ERROR; + return result; } + #ifdef PS_USES_PROCPCPU qsort(chld_out.line + 1, chld_out.lines - 1, sizeof(char *), cmpstringp); #endif /* PS_USES_PROCPCPU */ int lines_to_show = chld_out.lines < (size_t)(n_procs_to_show + 1) ? (int)chld_out.lines : n_procs_to_show + 1; - for (i = 0; i < lines_to_show; i += 1) { - printf("%s\n", chld_out.line[i]); + + result.top_processes = calloc(lines_to_show, sizeof(char *)); + if (result.top_processes == NULL) { + // Failed allocation + result.errorcode = ERROR; + return result; } - return OK; + + for (int i = 0; i < lines_to_show; i += 1) { + xasprintf(&result.top_processes[i], "%s", chld_out.line[i]); + } + + return result; } -- cgit v1.2.3-74-g34f1