3534 lines
115 KiB
C
3534 lines
115 KiB
C
/*****************************************************************************
|
|
*
|
|
* CHECKS.C - Service and host check functions for Nagios
|
|
*
|
|
*
|
|
* License:
|
|
*
|
|
* This program is free software; you can redistribute it and/or modify
|
|
* it under the terms of the GNU General Public License version 2 as
|
|
* published by the Free Software Foundation.
|
|
*
|
|
* This program is distributed in the hope that it will be useful,
|
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
|
* GNU General Public License for more details.
|
|
*
|
|
* You should have received a copy of the GNU General Public License
|
|
* along with this program; if not, write to the Free Software
|
|
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
|
*
|
|
*****************************************************************************/
|
|
|
|
#include "../include/config.h"
|
|
#include "../include/comments.h"
|
|
#include "../include/common.h"
|
|
#include "../include/statusdata.h"
|
|
#include "../include/downtime.h"
|
|
#include "../include/macros.h"
|
|
#include "../include/nagios.h"
|
|
#include "../include/broker.h"
|
|
#include "../include/perfdata.h"
|
|
#include "../include/workers.h"
|
|
|
|
/*#define DEBUG_CHECKS*/
|
|
/*#define DEBUG_HOST_CHECKS 1*/
|
|
|
|
#define replace_semicolons(output, ptr) do { ptr = output; while ((ptr = strchr(ptr, ';')) != NULL) { * ptr = ':'; } } while (0)
|
|
|
|
#ifdef USE_EVENT_BROKER
|
|
#include "../include/neberrors.h"
|
|
#endif
|
|
|
|
/******************************************************************/
|
|
/********************** CHECK REAPER FUNCTIONS ********************/
|
|
/******************************************************************/
|
|
|
|
/* reaps host and service check results */
|
|
int reap_check_results(void)
|
|
{
|
|
int reaped_checks = 0;
|
|
|
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "reap_check_results() start\n");
|
|
|
|
/* process files in the check result queue */
|
|
reaped_checks = process_check_result_queue(check_result_path);
|
|
|
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "reap_check_results() reaped %d checks end\n", reaped_checks);
|
|
|
|
return OK;
|
|
}
|
|
|
|
|
|
|
|
|
|
/******************************************************************/
|
|
/****************** SERVICE MONITORING FUNCTIONS ******************/
|
|
/******************************************************************/
|
|
|
|
/* executes a scheduled service check */
|
|
int run_scheduled_service_check(service *svc, int check_options, double latency)
|
|
{
|
|
int result = OK;
|
|
time_t current_time = 0L;
|
|
time_t preferred_time = 0L;
|
|
time_t next_valid_time = 0L;
|
|
int time_is_valid = TRUE;
|
|
|
|
if (svc == NULL) {
|
|
return ERROR;
|
|
}
|
|
|
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "run_scheduled_service_check() start\n");
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Attempting to run scheduled check of service '%s' on host '%s': check options=%d, latency=%lf\n", svc->description, svc->host_name, check_options, latency);
|
|
|
|
/*
|
|
* reset the next_check_event so we know it's
|
|
* no longer in the scheduling queue
|
|
*/
|
|
svc->next_check_event = NULL;
|
|
|
|
/* attempt to run the check */
|
|
result = run_async_service_check(svc, check_options, latency, TRUE, TRUE, &time_is_valid, &preferred_time);
|
|
|
|
/* an error occurred, so reschedule the check */
|
|
if (result == ERROR) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Unable to run scheduled service check at this time\n");
|
|
|
|
/* only attempt to (re)schedule checks that should get checked... */
|
|
if (svc->should_be_scheduled == TRUE) {
|
|
|
|
/* get current time */
|
|
time(¤t_time);
|
|
|
|
/* determine next time we should check the service if needed */
|
|
/* if service has no check interval, schedule it again for 5 minutes from now */
|
|
if (current_time >= preferred_time) {
|
|
preferred_time = current_time + ((svc->check_interval <= 0) ? 300 : (svc->check_interval * interval_length));
|
|
}
|
|
|
|
/* make sure we rescheduled the next service check at a valid time */
|
|
get_next_valid_time(preferred_time, &next_valid_time, svc->check_period_ptr);
|
|
|
|
/*
|
|
* If we really can't reschedule the service properly, we
|
|
* just push the check to preferred_time plus some reasonable
|
|
* random value and try again then.
|
|
*/
|
|
if (time_is_valid == FALSE && check_time_against_period(next_valid_time, svc->check_period_ptr) == ERROR) {
|
|
|
|
svc->next_check = preferred_time + ranged_urand(0, check_window(svc));
|
|
|
|
logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Check of service '%s' on host '%s' could not be rescheduled properly. Scheduling check for %s...\n", svc->description, svc->host_name, ctime(&preferred_time));
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Unable to find any valid times to reschedule the next service check!\n");
|
|
}
|
|
|
|
/* this service could be rescheduled... */
|
|
else {
|
|
svc->next_check = next_valid_time;
|
|
if (next_valid_time > preferred_time) {
|
|
/* Next valid time is further in the future because of
|
|
* timeperiod constraints. Add a random amount so we
|
|
* don't get all checks subject to that timeperiod
|
|
* constraint scheduled at the same time
|
|
*/
|
|
svc->next_check += ranged_urand(0, check_window(svc));
|
|
}
|
|
svc->should_be_scheduled = TRUE;
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Rescheduled next service check for %s", ctime(&next_valid_time));
|
|
}
|
|
}
|
|
|
|
/*
|
|
* reschedule the next service check - unless we couldn't
|
|
* find a valid next check time, but keep original options
|
|
*/
|
|
if (svc->should_be_scheduled == TRUE) {
|
|
schedule_service_check(svc, svc->next_check, check_options);
|
|
}
|
|
|
|
/* update the status log */
|
|
update_service_status(svc, FALSE);
|
|
|
|
return ERROR;
|
|
}
|
|
|
|
return OK;
|
|
}
|
|
|
|
|
|
/* forks a child process to run a service check, but does not wait for the service check result */
|
|
int run_async_service_check(service *svc, int check_options, double latency, int scheduled_check, int reschedule_check, int *time_is_valid, time_t *preferred_time)
|
|
{
|
|
nagios_macros mac;
|
|
char *raw_command = NULL;
|
|
char *processed_command = NULL;
|
|
struct timeval start_time, end_time;
|
|
host *temp_host = NULL;
|
|
double old_latency = 0.0;
|
|
check_result *cr;
|
|
int runchk_result = OK;
|
|
int macro_options = STRIP_ILLEGAL_MACRO_CHARS | ESCAPE_MACRO_CHARS;
|
|
#ifdef USE_EVENT_BROKER
|
|
int neb_result = OK;
|
|
#endif
|
|
|
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "run_async_service_check()\n");
|
|
|
|
/* make sure we have something */
|
|
if (svc == NULL) {
|
|
return ERROR;
|
|
}
|
|
|
|
/* is the service check viable at this time? */
|
|
if (check_service_check_viability(svc, check_options, time_is_valid, preferred_time) == ERROR) {
|
|
return ERROR;
|
|
}
|
|
|
|
/* find the host associated with this service */
|
|
if ((temp_host = svc->host_ptr) == NULL) {
|
|
return ERROR;
|
|
}
|
|
|
|
/******** GOOD TO GO FOR A REAL SERVICE CHECK AT THIS POINT ********/
|
|
|
|
#ifdef USE_EVENT_BROKER
|
|
/* initialize start/end times */
|
|
start_time.tv_sec = 0L;
|
|
start_time.tv_usec = 0L;
|
|
end_time.tv_sec = 0L;
|
|
end_time.tv_usec = 0L;
|
|
|
|
/* send data to event broker */
|
|
neb_result = broker_service_check(NEBTYPE_SERVICECHECK_ASYNC_PRECHECK, NEBFLAG_NONE, NEBATTR_NONE, svc, CHECK_TYPE_ACTIVE, start_time, end_time, svc->check_command, svc->latency, 0.0, 0, FALSE, 0, NULL, NULL, NULL);
|
|
|
|
/* neb module wants to cancel the service check - the check will be rescheduled for a later time by the scheduling logic */
|
|
if (neb_result == NEBERROR_CALLBACKCANCEL) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Check of service '%s' on host '%s' (id=%u) was cancelled by a module\n", svc->description, svc->host_name, svc->id);
|
|
|
|
if (preferred_time) {
|
|
*preferred_time += (svc->check_interval * interval_length);
|
|
}
|
|
return ERROR;
|
|
}
|
|
|
|
/* neb module wants to override (or cancel) the service check - perhaps it will check the service itself */
|
|
/* NOTE: if a module does this, it has to do a lot of the stuff found below to make sure things don't get whacked out of shape! */
|
|
/* NOTE: if would be easier for modules to override checks when the NEBTYPE_SERVICECHECK_INITIATE event is called (later) */
|
|
if (neb_result == NEBERROR_CALLBACKOVERRIDE) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Check of service '%s' on host '%s' (id=%u) was overridden by a module\n", svc->description, svc->host_name, svc->id);
|
|
return OK;
|
|
}
|
|
#endif
|
|
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Checking service '%s' on host '%s'...\n", svc->description, svc->host_name);
|
|
|
|
/* clear check options - we don't want old check options retained */
|
|
/* only clear check options for scheduled checks - ondemand checks shouldn't affected retained check options */
|
|
if (scheduled_check == TRUE) {
|
|
svc->check_options = CHECK_OPTION_NONE;
|
|
}
|
|
|
|
/* update latency for macros, event broker, save old value for later */
|
|
old_latency = svc->latency;
|
|
svc->latency = latency;
|
|
|
|
/* grab the host and service macro variables */
|
|
memset(&mac, 0, sizeof(mac));
|
|
grab_host_macros_r(&mac, temp_host);
|
|
grab_service_macros_r(&mac, svc);
|
|
|
|
/* get the raw command line */
|
|
get_raw_command_line_r(&mac, svc->check_command_ptr, svc->check_command, &raw_command, macro_options);
|
|
if (raw_command == NULL) {
|
|
clear_volatile_macros_r(&mac);
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Raw check command for service '%s' on host '%s' was NULL - aborting.\n", svc->description, svc->host_name);
|
|
if (preferred_time) {
|
|
*preferred_time += (svc->check_interval * interval_length);
|
|
}
|
|
svc->latency = old_latency;
|
|
return ERROR;
|
|
}
|
|
|
|
/* process any macros contained in the argument */
|
|
process_macros_r(&mac, raw_command, &processed_command, macro_options);
|
|
my_free(raw_command);
|
|
if (processed_command == NULL) {
|
|
clear_volatile_macros_r(&mac);
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Processed check command for service '%s' on host '%s' was NULL - aborting.\n", svc->description, svc->host_name);
|
|
if (preferred_time) {
|
|
*preferred_time += (svc->check_interval * interval_length);
|
|
}
|
|
svc->latency = old_latency;
|
|
return ERROR;
|
|
}
|
|
|
|
/* get the command start time */
|
|
gettimeofday(&start_time, NULL);
|
|
|
|
cr = calloc(1, sizeof(*cr));
|
|
if (!cr) {
|
|
clear_volatile_macros_r(&mac);
|
|
svc->latency = old_latency;
|
|
my_free(processed_command);
|
|
return ERROR;
|
|
}
|
|
init_check_result(cr);
|
|
|
|
/* save check info */
|
|
cr->object_check_type = SERVICE_CHECK;
|
|
cr->check_type = CHECK_TYPE_ACTIVE;
|
|
cr->check_options = check_options;
|
|
cr->scheduled_check = scheduled_check;
|
|
cr->reschedule_check = reschedule_check;
|
|
cr->latency = latency;
|
|
cr->start_time = start_time;
|
|
cr->finish_time = start_time;
|
|
cr->early_timeout = FALSE;
|
|
cr->exited_ok = TRUE;
|
|
cr->return_code = STATE_OK;
|
|
cr->output = NULL;
|
|
cr->host_name = (char *)strdup(svc->host_name);
|
|
cr->service_description = (char *)strdup(svc->description);
|
|
|
|
#ifdef USE_EVENT_BROKER
|
|
/* send data to event broker */
|
|
neb_result = broker_service_check(NEBTYPE_SERVICECHECK_INITIATE, NEBFLAG_NONE, NEBATTR_NONE, svc, CHECK_TYPE_ACTIVE, start_time, end_time, svc->check_command, svc->latency, 0.0, service_check_timeout, FALSE, 0, processed_command, NULL, cr);
|
|
|
|
/* neb module wants to override the service check - perhaps it will check the service itself */
|
|
if (neb_result == NEBERROR_CALLBACKOVERRIDE) {
|
|
clear_volatile_macros_r(&mac);
|
|
svc->latency = old_latency;
|
|
free_check_result(cr);
|
|
my_free(processed_command);
|
|
return OK;
|
|
}
|
|
#endif
|
|
|
|
/* reset latency (permanent value will be set later) */
|
|
svc->latency = old_latency;
|
|
|
|
/* paw off the check to a worker to run */
|
|
runchk_result = wproc_run_check(cr, processed_command, &mac);
|
|
if (runchk_result == ERROR) {
|
|
logit(NSLOG_RUNTIME_ERROR, TRUE, "Unable to run check for service '%s' on host '%s'\n", svc->description, svc->host_name);
|
|
}
|
|
else {
|
|
/* do the book-keeping */
|
|
currently_running_service_checks++;
|
|
svc->is_executing = TRUE;
|
|
update_check_stats((scheduled_check == TRUE) ? ACTIVE_SCHEDULED_SERVICE_CHECK_STATS : ACTIVE_ONDEMAND_SERVICE_CHECK_STATS, start_time.tv_sec);
|
|
}
|
|
|
|
/* free memory */
|
|
my_free(processed_command);
|
|
clear_volatile_macros_r(&mac);
|
|
|
|
return runchk_result;
|
|
}
|
|
|
|
/* Start of inline helper functions for handle async host/service check result functions
|
|
BH 03 Dec 2017
|
|
The giant monolithic async helper functions were becoming difficult to maintain
|
|
So I broke them out into smaller manageable chunks where a side-by-side comparison
|
|
of the two functions is a bit more reasonable. This would have been a lot easier with
|
|
dynamic type casting, or even some macro magic - but at least this way it isn't as
|
|
messy as it would have been with macros.
|
|
|
|
When introducing new inline helper functions, the goal is so that both of the functions
|
|
can sit on the same screen at the same time - for obvious reasons (I hope).
|
|
Try to keep them concise */
|
|
|
|
/* Bit of logic for determining an adequate return code */
|
|
int get_service_check_return_code(service *svc, check_result *cr)
|
|
{
|
|
int rc;
|
|
|
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "get_service_check_return_code()\n");
|
|
|
|
if (NULL == svc || NULL == cr) {
|
|
return STATE_UNKNOWN;
|
|
}
|
|
|
|
/* return now if it's a passive check */
|
|
if (cr->check_type != CHECK_TYPE_ACTIVE) {
|
|
return cr->return_code;
|
|
}
|
|
|
|
/* grab the return code */
|
|
rc = cr->return_code;
|
|
|
|
/* did the check result have an early timeout? */
|
|
if (cr->early_timeout == TRUE) {
|
|
|
|
my_free(svc->plugin_output);
|
|
my_free(svc->long_plugin_output);
|
|
my_free(svc->perf_data);
|
|
|
|
logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Check of service '%s' on host '%s' timed out after %.3fs!\n", svc->description, svc->host_name, svc->execution_time);
|
|
asprintf(&svc->plugin_output, "(Service check timed out after %.2lf seconds)", svc->execution_time);
|
|
|
|
rc = service_check_timeout_state;
|
|
}
|
|
|
|
/* if there was some error running the command, just skip it (this shouldn't be happening) */
|
|
else if (cr->exited_ok == FALSE) {
|
|
|
|
my_free(svc->plugin_output);
|
|
my_free(svc->long_plugin_output);
|
|
my_free(svc->perf_data);
|
|
|
|
logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Check of service '%s' on host '%s' did not exit properly!\n", svc->description, svc->host_name);
|
|
svc->plugin_output = (char *)strdup("(Service check did not exit properly)");
|
|
|
|
rc = STATE_CRITICAL;
|
|
}
|
|
|
|
/* 126 is a return code for non-executable */
|
|
else if (cr->return_code == 126) {
|
|
|
|
my_free(svc->plugin_output);
|
|
my_free(svc->long_plugin_output);
|
|
my_free(svc->perf_data);
|
|
|
|
logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Return code of 126 for service '%s' on host '%s' may indicate a non-executable plugin.\n",
|
|
svc->description, svc->host_name);
|
|
svc->plugin_output = strdup("(Return code of 126 is out of bounds. Check if plugin is executable)");
|
|
rc = STATE_CRITICAL;
|
|
}
|
|
|
|
/* 127 is a return code for non-existent */
|
|
else if (cr->return_code == 127) {
|
|
|
|
my_free(svc->plugin_output);
|
|
my_free(svc->long_plugin_output);
|
|
my_free(svc->perf_data);
|
|
|
|
logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Return code of 127 for service '%s' on host '%s' may indicate this plugin doesn't exist.\n",
|
|
svc->description, svc->host_name);
|
|
svc->plugin_output = strdup("(Return code of 127 is out of bounds. Check if plugin exists)");
|
|
rc = STATE_CRITICAL;
|
|
}
|
|
|
|
/* make sure the return code is within bounds */
|
|
else if (cr->return_code < 0 || cr->return_code > 3) {
|
|
|
|
logit(NSLOG_RUNTIME_WARNING, TRUE,
|
|
"Warning: Return code of %d for check of service '%s' on host '%s' was out of bounds.\n",
|
|
cr->return_code,
|
|
svc->description,
|
|
svc->host_name);
|
|
|
|
asprintf(&svc->plugin_output, "(Return code of %d for service '%s' on host '%s' was out of bounds)",
|
|
cr->return_code,
|
|
svc->description,
|
|
svc->host_name);
|
|
|
|
rc = STATE_CRITICAL;
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
/* Bit of logic for determining an adequate return code */
|
|
int get_host_check_return_code(host *hst, check_result *cr)
|
|
{
|
|
int rc;
|
|
|
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "get_host_check_return_code()\n");
|
|
|
|
if (hst == NULL || cr == NULL) {
|
|
return HOST_UNREACHABLE;
|
|
}
|
|
|
|
/* return now if it's a passive check */
|
|
if (cr->check_type != CHECK_TYPE_ACTIVE) {
|
|
return cr->return_code;
|
|
}
|
|
|
|
/* get the unprocessed return code */
|
|
rc = cr->return_code;
|
|
|
|
/* did the check result have an early timeout? */
|
|
if (cr->early_timeout) {
|
|
|
|
my_free(hst->plugin_output);
|
|
my_free(hst->long_plugin_output);
|
|
my_free(hst->perf_data);
|
|
|
|
logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Check of host '%s' timed out after %.2lf seconds\n", hst->name, hst->execution_time);
|
|
asprintf(&hst->plugin_output, "(Host check timed out after %.2lf seconds)", hst->execution_time);
|
|
|
|
rc = HOST_UNREACHABLE;
|
|
}
|
|
|
|
/* if there was some error running the command, just skip it (this shouldn't be happening) */
|
|
else if (cr->exited_ok == FALSE) {
|
|
|
|
my_free(hst->plugin_output);
|
|
my_free(hst->long_plugin_output);
|
|
my_free(hst->perf_data);
|
|
|
|
logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Check of host '%s' did not exit properly!\n", hst->name);
|
|
hst->plugin_output = (char *)strdup("(Host check did not exit properly)");
|
|
|
|
rc = HOST_UNREACHABLE;
|
|
}
|
|
|
|
/* 126 is a return code for non-executable */
|
|
else if (cr->return_code == 126) {
|
|
|
|
my_free(hst->plugin_output);
|
|
my_free(hst->long_plugin_output);
|
|
my_free(hst->perf_data);
|
|
|
|
logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Return code of 126 for host '%s' may indicate a non-executable plugin.\n",
|
|
hst->name);
|
|
hst->plugin_output = strdup("(Return code of 126 is out of bounds. Check if plugin is executable)");
|
|
rc = HOST_UNREACHABLE;
|
|
}
|
|
|
|
/* 127 is a return code for non-existent */
|
|
else if (cr->return_code == 127) {
|
|
|
|
my_free(hst->plugin_output);
|
|
my_free(hst->long_plugin_output);
|
|
my_free(hst->perf_data);
|
|
|
|
logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Return code of 127 for host '%s' may indicate this plugin doesn't exist.\n",
|
|
hst->name);
|
|
hst->plugin_output = strdup("(Return code of 127 is out of bounds. Check if plugin exists)");
|
|
rc = HOST_UNREACHABLE;
|
|
}
|
|
|
|
/* make sure the return code is within bounds */
|
|
else if (cr->return_code < 0 || cr->return_code > 3) {
|
|
|
|
my_free(hst->plugin_output);
|
|
my_free(hst->long_plugin_output);
|
|
my_free(hst->perf_data);
|
|
|
|
logit(NSLOG_RUNTIME_WARNING, TRUE,
|
|
"Warning: Return code of %d for check of host '%s' was out of bounds.\n",
|
|
cr->return_code,
|
|
hst->name);
|
|
|
|
asprintf(&hst->plugin_output, "(Return code of %d for host '%s' was out of bounds)",
|
|
cr->return_code,
|
|
hst->name);
|
|
|
|
rc = HOST_UNREACHABLE;
|
|
}
|
|
|
|
/* a NULL host check command means we should assume the host is UP */
|
|
if (hst->check_command == NULL) {
|
|
my_free(hst->plugin_output);
|
|
hst->plugin_output = (char *)strdup("(Host assumed to be UP)");
|
|
rc = HOST_UP;
|
|
}
|
|
|
|
/* if we're not doing aggressive host checking, let WARNING states indicate the host is up (fake the result to be HOST_UP) */
|
|
else if (use_aggressive_host_checking == FALSE && rc == STATE_WARNING) {
|
|
rc = HOST_UP;
|
|
}
|
|
|
|
/* any problem state indicates the host is not UP */
|
|
else if (rc != HOST_UP) {
|
|
rc = HOST_DOWN;
|
|
}
|
|
|
|
return rc;
|
|
}
|
|
|
|
/******************************************************************************
|
|
******* Calculate check result exec time
|
|
*****************************************************************************/
|
|
static inline double calculate_check_result_execution_time(check_result *cr)
|
|
{
|
|
double execution_time = 0.0;
|
|
|
|
if (cr != NULL) {
|
|
|
|
double start_s = cr->start_time.tv_sec;
|
|
double start_us = cr->start_time.tv_usec;
|
|
double finish_s = cr->finish_time.tv_sec;
|
|
double finish_us = cr->finish_time.tv_usec;
|
|
|
|
execution_time = ((finish_s - start_s) + ((finish_us - start_us) / 1000.0) / 1000.0);
|
|
if (execution_time < 0.0) {
|
|
execution_time = 0.0;
|
|
}
|
|
}
|
|
|
|
return execution_time;
|
|
}
|
|
|
|
/******************************************************************************
|
|
******* Last state ended
|
|
*****************************************************************************/
|
|
static inline void record_last_service_state_ended(service * svc)
|
|
{
|
|
switch(svc->last_state) {
|
|
case STATE_OK:
|
|
svc->last_time_ok = svc->last_check;
|
|
break;
|
|
case STATE_WARNING:
|
|
svc->last_time_warning = svc->last_check;
|
|
break;
|
|
case STATE_UNKNOWN:
|
|
svc->last_time_unknown = svc->last_check;
|
|
break;
|
|
case STATE_CRITICAL:
|
|
svc->last_time_critical = svc->last_check;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
/*****************************************************************************/
|
|
static inline void record_last_host_state_ended(host * hst)
|
|
{
|
|
switch(hst->last_state) {
|
|
case HOST_UP:
|
|
hst->last_time_up = hst->last_check;
|
|
break;
|
|
case HOST_DOWN:
|
|
hst->last_time_down = hst->last_check;
|
|
break;
|
|
case HOST_UNREACHABLE:
|
|
hst->last_time_unreachable = hst->last_check;
|
|
break;
|
|
default:
|
|
break;
|
|
}
|
|
}
|
|
|
|
/******************************************************************************
|
|
******* Logic chunks for when an object is passive
|
|
*****************************************************************************/
|
|
static inline int service_is_passive(service *svc, check_result *cr)
|
|
{
|
|
if (accept_passive_service_checks == FALSE) {
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Discarding passive service check result because passive service checks are disabled globally.\n");
|
|
return FALSE;
|
|
}
|
|
|
|
if (svc->accept_passive_checks == FALSE) {
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Discarding passive service check result because passive checks are disabled for this service.\n");
|
|
return FALSE;
|
|
}
|
|
|
|
svc->check_type = CHECK_TYPE_PASSIVE;
|
|
|
|
/* update check statistics for passive checks */
|
|
update_check_stats(PASSIVE_SERVICE_CHECK_STATS, cr->start_time.tv_sec);
|
|
|
|
/* log passive checks - we need to do this here, as some may bypass external commands by getting dropped in checkresults dir */
|
|
if (log_passive_checks == TRUE) {
|
|
logit(NSLOG_PASSIVE_CHECK, FALSE, "PASSIVE SERVICE CHECK: %s;%s;%d;%s\n",
|
|
svc->host_name,
|
|
svc->description,
|
|
svc->current_state,
|
|
svc->plugin_output);
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
/*****************************************************************************/
|
|
static inline int host_is_passive(host *hst, check_result *cr)
|
|
{
|
|
if (accept_passive_host_checks == FALSE) {
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Discarding passive host check result because passive host checks are disabled globally.\n");
|
|
return FALSE;
|
|
}
|
|
if (hst->accept_passive_checks == FALSE) {
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Discarding passive host check result because passive checks are disabled for this host.\n");
|
|
return FALSE;
|
|
}
|
|
|
|
hst->check_type = CHECK_TYPE_PASSIVE;
|
|
|
|
/* update check stats for passive checks */
|
|
update_check_stats(PASSIVE_HOST_CHECK_STATS, cr->start_time.tv_sec);
|
|
|
|
/* log passive checks - we need to do this here as some may bypass external commands by getting dropped in checkresults dir */
|
|
/* todo - check if current_state is right - i don't think it is! */
|
|
if (log_passive_checks == TRUE) {
|
|
logit(NSLOG_PASSIVE_CHECK, FALSE, "PASSIVE HOST CHECK: %s;%d;%s\n",
|
|
hst->name,
|
|
hst->current_state,
|
|
hst->plugin_output);
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
/******************************************************************************
|
|
******* Logic chunks for when an object is active
|
|
*****************************************************************************/
|
|
static inline void service_is_active(service *svc)
|
|
{
|
|
svc->check_type = CHECK_TYPE_ACTIVE;
|
|
|
|
/* decrement the number of service checks still out there... */
|
|
if (currently_running_service_checks > 0) {
|
|
currently_running_service_checks--;
|
|
}
|
|
|
|
svc->is_executing = FALSE;
|
|
}
|
|
/*****************************************************************************/
|
|
static inline void host_is_active(host *hst)
|
|
{
|
|
hst->check_type = CHECK_TYPE_ACTIVE;
|
|
|
|
/* decrement the number of host checks still out there... */
|
|
if (currently_running_host_checks > 0) {
|
|
currently_running_host_checks--;
|
|
}
|
|
|
|
hst->is_executing = FALSE;
|
|
}
|
|
|
|
/******************************************************************************
|
|
******* Generic debugging functions
|
|
*****************************************************************************/
|
|
static inline void debug_async_service(service *svc, check_result *cr)
|
|
{
|
|
log_debug_info(DEBUGL_CHECKS, 0, "** Handling %s async check result for service '%s' on host '%s' from '%s'...\n",
|
|
(cr->check_type == CHECK_TYPE_ACTIVE) ? "ACTIVE" : "PASSIVE",
|
|
svc->description,
|
|
svc->host_name,
|
|
check_result_source(cr));
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1,
|
|
" * OPTIONS: %d, SCHEDULED: %d, RESCHEDULE: %d, EXITED OK: %d, RETURN CODE: %d, OUTPUT:\n%s\n",
|
|
cr->check_options,
|
|
cr->scheduled_check,
|
|
cr->reschedule_check,
|
|
cr->exited_ok,
|
|
cr->return_code,
|
|
(cr == NULL) ? "NULL" : cr->output);
|
|
}
|
|
/*****************************************************************************/
|
|
static inline void debug_async_host(host *hst, check_result *cr)
|
|
{
|
|
log_debug_info(DEBUGL_CHECKS, 0, "** Handling %s async check result for host '%s' from '%s'...\n",
|
|
(cr->check_type == CHECK_TYPE_ACTIVE) ? "ACTIVE" : "PASSIVE",
|
|
hst->name,
|
|
check_result_source(cr));
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1,
|
|
" * OPTIONS: %d, SCHEDULED: %d, RESCHEDULE: %d, EXITED OK: %d, RETURN CODE: %d, OUTPUT:\n%s\n",
|
|
cr->check_options,
|
|
cr->scheduled_check,
|
|
cr->reschedule_check,
|
|
cr->exited_ok,
|
|
cr->return_code,
|
|
(cr == NULL) ? "NULL" : cr->output);
|
|
}
|
|
|
|
/******************************************************************************
|
|
******* Logic chunks for checking object freshness
|
|
*****************************************************************************/
|
|
static inline void service_fresh_check(service *svc, check_result *cr, time_t current_time)
|
|
{
|
|
/* check freshness */
|
|
if (cr->check_options & CHECK_OPTION_FRESHNESS_CHECK) {
|
|
|
|
/* DISCARD INVALID FRESHNESS CHECK RESULTS */
|
|
/* If a services goes stale, Nagios will initiate a forced check in order to freshen it. There is a race condition whereby a passive check
|
|
could arrive between the 1) initiation of the forced check and 2) the time when the forced check result is processed here. This would
|
|
make the service fresh again, so we do a quick check to make sure the service is still stale before we accept the check result. */
|
|
if (is_service_result_fresh(svc, current_time, FALSE) == TRUE) {
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Discarding service freshness check result because the service is currently fresh (race condition avoided).\n");
|
|
return;
|
|
}
|
|
|
|
/* clear the freshening flag (it would have been set if this service was determined to be stale) */
|
|
svc->is_being_freshened = FALSE;
|
|
}
|
|
}
|
|
/*****************************************************************************/
|
|
static inline void host_fresh_check(host *hst, check_result *cr, time_t current_time)
|
|
{
|
|
/* check freshness */
|
|
if (cr->check_options & CHECK_OPTION_FRESHNESS_CHECK) {
|
|
|
|
/* DISCARD INVALID FRESHNESS CHECK RESULTS */
|
|
/* If a host goes stale, Nagios will initiate a forced check in order to freshen it. There is a race condition whereby a passive check
|
|
could arrive between the 1) initiation of the forced check and 2) the time when the forced check result is processed here. This would
|
|
make the host fresh again, so we do a quick check to make sure the host is still stale before we accept the check result. */
|
|
if (is_host_result_fresh(hst, current_time, FALSE) == TRUE) {
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Discarding host freshness check result because the host is currently fresh (race condition avoided).\n");
|
|
return;
|
|
}
|
|
|
|
/* clear the freshening flag (it would have been set if this host was determined to be stale) */
|
|
hst->is_being_freshened = FALSE;
|
|
}
|
|
}
|
|
|
|
/******************************************************************************
|
|
******* Logic chunks for setting some of the initial flags, etc.
|
|
*****************************************************************************/
|
|
static inline void service_initial_handling(service *svc, check_result *cr, char **old_plugin_output)
|
|
{
|
|
char * temp_ptr = NULL;
|
|
|
|
/* save old plugin output */
|
|
if (svc->plugin_output) {
|
|
* old_plugin_output = (char *)strdup(svc->plugin_output);
|
|
}
|
|
|
|
my_free(svc->plugin_output);
|
|
my_free(svc->long_plugin_output);
|
|
my_free(svc->perf_data);
|
|
|
|
/* parse check output to get: (1) short output, (2) long output, (3) perf data */
|
|
parse_check_output(cr->output, &svc->plugin_output, &svc->long_plugin_output, &svc->perf_data, TRUE, FALSE);
|
|
|
|
/* make sure the plugin output isn't null */
|
|
if (svc->plugin_output == NULL) {
|
|
svc->plugin_output = (char *)strdup("(No output returned from plugin)");
|
|
}
|
|
/* otherwise replace the semicolons with colons */
|
|
else {
|
|
replace_semicolons(svc->plugin_output, temp_ptr);
|
|
}
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2,
|
|
"Parsing check output...\n"
|
|
"Short Output: %s\n"
|
|
"Long Output: %s\n"
|
|
"Perf Data: %s\n",
|
|
svc->plugin_output,
|
|
(svc->long_plugin_output == NULL) ? "NULL" : svc->long_plugin_output,
|
|
(svc->perf_data == NULL) ? "NULL" : svc->perf_data);
|
|
|
|
svc->latency = cr->latency;
|
|
svc->execution_time = calculate_check_result_execution_time(cr);
|
|
svc->last_check = cr->start_time.tv_sec;
|
|
svc->should_be_scheduled = cr->reschedule_check;
|
|
|
|
svc->last_state = svc->current_state;
|
|
svc->current_state = get_service_check_return_code(svc, cr);
|
|
}
|
|
/*****************************************************************************/
|
|
static inline void host_initial_handling(host *hst, check_result *cr, char **old_plugin_output)
|
|
{
|
|
char * temp_ptr = NULL;
|
|
|
|
/* save old plugin output */
|
|
if (hst->plugin_output) {
|
|
* old_plugin_output = (char *)strdup(hst->plugin_output);
|
|
}
|
|
|
|
my_free(hst->plugin_output);
|
|
my_free(hst->long_plugin_output);
|
|
my_free(hst->perf_data);
|
|
|
|
/* parse check output to get: (1) short output, (2) long output, (3) perf data */
|
|
parse_check_output(cr->output, &hst->plugin_output, &hst->long_plugin_output, &hst->perf_data, TRUE, FALSE);
|
|
|
|
/* make sure the plugin output isn't null */
|
|
if (hst->plugin_output == NULL) {
|
|
hst->plugin_output = (char *)strdup("(No output returned from host check)");
|
|
}
|
|
/* otherwise replace the semicolons with colons */
|
|
else {
|
|
replace_semicolons(hst->plugin_output, temp_ptr);
|
|
}
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2,
|
|
"Parsing check output...\n"
|
|
"Short Output: %s\n"
|
|
"Long Output: %s\n"
|
|
"Perf Data: %s\n",
|
|
hst->plugin_output,
|
|
(hst->long_plugin_output == NULL) ? "NULL" : hst->long_plugin_output,
|
|
(hst->perf_data == NULL) ? "NULL" : hst->perf_data);
|
|
|
|
hst->latency = cr->latency;
|
|
hst->execution_time = calculate_check_result_execution_time(cr);
|
|
hst->last_check = cr->start_time.tv_sec;
|
|
hst->should_be_scheduled = cr->reschedule_check;
|
|
|
|
hst->last_state = hst->current_state;
|
|
hst->current_state = get_host_check_return_code(hst, cr);
|
|
}
|
|
|
|
/******************************************************************************
|
|
******* Logic for when an object has a notable change
|
|
******* Removes acknowledgement, advances event_id, etc.
|
|
*****************************************************************************/
|
|
static inline void service_state_or_hard_state_type_change(service * svc, int state_change, int hard_state_change, int * log_event, int * handle_event)
|
|
{
|
|
int state_or_type_change = FALSE;
|
|
|
|
/* update the event and problem ids */
|
|
if (state_change == TRUE) {
|
|
|
|
svc->last_state_change = svc->last_check;
|
|
|
|
/* always update the event id on a state change */
|
|
svc->last_event_id = svc->current_event_id;
|
|
svc->current_event_id = next_event_id;
|
|
next_event_id++;
|
|
|
|
/* update the problem id when transitioning to a problem state */
|
|
if (svc->last_state == STATE_OK) {
|
|
/* don't reset last problem id, or it will be zero the next time a problem is encountered */
|
|
svc->current_problem_id = next_problem_id;
|
|
next_problem_id++;
|
|
}
|
|
|
|
svc->state_type = SOFT_STATE;
|
|
|
|
state_or_type_change = TRUE;
|
|
}
|
|
|
|
if (hard_state_change == TRUE) {
|
|
|
|
svc->last_hard_state_change = svc->last_check;
|
|
svc->last_state_change = svc->last_check;
|
|
svc->last_hard_state = svc->current_state;
|
|
svc->state_type = HARD_STATE;
|
|
|
|
state_or_type_change = TRUE;
|
|
}
|
|
|
|
if (state_or_type_change) {
|
|
|
|
/* check if service should go into downtime from flexible downtime */
|
|
if (svc->pending_flex_downtime > 0) {
|
|
check_pending_flex_service_downtime(svc);
|
|
}
|
|
|
|
/* reset notification times and suppression option */
|
|
svc->last_notification = (time_t)0;
|
|
svc->next_notification = (time_t)0;
|
|
svc->no_more_notifications = FALSE;
|
|
|
|
if ((svc->acknowledgement_type == ACKNOWLEDGEMENT_NORMAL && (state_change == TRUE || hard_state_change == FALSE))
|
|
|| (svc->acknowledgement_type == ACKNOWLEDGEMENT_STICKY && svc->current_state == STATE_OK)) {
|
|
|
|
/* remove any non-persistant comments associated with the ack */
|
|
svc->problem_has_been_acknowledged = FALSE;
|
|
svc->acknowledgement_type = ACKNOWLEDGEMENT_NONE;
|
|
delete_service_acknowledgement_comments(svc);
|
|
}
|
|
|
|
svc->should_be_scheduled = TRUE;
|
|
|
|
*log_event = TRUE;
|
|
*handle_event = TRUE;
|
|
}
|
|
}
|
|
/*****************************************************************************/
|
|
static inline void host_state_or_hard_state_type_change(host * hst, int state_change, int hard_state_change, int * log_event, int * handle_event, int * send_notification)
|
|
{
|
|
int state_or_type_change = FALSE;
|
|
|
|
/* check if we simulate a hard state change */
|
|
if (hst->check_type == CHECK_TYPE_PASSIVE && passive_host_checks_are_soft == FALSE) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Check type passive and passive host checks aren't false\n");
|
|
|
|
if (state_change == TRUE) {
|
|
hst->current_attempt = 1;
|
|
hard_state_change = TRUE;
|
|
}
|
|
|
|
hst->state_type = HARD_STATE;
|
|
}
|
|
|
|
/* update event and problem ids */
|
|
if (state_change == TRUE) {
|
|
|
|
hst->last_state_change = hst->last_check;
|
|
|
|
/* always update the event id on a state change */
|
|
hst->last_event_id = hst->current_event_id;
|
|
hst->current_event_id = next_event_id;
|
|
next_event_id++;
|
|
|
|
/* update the problem id when transitioning to a problem state */
|
|
if (hst->last_state == HOST_UP) {
|
|
/* don't reset last problem id, or it will be zero the next time a problem is encountered */
|
|
hst->current_problem_id = next_problem_id;
|
|
next_problem_id++;
|
|
}
|
|
|
|
/* clear the problem id when transitioning from a problem state to an OK state */
|
|
else {
|
|
hst->last_problem_id = hst->current_problem_id;
|
|
hst->current_problem_id = 0L;
|
|
}
|
|
|
|
hst->state_type = SOFT_STATE;
|
|
|
|
state_or_type_change = TRUE;
|
|
}
|
|
|
|
if (hard_state_change == TRUE) {
|
|
|
|
hst->last_hard_state_change = hst->last_check;
|
|
hst->last_hard_state = hst->current_state;
|
|
hst->state_type = HARD_STATE;
|
|
|
|
state_or_type_change = TRUE;
|
|
|
|
/* this is in the host func, but not the service
|
|
because it can easily be missed if a passive check
|
|
comes in and passive_host_checks_are_soft == FALSE */
|
|
*send_notification = TRUE;
|
|
}
|
|
|
|
if (state_or_type_change) {
|
|
|
|
/* check if host should go into downtime from flexible downtime */
|
|
check_pending_flex_host_downtime(hst);
|
|
|
|
/* reset notification times and suppression option */
|
|
hst->last_notification = (time_t)0;
|
|
hst->next_notification = (time_t)0;
|
|
hst->no_more_notifications = FALSE;
|
|
|
|
if ((hst->acknowledgement_type == ACKNOWLEDGEMENT_NORMAL && (state_change == TRUE || hard_state_change == FALSE))
|
|
|| (hst->acknowledgement_type == ACKNOWLEDGEMENT_STICKY && hst->current_state == STATE_OK)) {
|
|
|
|
/* remove any non-persistant comments associated with the ack */
|
|
hst->problem_has_been_acknowledged = FALSE;
|
|
hst->acknowledgement_type = ACKNOWLEDGEMENT_NONE;
|
|
delete_host_acknowledgement_comments(hst);
|
|
}
|
|
|
|
hst->should_be_scheduled = TRUE;
|
|
|
|
*log_event = TRUE;
|
|
*handle_event = TRUE;
|
|
}
|
|
}
|
|
|
|
/******************************************************************************
|
|
******* Logic for setting default state change times, etc.
|
|
*****************************************************************************/
|
|
static inline void initialize_last_service_state_change_times(service * svc, host * hst)
|
|
{
|
|
/* initialize the last host and service state change times if necessary */
|
|
if (svc->last_state_change == (time_t)0) {
|
|
svc->last_state_change = svc->last_check;
|
|
}
|
|
if (svc->last_hard_state_change == (time_t)0) {
|
|
svc->last_hard_state_change = svc->last_check;
|
|
}
|
|
if (hst->last_state_change == (time_t)0) {
|
|
hst->last_state_change = svc->last_check;
|
|
}
|
|
if (hst->last_hard_state_change == (time_t)0) {
|
|
hst->last_hard_state_change = svc->last_check;
|
|
}
|
|
}
|
|
/*****************************************************************************/
|
|
static inline void initialize_last_host_state_change_times(host * hst)
|
|
{
|
|
/* initialize last host state change times if necessary */
|
|
if (hst->last_state_change == (time_t)0) {
|
|
hst->last_state_change = hst->last_check;
|
|
}
|
|
if (hst->last_hard_state_change == (time_t)0) {
|
|
hst->last_hard_state_change = hst->last_check;
|
|
}
|
|
}
|
|
|
|
|
|
/******************************************************************************
|
|
******* Logic chunks propagating checks to host parents/children
|
|
*****************************************************************************/
|
|
static inline void host_propagate_checks_to_immediate_parents(host * hst, int parent_host_up, time_t current_time)
|
|
{
|
|
hostsmember *temp_hostsmember = NULL;
|
|
host *parent_host = NULL;
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Propagating checks to parent host(s)...\n");
|
|
for(temp_hostsmember = hst->parent_hosts; temp_hostsmember != NULL; temp_hostsmember = temp_hostsmember->next) {
|
|
parent_host = temp_hostsmember->host_ptr;
|
|
if ((parent_host_up == TRUE && parent_host->current_state == HOST_UP)
|
|
|| ((parent_host_up == FALSE && parent_host->current_state != HOST_UP))) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Check of parent host '%s' queued.\n", parent_host->name);
|
|
schedule_host_check(parent_host, current_time, CHECK_OPTION_DEPENDENCY_CHECK);
|
|
}
|
|
}
|
|
}
|
|
static inline void host_propagate_checks_to_immediate_children(host * hst, int children_none_up, int children_none_unreachable, time_t current_time)
|
|
{
|
|
hostsmember *temp_hostsmember = NULL;
|
|
host *child_host = NULL;
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Propagating checks to child host(s)...\n");
|
|
for(temp_hostsmember = hst->child_hosts; temp_hostsmember != NULL; temp_hostsmember = temp_hostsmember->next) {
|
|
child_host = temp_hostsmember->host_ptr;
|
|
if ( (children_none_up == TRUE && child_host->current_state != HOST_UP)
|
|
|| (children_none_unreachable == TRUE && child_host->current_state != HOST_UNREACHABLE)) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Check of child host '%s' queued.\n", child_host->name);
|
|
schedule_host_check(child_host, current_time, CHECK_OPTION_DEPENDENCY_CHECK);
|
|
}
|
|
}
|
|
}
|
|
|
|
/******************************************************************************
|
|
******* Logic chunks propagating dependency checks
|
|
*****************************************************************************/
|
|
static inline void service_propagate_dependency_checks(service * svc, time_t current_time)
|
|
{
|
|
if (svc->current_attempt == (svc->max_attempts - 1)
|
|
&& execute_service_checks == TRUE
|
|
&& enable_predictive_service_dependency_checks == TRUE) {
|
|
|
|
servicedependency *temp_dependency = NULL;
|
|
service *master_service = NULL;
|
|
objectlist *list;
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Propagating predictive dependency checks to services this one depends on...\n");
|
|
|
|
/* check services that THIS ONE depends on for notification AND execution */
|
|
/* we do this because we might be sending out a notification soon and we want the dependency logic to be accurate */
|
|
for(list = svc->exec_deps; list; list = list->next) {
|
|
temp_dependency = (servicedependency *)list->object_ptr;
|
|
if (temp_dependency->dependent_service_ptr == svc && temp_dependency->master_service_ptr != NULL) {
|
|
|
|
master_service = (service *)temp_dependency->master_service_ptr;
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Predictive check of service '%s' on host '%s' queued.\n", master_service->description, master_service->host_name);
|
|
schedule_service_check(master_service, current_time, CHECK_OPTION_DEPENDENCY_CHECK);
|
|
}
|
|
}
|
|
|
|
for(list = svc->notify_deps; list; list = list->next) {
|
|
temp_dependency = (servicedependency *)list->object_ptr;
|
|
if (temp_dependency->dependent_service_ptr == svc && temp_dependency->master_service_ptr != NULL) {
|
|
|
|
master_service = (service *)temp_dependency->master_service_ptr;
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Predictive check of service '%s' on host '%s' queued.\n", master_service->description, master_service->host_name);
|
|
schedule_service_check(master_service, current_time, CHECK_OPTION_DEPENDENCY_CHECK);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
/*****************************************************************************/
|
|
static inline void host_propagate_dependency_checks(host * hst, time_t current_time)
|
|
{
|
|
/* we do to help ensure that the dependency checks are accurate before it comes time to notify */
|
|
if (hst->current_attempt == (hst->max_attempts - 1)
|
|
&& execute_host_checks == TRUE
|
|
&& enable_predictive_host_dependency_checks == TRUE) {
|
|
|
|
objectlist *list;
|
|
hostdependency *dep = NULL;
|
|
host *master_host = NULL;
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Propagating predictive dependency checks to hosts this one depends on...\n");
|
|
|
|
for(list = hst->notify_deps; list; list = list->next) {
|
|
dep = (hostdependency *)list->object_ptr;
|
|
if (dep->dependent_host_ptr == hst && dep->master_host_ptr != NULL) {
|
|
|
|
master_host = (host *)dep->master_host_ptr;
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Check of host '%s' queued.\n", master_host->name);
|
|
schedule_host_check(master_host, current_time, CHECK_OPTION_NONE);
|
|
}
|
|
}
|
|
|
|
for(list = hst->exec_deps; list; list = list->next) {
|
|
dep = (hostdependency *)list->object_ptr;
|
|
if (dep->dependent_host_ptr == hst && dep->master_host_ptr != NULL) {
|
|
|
|
master_host = (host *)dep->master_host_ptr;
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Check of host '%s' queued.\n", master_host->name);
|
|
schedule_host_check(master_host, current_time, CHECK_OPTION_NONE);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/******************************************************************************
|
|
******* One stop shop for determining if check_result data is valid
|
|
*****************************************************************************/
|
|
static inline int is_valid_check_result_data(host * hst, check_result * cr)
|
|
{
|
|
if (hst == NULL) {
|
|
log_debug_info(DEBUGL_CHECKS, 2, "No host associated with service, bailing!\n");
|
|
return FALSE;
|
|
}
|
|
|
|
if (cr == NULL) {
|
|
log_debug_info(DEBUGL_CHECKS, 2, "No check result specified, bailing!\n");
|
|
return FALSE;
|
|
}
|
|
|
|
return TRUE;
|
|
}
|
|
/******************************************************************************
|
|
********** Fin. ************************************************************
|
|
*****************************************************************************/
|
|
|
|
/* handles asynchronous service check results */
|
|
int handle_async_service_check_result(service *svc, check_result *cr)
|
|
{
|
|
time_t current_time = 0L;
|
|
time_t next_check = 0L;
|
|
time_t preferred_time = 0L;
|
|
time_t next_valid_time = 0L;
|
|
|
|
int state_change = FALSE;
|
|
int hard_state_change = FALSE;
|
|
int send_notification = FALSE;
|
|
int handle_event = FALSE;
|
|
int log_event = FALSE;
|
|
int check_host = FALSE;
|
|
int update_host_stats = FALSE;
|
|
|
|
char * old_plugin_output = NULL;
|
|
|
|
host * hst = NULL;
|
|
|
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "handle_async_service_check_result()\n");
|
|
|
|
if (svc == NULL) {
|
|
log_debug_info(DEBUGL_CHECKS, 2, "No service specified, bailing!\n");
|
|
return ERROR;
|
|
}
|
|
hst = svc->host_ptr;
|
|
if (is_valid_check_result_data(hst, cr) == FALSE) {
|
|
return ERROR;
|
|
}
|
|
|
|
if (cr->check_type == CHECK_TYPE_PASSIVE) {
|
|
if (service_is_passive(svc, cr) == FALSE) {
|
|
return ERROR;
|
|
}
|
|
}
|
|
else {
|
|
service_is_active(svc);
|
|
}
|
|
|
|
time(¤t_time);
|
|
initialize_last_service_state_change_times(svc, hst);
|
|
|
|
debug_async_service(svc, cr);
|
|
service_fresh_check(svc, cr, current_time);
|
|
service_initial_handling(svc, cr, &old_plugin_output);
|
|
|
|
/* reschedule the next check at the regular interval - may be overridden */
|
|
next_check = (time_t)(svc->last_check + (svc->check_interval * interval_length));
|
|
|
|
/***********************************************/
|
|
/********** SCHEDULE SERVICE CHECK LOGIC **********/
|
|
/***********************************************/
|
|
if (svc->current_state == STATE_OK) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Service is OK\n");
|
|
|
|
if (hst->has_been_checked == FALSE) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Host has not been checked yet\n");
|
|
|
|
if (hst->next_check == 0L
|
|
|| hst->initial_state != HOST_UP
|
|
|| hst->next_check < hst->check_interval * interval_length + current_time) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Service ok, but host hasn't been checked recently, scheduling host check\n");
|
|
|
|
check_host = TRUE;
|
|
}
|
|
}
|
|
|
|
else if (hst->current_state != HOST_UP) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Host is NOT UP, so we'll check it to see if it recovered...\n");
|
|
|
|
if (svc->last_state == STATE_OK
|
|
&& hst->has_been_checked == TRUE
|
|
&& current_time - hst->last_check < cached_host_check_horizon) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Service ok, but host isn't up (and has been checked). Using cached host data.\n");
|
|
|
|
update_host_stats = TRUE;
|
|
|
|
} else {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Service ok, but host isn't up and cached data isn't valid here, scheduling host check\n");
|
|
|
|
check_host = TRUE;
|
|
}
|
|
|
|
svc->host_problem_at_last_check = TRUE;
|
|
}
|
|
|
|
}
|
|
else {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Service is in a non-OK state!\n");
|
|
|
|
if (hst->current_state == HOST_UP) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Host is currently UP, so we'll recheck its state to make sure...\n");
|
|
|
|
if (execute_host_checks == TRUE
|
|
&& svc->last_state != svc->current_state
|
|
&& hst->last_check + cached_host_check_horizon < current_time) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Service not ok, host is up but cached data isn't valid, scheduling host check\n");
|
|
|
|
check_host = TRUE;
|
|
|
|
} else {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Service not ok, host is up, using cached host data\n");
|
|
|
|
update_host_stats = TRUE;
|
|
}
|
|
|
|
/* give the service a chance to recover */
|
|
if (svc->host_problem_at_last_check == TRUE
|
|
&& svc->state_type == SOFT_STATE) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Service had a host problem at last check and is SOFT, so we'll reset current_attempt to 1 to give it a chance\n");
|
|
|
|
svc->current_attempt = 1;
|
|
}
|
|
|
|
svc->host_problem_at_last_check = FALSE;
|
|
}
|
|
else {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Host is currently not UP...\n");
|
|
|
|
if (execute_host_checks == FALSE || svc->current_state == svc->last_state) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Host checks aren't enabled, so send a notification\n");
|
|
|
|
/* fake a host check */
|
|
if (hst->has_been_checked == FALSE) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Host has never been checked, fake a host check\n");
|
|
|
|
hst->has_been_checked = TRUE;
|
|
hst->last_check = svc->last_check;
|
|
}
|
|
|
|
/* possibly re-send host notifications... */
|
|
host_notification(hst, NOTIFICATION_NORMAL, NULL, NULL, NOTIFICATION_OPTION_NONE);
|
|
}
|
|
|
|
svc->host_problem_at_last_check = TRUE;
|
|
}
|
|
}
|
|
|
|
/* service hard state change, because if host is down/unreachable
|
|
the docs say we have a hard state change (but no notification) */
|
|
if (hst->current_state != HOST_UP && svc->last_hard_state != svc->current_state) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Host is down or unreachable, forcing service hard state change\n");
|
|
|
|
hard_state_change = TRUE;
|
|
svc->state_type = HARD_STATE;
|
|
svc->last_hard_state = svc->current_state;
|
|
}
|
|
|
|
if (check_host == TRUE) {
|
|
schedule_host_check(hst, current_time, CHECK_OPTION_DEPENDENCY_CHECK);
|
|
}
|
|
|
|
if (update_host_stats == TRUE) {
|
|
update_check_stats(ACTIVE_ONDEMAND_HOST_CHECK_STATS, current_time);
|
|
update_check_stats(ACTIVE_CACHED_HOST_CHECK_STATS, current_time);
|
|
}
|
|
|
|
/**************************************/
|
|
/******* SERVICE CHECK OK LOGIC *******/
|
|
/**************************************/
|
|
if (svc->last_state == STATE_OK) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Service was OK at last check.\n");
|
|
|
|
/***** SERVICE IS STILL OK *****/
|
|
if (svc->current_state == STATE_OK) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Service is still OK.\n");
|
|
|
|
svc->state_type = HARD_STATE;
|
|
svc->current_attempt = 1;
|
|
}
|
|
|
|
/***** SERVICE IS NOW IN PROBLEM STATE *****/
|
|
else {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Service is a non-OK state (%s)!\n", service_state_name(svc->current_state));
|
|
|
|
/* skip this service check if host is down/unreachable and state change happened */
|
|
if (svc->host_problem_at_last_check == FALSE && hard_state_change == FALSE) {
|
|
svc->state_type = SOFT_STATE;
|
|
}
|
|
|
|
svc->current_attempt = 1;
|
|
|
|
handle_event = TRUE;
|
|
}
|
|
}
|
|
|
|
/*******************************************/
|
|
/******* SERVICE CHECK PROBLEM LOGIC *******/
|
|
/*******************************************/
|
|
else {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Service was NOT OK at last check (%s).\n", service_state_name(svc->last_state));
|
|
|
|
/***** SERVICE IS NOW OK *****/
|
|
if (svc->current_state == STATE_OK) {
|
|
|
|
handle_event = TRUE;
|
|
|
|
if (svc->state_type == HARD_STATE) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Service experienced a HARD recovery.\n");
|
|
|
|
send_notification = TRUE;
|
|
}
|
|
else {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Service experienced a SOFT recovery.\n");
|
|
}
|
|
|
|
/* there was a state change, soft or hard */
|
|
state_change = TRUE;
|
|
}
|
|
|
|
/***** SERVICE IS STILL IN PROBLEM STATE *****/
|
|
else {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Service is still in a non-OK state (%s)!\n", service_state_name(svc->current_state));
|
|
|
|
if (svc->state_type == SOFT_STATE) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Service state type is soft, using retry_interval\n");
|
|
|
|
handle_event = TRUE;
|
|
next_check = (unsigned long) (current_time + svc->retry_interval * interval_length);
|
|
}
|
|
|
|
/* check if host is down/unreachable and don't send notifications */
|
|
else if (svc->host_problem_at_last_check == TRUE) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Service state type is hard, but host is down or unreachable, not sending notification\n");
|
|
|
|
} else {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Service state type is hard, sending a notification\n");
|
|
|
|
send_notification = TRUE;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* soft states should be using retry_interval */
|
|
if (svc->state_type == SOFT_STATE) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Service state type is soft, using retry_interval\n");
|
|
|
|
next_check = (unsigned long) (current_time + svc->retry_interval * interval_length);
|
|
}
|
|
|
|
/* check for a state change */
|
|
if (svc->current_state != svc->last_state || (svc->current_state == STATE_OK && svc->state_type == SOFT_STATE)) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Service experienced a state change\n");
|
|
|
|
state_change = TRUE;
|
|
}
|
|
|
|
/* adjust the current attempt */
|
|
if (svc->state_type == SOFT_STATE) {
|
|
|
|
/* this has to be first so we don't reset every time a new non-ok state comes
|
|
in (and triggers the state_change == TRUE) */
|
|
if (svc->last_state != STATE_OK && svc->current_attempt < svc->max_attempts) {
|
|
|
|
svc->current_attempt++;
|
|
}
|
|
|
|
/* historically, a soft recovery would actually get up to 2 attempts
|
|
and then immediately reset once the next check result came in */
|
|
else if (state_change == TRUE && svc->current_state != STATE_OK) {
|
|
|
|
svc->current_attempt = 1;
|
|
}
|
|
|
|
/* otherwise, just increase the attempt */
|
|
else if (svc->current_attempt < svc->max_attempts) {
|
|
|
|
svc->current_attempt++;
|
|
}
|
|
}
|
|
|
|
if (svc->current_attempt >= svc->max_attempts &&
|
|
(svc->current_state != svc->last_hard_state || svc->state_type == SOFT_STATE)) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Service had a HARD STATE CHANGE!!\n");
|
|
|
|
next_check = (unsigned long)(current_time + (svc->check_interval * interval_length));
|
|
|
|
/* set both states changed, this may have been missed... */
|
|
hard_state_change = TRUE;
|
|
|
|
/* this is missed earlier */
|
|
send_notification = TRUE;
|
|
}
|
|
|
|
/* handle some acknowledgement things and update last_state_change */
|
|
service_state_or_hard_state_type_change(svc, state_change, hard_state_change, &log_event, &handle_event);
|
|
|
|
/* fix edge cases where log_event wouldn't have been set or won't be */
|
|
if (svc->current_state != STATE_OK && svc->state_type == SOFT_STATE) {
|
|
log_event = TRUE;
|
|
}
|
|
|
|
record_last_service_state_ended(svc);
|
|
|
|
check_for_service_flapping(svc, TRUE, TRUE);
|
|
check_for_host_flapping(hst, TRUE, FALSE, TRUE);
|
|
|
|
/* service with active checks disabled do not get rescheduled */
|
|
if (svc->checks_enabled == FALSE) {
|
|
svc->should_be_scheduled = FALSE;
|
|
}
|
|
|
|
/* hosts with non-recurring intervals do not get rescheduled if we're in a HARD or OK state */
|
|
else if (svc->check_interval == 0 && (svc->state_type == HARD_STATE || svc->current_state == STATE_OK)) {
|
|
svc->should_be_scheduled = FALSE;
|
|
}
|
|
|
|
/* schedule a non-forced check if we can */
|
|
else if (svc->should_be_scheduled == TRUE) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Rescheduling next check of service at %s", ctime(&next_check));
|
|
|
|
/* next check time was calculated above */
|
|
/* make sure we don't get ourselves into too much trouble... */
|
|
if (current_time > next_check) {
|
|
svc->next_check = current_time;
|
|
} else {
|
|
svc->next_check = next_check;
|
|
}
|
|
|
|
/* make sure we rescheduled the next service check at a valid time */
|
|
preferred_time = svc->next_check;
|
|
get_next_valid_time(preferred_time, &next_valid_time, svc->check_period_ptr);
|
|
svc->next_check = next_valid_time;
|
|
|
|
/* Next valid time is further in the future because of timeperiod
|
|
constraints. Add a random amount so we don't get all checks
|
|
subject to that timeperiod constraint scheduled at the same time */
|
|
if (next_valid_time > preferred_time) {
|
|
svc->next_check += ranged_urand(0, check_window(svc));
|
|
}
|
|
|
|
schedule_service_check(svc, svc->next_check, CHECK_OPTION_NONE);
|
|
}
|
|
|
|
/* volatile service gets everything in non-ok hard state */
|
|
if ((svc->current_state != STATE_OK)
|
|
&& (svc->state_type == HARD_STATE)
|
|
&& (svc->is_volatile == TRUE)) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Service is volatile, and we're in a non-ok hard state..\n");
|
|
|
|
send_notification = TRUE;
|
|
log_event = TRUE;
|
|
handle_event = TRUE;
|
|
}
|
|
|
|
if (send_notification == TRUE) {
|
|
|
|
/* send notification */
|
|
if (service_notification(svc, NOTIFICATION_NORMAL, NULL, NULL, NOTIFICATION_OPTION_NONE) == OK) {
|
|
|
|
/* log state due to notification event when stalking_options N is set */
|
|
if (should_stalk_notifications(svc)) {
|
|
log_event = TRUE;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* the service recovered, so reset the current notification number and state flags (after the recovery notification has gone out) */
|
|
if(svc->current_state == STATE_OK && svc->state_type == HARD_STATE && hard_state_change == TRUE) {
|
|
svc->current_notification_number = 0;
|
|
svc->notified_on = 0;
|
|
}
|
|
|
|
if (obsess_over_services == TRUE) {
|
|
obsessive_compulsive_service_check_processor(svc);
|
|
}
|
|
|
|
/* if we're stalking this state type AND the plugin output changed since last check, log it now.. */
|
|
if (should_stalk(svc) && compare_strings(old_plugin_output, svc->plugin_output)) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Logging due to state stalking, old: [%s], new: [%s]\n", old_plugin_output, svc->plugin_output);
|
|
log_event = TRUE;
|
|
}
|
|
|
|
if (log_event == TRUE) {
|
|
log_service_event(svc);
|
|
}
|
|
|
|
if (handle_event == TRUE) {
|
|
handle_service_event(svc);
|
|
}
|
|
|
|
/* Update OK states since they send out a soft alert but then they
|
|
switch into a HARD state and reset the attempts */
|
|
if (svc->current_state == STATE_OK && state_change == TRUE) {
|
|
|
|
/* Reset attempts and problem state */
|
|
if (hard_state_change == TRUE) {
|
|
svc->last_problem_id = svc->current_problem_id;
|
|
svc->current_problem_id = 0L;
|
|
svc->current_notification_number = 0;
|
|
svc->host_problem_at_last_check = FALSE;
|
|
}
|
|
|
|
/* Set OK to a hard state */
|
|
svc->last_hard_state_change = svc->last_check;
|
|
svc->last_hard_state = svc->current_state;
|
|
svc->current_attempt = 1;
|
|
svc->state_type = HARD_STATE;
|
|
}
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2,
|
|
"STATE: %d, TYPE: %s, CUR: %d, MAX: %d, LAST_STATE: %d, LAST_HARD: %d, NOTIFY: %d, LOGGED: %d, HANDLED: %d\n",
|
|
svc->current_state,
|
|
(svc->state_type == SOFT_STATE) ? "SOFT" : "HARD",
|
|
svc->current_attempt,
|
|
svc->max_attempts,
|
|
svc->last_state,
|
|
svc->last_hard_state,
|
|
send_notification,
|
|
log_event,
|
|
handle_event);
|
|
|
|
#ifdef USE_EVENT_BROKER
|
|
broker_service_check(NEBTYPE_SERVICECHECK_PROCESSED, NEBFLAG_NONE, NEBATTR_NONE, svc, svc->check_type, cr->start_time, cr->finish_time, NULL, svc->latency, svc->execution_time, service_check_timeout, cr->early_timeout, cr->return_code, NULL, NULL, cr);
|
|
#endif
|
|
|
|
svc->has_been_checked = TRUE;
|
|
update_service_status(svc, FALSE);
|
|
update_service_performance_data(svc);
|
|
|
|
my_free(old_plugin_output);
|
|
|
|
return OK;
|
|
}
|
|
|
|
/* schedules an immediate or delayed service check */
|
|
inline void schedule_service_check(service *svc, time_t check_time, int options)
|
|
{
|
|
timed_event *temp_event = NULL;
|
|
int use_original_event = TRUE;
|
|
|
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "schedule_service_check()\n");
|
|
|
|
if (svc == NULL)
|
|
return;
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Scheduling a %s, active check of service '%s' on host '%s' @ %s", (options & CHECK_OPTION_FORCE_EXECUTION) ? "forced" : "non-forced", svc->description, svc->host_name, ctime(&check_time));
|
|
|
|
/* don't schedule a check if active checks of this service are disabled */
|
|
if (svc->checks_enabled == FALSE && !(options & CHECK_OPTION_FORCE_EXECUTION)) {
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Active checks of this service are disabled.\n");
|
|
return;
|
|
}
|
|
|
|
/* we may have to nudge this check a bit */
|
|
if (options == CHECK_OPTION_DEPENDENCY_CHECK) {
|
|
if (svc->last_check + cached_service_check_horizon > check_time) {
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Last check result is recent enough (%s)", ctime(&svc->last_check));
|
|
return;
|
|
}
|
|
}
|
|
|
|
/* default is to use the new event */
|
|
use_original_event = FALSE;
|
|
|
|
temp_event = (timed_event *)svc->next_check_event;
|
|
|
|
/*
|
|
* If the service already has a check scheduled,
|
|
* we need to decide which of the events to use
|
|
*/
|
|
if (temp_event != NULL) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Found another service check event for this service @ %s", ctime(&temp_event->run_time));
|
|
|
|
/* use the originally scheduled check unless we decide otherwise */
|
|
use_original_event = TRUE;
|
|
|
|
/* the original event is a forced check... */
|
|
if ((temp_event->event_options & CHECK_OPTION_FORCE_EXECUTION)) {
|
|
|
|
/* the new event is also forced and its execution time is earlier than the original, so use it instead */
|
|
if ((options & CHECK_OPTION_FORCE_EXECUTION) && (check_time < temp_event->run_time)) {
|
|
use_original_event = FALSE;
|
|
log_debug_info(DEBUGL_CHECKS, 2, "New service check event is forced and occurs before the existing event, so the new event will be used instead.\n");
|
|
}
|
|
}
|
|
|
|
/* the original event is not a forced check... */
|
|
else {
|
|
|
|
/* the new event is a forced check, so use it instead */
|
|
if ((options & CHECK_OPTION_FORCE_EXECUTION)) {
|
|
use_original_event = FALSE;
|
|
log_debug_info(DEBUGL_CHECKS, 2, "New service check event is forced, so it will be used instead of the existing event.\n");
|
|
}
|
|
|
|
/* the new event is not forced either and its execution time is earlier than the original, so use it instead */
|
|
else if (check_time < temp_event->run_time) {
|
|
use_original_event = FALSE;
|
|
log_debug_info(DEBUGL_CHECKS, 2, "New service check event occurs before the existing (older) event, so it will be used instead.\n");
|
|
}
|
|
|
|
/* the new event is older, so override the existing one */
|
|
else {
|
|
log_debug_info(DEBUGL_CHECKS, 2, "New service check event occurs after the existing event, so we'll ignore it.\n");
|
|
}
|
|
}
|
|
}
|
|
|
|
/* schedule a new event */
|
|
if (use_original_event == FALSE) {
|
|
/* make sure we remove the old event from the queue */
|
|
if (temp_event) {
|
|
remove_event(nagios_squeue, temp_event);
|
|
}
|
|
else {
|
|
/* allocate memory for a new event item */
|
|
temp_event = (timed_event *)calloc(1, sizeof(timed_event));
|
|
if (temp_event == NULL) {
|
|
logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Could not reschedule check of service '%s' on host '%s'!\n", svc->description, svc->host_name);
|
|
return;
|
|
}
|
|
}
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Scheduling new service check event.\n");
|
|
|
|
/* set the next service check event and time */
|
|
svc->next_check_event = temp_event;
|
|
svc->next_check = check_time;
|
|
|
|
/* save check options for retention purposes */
|
|
svc->check_options = options;
|
|
|
|
/* place the new event in the event queue */
|
|
temp_event->event_type = EVENT_SERVICE_CHECK;
|
|
temp_event->event_data = (void *)svc;
|
|
temp_event->event_args = (void *)NULL;
|
|
temp_event->event_options = options;
|
|
temp_event->run_time = svc->next_check;
|
|
temp_event->recurring = FALSE;
|
|
temp_event->event_interval = 0L;
|
|
temp_event->timing_func = NULL;
|
|
temp_event->compensate_for_time_change = TRUE;
|
|
add_event(nagios_squeue, temp_event);
|
|
}
|
|
|
|
else {
|
|
/* reset the next check time (it may be out of sync) */
|
|
if (temp_event != NULL) {
|
|
svc->next_check = temp_event->run_time;
|
|
}
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Keeping original service check event (ignoring the new one).\n");
|
|
}
|
|
|
|
|
|
/* update the status log */
|
|
update_service_status(svc, FALSE);
|
|
}
|
|
|
|
|
|
|
|
/* checks viability of performing a service check */
|
|
inline int check_service_check_viability(service *svc, int check_options, int *time_is_valid, time_t *new_time)
|
|
{
|
|
int perform_check = TRUE;
|
|
time_t current_time = 0L;
|
|
time_t preferred_time = 0L;
|
|
int check_interval = 0;
|
|
host *temp_host = NULL;
|
|
|
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "check_service_check_viability()\n");
|
|
|
|
/* make sure we have a service */
|
|
if (svc == NULL) {
|
|
return ERROR;
|
|
}
|
|
|
|
/* get the check interval to use if we need to reschedule the check */
|
|
if (svc->state_type == SOFT_STATE && svc->current_state != STATE_OK) {
|
|
check_interval = (svc->retry_interval * interval_length);
|
|
}
|
|
else {
|
|
check_interval = (svc->check_interval * interval_length);
|
|
}
|
|
|
|
/* get the current time */
|
|
time(¤t_time);
|
|
|
|
/* initialize the next preferred check time */
|
|
preferred_time = current_time;
|
|
|
|
/* can we check the host right now? */
|
|
if (!(check_options & CHECK_OPTION_FORCE_EXECUTION)) {
|
|
|
|
/* if checks of the service are currently disabled... */
|
|
if (svc->checks_enabled == FALSE) {
|
|
preferred_time = current_time + check_interval;
|
|
perform_check = FALSE;
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Active checks of the service are currently disabled.\n");
|
|
}
|
|
|
|
/* make sure this is a valid time to check the service */
|
|
if (check_time_against_period((unsigned long)current_time, svc->check_period_ptr) == ERROR) {
|
|
preferred_time = current_time;
|
|
if (time_is_valid) {
|
|
*time_is_valid = FALSE;
|
|
}
|
|
perform_check = FALSE;
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "This is not a valid time for this service to be actively checked.\n");
|
|
}
|
|
|
|
/* check service dependencies for execution */
|
|
if (check_service_dependencies(svc, EXECUTION_DEPENDENCY) == DEPENDENCIES_FAILED) {
|
|
preferred_time = current_time + check_interval;
|
|
perform_check = FALSE;
|
|
if (service_skip_check_dependency_status >= 0) {
|
|
svc->current_state = service_skip_check_dependency_status;
|
|
}
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Execution dependencies for this service failed, so it will not be actively checked.\n");
|
|
}
|
|
}
|
|
|
|
/* check if parent service is OK */
|
|
if (check_service_parents(svc) == DEPENDENCIES_FAILED) {
|
|
preferred_time = current_time + check_interval;
|
|
perform_check = FALSE;
|
|
if (service_skip_check_parent_status >= 0) {
|
|
svc->current_state = service_skip_check_parent_status;
|
|
}
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Execution parents for this service failed, so it will not be actively checked.\n");
|
|
}
|
|
|
|
/* check if host is up - if not, do not perform check */
|
|
if (host_down_disable_service_checks) {
|
|
if ((temp_host = svc->host_ptr) == NULL) {
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Host pointer NULL in check_service_check_viability().\n");
|
|
return ERROR;
|
|
}
|
|
else {
|
|
if (temp_host->current_state != HOST_UP) {
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Host state not UP, so service check will not be performed - will be rescheduled as normal.\n");
|
|
perform_check = FALSE;
|
|
if (service_skip_check_host_down_status >= 0) {
|
|
svc->current_state = service_skip_check_host_down_status;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
/* pass back the next viable check time */
|
|
if (new_time) {
|
|
*new_time = preferred_time;
|
|
}
|
|
|
|
if (perform_check == TRUE) {
|
|
return OK;
|
|
}
|
|
return ERROR;
|
|
}
|
|
|
|
|
|
|
|
/* checks service parents */
|
|
int check_service_parents(service *svc)
|
|
{
|
|
servicesmember *temp_servicesmember = NULL;
|
|
int state = STATE_OK;
|
|
|
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "check_service_parents()\n");
|
|
|
|
/* check all parents... */
|
|
for(temp_servicesmember = svc->parents; temp_servicesmember; temp_servicesmember = temp_servicesmember->next) {
|
|
service *parent_service;
|
|
|
|
/* find the service we depend on... */
|
|
if ((parent_service = temp_servicesmember->service_ptr) == NULL) {
|
|
logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: service '%s' on host '%s' is NULL ptr\n",
|
|
temp_servicesmember->service_description, temp_servicesmember->host_name);
|
|
continue;
|
|
}
|
|
|
|
state = parent_service->last_hard_state;
|
|
|
|
/* is the service we depend on in a state that fails the dependency tests? */
|
|
if ((state == STATE_CRITICAL) || (state == STATE_UNKNOWN))
|
|
return DEPENDENCIES_FAILED;
|
|
|
|
if (check_service_parents(parent_service) != DEPENDENCIES_OK)
|
|
return DEPENDENCIES_FAILED;
|
|
}
|
|
|
|
return DEPENDENCIES_OK;
|
|
}
|
|
|
|
|
|
|
|
/* checks service dependencies */
|
|
int check_service_dependencies(service *svc, int dependency_type)
|
|
{
|
|
objectlist *list;
|
|
int state = STATE_OK;
|
|
time_t current_time = 0L;
|
|
|
|
|
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "check_service_dependencies()\n");
|
|
|
|
/* only check dependencies of the desired type */
|
|
if (dependency_type == NOTIFICATION_DEPENDENCY)
|
|
list = svc->notify_deps;
|
|
else
|
|
list = svc->exec_deps;
|
|
|
|
/* check all dependencies of the desired type... */
|
|
for(; list; list = list->next) {
|
|
service *temp_service;
|
|
servicedependency *temp_dependency = (servicedependency *)list->object_ptr;
|
|
|
|
/* find the service we depend on... */
|
|
if ((temp_service = temp_dependency->master_service_ptr) == NULL) {
|
|
continue;
|
|
}
|
|
|
|
/* skip this dependency if it has a timeperiod and the current time isn't valid */
|
|
time(¤t_time);
|
|
if (temp_dependency->dependency_period != NULL
|
|
&& (check_time_against_period(current_time, temp_dependency->dependency_period_ptr) == ERROR)) {
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
/* get the status to use (use last hard state if its currently in a soft state) */
|
|
if (temp_service->state_type == SOFT_STATE && soft_state_dependencies == FALSE) {
|
|
state = temp_service->last_hard_state;
|
|
}
|
|
else {
|
|
state = temp_service->current_state;
|
|
}
|
|
|
|
/* is the service we depend on in state that fails the dependency tests? */
|
|
if (flag_isset(temp_dependency->failure_options, 1 << state)) {
|
|
return DEPENDENCIES_FAILED;
|
|
}
|
|
|
|
/* immediate dependencies ok at this point - check parent dependencies if necessary */
|
|
if (temp_dependency->inherits_parent == TRUE) {
|
|
if (check_service_dependencies(temp_service, dependency_type) != DEPENDENCIES_OK) {
|
|
return DEPENDENCIES_FAILED;
|
|
}
|
|
}
|
|
}
|
|
|
|
return DEPENDENCIES_OK;
|
|
}
|
|
|
|
|
|
|
|
/* check for services that never returned from a check... */
|
|
void check_for_orphaned_services(void)
|
|
{
|
|
service *temp_service = NULL;
|
|
time_t current_time = 0L;
|
|
time_t expected_time = 0L;
|
|
|
|
|
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "check_for_orphaned_services()\n");
|
|
|
|
/* get the current time */
|
|
time(¤t_time);
|
|
|
|
/* check all services... */
|
|
for(temp_service = service_list; temp_service != NULL; temp_service = temp_service->next) {
|
|
|
|
/* skip services that are not currently executing */
|
|
if (temp_service->is_executing == FALSE) {
|
|
continue;
|
|
}
|
|
|
|
/* determine the time at which the check results should have come in (allow 10 minutes slack time) */
|
|
expected_time = (time_t)(temp_service->next_check + temp_service->latency + service_check_timeout + check_reaper_interval + 600);
|
|
|
|
/* this service was supposed to have executed a while ago, but for some reason the results haven't come back in... */
|
|
if (expected_time < current_time) {
|
|
|
|
/* log a warning */
|
|
logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: The check of service '%s' on host '%s' looks like it was orphaned (results never came back; last_check=%lu; next_check=%lu). I'm scheduling an immediate check of the service...\n", temp_service->description, temp_service->host_name, temp_service->last_check, temp_service->next_check);
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Service '%s' on host '%s' was orphaned, so we're scheduling an immediate check...\n", temp_service->description, temp_service->host_name);
|
|
log_debug_info(DEBUGL_CHECKS, 1, " next_check=%lu (%s); last_check=%lu (%s);\n",
|
|
temp_service->next_check, ctime(&temp_service->next_check),
|
|
temp_service->last_check, ctime(&temp_service->last_check));
|
|
|
|
/* decrement the number of running service checks */
|
|
if (currently_running_service_checks > 0) {
|
|
currently_running_service_checks--;
|
|
}
|
|
|
|
/* disable the executing flag */
|
|
temp_service->is_executing = FALSE;
|
|
|
|
/* schedule an immediate check of the service */
|
|
schedule_service_check(temp_service, current_time, CHECK_OPTION_ORPHAN_CHECK);
|
|
}
|
|
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
|
|
/* check freshness of service results */
|
|
void check_service_result_freshness(void)
|
|
{
|
|
service *temp_service = NULL;
|
|
time_t current_time = 0L;
|
|
|
|
|
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "check_service_result_freshness()\n");
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Checking the freshness of service check results...\n");
|
|
|
|
/* bail out if we're not supposed to be checking freshness */
|
|
if (check_service_freshness == FALSE) {
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Service freshness checking is disabled.\n");
|
|
return;
|
|
}
|
|
|
|
/* get the current time */
|
|
time(¤t_time);
|
|
|
|
/* check all services... */
|
|
for(temp_service = service_list; temp_service != NULL; temp_service = temp_service->next) {
|
|
|
|
/* skip services we shouldn't be checking for freshness */
|
|
if (temp_service->check_freshness == FALSE) {
|
|
continue;
|
|
}
|
|
|
|
/* skip services that are currently executing (problems here will be caught by orphaned service check) */
|
|
if (temp_service->is_executing == TRUE) {
|
|
continue;
|
|
}
|
|
|
|
/* skip services that have both active and passive checks disabled */
|
|
if (temp_service->checks_enabled == FALSE && temp_service->accept_passive_checks == FALSE) {
|
|
continue;
|
|
}
|
|
|
|
/* skip services that are already being freshened */
|
|
if (temp_service->is_being_freshened == TRUE) {
|
|
continue;
|
|
}
|
|
|
|
/* see if the time is right... */
|
|
if (check_time_against_period(current_time, temp_service->check_period_ptr) == ERROR) {
|
|
continue;
|
|
}
|
|
|
|
/* EXCEPTION */
|
|
/* don't check freshness of services without regular check intervals if we're using auto-freshness threshold */
|
|
if ((temp_service->check_interval == 0) && (temp_service->freshness_threshold == 0)) {
|
|
continue;
|
|
}
|
|
|
|
/* the results for the last check of this service are stale! */
|
|
if (is_service_result_fresh(temp_service, current_time, TRUE) == FALSE) {
|
|
|
|
/* set the freshen flag */
|
|
temp_service->is_being_freshened = TRUE;
|
|
|
|
/* schedule an immediate forced check of the service */
|
|
schedule_service_check(temp_service, current_time, CHECK_OPTION_FORCE_EXECUTION | CHECK_OPTION_FRESHNESS_CHECK);
|
|
}
|
|
|
|
}
|
|
|
|
return;
|
|
}
|
|
|
|
|
|
|
|
/* tests whether or not a service's check results are fresh */
|
|
int is_service_result_fresh(service *temp_service, time_t current_time, int log_this)
|
|
{
|
|
int freshness_threshold = 0;
|
|
time_t expiration_time = 0L;
|
|
int days = 0;
|
|
int hours = 0;
|
|
int minutes = 0;
|
|
int seconds = 0;
|
|
int tdays = 0;
|
|
int thours = 0;
|
|
int tminutes = 0;
|
|
int tseconds = 0;
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Checking freshness of service '%s' on host '%s'...\n", temp_service->description, temp_service->host_name);
|
|
|
|
/* use user-supplied freshness threshold or auto-calculate a freshness threshold to use? */
|
|
if (temp_service->freshness_threshold == 0) {
|
|
if (temp_service->state_type == HARD_STATE || temp_service->current_state == STATE_OK) {
|
|
freshness_threshold = (temp_service->check_interval * interval_length) + temp_service->latency + additional_freshness_latency;
|
|
}
|
|
else {
|
|
freshness_threshold = (temp_service->retry_interval * interval_length) + temp_service->latency + additional_freshness_latency;
|
|
}
|
|
}
|
|
else {
|
|
freshness_threshold = temp_service->freshness_threshold;
|
|
}
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Freshness thresholds: service=%d, use=%d\n", temp_service->freshness_threshold, freshness_threshold);
|
|
|
|
/* calculate expiration time */
|
|
/*
|
|
* CHANGED 11/10/05 EG -
|
|
* program start is only used in expiration time calculation
|
|
* if > last check AND active checks are enabled, so active checks
|
|
* can become stale immediately upon program startup
|
|
*/
|
|
/*
|
|
* CHANGED 02/25/06 SG -
|
|
* passive checks also become stale, so remove dependence on active
|
|
* check logic
|
|
*/
|
|
if (temp_service->has_been_checked == FALSE) {
|
|
expiration_time = (time_t)(event_start + freshness_threshold);
|
|
}
|
|
/*
|
|
* CHANGED 06/19/07 EG -
|
|
* Per Ton's suggestion (and user requests), only use program start
|
|
* time over last check if no specific threshold has been set by user.
|
|
* Problems can occur if Nagios is restarted more frequently that
|
|
* freshness threshold intervals (services never go stale).
|
|
*/
|
|
/*
|
|
* CHANGED 10/07/07 EG:
|
|
* Only match next condition for services that
|
|
* have active checks enabled...
|
|
*/
|
|
/*
|
|
* CHANGED 10/07/07 EG:
|
|
* Added max_service_check_spread to expiration time as suggested
|
|
* by Altinity
|
|
*/
|
|
else if ((temp_service->checks_enabled == TRUE)
|
|
&& (event_start > temp_service->last_check)
|
|
&& (temp_service->freshness_threshold == 0)) {
|
|
|
|
expiration_time = (time_t)(event_start + freshness_threshold + (max_service_check_spread * interval_length));
|
|
}
|
|
else {
|
|
expiration_time = (time_t)(temp_service->last_check + freshness_threshold);
|
|
}
|
|
|
|
/*
|
|
* If the check was last done passively, we assume it's going
|
|
* to continue that way and we need to handle the fact that
|
|
* Nagios might have been shut off for quite a long time. If so,
|
|
* we mustn't spam freshness notifications but use event_start
|
|
* instead of last_check to determine freshness expiration time.
|
|
* The threshold for "long time" is determined as 61.8% of the normal
|
|
* freshness threshold based on vast heuristical research (ie, "some
|
|
* guy once told me the golden ratio is good for loads of stuff").
|
|
*/
|
|
if ((temp_service->check_type == CHECK_TYPE_PASSIVE)
|
|
&& (temp_service->last_check < event_start)
|
|
&& (event_start - last_program_stop > freshness_threshold * 0.618)) {
|
|
|
|
expiration_time = event_start + freshness_threshold;
|
|
}
|
|
log_debug_info(DEBUGL_CHECKS, 2, "HBC: %d, PS: %lu, ES: %lu, LC: %lu, CT: %lu, ET: %lu\n", temp_service->has_been_checked, (unsigned long)program_start, (unsigned long)event_start, (unsigned long)temp_service->last_check, (unsigned long)current_time, (unsigned long)expiration_time);
|
|
|
|
/* the results for the last check of this service are stale */
|
|
if (expiration_time < current_time) {
|
|
|
|
get_time_breakdown((current_time - expiration_time), &days, &hours, &minutes, &seconds);
|
|
get_time_breakdown(freshness_threshold, &tdays, &thours, &tminutes, &tseconds);
|
|
|
|
/* log a warning */
|
|
if (log_this == TRUE) {
|
|
logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: The results of service '%s' on host '%s' are stale by %dd %dh %dm %ds (threshold=%dd %dh %dm %ds). I'm forcing an immediate check of the service.\n", temp_service->description, temp_service->host_name, days, hours, minutes, seconds, tdays, thours, tminutes, tseconds);
|
|
}
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Check results for service '%s' on host '%s' are stale by %dd %dh %dm %ds (threshold=%dd %dh %dm %ds). Forcing an immediate check of the service...\n", temp_service->description, temp_service->host_name, days, hours, minutes, seconds, tdays, thours, tminutes, tseconds);
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Check results for service '%s' on host '%s' are fresh.\n", temp_service->description, temp_service->host_name);
|
|
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
int handle_async_host_check_result(host *hst, check_result *cr)
|
|
{
|
|
time_t current_time = 0L;
|
|
time_t next_check = 0L;
|
|
time_t preferred_time = 0L;
|
|
time_t next_valid_time = 0L;
|
|
|
|
int state_change = FALSE;
|
|
int hard_state_change = FALSE;
|
|
int send_notification = FALSE;
|
|
int handle_event = FALSE;
|
|
int log_event = FALSE;
|
|
|
|
char * old_plugin_output = NULL;
|
|
|
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "handle_async_host_check_result()\n");
|
|
|
|
if (is_valid_check_result_data(hst, cr) == FALSE) {
|
|
return ERROR;
|
|
}
|
|
|
|
if (cr->check_type == CHECK_TYPE_PASSIVE) {
|
|
if (host_is_passive(hst, cr) == FALSE) {
|
|
return ERROR;
|
|
}
|
|
}
|
|
else {
|
|
host_is_active(hst);
|
|
}
|
|
time(¤t_time);
|
|
initialize_last_host_state_change_times(hst);
|
|
|
|
debug_async_host(hst, cr);
|
|
host_fresh_check(hst, cr, current_time);
|
|
host_initial_handling(hst, cr, &old_plugin_output);
|
|
|
|
/* reschedule the next check at the regular interval - may be overridden */
|
|
next_check = (time_t)(hst->last_check + (hst->check_interval * interval_length));
|
|
|
|
/**************************************/
|
|
/********* HOST CHECK OK LOGIC ********/
|
|
/**************************************/
|
|
if (hst->last_state == HOST_UP) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Host was UP.\n");
|
|
|
|
/***** HOST IS STILL UP *****/
|
|
if (hst->current_state == HOST_UP) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Host is still UP.\n");
|
|
|
|
hst->state_type = HARD_STATE;
|
|
hst->current_attempt = 1;
|
|
}
|
|
|
|
/***** HOST IS NOW DOWN/UNREACHABLE *****/
|
|
else {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Host is no longer UP (%s)!\n", host_state_name(hst->current_state));
|
|
|
|
hst->state_type = SOFT_STATE;
|
|
hst->current_attempt = 1;
|
|
|
|
/* propagate checks to immediate parents if they are UP */
|
|
host_propagate_checks_to_immediate_parents(hst, FALSE, current_time);
|
|
|
|
/* propagate checks to immediate children if they are not UNREACHABLE */
|
|
host_propagate_checks_to_immediate_children(hst, FALSE, TRUE, current_time);
|
|
|
|
/* propagate checks to hosts that THIS ONE depends on for notifications AND execution */
|
|
host_propagate_dependency_checks(hst, current_time);
|
|
|
|
/* we need to handle this event */
|
|
handle_event = TRUE;
|
|
}
|
|
}
|
|
|
|
/**************************************/
|
|
/****** HOST CHECK PROBLEM LOGIC ******/
|
|
/**************************************/
|
|
else {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Host was not UP last time.\n");
|
|
|
|
/***** HOST IS NOW UP *****/
|
|
if (hst->current_state == HOST_UP) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Host is UP now.\n");
|
|
|
|
/* propagate checks to immediate parents if they are not UP */
|
|
host_propagate_checks_to_immediate_parents(hst, TRUE, current_time);
|
|
|
|
/* propagate checks to immediate children if they are not UP */
|
|
host_propagate_checks_to_immediate_children(hst, TRUE, FALSE, current_time);
|
|
|
|
/* we need to handle this event */
|
|
handle_event = TRUE;
|
|
|
|
/* but a soft recovery is not something we notify for */
|
|
if (hst->state_type == HARD_STATE) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Host experienced a HARD recovery.\n");
|
|
|
|
send_notification = TRUE;
|
|
hard_state_change = TRUE;
|
|
|
|
hst->current_attempt = 1;
|
|
}
|
|
else {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Host experienced a SOFT recovery.\n");
|
|
}
|
|
}
|
|
|
|
/***** HOST IS STILL DOWN/UNREACHABLE *****/
|
|
else {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Host is still not UP (%s)!\n", host_state_name(hst->current_state));
|
|
|
|
if (hst->state_type == SOFT_STATE) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Host state type is soft, using retry_interval\n");
|
|
|
|
handle_event = TRUE;
|
|
next_check = (unsigned long) (current_time + hst->retry_interval * interval_length);
|
|
}
|
|
|
|
/* if the state_type is hard, then send a notification */
|
|
else {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Host state type is hard, sending a notification\n");
|
|
|
|
send_notification = TRUE;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* translate host state between DOWN/UNREACHABLE (only for passive checks if enabled) */
|
|
if (hst->current_state != HOST_UP && (hst->check_type == CHECK_TYPE_ACTIVE || translate_passive_host_checks == TRUE)) {
|
|
|
|
hst->current_state = determine_host_reachability(hst);
|
|
if (hst->state_type == SOFT_STATE)
|
|
next_check = (unsigned long)(current_time + (hst->retry_interval * interval_length));
|
|
|
|
}
|
|
|
|
/* check for state change */
|
|
if (hst->current_state != hst->last_state || (hst->current_state == HOST_UP && hst->state_type == SOFT_STATE)) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Host experienced a state change\n");
|
|
|
|
state_change = TRUE;
|
|
}
|
|
|
|
/* adjust the current attempt */
|
|
if (hst->state_type == SOFT_STATE) {
|
|
|
|
/* this is an edge case for non-up states, it needs to be checked first */
|
|
if (hst->last_state != HOST_UP && hst->current_state != HOST_UP && hst->current_attempt < hst->max_attempts) {
|
|
hst->current_attempt++;
|
|
}
|
|
|
|
/* reset it to 1 */
|
|
else if (state_change == TRUE || hst->current_state == HOST_UP) {
|
|
hst->current_attempt = 1;
|
|
}
|
|
|
|
/* or increment if we can */
|
|
else if (hst->current_attempt < hst->max_attempts) {
|
|
hst->current_attempt++;
|
|
}
|
|
}
|
|
|
|
if (hst->current_attempt >= hst->max_attempts && hst->current_state != hst->last_hard_state) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Host had a HARD STATE CHANGE!!\n");
|
|
|
|
next_check = (unsigned long)(current_time + (hst->check_interval * interval_length));
|
|
|
|
hard_state_change = TRUE;
|
|
send_notification = TRUE;
|
|
}
|
|
|
|
/* handle some acknowledgement things and update last_state_change */
|
|
host_state_or_hard_state_type_change(hst, state_change, hard_state_change, &log_event, &handle_event, &send_notification);
|
|
|
|
record_last_host_state_ended(hst);
|
|
|
|
check_for_host_flapping(hst, TRUE, TRUE, TRUE);
|
|
|
|
/* host with active checks disabled do not get rescheduled */
|
|
if (hst->checks_enabled == FALSE) {
|
|
hst->should_be_scheduled = FALSE;
|
|
}
|
|
|
|
/* hosts with non-recurring intervals do not get rescheduled if we're in a HARD or UP state */
|
|
else if (hst->check_interval == 0 && (hst->state_type == HARD_STATE || hst->current_state == HOST_UP)) {
|
|
hst->should_be_scheduled = FALSE;
|
|
}
|
|
|
|
/* schedule a non-forced check if we can */
|
|
else if (hst->should_be_scheduled == TRUE) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Rescheduling next check of host at %s", ctime(&next_check));
|
|
|
|
/* next check time was calculated above */
|
|
/* make sure we don't get ourselves into too much trouble... */
|
|
if (current_time > next_check) {
|
|
hst->next_check = current_time;
|
|
} else {
|
|
hst->next_check = next_check;
|
|
}
|
|
|
|
/* make sure we rescheduled the next service check at a valid time */
|
|
preferred_time = hst->next_check;
|
|
get_next_valid_time(preferred_time, &next_valid_time, hst->check_period_ptr);
|
|
hst->next_check = next_valid_time;
|
|
|
|
/* Next valid time is further in the future because of timeperiod
|
|
constraints. Add a random amount so we don't get all checks
|
|
subject to that timeperiod constraint scheduled at the same time */
|
|
if (next_valid_time > preferred_time) {
|
|
hst->next_check += ranged_urand(0, check_window(hst));
|
|
}
|
|
|
|
schedule_host_check(hst, hst->next_check, CHECK_OPTION_NONE);
|
|
}
|
|
|
|
if (hst->current_attempt == HOST_UP) {
|
|
hst->current_attempt = 1;
|
|
}
|
|
|
|
if (send_notification == TRUE) {
|
|
|
|
/* send notifications */
|
|
if (host_notification(hst, NOTIFICATION_NORMAL, NULL, NULL, NOTIFICATION_OPTION_NONE) == OK) {
|
|
|
|
/* log state due to notification event when stalking_options N is set */
|
|
if (should_stalk_notifications(hst)) {
|
|
log_event = TRUE;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* the host recovered, so reset the current notification number and state flags (after the recovery notification has gone out) */
|
|
if(hst->current_state == HOST_UP && hst->state_type == HARD_STATE && hard_state_change == TRUE) {
|
|
hst->current_notification_number = 0;
|
|
hst->notified_on = 0;
|
|
}
|
|
|
|
if (obsess_over_hosts == TRUE) {
|
|
obsessive_compulsive_host_check_processor(hst);
|
|
}
|
|
|
|
/* if we're stalking this state type AND the plugin output changed since last check, log it now.. */
|
|
if (should_stalk(hst) && compare_strings(old_plugin_output, hst->plugin_output)) {
|
|
log_event = TRUE;
|
|
}
|
|
|
|
/* if log_host_retries is set to true, we have to log soft states too */
|
|
if (hst->state_type == SOFT_STATE && log_host_retries == TRUE) {
|
|
log_event = TRUE;
|
|
}
|
|
|
|
if (log_event == TRUE) {
|
|
log_host_event(hst);
|
|
}
|
|
|
|
if (handle_event == TRUE) {
|
|
handle_host_event(hst);
|
|
}
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2,
|
|
"STATE: %d, TYPE: %s, CUR: %d, MAX: %d, LAST_STATE: %d, LAST_HARD: %d, NOTIFY: %d, LOGGED: %d, HANDLED: %d\n",
|
|
hst->current_state,
|
|
(hst->state_type == SOFT_STATE) ? "SOFT" : "HARD",
|
|
hst->current_attempt,
|
|
hst->max_attempts,
|
|
hst->last_state,
|
|
hst->last_hard_state,
|
|
send_notification,
|
|
log_event,
|
|
handle_event);
|
|
|
|
#ifdef USE_EVENT_BROKER
|
|
broker_host_check(NEBTYPE_HOSTCHECK_PROCESSED, NEBFLAG_NONE, NEBATTR_NONE, hst, hst->check_type, hst->current_state, hst->state_type, cr->start_time, cr->finish_time, hst->check_command, hst->latency, hst->execution_time, host_check_timeout, cr->early_timeout, cr->return_code, NULL, hst->plugin_output, hst->long_plugin_output, hst->perf_data, NULL, cr);
|
|
#endif
|
|
|
|
hst->has_been_checked = TRUE;
|
|
update_host_status(hst, FALSE);
|
|
update_host_performance_data(hst);
|
|
|
|
/* free memory */
|
|
my_free(old_plugin_output);
|
|
|
|
return OK;
|
|
}
|
|
|
|
/******************************************************************/
|
|
/*************** COMMON ROUTE/HOST CHECK FUNCTIONS ****************/
|
|
/******************************************************************/
|
|
|
|
/* schedules an immediate or delayed host check */
|
|
inline void schedule_host_check(host *hst, time_t check_time, int options)
|
|
{
|
|
timed_event *temp_event = NULL;
|
|
|
|
/* use the originally scheduled check unless we decide otherwise */
|
|
int use_original_event = TRUE;
|
|
|
|
|
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "schedule_host_check()\n");
|
|
|
|
|
|
if (hst == NULL) {
|
|
return;
|
|
}
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Scheduling a %s, active check of host '%s' @ %s",
|
|
(options & CHECK_OPTION_FORCE_EXECUTION) ? "forced" : "non-forced",
|
|
hst->name,
|
|
ctime(&check_time));
|
|
|
|
/* don't schedule a check if active checks of this host are disabled */
|
|
if (hst->checks_enabled == FALSE && !(options & CHECK_OPTION_FORCE_EXECUTION)) {
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Active checks are disabled for this host.\n");
|
|
return;
|
|
}
|
|
|
|
if ((options == CHECK_OPTION_DEPENDENCY_CHECK)
|
|
&& (hst->last_check + cached_host_check_horizon > check_time)) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Last check result is recent enough (%s)\n", ctime(&hst->last_check));
|
|
return;
|
|
}
|
|
|
|
temp_event = (timed_event *)hst->next_check_event;
|
|
|
|
if (temp_event == NULL) {
|
|
use_original_event = FALSE;
|
|
}
|
|
|
|
/*
|
|
* If the host already had a check scheduled we need
|
|
* to decide which check event to use
|
|
*/
|
|
else {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Found another host check event for this host @ %s", ctime(&temp_event->run_time));
|
|
|
|
/* the original event is a forced check... */
|
|
if ((temp_event->event_options & CHECK_OPTION_FORCE_EXECUTION)) {
|
|
|
|
/* the new event is also forced and its execution time is earlier than the original, so use it instead */
|
|
if ((options & CHECK_OPTION_FORCE_EXECUTION) && (check_time < temp_event->run_time)) {
|
|
log_debug_info(DEBUGL_CHECKS, 2, "New host check event is forced and occurs before the existing event, so the new event be used instead.\n");
|
|
use_original_event = FALSE;
|
|
}
|
|
}
|
|
|
|
/* the original event is not a forced check... */
|
|
else {
|
|
|
|
/* the new event is a forced check, so use it instead */
|
|
if ((options & CHECK_OPTION_FORCE_EXECUTION)) {
|
|
log_debug_info(DEBUGL_CHECKS, 2, "New host check event is forced, so it will be used instead of the existing event.\n");
|
|
use_original_event = FALSE;
|
|
}
|
|
|
|
/* the new event is not forced either and its execution time is earlier than the original, so use it instead */
|
|
else if (check_time < temp_event->run_time) {
|
|
log_debug_info(DEBUGL_CHECKS, 2, "New host check event occurs before the existing (older) event, so it will be used instead.\n");
|
|
use_original_event = FALSE;
|
|
}
|
|
|
|
/* the new event is older, so override the existing one */
|
|
else {
|
|
log_debug_info(DEBUGL_CHECKS, 2, "New host check event occurs after the existing event, so we'll ignore it.\n");
|
|
}
|
|
}
|
|
}
|
|
|
|
/* use the new event */
|
|
if (use_original_event == FALSE) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Scheduling new host check event.\n");
|
|
|
|
/* possibly allocate memory for a new event item */
|
|
if (temp_event) {
|
|
remove_event(nagios_squeue, temp_event);
|
|
}
|
|
else if ((temp_event = (timed_event *)calloc(1, sizeof(timed_event))) == NULL) {
|
|
logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Could not reschedule check of host '%s'!\n", hst->name);
|
|
return;
|
|
}
|
|
|
|
/* set the next host check event and time */
|
|
hst->next_check_event = temp_event;
|
|
hst->next_check = check_time;
|
|
|
|
/* save check options for retention purposes */
|
|
hst->check_options = options;
|
|
|
|
/* place the new event in the event queue */
|
|
temp_event->event_type = EVENT_HOST_CHECK;
|
|
temp_event->event_data = (void *)hst;
|
|
temp_event->event_args = (void *)NULL;
|
|
temp_event->event_options = options;
|
|
temp_event->run_time = hst->next_check;
|
|
temp_event->recurring = FALSE;
|
|
temp_event->event_interval = 0L;
|
|
temp_event->timing_func = NULL;
|
|
temp_event->compensate_for_time_change = TRUE;
|
|
add_event(nagios_squeue, temp_event);
|
|
}
|
|
|
|
else {
|
|
/* reset the next check time (it may be out of sync) */
|
|
if (temp_event != NULL) {
|
|
hst->next_check = temp_event->run_time;
|
|
}
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Keeping original host check event (ignoring the new one).\n");
|
|
}
|
|
|
|
/* update the status log */
|
|
update_host_status(hst, FALSE);
|
|
}
|
|
|
|
|
|
|
|
/* checks host dependencies */
|
|
int check_host_dependencies(host *hst, int dependency_type)
|
|
{
|
|
hostdependency *temp_dependency = NULL;
|
|
objectlist *list;
|
|
host *temp_host = NULL;
|
|
int state = HOST_UP;
|
|
time_t current_time = 0L;
|
|
|
|
|
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "check_host_dependencies()\n");
|
|
|
|
if (dependency_type == NOTIFICATION_DEPENDENCY) {
|
|
list = hst->notify_deps;
|
|
}
|
|
else {
|
|
list = hst->exec_deps;
|
|
}
|
|
|
|
/* check all dependencies... */
|
|
for(; list; list = list->next) {
|
|
temp_dependency = (hostdependency *)list->object_ptr;
|
|
|
|
/* find the host we depend on... */
|
|
if ((temp_host = temp_dependency->master_host_ptr) == NULL) {
|
|
continue;
|
|
}
|
|
|
|
/* skip this dependency if it has a timeperiod and the current time isn't valid */
|
|
time(¤t_time);
|
|
if ((temp_dependency->dependency_period != NULL)
|
|
&& (check_time_against_period(current_time, temp_dependency->dependency_period_ptr) == ERROR)) {
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
/* get the status to use (use last hard state if its currently in a soft state) */
|
|
if (temp_host->state_type == SOFT_STATE && soft_state_dependencies == FALSE) {
|
|
state = temp_host->last_hard_state;
|
|
}
|
|
else {
|
|
state = temp_host->current_state;
|
|
}
|
|
|
|
/* is the host we depend on in state that fails the dependency tests? */
|
|
if (flag_isset(temp_dependency->failure_options, 1 << state)) {
|
|
return DEPENDENCIES_FAILED;
|
|
}
|
|
|
|
/* immediate dependencies ok at this point - check parent dependencies if necessary */
|
|
if ((temp_dependency->inherits_parent == TRUE)
|
|
&& (check_host_dependencies(temp_host, dependency_type) != DEPENDENCIES_OK)) {
|
|
|
|
return DEPENDENCIES_FAILED;
|
|
}
|
|
}
|
|
|
|
return DEPENDENCIES_OK;
|
|
}
|
|
|
|
|
|
|
|
/* check for hosts that never returned from a check... */
|
|
void check_for_orphaned_hosts(void)
|
|
{
|
|
host *temp_host = NULL;
|
|
time_t current_time = 0L;
|
|
time_t expected_time = 0L;
|
|
|
|
|
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "check_for_orphaned_hosts()\n");
|
|
|
|
/* get the current time */
|
|
time(¤t_time);
|
|
|
|
/* check all hosts... */
|
|
for(temp_host = host_list; temp_host != NULL; temp_host = temp_host->next) {
|
|
|
|
/* skip hosts that don't have a set check interval (on-demand checks are missed by the orphan logic) */
|
|
if (temp_host->next_check == (time_t)0L) {
|
|
continue;
|
|
}
|
|
|
|
/* skip hosts that are not currently executing */
|
|
if (temp_host->is_executing == FALSE) {
|
|
continue;
|
|
}
|
|
|
|
/* determine the time at which the check results should have come in (allow 10 minutes slack time) */
|
|
expected_time = (time_t)(temp_host->next_check + temp_host->latency + host_check_timeout + check_reaper_interval + 600);
|
|
|
|
/* this host was supposed to have executed a while ago, but for some reason the results haven't come back in... */
|
|
if (expected_time < current_time) {
|
|
|
|
/* log a warning */
|
|
logit(NSLOG_RUNTIME_WARNING, TRUE,
|
|
"Warning: The check of host '%s' looks like it was orphaned (results never came back). I'm scheduling an immediate check of the host...\n",
|
|
temp_host->name);
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1,
|
|
"Host '%s' was orphaned, so we're scheduling an immediate check...\n",
|
|
temp_host->name);
|
|
|
|
/* decrement the number of running host checks */
|
|
if (currently_running_host_checks > 0) {
|
|
currently_running_host_checks--;
|
|
}
|
|
|
|
/* disable the executing flag */
|
|
temp_host->is_executing = FALSE;
|
|
|
|
/* schedule an immediate check of the host */
|
|
schedule_host_check(temp_host, current_time, CHECK_OPTION_ORPHAN_CHECK);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/* check freshness of host results */
|
|
void check_host_result_freshness(void)
|
|
{
|
|
host *temp_host = NULL;
|
|
time_t current_time = 0L;
|
|
|
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "check_host_result_freshness()\n");
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Attempting to check the freshness of host check results...\n");
|
|
|
|
/* bail out if we're not supposed to be checking freshness */
|
|
if (check_host_freshness == FALSE) {
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Host freshness checking is disabled.\n");
|
|
return;
|
|
}
|
|
|
|
/* get the current time */
|
|
time(¤t_time);
|
|
|
|
/* check all hosts... */
|
|
for(temp_host = host_list; temp_host != NULL; temp_host = temp_host->next) {
|
|
|
|
/* skip hosts we shouldn't be checking for freshness */
|
|
if (temp_host->check_freshness == FALSE) {
|
|
continue;
|
|
}
|
|
|
|
/* skip hosts that have both active and passive checks disabled */
|
|
if (temp_host->checks_enabled == FALSE && temp_host->accept_passive_checks == FALSE) {
|
|
continue;
|
|
}
|
|
|
|
/* skip hosts that are currently executing (problems here will be caught by orphaned host check) */
|
|
if (temp_host->is_executing == TRUE) {
|
|
continue;
|
|
}
|
|
|
|
/* skip hosts that are already being freshened */
|
|
if (temp_host->is_being_freshened == TRUE) {
|
|
continue;
|
|
}
|
|
|
|
/* see if the time is right... */
|
|
if (check_time_against_period(current_time, temp_host->check_period_ptr) == ERROR) {
|
|
continue;
|
|
}
|
|
|
|
/* the results for the last check of this host are stale */
|
|
if (is_host_result_fresh(temp_host, current_time, TRUE) == FALSE) {
|
|
|
|
/* set the freshen flag */
|
|
temp_host->is_being_freshened = TRUE;
|
|
|
|
/* schedule an immediate forced check of the host */
|
|
schedule_host_check(temp_host, current_time, CHECK_OPTION_FORCE_EXECUTION | CHECK_OPTION_FRESHNESS_CHECK);
|
|
}
|
|
}
|
|
}
|
|
|
|
|
|
|
|
/* checks to see if a hosts's check results are fresh */
|
|
int is_host_result_fresh(host *temp_host, time_t current_time, int log_this)
|
|
{
|
|
time_t expiration_time = 0L;
|
|
int freshness_threshold = 0;
|
|
int days = 0;
|
|
int hours = 0;
|
|
int minutes = 0;
|
|
int seconds = 0;
|
|
int tdays = 0;
|
|
int thours = 0;
|
|
int tminutes = 0;
|
|
int tseconds = 0;
|
|
double interval = 0;
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Checking freshness of host '%s'...\n", temp_host->name);
|
|
|
|
/* use user-supplied freshness threshold or auto-calculate a freshness threshold to use? */
|
|
if (temp_host->freshness_threshold == 0) {
|
|
|
|
if (temp_host->state_type == HARD_STATE || temp_host->current_state == STATE_OK) {
|
|
interval = temp_host->check_interval;
|
|
}
|
|
else {
|
|
interval = temp_host->retry_interval;
|
|
}
|
|
|
|
freshness_threshold = (interval * interval_length) + temp_host->latency + additional_freshness_latency;
|
|
}
|
|
else {
|
|
freshness_threshold = temp_host->freshness_threshold;
|
|
}
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Freshness thresholds: host=%d, use=%d\n", temp_host->freshness_threshold, freshness_threshold);
|
|
|
|
/* calculate expiration time */
|
|
/*
|
|
* CHANGED 11/10/05 EG:
|
|
* program start is only used in expiration time calculation
|
|
* if > last check AND active checks are enabled, so active checks
|
|
* can become stale immediately upon program startup
|
|
*/
|
|
if (temp_host->has_been_checked == FALSE) {
|
|
expiration_time = (time_t)(event_start + freshness_threshold);
|
|
}
|
|
/*
|
|
* CHANGED 06/19/07 EG:
|
|
* Per Ton's suggestion (and user requests), only use program start
|
|
* time over last check if no specific threshold has been set by user.
|
|
* Problems can occur if Nagios is restarted more frequently that
|
|
* freshness threshold intervals (hosts never go stale).
|
|
*/
|
|
/*
|
|
* CHANGED 10/07/07 EG:
|
|
* Added max_host_check_spread to expiration time as suggested by
|
|
* Altinity
|
|
*/
|
|
else if ((temp_host->checks_enabled == TRUE)
|
|
&& (event_start > temp_host->last_check)
|
|
&& (temp_host->freshness_threshold == 0)) {
|
|
|
|
expiration_time = (time_t)(event_start + freshness_threshold + (max_host_check_spread * interval_length));
|
|
}
|
|
else {
|
|
expiration_time = (time_t)(temp_host->last_check + freshness_threshold);
|
|
}
|
|
|
|
/*
|
|
* If the check was last done passively, we assume it's going
|
|
* to continue that way and we need to handle the fact that
|
|
* Nagios might have been shut off for quite a long time. If so,
|
|
* we mustn't spam freshness notifications but use event_start
|
|
* instead of last_check to determine freshness expiration time.
|
|
* The threshold for "long time" is determined as 61.8% of the normal
|
|
* freshness threshold based on vast heuristical research (ie, "some
|
|
* guy once told me the golden ratio is good for loads of stuff").
|
|
*/
|
|
if ((temp_host->check_type == CHECK_TYPE_PASSIVE)
|
|
&& (temp_host->last_check < event_start)
|
|
&& (event_start - last_program_stop > freshness_threshold * 0.618)) {
|
|
|
|
expiration_time = event_start + freshness_threshold;
|
|
}
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2,
|
|
"HBC: %d, PS: %lu, ES: %lu, LC: %lu, CT: %lu, ET: %lu\n",
|
|
temp_host->has_been_checked,
|
|
(unsigned long)program_start,
|
|
(unsigned long)event_start,
|
|
(unsigned long)temp_host->last_check,
|
|
(unsigned long)current_time,
|
|
(unsigned long)expiration_time);
|
|
|
|
/* the results for the last check of this host are stale */
|
|
if (expiration_time < current_time) {
|
|
|
|
get_time_breakdown((current_time - expiration_time), &days, &hours, &minutes, &seconds);
|
|
get_time_breakdown(freshness_threshold, &tdays, &thours, &tminutes, &tseconds);
|
|
|
|
/* log a warning */
|
|
if (log_this == TRUE) {
|
|
logit(NSLOG_RUNTIME_WARNING, TRUE,
|
|
"Warning: The results of host '%s' are stale by %dd %dh %dm %ds (threshold=%dd %dh %dm %ds). I'm forcing an immediate check of the host.\n",
|
|
temp_host->name,
|
|
days,
|
|
hours,
|
|
minutes,
|
|
seconds,
|
|
tdays,
|
|
thours,
|
|
tminutes,
|
|
tseconds);
|
|
}
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1,
|
|
"Check results for host '%s' are stale by %dd %dh %dm %ds (threshold=%dd %dh %dm %ds). Forcing an immediate check of the host...\n",
|
|
temp_host->name,
|
|
days,
|
|
hours,
|
|
minutes,
|
|
seconds,
|
|
tdays,
|
|
thours,
|
|
tminutes,
|
|
tseconds);
|
|
|
|
return FALSE;
|
|
}
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Check results for host '%s' are fresh.\n", temp_host->name);
|
|
return TRUE;
|
|
}
|
|
|
|
|
|
/* run a scheduled host check asynchronously */
|
|
int run_scheduled_host_check(host *hst, int check_options, double latency)
|
|
{
|
|
int result = OK;
|
|
time_t current_time = 0L;
|
|
time_t preferred_time = 0L;
|
|
time_t next_valid_time = 0L;
|
|
int time_is_valid = TRUE;
|
|
|
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "run_scheduled_host_check()\n");
|
|
|
|
if (hst == NULL) {
|
|
return ERROR;
|
|
}
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Attempting to run scheduled check of host '%s': check options=%d, latency=%lf\n", hst->name, check_options, latency);
|
|
|
|
/*
|
|
* reset the next_check_event so we know this host
|
|
* check is no longer in the scheduling queue
|
|
*/
|
|
hst->next_check_event = NULL;
|
|
|
|
/* attempt to run the check */
|
|
result = run_async_host_check(hst, check_options, latency, TRUE, TRUE, &time_is_valid, &preferred_time);
|
|
|
|
/* an error occurred, so reschedule the check */
|
|
if (result == ERROR) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Unable to run scheduled host check at this time\n");
|
|
|
|
/* only attempt to (re)schedule checks that should get checked... */
|
|
if (hst->should_be_scheduled == TRUE) {
|
|
|
|
/* get current time */
|
|
time(¤t_time);
|
|
|
|
/* determine next time we should check the host if needed */
|
|
/* if host has no check interval, schedule it again for 5 minutes from now */
|
|
if (current_time >= preferred_time) {
|
|
preferred_time = current_time + ((hst->check_interval <= 0) ? 300 : (hst->check_interval * interval_length));
|
|
}
|
|
|
|
/* make sure we rescheduled the next host check at a valid time */
|
|
get_next_valid_time(preferred_time, &next_valid_time, hst->check_period_ptr);
|
|
|
|
/*
|
|
* If the host really can't be rescheduled properly we
|
|
* set next check time to preferred_time and try again then
|
|
*/
|
|
if ((time_is_valid == FALSE)
|
|
&& (check_time_against_period(next_valid_time, hst->check_period_ptr) == ERROR)) {
|
|
|
|
hst->next_check = preferred_time + ranged_urand(0, check_window(hst));
|
|
|
|
logit(NSLOG_RUNTIME_WARNING, TRUE, "Warning: Check of host '%s' could not be rescheduled properly. Scheduling check for %s...\n", hst->name, ctime(&preferred_time));
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Unable to find any valid times to reschedule the next host check!\n");
|
|
}
|
|
|
|
/* this service could be rescheduled... */
|
|
else {
|
|
hst->next_check = next_valid_time;
|
|
if (next_valid_time > preferred_time) {
|
|
/* Next valid time is further in the future because of
|
|
* timeperiod constraints. Add a random amount so we
|
|
* don't get all checks subject to that timeperiod
|
|
* constraint scheduled at the same time
|
|
*/
|
|
hst->next_check += ranged_urand(0, check_window(hst));
|
|
}
|
|
hst->should_be_scheduled = TRUE;
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 1, "Rescheduled next host check for %s", ctime(&next_valid_time));
|
|
}
|
|
}
|
|
|
|
/* update the status log */
|
|
update_host_status(hst, FALSE);
|
|
|
|
/* reschedule the next host check - unless we couldn't find a valid next check time */
|
|
/* 10/19/07 EG - keep original check options */
|
|
if (hst->should_be_scheduled == TRUE) {
|
|
schedule_host_check(hst, hst->next_check, check_options);
|
|
}
|
|
|
|
return ERROR;
|
|
}
|
|
|
|
return OK;
|
|
}
|
|
|
|
|
|
|
|
/* perform an asynchronous check of a host */
|
|
/* scheduled host checks will use this, as will some checks that result from on-demand checks... */
|
|
int run_async_host_check(host *hst, int check_options, double latency, int scheduled_check, int reschedule_check, int *time_is_valid, time_t *preferred_time)
|
|
{
|
|
nagios_macros mac;
|
|
char *raw_command = NULL;
|
|
char *processed_command = NULL;
|
|
struct timeval start_time, end_time;
|
|
double old_latency = 0.0;
|
|
check_result *cr;
|
|
int runchk_result = OK;
|
|
int macro_options = STRIP_ILLEGAL_MACRO_CHARS | ESCAPE_MACRO_CHARS;
|
|
#ifdef USE_EVENT_BROKER
|
|
int neb_result = OK;
|
|
#endif
|
|
|
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "run_async_host_check(%s ...)\n", hst ? hst->name : "(NULL host!)");
|
|
|
|
/* make sure we have a host */
|
|
if (hst == NULL)
|
|
return ERROR;
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 0, "** Running async check of host '%s'...\n", hst->name);
|
|
|
|
/* abort if check is already running or was recently completed */
|
|
if (!(check_options & CHECK_OPTION_FORCE_EXECUTION)) {
|
|
if (hst->is_executing == TRUE) {
|
|
log_debug_info(DEBUGL_CHECKS, 1, "A check of this host is already being executed, so we'll pass for the moment...\n");
|
|
return ERROR;
|
|
}
|
|
|
|
if (hst->last_check + cached_host_check_horizon > time(NULL)) {
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Host '%s' was last checked within its cache horizon. Aborting check\n", hst->name);
|
|
return ERROR;
|
|
}
|
|
}
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Host '%s' passed first hurdle (caching/execution)\n", hst->name);
|
|
/* is the host check viable at this time? */
|
|
if (check_host_check_viability(hst, check_options, time_is_valid, preferred_time) == ERROR) {
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Host check isn't viable at this point.\n");
|
|
return ERROR;
|
|
}
|
|
|
|
/******** GOOD TO GO FOR A REAL HOST CHECK AT THIS POINT ********/
|
|
|
|
#ifdef USE_EVENT_BROKER
|
|
/* initialize start/end times */
|
|
start_time.tv_sec = 0L;
|
|
start_time.tv_usec = 0L;
|
|
end_time.tv_sec = 0L;
|
|
end_time.tv_usec = 0L;
|
|
|
|
/* send data to event broker */
|
|
neb_result = broker_host_check(NEBTYPE_HOSTCHECK_ASYNC_PRECHECK, NEBFLAG_NONE, NEBATTR_NONE, hst, CHECK_TYPE_ACTIVE, hst->current_state, hst->state_type, start_time, end_time, hst->check_command, hst->latency, 0.0, host_check_timeout, FALSE, 0, NULL, NULL, NULL, NULL, NULL, NULL);
|
|
|
|
/* neb module wants to cancel the host check - the check will be rescheduled for a later time by the scheduling logic */
|
|
if (neb_result == NEBERROR_CALLBACKCANCEL) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Check of host '%s' (id=%u) was cancelled by a module\n", hst->name, hst->id);
|
|
|
|
if (preferred_time) {
|
|
*preferred_time += check_window(hst);
|
|
}
|
|
return ERROR;
|
|
}
|
|
|
|
/* neb module wants to override the host check - perhaps it will check the host itself */
|
|
/* NOTE: if a module does this, it has to do a lot of the stuff found below to make sure things don't get whacked out of shape! */
|
|
/* NOTE: if would be easier for modules to override checks when the NEBTYPE_SERVICECHECK_INITIATE event is called (later) */
|
|
if (neb_result == NEBERROR_CALLBACKOVERRIDE) {
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Check of host '%s' (id=%u) was overridden by a module\n", hst->name, hst->id);
|
|
return OK;
|
|
}
|
|
#endif
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Checking host '%s'...\n", hst->name);
|
|
|
|
/* clear check options - we don't want old check options retained */
|
|
/* only clear options if this was a scheduled check - on demand check options shouldn't affect retained info */
|
|
if (scheduled_check == TRUE) {
|
|
hst->check_options = CHECK_OPTION_NONE;
|
|
}
|
|
|
|
/* set latency (temporarily) for macros and event broker */
|
|
old_latency = hst->latency;
|
|
hst->latency = latency;
|
|
|
|
/* grab the host macro variables */
|
|
memset(&mac, 0, sizeof(mac));
|
|
grab_host_macros_r(&mac, hst);
|
|
|
|
/* get the raw command line */
|
|
get_raw_command_line_r(&mac, hst->check_command_ptr, hst->check_command, &raw_command, macro_options);
|
|
if (raw_command == NULL) {
|
|
clear_volatile_macros_r(&mac);
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Raw check command for host '%s' was NULL - aborting.\n", hst->name);
|
|
return ERROR;
|
|
}
|
|
|
|
/* process any macros contained in the argument */
|
|
process_macros_r(&mac, raw_command, &processed_command, macro_options);
|
|
my_free(raw_command);
|
|
if (processed_command == NULL) {
|
|
clear_volatile_macros_r(&mac);
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Processed check command for host '%s' was NULL - aborting.\n", hst->name);
|
|
return ERROR;
|
|
}
|
|
|
|
/* get the command start time */
|
|
gettimeofday(&start_time, NULL);
|
|
|
|
cr = calloc(1, sizeof(*cr));
|
|
if (!cr) {
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Failed to allocate checkresult struct\n");
|
|
clear_volatile_macros_r(&mac);
|
|
clear_host_macros_r(&mac);
|
|
return ERROR;
|
|
}
|
|
init_check_result(cr);
|
|
|
|
/* save check info */
|
|
cr->object_check_type = HOST_CHECK;
|
|
cr->host_name = (char *)strdup(hst->name);
|
|
cr->service_description = NULL;
|
|
cr->check_type = CHECK_TYPE_ACTIVE;
|
|
cr->check_options = check_options;
|
|
cr->scheduled_check = scheduled_check;
|
|
cr->reschedule_check = reschedule_check;
|
|
cr->latency = latency;
|
|
cr->start_time = start_time;
|
|
cr->finish_time = start_time;
|
|
cr->early_timeout = FALSE;
|
|
cr->exited_ok = TRUE;
|
|
cr->return_code = STATE_OK;
|
|
cr->output = NULL;
|
|
|
|
#ifdef USE_EVENT_BROKER
|
|
/* send data to event broker */
|
|
neb_result = broker_host_check(NEBTYPE_HOSTCHECK_INITIATE, NEBFLAG_NONE, NEBATTR_NONE, hst, CHECK_TYPE_ACTIVE, hst->current_state, hst->state_type, start_time, end_time, hst->check_command, hst->latency, 0.0, host_check_timeout, FALSE, 0, processed_command, NULL, NULL, NULL, NULL, cr);
|
|
|
|
/* neb module wants to override the service check - perhaps it will check the service itself */
|
|
if (neb_result == NEBERROR_CALLBACKOVERRIDE) {
|
|
clear_volatile_macros_r(&mac);
|
|
hst->latency = old_latency;
|
|
free_check_result(cr);
|
|
my_free(processed_command);
|
|
return OK;
|
|
}
|
|
#endif
|
|
|
|
/* reset latency (permanent value for this check will get set later) */
|
|
hst->latency = old_latency;
|
|
|
|
runchk_result = wproc_run_check(cr, processed_command, &mac);
|
|
if (runchk_result == ERROR) {
|
|
logit(NSLOG_RUNTIME_ERROR, TRUE, "Unable to send check for host '%s' to worker (ret=%d)\n", hst->name, runchk_result);
|
|
}
|
|
else {
|
|
/* do the book-keeping */
|
|
currently_running_host_checks++;
|
|
hst->is_executing = TRUE;
|
|
update_check_stats((scheduled_check == TRUE) ? ACTIVE_SCHEDULED_HOST_CHECK_STATS : ACTIVE_ONDEMAND_HOST_CHECK_STATS, start_time.tv_sec);
|
|
update_check_stats(PARALLEL_HOST_CHECK_STATS, start_time.tv_sec);
|
|
}
|
|
|
|
|
|
/* free memory */
|
|
clear_volatile_macros_r(&mac);
|
|
my_free(processed_command);
|
|
|
|
return runchk_result;
|
|
}
|
|
|
|
/* process results of an asynchronous host check */
|
|
|
|
|
|
/* checks viability of performing a host check */
|
|
int check_host_check_viability(host *hst, int check_options, int *time_is_valid, time_t *new_time)
|
|
{
|
|
int perform_check = TRUE;
|
|
time_t current_time = 0L;
|
|
time_t preferred_time = 0L;
|
|
int check_interval = 0;
|
|
|
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "check_host_check_viability()\n");
|
|
|
|
/* make sure we have a host */
|
|
if (hst == NULL) {
|
|
return ERROR;
|
|
}
|
|
|
|
/* get the check interval to use if we need to reschedule the check */
|
|
if (hst->state_type == SOFT_STATE && hst->current_state != HOST_UP) {
|
|
check_interval = (hst->retry_interval * interval_length);
|
|
}
|
|
else {
|
|
check_interval = (hst->check_interval * interval_length);
|
|
}
|
|
|
|
/* make sure check interval is positive - otherwise use 5 minutes out for next check */
|
|
if (check_interval <= 0) {
|
|
check_interval = 300;
|
|
}
|
|
|
|
/* get the current time */
|
|
time(¤t_time);
|
|
|
|
/* initialize the next preferred check time */
|
|
preferred_time = current_time;
|
|
|
|
/* can we check the host right now? */
|
|
if (!(check_options & CHECK_OPTION_FORCE_EXECUTION)) {
|
|
|
|
/* if checks of the host are currently disabled... */
|
|
if (hst->checks_enabled == FALSE) {
|
|
preferred_time = current_time + check_interval;
|
|
perform_check = FALSE;
|
|
}
|
|
|
|
/* make sure this is a valid time to check the host */
|
|
if (check_time_against_period((unsigned long)current_time, hst->check_period_ptr) == ERROR) {
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Timeperiod check failed\n");
|
|
preferred_time = current_time;
|
|
if (time_is_valid) {
|
|
*time_is_valid = FALSE;
|
|
}
|
|
perform_check = FALSE;
|
|
}
|
|
|
|
/* check host dependencies for execution */
|
|
if (check_host_dependencies(hst, EXECUTION_DEPENDENCY) == DEPENDENCIES_FAILED) {
|
|
log_debug_info(DEBUGL_CHECKS, 0, "Host check dependencies failed\n");
|
|
preferred_time = current_time + check_interval;
|
|
perform_check = FALSE;
|
|
if (host_skip_check_dependency_status >= 0) {
|
|
hst->current_state = host_skip_check_dependency_status;
|
|
}
|
|
}
|
|
}
|
|
|
|
/* pass back the next viable check time */
|
|
if (new_time) {
|
|
*new_time = preferred_time;
|
|
}
|
|
|
|
if (perform_check == TRUE) {
|
|
return OK;
|
|
}
|
|
|
|
return ERROR;
|
|
}
|
|
|
|
|
|
|
|
/* determination of the host's state based on route availability*/
|
|
/* used only to determine difference between DOWN and UNREACHABLE states */
|
|
inline int determine_host_reachability(host *hst)
|
|
{
|
|
host *parent_host = NULL;
|
|
hostsmember *temp_hostsmember = NULL;
|
|
|
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "determine_host_reachability(host=%s)\n", hst ? hst->name : "(NULL host!)");
|
|
|
|
if (hst == NULL)
|
|
return HOST_DOWN;
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Determining state of host '%s': current state=%d (%s)\n", hst->name, hst->current_state, host_state_name(hst->current_state));
|
|
|
|
/* host is UP - no translation needed */
|
|
if (hst->current_state == HOST_UP) {
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Host is UP, no state translation needed.\n");
|
|
return HOST_UP;
|
|
}
|
|
|
|
/* host has no parents, so it is DOWN */
|
|
if (hst->check_type == CHECK_TYPE_PASSIVE && hst->current_state == HOST_UNREACHABLE) {
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Passive check so keep it UNREACHABLE.\n");
|
|
return HOST_UNREACHABLE;
|
|
}
|
|
else if (hst->parent_hosts == NULL) {
|
|
log_debug_info(DEBUGL_CHECKS, 2, "Host has no parents, so it is DOWN.\n");
|
|
return HOST_DOWN;
|
|
}
|
|
|
|
/* check all parent hosts to see if we're DOWN or UNREACHABLE */
|
|
else {
|
|
for(temp_hostsmember = hst->parent_hosts; temp_hostsmember != NULL; temp_hostsmember = temp_hostsmember->next) {
|
|
parent_host = temp_hostsmember->host_ptr;
|
|
log_debug_info(DEBUGL_CHECKS, 2, " Parent '%s' is %s\n", parent_host->name, host_state_name(parent_host->current_state));
|
|
/* bail out as soon as we find one parent host that is UP */
|
|
if (parent_host->current_state == HOST_UP) {
|
|
/* set the current state */
|
|
log_debug_info(DEBUGL_CHECKS, 2, "At least one parent (%s) is up, so host is DOWN.\n", parent_host->name);
|
|
return HOST_DOWN;
|
|
}
|
|
}
|
|
}
|
|
|
|
log_debug_info(DEBUGL_CHECKS, 2, "No parents were up, so host is UNREACHABLE.\n");
|
|
return HOST_UNREACHABLE;
|
|
}
|
|
|
|
|
|
|
|
/******************************************************************/
|
|
/****************** HOST STATE HANDLER FUNCTIONS ******************/
|
|
/******************************************************************/
|
|
|
|
|
|
|
|
/* Parses raw plugin output and returns: short and long output, perf data. */
|
|
int parse_check_output(char *buf, char **short_output, char **long_output, char **perf_data, int escape_newlines_please, int newlines_are_escaped)
|
|
{
|
|
int current_line = 0;
|
|
int eof = FALSE;
|
|
int in_perf_data = FALSE;
|
|
const int dbuf_chunk = 1024;
|
|
dbuf long_text;
|
|
dbuf perf_text;
|
|
char *ptr = NULL;
|
|
int x = 0;
|
|
int y = 0;
|
|
|
|
/* Initialize output values. */
|
|
if (short_output) {
|
|
*short_output = NULL;
|
|
}
|
|
if (long_output) {
|
|
*long_output = NULL;
|
|
}
|
|
if (perf_data) {
|
|
*perf_data = NULL;
|
|
}
|
|
|
|
/* No input provided or no output requested, nothing to do. */
|
|
if (!buf
|
|
|| !*buf
|
|
|| (!short_output && !long_output && !perf_data)) {
|
|
|
|
return OK;
|
|
}
|
|
|
|
/* Initialize dynamic buffers (1KB chunk size). */
|
|
dbuf_init(&long_text, dbuf_chunk);
|
|
dbuf_init(&perf_text, dbuf_chunk);
|
|
|
|
/* We should never need to worry about unescaping here again. We assume a
|
|
* common internal plugin output format that is newline delimited. */
|
|
if (newlines_are_escaped) {
|
|
for (x = 0, y = 0; buf[x]; x++) {
|
|
if (buf[x] == '\\' && buf[x + 1] == '\\') {
|
|
x++;
|
|
buf[y++] = buf[x];
|
|
}
|
|
else if (buf[x] == '\\' && buf[x + 1] == 'n') {
|
|
x++;
|
|
buf[y++] = '\n';
|
|
}
|
|
else {
|
|
buf[y++] = buf[x];
|
|
}
|
|
}
|
|
buf[y] = '\0';
|
|
}
|
|
|
|
/* Process each line of input. */
|
|
for (x = 0; !eof && buf[0]; x++) {
|
|
|
|
/* Continue on until we reach the end of a line (or input). */
|
|
if (buf[x] == '\n') {
|
|
buf[x] = '\0';
|
|
}
|
|
else if (buf[x] == '\0') {
|
|
eof = TRUE;
|
|
}
|
|
else {
|
|
continue;
|
|
}
|
|
|
|
/* Handle this line of input. */
|
|
current_line++;
|
|
|
|
/* The first line contains short plugin output and optional perf data. */
|
|
if (current_line == 1) {
|
|
|
|
/* Get the short plugin output. If buf[0] is '|', strtok() will
|
|
* return buf+1 or NULL if buf[1] is '\0'. We use my_strtok_with_free()
|
|
* instead which returns a pointer to '\0' in this case. */
|
|
ptr = my_strtok_with_free(buf, "|", FALSE);
|
|
if (ptr != NULL) {
|
|
|
|
if (short_output) {
|
|
|
|
/* Remove leading and trailing whitespace. */
|
|
strip(ptr);
|
|
*short_output = strdup(ptr);
|
|
}
|
|
|
|
/* Get the optional perf data. */
|
|
ptr = my_strtok_with_free(NULL, "\n", FALSE);
|
|
if (ptr != NULL) {
|
|
dbuf_strcat(&perf_text, ptr);
|
|
}
|
|
|
|
/* free anything we've allocated */
|
|
my_strtok_with_free(NULL, NULL, TRUE);
|
|
}
|
|
}
|
|
|
|
/* Additional lines contain long plugin output and optional perf data.
|
|
* Once we've hit perf data, the rest of the output is perf data. */
|
|
else if (in_perf_data) {
|
|
if (perf_text.buf && *perf_text.buf) {
|
|
dbuf_strcat(&perf_text, " ");
|
|
}
|
|
dbuf_strcat(&perf_text, buf);
|
|
}
|
|
|
|
/* Look for the perf data separator. */
|
|
else if (strchr(buf, '|')) {
|
|
in_perf_data = TRUE;
|
|
|
|
ptr = my_strtok_with_free(buf, "|", FALSE);
|
|
if (ptr != NULL) {
|
|
|
|
/* Get the remaining long plugin output. */
|
|
if (current_line > 2) {
|
|
dbuf_strcat(&long_text, "\n");
|
|
}
|
|
dbuf_strcat(&long_text, ptr);
|
|
|
|
/* Get the perf data. */
|
|
ptr = my_strtok_with_free(NULL, "\n", FALSE);
|
|
if (ptr != NULL) {
|
|
if (perf_text.buf && *perf_text.buf) {
|
|
dbuf_strcat(&perf_text, " ");
|
|
}
|
|
dbuf_strcat(&perf_text, ptr);
|
|
}
|
|
|
|
/* free anything we've allocated */
|
|
my_strtok_with_free(NULL, NULL, TRUE);
|
|
}
|
|
}
|
|
|
|
/* Otherwise it's still just long output. */
|
|
else {
|
|
if (current_line > 2) {
|
|
dbuf_strcat(&long_text, "\n");
|
|
}
|
|
dbuf_strcat(&long_text, buf);
|
|
}
|
|
|
|
/* Point buf to the start of the next line. *(buf+x+1) will be a valid
|
|
* memory reference on our next iteration or we are at the end of input
|
|
* (eof == TRUE) and *(buf+x+1) will never be referenced. */
|
|
buf += x + 1;
|
|
|
|
/* x will be incremented to 0 by the loop update. */
|
|
x = -1;
|
|
}
|
|
|
|
/* Save long output. */
|
|
if (long_output && long_text.buf && *long_text.buf) {
|
|
|
|
/* Escape newlines (and backslashes) in long output if requested. */
|
|
if (escape_newlines_please) {
|
|
*long_output = escape_newlines(long_text.buf);
|
|
}
|
|
else {
|
|
*long_output = strdup(long_text.buf);
|
|
}
|
|
}
|
|
|
|
/* Save perf data. */
|
|
if (perf_data && perf_text.buf && *perf_text.buf) {
|
|
|
|
/* Remove leading and trailing whitespace. */
|
|
strip(perf_text.buf);
|
|
*perf_data = strdup(perf_text.buf);
|
|
}
|
|
|
|
/* free dynamic buffers */
|
|
dbuf_free(&long_text);
|
|
dbuf_free(&perf_text);
|
|
|
|
return OK;
|
|
}
|