2017-05-19 22:22:40 +02:00
/*****************************************************************************
*
* CHECKS . C - Service and host check functions for Nagios
*
*
* License :
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 675 Mass Ave , Cambridge , MA 0213 9 , USA .
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# include "../include/config.h"
# include "../include/comments.h"
# include "../include/common.h"
# include "../include/statusdata.h"
# include "../include/downtime.h"
# include "../include/macros.h"
# include "../include/nagios.h"
# include "../include/broker.h"
# include "../include/perfdata.h"
2017-05-19 23:37:19 +02:00
# include "../include/workers.h"
2017-05-19 22:22:40 +02:00
/*#define DEBUG_CHECKS*/
/*#define DEBUG_HOST_CHECKS 1*/
2019-04-18 17:09:18 +02:00
# define replace_semicolons(output, ptr) do { ptr = output; while ((ptr = strchr(ptr, ';')) != NULL) { * ptr = ':'; } } while (0)
2017-05-19 22:22:40 +02:00
# ifdef USE_EVENT_BROKER
# include "../include/neberrors.h"
# endif
/******************************************************************/
/********************** CHECK REAPER FUNCTIONS ********************/
/******************************************************************/
/* reaps host and service check results */
2019-04-18 17:09:18 +02:00
int reap_check_results ( void )
{
2017-05-19 22:22:40 +02:00
int reaped_checks = 0 ;
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " reap_check_results() start \n " ) ;
/* process files in the check result queue */
2017-05-19 23:37:19 +02:00
reaped_checks = process_check_result_queue ( check_result_path ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " reap_check_results() reaped %d checks end \n " , reaped_checks ) ;
2017-05-19 22:22:40 +02:00
return OK ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/******************************************************************/
/****************** SERVICE MONITORING FUNCTIONS ******************/
/******************************************************************/
/* executes a scheduled service check */
2019-04-18 17:09:18 +02:00
int run_scheduled_service_check ( service * svc , int check_options , double latency )
{
2017-05-19 22:22:40 +02:00
int result = OK ;
time_t current_time = 0L ;
time_t preferred_time = 0L ;
time_t next_valid_time = 0L ;
int time_is_valid = TRUE ;
2019-04-18 17:09:18 +02:00
if ( svc = = NULL ) {
2017-05-19 22:22:40 +02:00
return ERROR ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " run_scheduled_service_check() start \n " ) ;
log_debug_info ( DEBUGL_CHECKS , 0 , " Attempting to run scheduled check of service '%s' on host '%s': check options=%d, latency=%lf \n " , svc - > description , svc - > host_name , check_options , latency ) ;
/*
* reset the next_check_event so we know it ' s
* no longer in the scheduling queue
*/
svc - > next_check_event = NULL ;
/* attempt to run the check */
result = run_async_service_check ( svc , check_options , latency , TRUE , TRUE , & time_is_valid , & preferred_time ) ;
/* an error occurred, so reschedule the check */
2019-04-18 17:09:18 +02:00
if ( result = = ERROR ) {
2017-05-19 22:22:40 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Unable to run scheduled service check at this time \n " ) ;
/* only attempt to (re)schedule checks that should get checked... */
2019-04-18 17:09:18 +02:00
if ( svc - > should_be_scheduled = = TRUE ) {
2017-05-19 22:22:40 +02:00
/* get current time */
time ( & current_time ) ;
/* determine next time we should check the service if needed */
/* if service has no check interval, schedule it again for 5 minutes from now */
2019-04-18 17:09:18 +02:00
if ( current_time > = preferred_time ) {
2017-05-19 22:22:40 +02:00
preferred_time = current_time + ( ( svc - > check_interval < = 0 ) ? 300 : ( svc - > check_interval * interval_length ) ) ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* make sure we rescheduled the next service check at a valid time */
get_next_valid_time ( preferred_time , & next_valid_time , svc - > check_period_ptr ) ;
/*
2017-05-19 23:37:19 +02:00
* If we really can ' t reschedule the service properly , we
* just push the check to preferred_time plus some reasonable
* random value and try again then .
*/
2019-04-18 17:09:18 +02:00
if ( time_is_valid = = FALSE & & check_time_against_period ( next_valid_time , svc - > check_period_ptr ) = = ERROR ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
svc - > next_check = preferred_time + ranged_urand ( 0 , check_window ( svc ) ) ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
logit ( NSLOG_RUNTIME_WARNING , TRUE , " Warning: Check of service '%s' on host '%s' could not be rescheduled properly. Scheduling check for %s... \n " , svc - > description , svc - > host_name , ctime ( & preferred_time ) ) ;
2017-05-19 22:22:40 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Unable to find any valid times to reschedule the next service check! \n " ) ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* this service could be rescheduled... */
else {
svc - > next_check = next_valid_time ;
2019-04-18 17:09:18 +02:00
if ( next_valid_time > preferred_time ) {
2017-05-19 23:37:19 +02:00
/* Next valid time is further in the future because of
* timeperiod constraints . Add a random amount so we
* don ' t get all checks subject to that timeperiod
* constraint scheduled at the same time
*/
2019-08-03 18:28:19 +02:00
svc - > next_check = reschedule_within_timeperiod ( next_valid_time , svc - > check_period_ptr , check_window ( svc ) ) ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
svc - > should_be_scheduled = TRUE ;
log_debug_info ( DEBUGL_CHECKS , 1 , " Rescheduled next service check for %s " , ctime ( & next_valid_time ) ) ;
}
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/*
* reschedule the next service check - unless we couldn ' t
* find a valid next check time , but keep original options
*/
2019-04-18 17:09:18 +02:00
if ( svc - > should_be_scheduled = = TRUE ) {
2017-05-19 22:22:40 +02:00
schedule_service_check ( svc , svc - > next_check , check_options ) ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* update the status log */
update_service_status ( svc , FALSE ) ;
return ERROR ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
return OK ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* forks a child process to run a service check, but does not wait for the service check result */
2019-04-18 17:09:18 +02:00
int run_async_service_check ( service * svc , int check_options , double latency , int scheduled_check , int reschedule_check , int * time_is_valid , time_t * preferred_time )
{
2017-05-19 22:22:40 +02:00
nagios_macros mac ;
char * raw_command = NULL ;
char * processed_command = NULL ;
struct timeval start_time , end_time ;
host * temp_host = NULL ;
double old_latency = 0.0 ;
2017-05-19 23:37:19 +02:00
check_result * cr ;
int runchk_result = OK ;
int macro_options = STRIP_ILLEGAL_MACRO_CHARS | ESCAPE_MACRO_CHARS ;
2017-05-19 22:22:40 +02:00
# ifdef USE_EVENT_BROKER
int neb_result = OK ;
# endif
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " run_async_service_check() \n " ) ;
/* make sure we have something */
2019-04-18 17:09:18 +02:00
if ( svc = = NULL ) {
2017-05-19 22:22:40 +02:00
return ERROR ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* is the service check viable at this time? */
2019-04-18 17:09:18 +02:00
if ( check_service_check_viability ( svc , check_options , time_is_valid , preferred_time ) = = ERROR ) {
2017-05-19 22:22:40 +02:00
return ERROR ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* find the host associated with this service */
2019-04-18 17:09:18 +02:00
if ( ( temp_host = svc - > host_ptr ) = = NULL ) {
2017-05-19 22:22:40 +02:00
return ERROR ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/******** GOOD TO GO FOR A REAL SERVICE CHECK AT THIS POINT ********/
# ifdef USE_EVENT_BROKER
/* initialize start/end times */
start_time . tv_sec = 0L ;
start_time . tv_usec = 0L ;
end_time . tv_sec = 0L ;
end_time . tv_usec = 0L ;
/* send data to event broker */
2017-05-19 23:37:19 +02:00
neb_result = broker_service_check ( NEBTYPE_SERVICECHECK_ASYNC_PRECHECK , NEBFLAG_NONE , NEBATTR_NONE , svc , CHECK_TYPE_ACTIVE , start_time , end_time , svc - > check_command , svc - > latency , 0.0 , 0 , FALSE , 0 , NULL , NULL , NULL ) ;
2017-05-19 22:22:40 +02:00
/* neb module wants to cancel the service check - the check will be rescheduled for a later time by the scheduling logic */
2019-04-18 17:09:18 +02:00
if ( neb_result = = NEBERROR_CALLBACKCANCEL ) {
log_debug_info ( DEBUGL_CHECKS , 0 , " Check of service '%s' on host '%s' (id=%u) was cancelled by a module \n " , svc - > description , svc - > host_name , svc - > id ) ;
if ( preferred_time ) {
2017-05-19 22:22:40 +02:00
* preferred_time + = ( svc - > check_interval * interval_length ) ;
}
2019-04-18 17:09:18 +02:00
return ERROR ;
}
2017-05-19 22:22:40 +02:00
/* neb module wants to override (or cancel) the service check - perhaps it will check the service itself */
/* NOTE: if a module does this, it has to do a lot of the stuff found below to make sure things don't get whacked out of shape! */
/* NOTE: if would be easier for modules to override checks when the NEBTYPE_SERVICECHECK_INITIATE event is called (later) */
2019-04-18 17:09:18 +02:00
if ( neb_result = = NEBERROR_CALLBACKOVERRIDE ) {
log_debug_info ( DEBUGL_CHECKS , 0 , " Check of service '%s' on host '%s' (id=%u) was overridden by a module \n " , svc - > description , svc - > host_name , svc - > id ) ;
2017-05-19 22:22:40 +02:00
return OK ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
# endif
log_debug_info ( DEBUGL_CHECKS , 0 , " Checking service '%s' on host '%s'... \n " , svc - > description , svc - > host_name ) ;
/* clear check options - we don't want old check options retained */
/* only clear check options for scheduled checks - ondemand checks shouldn't affected retained check options */
2019-04-18 17:09:18 +02:00
if ( scheduled_check = = TRUE ) {
2017-05-19 22:22:40 +02:00
svc - > check_options = CHECK_OPTION_NONE ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* update latency for macros, event broker, save old value for later */
old_latency = svc - > latency ;
svc - > latency = latency ;
/* grab the host and service macro variables */
memset ( & mac , 0 , sizeof ( mac ) ) ;
grab_host_macros_r ( & mac , temp_host ) ;
grab_service_macros_r ( & mac , svc ) ;
/* get the raw command line */
2017-05-19 23:37:19 +02:00
get_raw_command_line_r ( & mac , svc - > check_command_ptr , svc - > check_command , & raw_command , macro_options ) ;
2019-04-18 17:09:18 +02:00
if ( raw_command = = NULL ) {
2017-05-19 22:22:40 +02:00
clear_volatile_macros_r ( & mac ) ;
log_debug_info ( DEBUGL_CHECKS , 0 , " Raw check command for service '%s' on host '%s' was NULL - aborting. \n " , svc - > description , svc - > host_name ) ;
2019-04-18 17:09:18 +02:00
if ( preferred_time ) {
2017-05-19 22:22:40 +02:00
* preferred_time + = ( svc - > check_interval * interval_length ) ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
svc - > latency = old_latency ;
return ERROR ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* process any macros contained in the argument */
2017-05-19 23:37:19 +02:00
process_macros_r ( & mac , raw_command , & processed_command , macro_options ) ;
2017-05-19 22:22:40 +02:00
my_free ( raw_command ) ;
2019-04-18 17:09:18 +02:00
if ( processed_command = = NULL ) {
2017-05-19 22:22:40 +02:00
clear_volatile_macros_r ( & mac ) ;
log_debug_info ( DEBUGL_CHECKS , 0 , " Processed check command for service '%s' on host '%s' was NULL - aborting. \n " , svc - > description , svc - > host_name ) ;
2019-04-18 17:09:18 +02:00
if ( preferred_time ) {
2017-05-19 22:22:40 +02:00
* preferred_time + = ( svc - > check_interval * interval_length ) ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
svc - > latency = old_latency ;
return ERROR ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* get the command start time */
gettimeofday ( & start_time , NULL ) ;
2017-05-19 23:37:19 +02:00
cr = calloc ( 1 , sizeof ( * cr ) ) ;
if ( ! cr ) {
clear_volatile_macros_r ( & mac ) ;
svc - > latency = old_latency ;
my_free ( processed_command ) ;
return ERROR ;
}
init_check_result ( cr ) ;
/* save check info */
cr - > object_check_type = SERVICE_CHECK ;
cr - > check_type = CHECK_TYPE_ACTIVE ;
cr - > check_options = check_options ;
cr - > scheduled_check = scheduled_check ;
cr - > reschedule_check = reschedule_check ;
cr - > latency = latency ;
cr - > start_time = start_time ;
cr - > finish_time = start_time ;
cr - > early_timeout = FALSE ;
cr - > exited_ok = TRUE ;
cr - > return_code = STATE_OK ;
cr - > output = NULL ;
cr - > host_name = ( char * ) strdup ( svc - > host_name ) ;
cr - > service_description = ( char * ) strdup ( svc - > description ) ;
2017-05-19 22:22:40 +02:00
# ifdef USE_EVENT_BROKER
/* send data to event broker */
2017-05-19 23:37:19 +02:00
neb_result = broker_service_check ( NEBTYPE_SERVICECHECK_INITIATE , NEBFLAG_NONE , NEBATTR_NONE , svc , CHECK_TYPE_ACTIVE , start_time , end_time , svc - > check_command , svc - > latency , 0.0 , service_check_timeout , FALSE , 0 , processed_command , NULL , cr ) ;
2017-05-19 22:22:40 +02:00
/* neb module wants to override the service check - perhaps it will check the service itself */
2019-04-18 17:09:18 +02:00
if ( neb_result = = NEBERROR_CALLBACKOVERRIDE ) {
2017-05-19 22:22:40 +02:00
clear_volatile_macros_r ( & mac ) ;
svc - > latency = old_latency ;
2017-05-19 23:37:19 +02:00
free_check_result ( cr ) ;
2019-08-03 18:28:19 +02:00
my_free ( cr ) ;
2017-05-19 22:22:40 +02:00
my_free ( processed_command ) ;
return OK ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
# endif
/* reset latency (permanent value will be set later) */
svc - > latency = old_latency ;
2017-05-19 23:37:19 +02:00
/* paw off the check to a worker to run */
runchk_result = wproc_run_check ( cr , processed_command , & mac ) ;
if ( runchk_result = = ERROR ) {
logit ( NSLOG_RUNTIME_ERROR , TRUE , " Unable to run check for service '%s' on host '%s' \n " , svc - > description , svc - > host_name ) ;
}
else {
/* do the book-keeping */
currently_running_service_checks + + ;
svc - > is_executing = TRUE ;
update_check_stats ( ( scheduled_check = = TRUE ) ? ACTIVE_SCHEDULED_SERVICE_CHECK_STATS : ACTIVE_ONDEMAND_SERVICE_CHECK_STATS , start_time . tv_sec ) ;
}
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* free memory */
my_free ( processed_command ) ;
clear_volatile_macros_r ( & mac ) ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
return runchk_result ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* Start of inline helper functions for handle async host/service check result functions
BH 03 Dec 2017
The giant monolithic async helper functions were becoming difficult to maintain
So I broke them out into smaller manageable chunks where a side - by - side comparison
of the two functions is a bit more reasonable . This would have been a lot easier with
dynamic type casting , or even some macro magic - but at least this way it isn ' t as
messy as it would have been with macros .
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
When introducing new inline helper functions , the goal is so that both of the functions
can sit on the same screen at the same time - for obvious reasons ( I hope ) .
Try to keep them concise */
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* Bit of logic for determining an adequate return code */
int get_service_check_return_code ( service * svc , check_result * cr )
{
2017-05-19 23:37:19 +02:00
int rc ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " get_service_check_return_code() \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( NULL = = svc | | NULL = = cr ) {
2017-05-19 23:37:19 +02:00
return STATE_UNKNOWN ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* return now if it's a passive check */
if ( cr - > check_type ! = CHECK_TYPE_ACTIVE ) {
return cr - > return_code ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* grab the return code */
rc = cr - > return_code ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* did the check result have an early timeout? */
if ( cr - > early_timeout = = TRUE ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
my_free ( svc - > plugin_output ) ;
my_free ( svc - > long_plugin_output ) ;
my_free ( svc - > perf_data ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
logit ( NSLOG_RUNTIME_WARNING , TRUE , " Warning: Check of service '%s' on host '%s' timed out after %.3fs! \n " , svc - > description , svc - > host_name , svc - > execution_time ) ;
asprintf ( & svc - > plugin_output , " (Service check timed out after %.2lf seconds) " , svc - > execution_time ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
rc = service_check_timeout_state ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* if there was some error running the command, just skip it (this shouldn't be happening) */
else if ( cr - > exited_ok = = FALSE ) {
my_free ( svc - > plugin_output ) ;
my_free ( svc - > long_plugin_output ) ;
my_free ( svc - > perf_data ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
logit ( NSLOG_RUNTIME_WARNING , TRUE , " Warning: Check of service '%s' on host '%s' did not exit properly! \n " , svc - > description , svc - > host_name ) ;
svc - > plugin_output = ( char * ) strdup ( " (Service check did not exit properly) " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
rc = STATE_CRITICAL ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
/* 126 is a return code for non-executable */
else if ( cr - > return_code = = 126 ) {
my_free ( svc - > plugin_output ) ;
my_free ( svc - > long_plugin_output ) ;
my_free ( svc - > perf_data ) ;
logit ( NSLOG_RUNTIME_WARNING , TRUE , " Warning: Return code of 126 for service '%s' on host '%s' may indicate a non-executable plugin. \n " ,
svc - > description , svc - > host_name ) ;
svc - > plugin_output = strdup ( " (Return code of 126 is out of bounds. Check if plugin is executable) " ) ;
rc = STATE_CRITICAL ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* 127 is a return code for non-existent */
else if ( cr - > return_code = = 127 ) {
my_free ( svc - > plugin_output ) ;
my_free ( svc - > long_plugin_output ) ;
my_free ( svc - > perf_data ) ;
logit ( NSLOG_RUNTIME_WARNING , TRUE , " Warning: Return code of 127 for service '%s' on host '%s' may indicate this plugin doesn't exist. \n " ,
svc - > description , svc - > host_name ) ;
svc - > plugin_output = strdup ( " (Return code of 127 is out of bounds. Check if plugin exists) " ) ;
rc = STATE_CRITICAL ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* make sure the return code is within bounds */
else if ( cr - > return_code < 0 | | cr - > return_code > 3 ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
logit ( NSLOG_RUNTIME_WARNING , TRUE ,
" Warning: Return code of %d for check of service '%s' on host '%s' was out of bounds. \n " ,
cr - > return_code ,
svc - > description ,
svc - > host_name ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
asprintf ( & svc - > plugin_output , " (Return code of %d for service '%s' on host '%s' was out of bounds) " ,
cr - > return_code ,
svc - > description ,
svc - > host_name ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
rc = STATE_CRITICAL ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
return rc ;
}
2017-05-19 23:37:19 +02:00
2019-04-18 17:09:18 +02:00
/* Bit of logic for determining an adequate return code */
int get_host_check_return_code ( host * hst , check_result * cr )
{
int rc ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " get_host_check_return_code() \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( hst = = NULL | | cr = = NULL ) {
return HOST_UNREACHABLE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* return now if it's a passive check */
if ( cr - > check_type ! = CHECK_TYPE_ACTIVE ) {
return cr - > return_code ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* get the unprocessed return code */
rc = cr - > return_code ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* did the check result have an early timeout? */
if ( cr - > early_timeout ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
my_free ( hst - > plugin_output ) ;
my_free ( hst - > long_plugin_output ) ;
my_free ( hst - > perf_data ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
logit ( NSLOG_RUNTIME_WARNING , TRUE , " Warning: Check of host '%s' timed out after %.2lf seconds \n " , hst - > name , hst - > execution_time ) ;
asprintf ( & hst - > plugin_output , " (Host check timed out after %.2lf seconds) " , hst - > execution_time ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
rc = HOST_UNREACHABLE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* if there was some error running the command, just skip it (this shouldn't be happening) */
else if ( cr - > exited_ok = = FALSE ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
my_free ( hst - > plugin_output ) ;
my_free ( hst - > long_plugin_output ) ;
my_free ( hst - > perf_data ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
logit ( NSLOG_RUNTIME_WARNING , TRUE , " Warning: Check of host '%s' did not exit properly! \n " , hst - > name ) ;
hst - > plugin_output = ( char * ) strdup ( " (Host check did not exit properly) " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
rc = HOST_UNREACHABLE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* 126 is a return code for non-executable */
else if ( cr - > return_code = = 126 ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
my_free ( hst - > plugin_output ) ;
my_free ( hst - > long_plugin_output ) ;
my_free ( hst - > perf_data ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
logit ( NSLOG_RUNTIME_WARNING , TRUE , " Warning: Return code of 126 for host '%s' may indicate a non-executable plugin. \n " ,
hst - > name ) ;
hst - > plugin_output = strdup ( " (Return code of 126 is out of bounds. Check if plugin is executable) " ) ;
rc = HOST_UNREACHABLE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* 127 is a return code for non-existent */
else if ( cr - > return_code = = 127 ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
my_free ( hst - > plugin_output ) ;
my_free ( hst - > long_plugin_output ) ;
my_free ( hst - > perf_data ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
logit ( NSLOG_RUNTIME_WARNING , TRUE , " Warning: Return code of 127 for host '%s' may indicate this plugin doesn't exist. \n " ,
hst - > name ) ;
hst - > plugin_output = strdup ( " (Return code of 127 is out of bounds. Check if plugin exists) " ) ;
rc = HOST_UNREACHABLE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* make sure the return code is within bounds */
else if ( cr - > return_code < 0 | | cr - > return_code > 3 ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
my_free ( hst - > plugin_output ) ;
my_free ( hst - > long_plugin_output ) ;
my_free ( hst - > perf_data ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
logit ( NSLOG_RUNTIME_WARNING , TRUE ,
" Warning: Return code of %d for check of host '%s' was out of bounds. \n " ,
cr - > return_code ,
hst - > name ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
asprintf ( & hst - > plugin_output , " (Return code of %d for host '%s' was out of bounds) " ,
cr - > return_code ,
hst - > name ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
rc = HOST_UNREACHABLE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* a NULL host check command means we should assume the host is UP */
if ( hst - > check_command = = NULL ) {
my_free ( hst - > plugin_output ) ;
hst - > plugin_output = ( char * ) strdup ( " (Host assumed to be UP) " ) ;
rc = HOST_UP ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* if we're not doing aggressive host checking, let WARNING states indicate the host is up (fake the result to be HOST_UP) */
else if ( use_aggressive_host_checking = = FALSE & & rc = = STATE_WARNING ) {
rc = HOST_UP ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* any problem state indicates the host is not UP */
else if ( rc ! = HOST_UP ) {
rc = HOST_DOWN ;
}
return rc ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/******************************************************************************
* * * * * * * Calculate check result exec time
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static inline double calculate_check_result_execution_time ( check_result * cr )
{
double execution_time = 0.0 ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( cr ! = NULL ) {
double start_s = cr - > start_time . tv_sec ;
double start_us = cr - > start_time . tv_usec ;
double finish_s = cr - > finish_time . tv_sec ;
double finish_us = cr - > finish_time . tv_usec ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
execution_time = ( ( finish_s - start_s ) + ( ( finish_us - start_us ) / 1000.0 ) / 1000.0 ) ;
if ( execution_time < 0.0 ) {
execution_time = 0.0 ;
}
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
return execution_time ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/******************************************************************************
* * * * * * * Last state ended
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static inline void record_last_service_state_ended ( service * svc )
{
switch ( svc - > last_state ) {
case STATE_OK :
svc - > last_time_ok = svc - > last_check ;
break ;
case STATE_WARNING :
svc - > last_time_warning = svc - > last_check ;
break ;
case STATE_UNKNOWN :
svc - > last_time_unknown = svc - > last_check ;
break ;
case STATE_CRITICAL :
svc - > last_time_critical = svc - > last_check ;
break ;
default :
break ;
}
}
/*****************************************************************************/
static inline void record_last_host_state_ended ( host * hst )
{
switch ( hst - > last_state ) {
case HOST_UP :
hst - > last_time_up = hst - > last_check ;
break ;
case HOST_DOWN :
hst - > last_time_down = hst - > last_check ;
break ;
case HOST_UNREACHABLE :
hst - > last_time_unreachable = hst - > last_check ;
break ;
default :
break ;
}
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/******************************************************************************
* * * * * * * Logic chunks for when an object is passive
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static inline int service_is_passive ( service * svc , check_result * cr )
{
if ( accept_passive_service_checks = = FALSE ) {
log_debug_info ( DEBUGL_CHECKS , 0 , " Discarding passive service check result because passive service checks are disabled globally. \n " ) ;
return FALSE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( svc - > accept_passive_checks = = FALSE ) {
log_debug_info ( DEBUGL_CHECKS , 0 , " Discarding passive service check result because passive checks are disabled for this service. \n " ) ;
return FALSE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
svc - > check_type = CHECK_TYPE_PASSIVE ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* update check statistics for passive checks */
update_check_stats ( PASSIVE_SERVICE_CHECK_STATS , cr - > start_time . tv_sec ) ;
/* log passive checks - we need to do this here, as some may bypass external commands by getting dropped in checkresults dir */
if ( log_passive_checks = = TRUE ) {
logit ( NSLOG_PASSIVE_CHECK , FALSE , " PASSIVE SERVICE CHECK: %s;%s;%d;%s \n " ,
svc - > host_name ,
svc - > description ,
svc - > current_state ,
svc - > plugin_output ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
return TRUE ;
}
/*****************************************************************************/
static inline int host_is_passive ( host * hst , check_result * cr )
{
if ( accept_passive_host_checks = = FALSE ) {
log_debug_info ( DEBUGL_CHECKS , 0 , " Discarding passive host check result because passive host checks are disabled globally. \n " ) ;
return FALSE ;
}
if ( hst - > accept_passive_checks = = FALSE ) {
log_debug_info ( DEBUGL_CHECKS , 0 , " Discarding passive host check result because passive checks are disabled for this host. \n " ) ;
return FALSE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
hst - > check_type = CHECK_TYPE_PASSIVE ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* update check stats for passive checks */
update_check_stats ( PASSIVE_HOST_CHECK_STATS , cr - > start_time . tv_sec ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* log passive checks - we need to do this here as some may bypass external commands by getting dropped in checkresults dir */
/* todo - check if current_state is right - i don't think it is! */
if ( log_passive_checks = = TRUE ) {
logit ( NSLOG_PASSIVE_CHECK , FALSE , " PASSIVE HOST CHECK: %s;%d;%s \n " ,
hst - > name ,
hst - > current_state ,
hst - > plugin_output ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
return TRUE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/******************************************************************************
* * * * * * * Logic chunks for when an object is active
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static inline void service_is_active ( service * svc )
{
svc - > check_type = CHECK_TYPE_ACTIVE ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* decrement the number of service checks still out there... */
if ( currently_running_service_checks > 0 ) {
currently_running_service_checks - - ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
svc - > is_executing = FALSE ;
}
/*****************************************************************************/
static inline void host_is_active ( host * hst )
{
hst - > check_type = CHECK_TYPE_ACTIVE ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* decrement the number of host checks still out there... */
if ( currently_running_host_checks > 0 ) {
currently_running_host_checks - - ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
hst - > is_executing = FALSE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/******************************************************************************
* * * * * * * Generic debugging functions
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static inline void debug_async_service ( service * svc , check_result * cr )
{
2019-08-03 18:28:19 +02:00
log_debug_info ( DEBUGL_CHECKS , 0 , " ** Handling %s async check result for service '%s' on host '%s' from '%s'... current state %d last_hard_state %d \n " ,
2019-04-18 17:09:18 +02:00
( cr - > check_type = = CHECK_TYPE_ACTIVE ) ? " ACTIVE " : " PASSIVE " ,
svc - > description ,
svc - > host_name ,
2019-08-03 18:28:19 +02:00
check_result_source ( cr ) ,
svc - > current_state ,
svc - > last_hard_state ) ;
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 ,
" * OPTIONS: %d, SCHEDULED: %d, RESCHEDULE: %d, EXITED OK: %d, RETURN CODE: %d, OUTPUT: \n %s \n " ,
cr - > check_options ,
cr - > scheduled_check ,
cr - > reschedule_check ,
cr - > exited_ok ,
cr - > return_code ,
( cr = = NULL ) ? " NULL " : cr - > output ) ;
}
/*****************************************************************************/
static inline void debug_async_host ( host * hst , check_result * cr )
{
log_debug_info ( DEBUGL_CHECKS , 0 , " ** Handling %s async check result for host '%s' from '%s'... \n " ,
( cr - > check_type = = CHECK_TYPE_ACTIVE ) ? " ACTIVE " : " PASSIVE " ,
hst - > name ,
check_result_source ( cr ) ) ;
log_debug_info ( DEBUGL_CHECKS , 1 ,
" * OPTIONS: %d, SCHEDULED: %d, RESCHEDULE: %d, EXITED OK: %d, RETURN CODE: %d, OUTPUT: \n %s \n " ,
cr - > check_options ,
cr - > scheduled_check ,
cr - > reschedule_check ,
cr - > exited_ok ,
cr - > return_code ,
( cr = = NULL ) ? " NULL " : cr - > output ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/******************************************************************************
* * * * * * * Logic chunks for checking object freshness
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static inline void service_fresh_check ( service * svc , check_result * cr , time_t current_time )
{
/* check freshness */
if ( cr - > check_options & CHECK_OPTION_FRESHNESS_CHECK ) {
/* DISCARD INVALID FRESHNESS CHECK RESULTS */
/* If a services goes stale, Nagios will initiate a forced check in order to freshen it. There is a race condition whereby a passive check
could arrive between the 1 ) initiation of the forced check and 2 ) the time when the forced check result is processed here . This would
make the service fresh again , so we do a quick check to make sure the service is still stale before we accept the check result . */
if ( is_service_result_fresh ( svc , current_time , FALSE ) = = TRUE ) {
log_debug_info ( DEBUGL_CHECKS , 0 , " Discarding service freshness check result because the service is currently fresh (race condition avoided). \n " ) ;
return ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* clear the freshening flag (it would have been set if this service was determined to be stale) */
svc - > is_being_freshened = FALSE ;
}
}
/*****************************************************************************/
static inline void host_fresh_check ( host * hst , check_result * cr , time_t current_time )
{
/* check freshness */
if ( cr - > check_options & CHECK_OPTION_FRESHNESS_CHECK ) {
/* DISCARD INVALID FRESHNESS CHECK RESULTS */
/* If a host goes stale, Nagios will initiate a forced check in order to freshen it. There is a race condition whereby a passive check
could arrive between the 1 ) initiation of the forced check and 2 ) the time when the forced check result is processed here . This would
make the host fresh again , so we do a quick check to make sure the host is still stale before we accept the check result . */
if ( is_host_result_fresh ( hst , current_time , FALSE ) = = TRUE ) {
log_debug_info ( DEBUGL_CHECKS , 0 , " Discarding host freshness check result because the host is currently fresh (race condition avoided). \n " ) ;
return ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* clear the freshening flag (it would have been set if this host was determined to be stale) */
hst - > is_being_freshened = FALSE ;
}
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/******************************************************************************
* * * * * * * Logic chunks for setting some of the initial flags , etc .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static inline void service_initial_handling ( service * svc , check_result * cr , char * * old_plugin_output )
{
char * temp_ptr = NULL ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* save old plugin output */
if ( svc - > plugin_output ) {
* old_plugin_output = ( char * ) strdup ( svc - > plugin_output ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
my_free ( svc - > plugin_output ) ;
my_free ( svc - > long_plugin_output ) ;
my_free ( svc - > perf_data ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* parse check output to get: (1) short output, (2) long output, (3) perf data */
parse_check_output ( cr - > output , & svc - > plugin_output , & svc - > long_plugin_output , & svc - > perf_data , TRUE , FALSE ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* make sure the plugin output isn't null */
if ( svc - > plugin_output = = NULL ) {
svc - > plugin_output = ( char * ) strdup ( " (No output returned from plugin) " ) ;
}
/* otherwise replace the semicolons with colons */
else {
replace_semicolons ( svc - > plugin_output , temp_ptr ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 ,
" Parsing check output... \n "
" Short Output: %s \n "
" Long Output: %s \n "
" Perf Data: %s \n " ,
svc - > plugin_output ,
( svc - > long_plugin_output = = NULL ) ? " NULL " : svc - > long_plugin_output ,
( svc - > perf_data = = NULL ) ? " NULL " : svc - > perf_data ) ;
svc - > latency = cr - > latency ;
svc - > execution_time = calculate_check_result_execution_time ( cr ) ;
svc - > last_check = cr - > start_time . tv_sec ;
svc - > should_be_scheduled = cr - > reschedule_check ;
svc - > last_state = svc - > current_state ;
svc - > current_state = get_service_check_return_code ( svc , cr ) ;
}
/*****************************************************************************/
static inline void host_initial_handling ( host * hst , check_result * cr , char * * old_plugin_output )
{
char * temp_ptr = NULL ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* save old plugin output */
if ( hst - > plugin_output ) {
* old_plugin_output = ( char * ) strdup ( hst - > plugin_output ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
my_free ( hst - > plugin_output ) ;
my_free ( hst - > long_plugin_output ) ;
my_free ( hst - > perf_data ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* parse check output to get: (1) short output, (2) long output, (3) perf data */
parse_check_output ( cr - > output , & hst - > plugin_output , & hst - > long_plugin_output , & hst - > perf_data , TRUE , FALSE ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* make sure the plugin output isn't null */
if ( hst - > plugin_output = = NULL ) {
hst - > plugin_output = ( char * ) strdup ( " (No output returned from host check) " ) ;
}
/* otherwise replace the semicolons with colons */
else {
replace_semicolons ( hst - > plugin_output , temp_ptr ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 ,
" Parsing check output... \n "
" Short Output: %s \n "
" Long Output: %s \n "
" Perf Data: %s \n " ,
hst - > plugin_output ,
( hst - > long_plugin_output = = NULL ) ? " NULL " : hst - > long_plugin_output ,
( hst - > perf_data = = NULL ) ? " NULL " : hst - > perf_data ) ;
hst - > latency = cr - > latency ;
hst - > execution_time = calculate_check_result_execution_time ( cr ) ;
hst - > last_check = cr - > start_time . tv_sec ;
hst - > should_be_scheduled = cr - > reschedule_check ;
hst - > last_state = hst - > current_state ;
hst - > current_state = get_host_check_return_code ( hst , cr ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/******************************************************************************
* * * * * * * Logic for when an object has a notable change
* * * * * * * Removes acknowledgement , advances event_id , etc .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static inline void service_state_or_hard_state_type_change ( service * svc , int state_change , int hard_state_change , int * log_event , int * handle_event )
{
int state_or_type_change = FALSE ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* update the event and problem ids */
if ( state_change = = TRUE ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
svc - > last_state_change = svc - > last_check ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* always update the event id on a state change */
svc - > last_event_id = svc - > current_event_id ;
svc - > current_event_id = next_event_id ;
next_event_id + + ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* update the problem id when transitioning to a problem state */
if ( svc - > last_state = = STATE_OK ) {
/* don't reset last problem id, or it will be zero the next time a problem is encountered */
svc - > current_problem_id = next_problem_id ;
next_problem_id + + ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
svc - > state_type = SOFT_STATE ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
state_or_type_change = TRUE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( hard_state_change = = TRUE ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
svc - > last_hard_state_change = svc - > last_check ;
svc - > last_state_change = svc - > last_check ;
svc - > last_hard_state = svc - > current_state ;
svc - > state_type = HARD_STATE ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
state_or_type_change = TRUE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( state_or_type_change ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* check if service should go into downtime from flexible downtime */
if ( svc - > pending_flex_downtime > 0 ) {
check_pending_flex_service_downtime ( svc ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* reset notification times and suppression option */
svc - > last_notification = ( time_t ) 0 ;
svc - > next_notification = ( time_t ) 0 ;
svc - > no_more_notifications = FALSE ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( ( svc - > acknowledgement_type = = ACKNOWLEDGEMENT_NORMAL & & ( state_change = = TRUE | | hard_state_change = = FALSE ) )
| | ( svc - > acknowledgement_type = = ACKNOWLEDGEMENT_STICKY & & svc - > current_state = = STATE_OK ) ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* remove any non-persistant comments associated with the ack */
svc - > problem_has_been_acknowledged = FALSE ;
svc - > acknowledgement_type = ACKNOWLEDGEMENT_NONE ;
delete_service_acknowledgement_comments ( svc ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
svc - > should_be_scheduled = TRUE ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
* log_event = TRUE ;
* handle_event = TRUE ;
}
}
/*****************************************************************************/
static inline void host_state_or_hard_state_type_change ( host * hst , int state_change , int hard_state_change , int * log_event , int * handle_event , int * send_notification )
{
int state_or_type_change = FALSE ;
/* check if we simulate a hard state change */
if ( hst - > check_type = = CHECK_TYPE_PASSIVE & & passive_host_checks_are_soft = = FALSE ) {
log_debug_info ( DEBUGL_CHECKS , 2 , " Check type passive and passive host checks aren't false \n " ) ;
if ( state_change = = TRUE ) {
hst - > current_attempt = 1 ;
hard_state_change = TRUE ;
}
hst - > state_type = HARD_STATE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* update event and problem ids */
if ( state_change = = TRUE ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
hst - > last_state_change = hst - > last_check ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* always update the event id on a state change */
hst - > last_event_id = hst - > current_event_id ;
hst - > current_event_id = next_event_id ;
next_event_id + + ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* update the problem id when transitioning to a problem state */
if ( hst - > last_state = = HOST_UP ) {
/* don't reset last problem id, or it will be zero the next time a problem is encountered */
hst - > current_problem_id = next_problem_id ;
next_problem_id + + ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* clear the problem id when transitioning from a problem state to an OK state */
else {
hst - > last_problem_id = hst - > current_problem_id ;
hst - > current_problem_id = 0L ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
hst - > state_type = SOFT_STATE ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
state_or_type_change = TRUE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( hard_state_change = = TRUE ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
hst - > last_hard_state_change = hst - > last_check ;
hst - > last_hard_state = hst - > current_state ;
hst - > state_type = HARD_STATE ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
state_or_type_change = TRUE ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* this is in the host func, but not the service
because it can easily be missed if a passive check
comes in and passive_host_checks_are_soft = = FALSE */
* send_notification = TRUE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( state_or_type_change ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* check if host should go into downtime from flexible downtime */
check_pending_flex_host_downtime ( hst ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* reset notification times and suppression option */
hst - > last_notification = ( time_t ) 0 ;
hst - > next_notification = ( time_t ) 0 ;
hst - > no_more_notifications = FALSE ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( ( hst - > acknowledgement_type = = ACKNOWLEDGEMENT_NORMAL & & ( state_change = = TRUE | | hard_state_change = = FALSE ) )
| | ( hst - > acknowledgement_type = = ACKNOWLEDGEMENT_STICKY & & hst - > current_state = = STATE_OK ) ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* remove any non-persistant comments associated with the ack */
hst - > problem_has_been_acknowledged = FALSE ;
hst - > acknowledgement_type = ACKNOWLEDGEMENT_NONE ;
delete_host_acknowledgement_comments ( hst ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
hst - > should_be_scheduled = TRUE ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
* log_event = TRUE ;
* handle_event = TRUE ;
}
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/******************************************************************************
* * * * * * * Logic for setting default state change times , etc .
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static inline void initialize_last_service_state_change_times ( service * svc , host * hst )
{
/* initialize the last host and service state change times if necessary */
if ( svc - > last_state_change = = ( time_t ) 0 ) {
svc - > last_state_change = svc - > last_check ;
}
if ( svc - > last_hard_state_change = = ( time_t ) 0 ) {
svc - > last_hard_state_change = svc - > last_check ;
}
if ( hst - > last_state_change = = ( time_t ) 0 ) {
hst - > last_state_change = svc - > last_check ;
}
if ( hst - > last_hard_state_change = = ( time_t ) 0 ) {
hst - > last_hard_state_change = svc - > last_check ;
}
}
/*****************************************************************************/
static inline void initialize_last_host_state_change_times ( host * hst )
{
/* initialize last host state change times if necessary */
if ( hst - > last_state_change = = ( time_t ) 0 ) {
hst - > last_state_change = hst - > last_check ;
}
if ( hst - > last_hard_state_change = = ( time_t ) 0 ) {
hst - > last_hard_state_change = hst - > last_check ;
}
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/******************************************************************************
* * * * * * * Logic chunks propagating checks to host parents / children
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static inline void host_propagate_checks_to_immediate_parents ( host * hst , int parent_host_up , time_t current_time )
{
hostsmember * temp_hostsmember = NULL ;
host * parent_host = NULL ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Propagating checks to parent host(s)... \n " ) ;
for ( temp_hostsmember = hst - > parent_hosts ; temp_hostsmember ! = NULL ; temp_hostsmember = temp_hostsmember - > next ) {
parent_host = temp_hostsmember - > host_ptr ;
if ( ( parent_host_up = = TRUE & & parent_host - > current_state = = HOST_UP )
| | ( ( parent_host_up = = FALSE & & parent_host - > current_state ! = HOST_UP ) ) ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Check of parent host '%s' queued. \n " , parent_host - > name ) ;
schedule_host_check ( parent_host , current_time , CHECK_OPTION_DEPENDENCY_CHECK ) ;
}
}
}
static inline void host_propagate_checks_to_immediate_children ( host * hst , int children_none_up , int children_none_unreachable , time_t current_time )
{
hostsmember * temp_hostsmember = NULL ;
host * child_host = NULL ;
2017-05-19 23:37:19 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Propagating checks to child host(s)... \n " ) ;
for ( temp_hostsmember = hst - > child_hosts ; temp_hostsmember ! = NULL ; temp_hostsmember = temp_hostsmember - > next ) {
child_host = temp_hostsmember - > host_ptr ;
if ( ( children_none_up = = TRUE & & child_host - > current_state ! = HOST_UP )
| | ( children_none_unreachable = = TRUE & & child_host - > current_state ! = HOST_UNREACHABLE ) ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Check of child host '%s' queued. \n " , child_host - > name ) ;
schedule_host_check ( child_host , current_time , CHECK_OPTION_DEPENDENCY_CHECK ) ;
}
}
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/******************************************************************************
* * * * * * * Logic chunks propagating dependency checks
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static inline void service_propagate_dependency_checks ( service * svc , time_t current_time )
{
if ( svc - > current_attempt = = ( svc - > max_attempts - 1 )
& & execute_service_checks = = TRUE
& & enable_predictive_service_dependency_checks = = TRUE ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
servicedependency * temp_dependency = NULL ;
service * master_service = NULL ;
objectlist * list ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Propagating predictive dependency checks to services this one depends on... \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* check services that THIS ONE depends on for notification AND execution */
/* we do this because we might be sending out a notification soon and we want the dependency logic to be accurate */
for ( list = svc - > exec_deps ; list ; list = list - > next ) {
temp_dependency = ( servicedependency * ) list - > object_ptr ;
if ( temp_dependency - > dependent_service_ptr = = svc & & temp_dependency - > master_service_ptr ! = NULL ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
master_service = ( service * ) temp_dependency - > master_service_ptr ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Predictive check of service '%s' on host '%s' queued. \n " , master_service - > description , master_service - > host_name ) ;
schedule_service_check ( master_service , current_time , CHECK_OPTION_DEPENDENCY_CHECK ) ;
2017-05-19 22:22:40 +02:00
}
}
2019-04-18 17:09:18 +02:00
for ( list = svc - > notify_deps ; list ; list = list - > next ) {
temp_dependency = ( servicedependency * ) list - > object_ptr ;
if ( temp_dependency - > dependent_service_ptr = = svc & & temp_dependency - > master_service_ptr ! = NULL ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
master_service = ( service * ) temp_dependency - > master_service_ptr ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Predictive check of service '%s' on host '%s' queued. \n " , master_service - > description , master_service - > host_name ) ;
schedule_service_check ( master_service , current_time , CHECK_OPTION_DEPENDENCY_CHECK ) ;
}
}
}
}
/*****************************************************************************/
static inline void host_propagate_dependency_checks ( host * hst , time_t current_time )
{
/* we do to help ensure that the dependency checks are accurate before it comes time to notify */
if ( hst - > current_attempt = = ( hst - > max_attempts - 1 )
& & execute_host_checks = = TRUE
& & enable_predictive_host_dependency_checks = = TRUE ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
objectlist * list ;
hostdependency * dep = NULL ;
host * master_host = NULL ;
log_debug_info ( DEBUGL_CHECKS , 1 , " Propagating predictive dependency checks to hosts this one depends on... \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
for ( list = hst - > notify_deps ; list ; list = list - > next ) {
dep = ( hostdependency * ) list - > object_ptr ;
if ( dep - > dependent_host_ptr = = hst & & dep - > master_host_ptr ! = NULL ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
master_host = ( host * ) dep - > master_host_ptr ;
log_debug_info ( DEBUGL_CHECKS , 1 , " Check of host '%s' queued. \n " , master_host - > name ) ;
schedule_host_check ( master_host , current_time , CHECK_OPTION_NONE ) ;
}
2017-05-19 23:37:19 +02:00
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
for ( list = hst - > exec_deps ; list ; list = list - > next ) {
dep = ( hostdependency * ) list - > object_ptr ;
if ( dep - > dependent_host_ptr = = hst & & dep - > master_host_ptr ! = NULL ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
master_host = ( host * ) dep - > master_host_ptr ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Check of host '%s' queued. \n " , master_host - > name ) ;
schedule_host_check ( master_host , current_time , CHECK_OPTION_NONE ) ;
}
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
}
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/******************************************************************************
* * * * * * * One stop shop for determining if check_result data is valid
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
static inline int is_valid_check_result_data ( host * hst , check_result * cr )
{
if ( hst = = NULL ) {
log_debug_info ( DEBUGL_CHECKS , 2 , " No host associated with service, bailing! \n " ) ;
return FALSE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( cr = = NULL ) {
log_debug_info ( DEBUGL_CHECKS , 2 , " No check result specified, bailing! \n " ) ;
return FALSE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
return TRUE ;
}
/******************************************************************************
* * * * * * * * * * Fin . * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* handles asynchronous service check results */
int handle_async_service_check_result ( service * svc , check_result * cr )
{
time_t current_time = 0L ;
time_t next_check = 0L ;
time_t preferred_time = 0L ;
time_t next_valid_time = 0L ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
int state_change = FALSE ;
int hard_state_change = FALSE ;
int send_notification = FALSE ;
int handle_event = FALSE ;
int log_event = FALSE ;
int check_host = FALSE ;
int update_host_stats = FALSE ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
char * old_plugin_output = NULL ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
host * hst = NULL ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " handle_async_service_check_result() \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( svc = = NULL ) {
log_debug_info ( DEBUGL_CHECKS , 2 , " No service specified, bailing! \n " ) ;
return ERROR ;
}
hst = svc - > host_ptr ;
if ( is_valid_check_result_data ( hst , cr ) = = FALSE ) {
return ERROR ;
}
2017-05-19 22:22:40 +02:00
2019-08-03 18:28:19 +02:00
int new_last_hard_state = svc - > last_hard_state ;
2019-04-18 17:09:18 +02:00
if ( cr - > check_type = = CHECK_TYPE_PASSIVE ) {
if ( service_is_passive ( svc , cr ) = = FALSE ) {
return ERROR ;
}
}
else {
service_is_active ( svc ) ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
time ( & current_time ) ;
initialize_last_service_state_change_times ( svc , hst ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
debug_async_service ( svc , cr ) ;
service_fresh_check ( svc , cr , current_time ) ;
service_initial_handling ( svc , cr , & old_plugin_output ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* reschedule the next check at the regular interval - may be overridden */
next_check = ( time_t ) ( svc - > last_check + ( svc - > check_interval * interval_length ) ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/***********************************************/
/********** SCHEDULE SERVICE CHECK LOGIC **********/
/***********************************************/
if ( svc - > current_state = = STATE_OK ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Service is OK \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( hst - > has_been_checked = = FALSE ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Host has not been checked yet \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( hst - > next_check = = 0L
| | hst - > initial_state ! = HOST_UP
| | hst - > next_check < hst - > check_interval * interval_length + current_time ) {
log_debug_info ( DEBUGL_CHECKS , 2 , " Service ok, but host hasn't been checked recently, scheduling host check \n " ) ;
check_host = TRUE ;
2017-05-19 23:37:19 +02:00
}
}
2019-04-18 17:09:18 +02:00
else if ( hst - > current_state ! = HOST_UP ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Host is NOT UP, so we'll check it to see if it recovered... \n " ) ;
if ( svc - > last_state = = STATE_OK
& & hst - > has_been_checked = = TRUE
& & current_time - hst - > last_check < cached_host_check_horizon ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Service ok, but host isn't up (and has been checked). Using cached host data. \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
update_host_stats = TRUE ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
} else {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Service ok, but host isn't up and cached data isn't valid here, scheduling host check \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
check_host = TRUE ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
svc - > host_problem_at_last_check = TRUE ;
}
}
else {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Service is in a non-OK state! \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( hst - > current_state = = HOST_UP ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Host is currently UP, so we'll recheck its state to make sure... \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( execute_host_checks = = TRUE
& & svc - > last_state ! = svc - > current_state
& & hst - > last_check + cached_host_check_horizon < current_time ) {
log_debug_info ( DEBUGL_CHECKS , 2 , " Service not ok, host is up but cached data isn't valid, scheduling host check \n " ) ;
check_host = TRUE ;
} else {
log_debug_info ( DEBUGL_CHECKS , 2 , " Service not ok, host is up, using cached host data \n " ) ;
update_host_stats = TRUE ;
2017-05-19 22:22:40 +02:00
}
2017-05-19 23:37:19 +02:00
2019-04-18 17:09:18 +02:00
/* give the service a chance to recover */
if ( svc - > host_problem_at_last_check = = TRUE
& & svc - > state_type = = SOFT_STATE ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Service had a host problem at last check and is SOFT, so we'll reset current_attempt to 1 to give it a chance \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
svc - > current_attempt = 1 ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
svc - > host_problem_at_last_check = FALSE ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
else {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Host is currently not UP... \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( execute_host_checks = = FALSE | | svc - > current_state = = svc - > last_state ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Host checks aren't enabled, so send a notification \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* fake a host check */
if ( hst - > has_been_checked = = FALSE ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Host has never been checked, fake a host check \n " ) ;
hst - > has_been_checked = TRUE ;
hst - > last_check = svc - > last_check ;
}
/* possibly re-send host notifications... */
host_notification ( hst , NOTIFICATION_NORMAL , NULL , NULL , NOTIFICATION_OPTION_NONE ) ;
}
svc - > host_problem_at_last_check = TRUE ;
}
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
/* service hard state change, because if host is down/unreachable
the docs say we have a hard state change ( but no notification ) */
2019-08-03 18:28:19 +02:00
if ( hst - > current_state ! = HOST_UP & & new_last_hard_state ! = svc - > current_state ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Host is down or unreachable, forcing service hard state change \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
hard_state_change = TRUE ;
svc - > state_type = HARD_STATE ;
2019-08-03 18:28:19 +02:00
new_last_hard_state = svc - > current_state ;
svc - > current_attempt = svc - > max_attempts ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( check_host = = TRUE ) {
schedule_host_check ( hst , current_time , CHECK_OPTION_DEPENDENCY_CHECK ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( update_host_stats = = TRUE ) {
update_check_stats ( ACTIVE_ONDEMAND_HOST_CHECK_STATS , current_time ) ;
update_check_stats ( ACTIVE_CACHED_HOST_CHECK_STATS , current_time ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/**************************************/
/******* SERVICE CHECK OK LOGIC *******/
/**************************************/
if ( svc - > last_state = = STATE_OK ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Service was OK at last check. \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/***** SERVICE IS STILL OK *****/
if ( svc - > current_state = = STATE_OK ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Service is still OK. \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
svc - > state_type = HARD_STATE ;
svc - > current_attempt = 1 ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/***** SERVICE IS NOW IN PROBLEM STATE *****/
else {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Service is a non-OK state (%s)! \n " , service_state_name ( svc - > current_state ) ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* skip this service check if host is down/unreachable and state change happened */
if ( svc - > host_problem_at_last_check = = FALSE & & hard_state_change = = FALSE ) {
svc - > state_type = SOFT_STATE ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
svc - > current_attempt = 1 ;
2017-05-19 23:37:19 +02:00
2019-04-18 17:09:18 +02:00
handle_event = TRUE ;
2017-05-19 23:37:19 +02:00
}
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
/*******************************************/
/******* SERVICE CHECK PROBLEM LOGIC *******/
/*******************************************/
else {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Service was NOT OK at last check (%s). \n " , service_state_name ( svc - > last_state ) ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/***** SERVICE IS NOW OK *****/
if ( svc - > current_state = = STATE_OK ) {
2017-05-19 23:37:19 +02:00
2019-04-18 17:09:18 +02:00
handle_event = TRUE ;
2017-05-19 23:37:19 +02:00
2019-04-18 17:09:18 +02:00
if ( svc - > state_type = = HARD_STATE ) {
2017-05-19 23:37:19 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Service experienced a HARD recovery. \n " ) ;
2017-05-19 23:37:19 +02:00
2019-04-18 17:09:18 +02:00
send_notification = TRUE ;
}
else {
2017-05-19 23:37:19 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Service experienced a SOFT recovery. \n " ) ;
}
2017-05-19 23:37:19 +02:00
2019-04-18 17:09:18 +02:00
/* there was a state change, soft or hard */
state_change = TRUE ;
}
2017-05-19 23:37:19 +02:00
2019-04-18 17:09:18 +02:00
/***** SERVICE IS STILL IN PROBLEM STATE *****/
else {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Service is still in a non-OK state (%s)! \n " , service_state_name ( svc - > current_state ) ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( svc - > state_type = = SOFT_STATE ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Service state type is soft, using retry_interval \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
handle_event = TRUE ;
next_check = ( unsigned long ) ( current_time + svc - > retry_interval * interval_length ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* check if host is down/unreachable and don't send notifications */
else if ( svc - > host_problem_at_last_check = = TRUE ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Service state type is hard, but host is down or unreachable, not sending notification \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
} else {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Service state type is hard, sending a notification \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
send_notification = TRUE ;
2017-05-19 22:22:40 +02:00
}
}
}
2019-04-18 17:09:18 +02:00
/* soft states should be using retry_interval */
if ( svc - > state_type = = SOFT_STATE ) {
log_debug_info ( DEBUGL_CHECKS , 2 , " Service state type is soft, using retry_interval \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
next_check = ( unsigned long ) ( current_time + svc - > retry_interval * interval_length ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* check for a state change */
if ( svc - > current_state ! = svc - > last_state | | ( svc - > current_state = = STATE_OK & & svc - > state_type = = SOFT_STATE ) ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Service experienced a state change \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
state_change = TRUE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* adjust the current attempt */
if ( svc - > state_type = = SOFT_STATE ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* this has to be first so we don't reset every time a new non-ok state comes
in ( and triggers the state_change = = TRUE ) */
if ( svc - > last_state ! = STATE_OK & & svc - > current_attempt < svc - > max_attempts ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
svc - > current_attempt + + ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* historically, a soft recovery would actually get up to 2 attempts
and then immediately reset once the next check result came in */
else if ( state_change = = TRUE & & svc - > current_state ! = STATE_OK ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
svc - > current_attempt = 1 ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* otherwise, just increase the attempt */
else if ( svc - > current_attempt < svc - > max_attempts ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
svc - > current_attempt + + ;
}
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( svc - > current_attempt > = svc - > max_attempts & &
2019-08-03 18:28:19 +02:00
( svc - > current_state ! = new_last_hard_state | | svc - > state_type = = SOFT_STATE ) ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Service had a HARD STATE CHANGE!! \n " ) ;
next_check = ( unsigned long ) ( current_time + ( svc - > check_interval * interval_length ) ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* set both states changed, this may have been missed... */
hard_state_change = TRUE ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* this is missed earlier */
send_notification = TRUE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* handle some acknowledgement things and update last_state_change */
2019-08-03 18:28:19 +02:00
/* This is a temporary fix that lets us avoid changing any function boundaries in a bugfix release */
/* @fixme 4.5.0 - refactor so that each specific struct member is only modified in */
/* service_state_or_hard_state_type_change() or handle_async_service_check_result(), not both.*/
int original_last_hard_state = svc - > last_hard_state ;
2019-04-18 17:09:18 +02:00
service_state_or_hard_state_type_change ( svc , state_change , hard_state_change , & log_event , & handle_event ) ;
2019-08-03 18:28:19 +02:00
if ( original_last_hard_state ! = svc - > last_hard_state ) {
/* svc->last_hard_state now gets written only after the service status is brokered */
new_last_hard_state = svc - > last_hard_state ;
svc - > last_hard_state = original_last_hard_state ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* fix edge cases where log_event wouldn't have been set or won't be */
if ( svc - > current_state ! = STATE_OK & & svc - > state_type = = SOFT_STATE ) {
log_event = TRUE ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
record_last_service_state_ended ( svc ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
check_for_service_flapping ( svc , TRUE , TRUE ) ;
check_for_host_flapping ( hst , TRUE , FALSE , TRUE ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* service with active checks disabled do not get rescheduled */
if ( svc - > checks_enabled = = FALSE ) {
svc - > should_be_scheduled = FALSE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* hosts with non-recurring intervals do not get rescheduled if we're in a HARD or OK state */
else if ( svc - > check_interval = = 0 & & ( svc - > state_type = = HARD_STATE | | svc - > current_state = = STATE_OK ) ) {
svc - > should_be_scheduled = FALSE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* schedule a non-forced check if we can */
else if ( svc - > should_be_scheduled = = TRUE ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Rescheduling next check of service at %s " , ctime ( & next_check ) ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* next check time was calculated above */
/* make sure we don't get ourselves into too much trouble... */
if ( current_time > next_check ) {
svc - > next_check = current_time ;
} else {
svc - > next_check = next_check ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* make sure we rescheduled the next service check at a valid time */
preferred_time = svc - > next_check ;
get_next_valid_time ( preferred_time , & next_valid_time , svc - > check_period_ptr ) ;
svc - > next_check = next_valid_time ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* Next valid time is further in the future because of timeperiod
constraints . Add a random amount so we don ' t get all checks
subject to that timeperiod constraint scheduled at the same time */
if ( next_valid_time > preferred_time ) {
2019-08-03 18:28:19 +02:00
svc - > next_check = reschedule_within_timeperiod ( next_valid_time , svc - > check_period_ptr , check_window ( svc ) ) ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
schedule_service_check ( svc , svc - > next_check , CHECK_OPTION_NONE ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* volatile service gets everything in non-ok hard state */
if ( ( svc - > current_state ! = STATE_OK )
& & ( svc - > state_type = = HARD_STATE )
& & ( svc - > is_volatile = = TRUE ) ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Service is volatile, and we're in a non-ok hard state.. \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
send_notification = TRUE ;
log_event = TRUE ;
handle_event = TRUE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( send_notification = = TRUE ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* send notification */
if ( service_notification ( svc , NOTIFICATION_NORMAL , NULL , NULL , NOTIFICATION_OPTION_NONE ) = = OK ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* log state due to notification event when stalking_options N is set */
if ( should_stalk_notifications ( svc ) ) {
log_event = TRUE ;
2017-05-19 22:22:40 +02:00
}
}
}
2019-04-18 17:09:18 +02:00
/* the service recovered, so reset the current notification number and state flags (after the recovery notification has gone out) */
if ( svc - > current_state = = STATE_OK & & svc - > state_type = = HARD_STATE & & hard_state_change = = TRUE ) {
svc - > current_notification_number = 0 ;
svc - > notified_on = 0 ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
if ( obsess_over_services = = TRUE ) {
obsessive_compulsive_service_check_processor ( svc ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* if we're stalking this state type AND the plugin output changed since last check, log it now.. */
if ( should_stalk ( svc ) & & compare_strings ( old_plugin_output , svc - > plugin_output ) ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Logging due to state stalking, old: [%s], new: [%s] \n " , old_plugin_output , svc - > plugin_output ) ;
log_event = TRUE ;
2017-05-19 23:37:19 +02:00
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( log_event = = TRUE ) {
log_service_event ( svc ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( handle_event = = TRUE ) {
2019-08-03 18:28:19 +02:00
log_debug_info ( DEBUGL_CHECKS , 0 , " IS TIME FOR HANDLE THE SERVICE KTHX " ) ;
debug_async_service ( svc , cr ) ;
2019-04-18 17:09:18 +02:00
handle_service_event ( svc ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* Update OK states since they send out a soft alert but then they
switch into a HARD state and reset the attempts */
if ( svc - > current_state = = STATE_OK & & state_change = = TRUE ) {
2017-05-19 22:22:40 +02:00
2019-08-03 18:28:19 +02:00
/* Problem state starts regardless of SOFT/HARD status. */
svc - > last_problem_id = svc - > current_problem_id ;
svc - > current_problem_id = 0L ;
/* Reset attempts */
2019-04-18 17:09:18 +02:00
if ( hard_state_change = = TRUE ) {
svc - > current_notification_number = 0 ;
svc - > host_problem_at_last_check = FALSE ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
svc - > last_hard_state_change = svc - > last_check ;
2019-08-03 18:28:19 +02:00
new_last_hard_state = svc - > current_state ;
/* Set OK to a hard state */
2019-04-18 17:09:18 +02:00
svc - > current_attempt = 1 ;
svc - > state_type = HARD_STATE ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 ,
" STATE: %d, TYPE: %s, CUR: %d, MAX: %d, LAST_STATE: %d, LAST_HARD: %d, NOTIFY: %d, LOGGED: %d, HANDLED: %d \n " ,
svc - > current_state ,
( svc - > state_type = = SOFT_STATE ) ? " SOFT " : " HARD " ,
svc - > current_attempt ,
svc - > max_attempts ,
svc - > last_state ,
svc - > last_hard_state ,
send_notification ,
log_event ,
handle_event ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
# ifdef USE_EVENT_BROKER
broker_service_check ( NEBTYPE_SERVICECHECK_PROCESSED , NEBFLAG_NONE , NEBATTR_NONE , svc , svc - > check_type , cr - > start_time , cr - > finish_time , NULL , svc - > latency , svc - > execution_time , service_check_timeout , cr - > early_timeout , cr - > return_code , NULL , NULL , cr ) ;
# endif
2017-05-19 22:22:40 +02:00
2019-08-03 18:28:19 +02:00
2019-04-18 17:09:18 +02:00
svc - > has_been_checked = TRUE ;
update_service_status ( svc , FALSE ) ;
update_service_performance_data ( svc ) ;
2017-05-19 22:22:40 +02:00
2019-08-03 18:28:19 +02:00
/* last_hard_state cleanup
* This occurs after being brokered so that last_hard_state refers to the previous logged hard state ,
* rather than the current hard state
*/
svc - > last_hard_state = new_last_hard_state ;
2019-04-18 17:09:18 +02:00
my_free ( old_plugin_output ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
return OK ;
}
/* schedules an immediate or delayed service check */
inline void schedule_service_check ( service * svc , time_t check_time , int options )
{
2017-05-19 22:22:40 +02:00
timed_event * temp_event = NULL ;
int use_original_event = TRUE ;
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " schedule_service_check() \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( svc = = NULL )
2017-05-19 22:22:40 +02:00
return ;
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 0 , " Scheduling a %s, active check of service '%s' on host '%s' @ %s " , ( options & CHECK_OPTION_FORCE_EXECUTION ) ? " forced " : " non-forced " , svc - > description , svc - > host_name , ctime ( & check_time ) ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* don't schedule a check if active checks of this service are disabled */
if ( svc - > checks_enabled = = FALSE & & ! ( options & CHECK_OPTION_FORCE_EXECUTION ) ) {
log_debug_info ( DEBUGL_CHECKS , 0 , " Active checks of this service are disabled. \n " ) ;
2017-05-19 22:22:40 +02:00
return ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* we may have to nudge this check a bit */
2017-05-19 23:37:19 +02:00
if ( options = = CHECK_OPTION_DEPENDENCY_CHECK ) {
2019-04-18 17:09:18 +02:00
if ( svc - > last_check + cached_service_check_horizon > check_time ) {
log_debug_info ( DEBUGL_CHECKS , 0 , " Last check result is recent enough (%s) " , ctime ( & svc - > last_check ) ) ;
2017-05-19 23:37:19 +02:00
return ;
}
}
2017-05-19 22:22:40 +02:00
/* default is to use the new event */
use_original_event = FALSE ;
2019-04-18 17:09:18 +02:00
temp_event = ( timed_event * ) svc - > next_check_event ;
2017-05-19 22:22:40 +02:00
/*
2019-04-18 17:09:18 +02:00
* If the service already has a check scheduled ,
* we need to decide which of the events to use
2017-05-19 22:22:40 +02:00
*/
2019-04-18 17:09:18 +02:00
if ( temp_event ! = NULL ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Found another service check event for this service @ %s " , ctime ( & temp_event - > run_time ) ) ;
2017-05-19 22:22:40 +02:00
/* use the originally scheduled check unless we decide otherwise */
use_original_event = TRUE ;
/* the original event is a forced check... */
2019-04-18 17:09:18 +02:00
if ( ( temp_event - > event_options & CHECK_OPTION_FORCE_EXECUTION ) ) {
2017-05-19 22:22:40 +02:00
/* the new event is also forced and its execution time is earlier than the original, so use it instead */
2019-04-18 17:09:18 +02:00
if ( ( options & CHECK_OPTION_FORCE_EXECUTION ) & & ( check_time < temp_event - > run_time ) ) {
2017-05-19 22:22:40 +02:00
use_original_event = FALSE ;
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " New service check event is forced and occurs before the existing event, so the new event will be used instead. \n " ) ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* the original event is not a forced check... */
else {
/* the new event is a forced check, so use it instead */
2019-04-18 17:09:18 +02:00
if ( ( options & CHECK_OPTION_FORCE_EXECUTION ) ) {
2017-05-19 22:22:40 +02:00
use_original_event = FALSE ;
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " New service check event is forced, so it will be used instead of the existing event. \n " ) ;
}
2017-05-19 22:22:40 +02:00
/* the new event is not forced either and its execution time is earlier than the original, so use it instead */
2019-04-18 17:09:18 +02:00
else if ( check_time < temp_event - > run_time ) {
2017-05-19 22:22:40 +02:00
use_original_event = FALSE ;
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " New service check event occurs before the existing (older) event, so it will be used instead. \n " ) ;
}
2017-05-19 22:22:40 +02:00
/* the new event is older, so override the existing one */
else {
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " New service check event occurs after the existing event, so we'll ignore it. \n " ) ;
2017-05-19 22:22:40 +02:00
}
}
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* schedule a new event */
if ( use_original_event = = FALSE ) {
/* make sure we remove the old event from the queue */
2017-05-19 23:37:19 +02:00
if ( temp_event ) {
remove_event ( nagios_squeue , temp_event ) ;
2019-04-18 17:09:18 +02:00
}
else {
/* allocate memory for a new event item */
temp_event = ( timed_event * ) calloc ( 1 , sizeof ( timed_event ) ) ;
if ( temp_event = = NULL ) {
logit ( NSLOG_RUNTIME_WARNING , TRUE , " Warning: Could not reschedule check of service '%s' on host '%s'! \n " , svc - > description , svc - > host_name ) ;
return ;
2017-05-19 23:37:19 +02:00
}
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Scheduling new service check event. \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* set the next service check event and time */
svc - > next_check_event = temp_event ;
svc - > next_check = check_time ;
2017-05-19 22:22:40 +02:00
/* save check options for retention purposes */
2019-04-18 17:09:18 +02:00
svc - > check_options = options ;
2017-05-19 22:22:40 +02:00
/* place the new event in the event queue */
2019-04-18 17:09:18 +02:00
temp_event - > event_type = EVENT_SERVICE_CHECK ;
temp_event - > event_data = ( void * ) svc ;
2017-05-19 23:37:19 +02:00
temp_event - > event_args = ( void * ) NULL ;
temp_event - > event_options = options ;
2019-04-18 17:09:18 +02:00
temp_event - > run_time = svc - > next_check ;
2017-05-19 23:37:19 +02:00
temp_event - > recurring = FALSE ;
temp_event - > event_interval = 0L ;
temp_event - > timing_func = NULL ;
temp_event - > compensate_for_time_change = TRUE ;
add_event ( nagios_squeue , temp_event ) ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
else {
/* reset the next check time (it may be out of sync) */
2019-04-18 17:09:18 +02:00
if ( temp_event ! = NULL ) {
svc - > next_check = temp_event - > run_time ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Keeping original service check event (ignoring the new one). \n " ) ;
}
2017-05-19 22:22:40 +02:00
/* update the status log */
2019-04-18 17:09:18 +02:00
update_service_status ( svc , FALSE ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* checks viability of performing a service check */
inline int check_service_check_viability ( service * svc , int check_options , int * time_is_valid , time_t * new_time )
{
int perform_check = TRUE ;
time_t current_time = 0L ;
time_t preferred_time = 0L ;
int check_interval = 0 ;
host * temp_host = NULL ;
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " check_service_check_viability() \n " ) ;
/* make sure we have a service */
if ( svc = = NULL ) {
return ERROR ;
}
/* get the check interval to use if we need to reschedule the check */
if ( svc - > state_type = = SOFT_STATE & & svc - > current_state ! = STATE_OK ) {
check_interval = ( svc - > retry_interval * interval_length ) ;
}
else {
check_interval = ( svc - > check_interval * interval_length ) ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
/* get the current time */
time ( & current_time ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* initialize the next preferred check time */
preferred_time = current_time ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* can we check the host right now? */
if ( ! ( check_options & CHECK_OPTION_FORCE_EXECUTION ) ) {
/* if checks of the service are currently disabled... */
if ( svc - > checks_enabled = = FALSE ) {
preferred_time = current_time + check_interval ;
perform_check = FALSE ;
log_debug_info ( DEBUGL_CHECKS , 2 , " Active checks of the service are currently disabled. \n " ) ;
}
/* make sure this is a valid time to check the service */
if ( check_time_against_period ( ( unsigned long ) current_time , svc - > check_period_ptr ) = = ERROR ) {
preferred_time = current_time ;
if ( time_is_valid ) {
* time_is_valid = FALSE ;
}
perform_check = FALSE ;
log_debug_info ( DEBUGL_CHECKS , 2 , " This is not a valid time for this service to be actively checked. \n " ) ;
}
/* check service dependencies for execution */
if ( check_service_dependencies ( svc , EXECUTION_DEPENDENCY ) = = DEPENDENCIES_FAILED ) {
preferred_time = current_time + check_interval ;
perform_check = FALSE ;
if ( service_skip_check_dependency_status > = 0 ) {
svc - > current_state = service_skip_check_dependency_status ;
}
log_debug_info ( DEBUGL_CHECKS , 2 , " Execution dependencies for this service failed, so it will not be actively checked. \n " ) ;
}
}
/* check if parent service is OK */
if ( check_service_parents ( svc ) = = DEPENDENCIES_FAILED ) {
preferred_time = current_time + check_interval ;
perform_check = FALSE ;
if ( service_skip_check_parent_status > = 0 ) {
svc - > current_state = service_skip_check_parent_status ;
}
log_debug_info ( DEBUGL_CHECKS , 2 , " Execution parents for this service failed, so it will not be actively checked. \n " ) ;
}
/* check if host is up - if not, do not perform check */
if ( host_down_disable_service_checks ) {
if ( ( temp_host = svc - > host_ptr ) = = NULL ) {
log_debug_info ( DEBUGL_CHECKS , 2 , " Host pointer NULL in check_service_check_viability(). \n " ) ;
return ERROR ;
}
else {
if ( temp_host - > current_state ! = HOST_UP ) {
log_debug_info ( DEBUGL_CHECKS , 2 , " Host state not UP, so service check will not be performed - will be rescheduled as normal. \n " ) ;
perform_check = FALSE ;
if ( service_skip_check_host_down_status > = 0 ) {
svc - > current_state = service_skip_check_host_down_status ;
}
}
}
}
/* pass back the next viable check time */
if ( new_time ) {
* new_time = preferred_time ;
}
if ( perform_check = = TRUE ) {
return OK ;
}
return ERROR ;
}
/* checks service parents */
int check_service_parents ( service * svc )
{
servicesmember * temp_servicesmember = NULL ;
int state = STATE_OK ;
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " check_service_parents() \n " ) ;
/* check all parents... */
for ( temp_servicesmember = svc - > parents ; temp_servicesmember ; temp_servicesmember = temp_servicesmember - > next ) {
service * parent_service ;
/* find the service we depend on... */
if ( ( parent_service = temp_servicesmember - > service_ptr ) = = NULL ) {
logit ( NSLOG_RUNTIME_WARNING , TRUE , " Warning: service '%s' on host '%s' is NULL ptr \n " ,
temp_servicesmember - > service_description , temp_servicesmember - > host_name ) ;
continue ;
}
state = parent_service - > last_hard_state ;
/* is the service we depend on in a state that fails the dependency tests? */
if ( ( state = = STATE_CRITICAL ) | | ( state = = STATE_UNKNOWN ) )
return DEPENDENCIES_FAILED ;
if ( check_service_parents ( parent_service ) ! = DEPENDENCIES_OK )
return DEPENDENCIES_FAILED ;
}
return DEPENDENCIES_OK ;
}
/* checks service dependencies */
int check_service_dependencies ( service * svc , int dependency_type )
{
2017-05-19 23:37:19 +02:00
objectlist * list ;
2019-04-18 17:09:18 +02:00
int state = STATE_OK ;
2017-05-19 22:22:40 +02:00
time_t current_time = 0L ;
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " check_service_dependencies() \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* only check dependencies of the desired type */
if ( dependency_type = = NOTIFICATION_DEPENDENCY )
list = svc - > notify_deps ;
else
list = svc - > exec_deps ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* check all dependencies of the desired type... */
2017-05-19 23:37:19 +02:00
for ( ; list ; list = list - > next ) {
2019-04-18 17:09:18 +02:00
service * temp_service ;
servicedependency * temp_dependency = ( servicedependency * ) list - > object_ptr ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* find the service we depend on... */
if ( ( temp_service = temp_dependency - > master_service_ptr ) = = NULL ) {
2017-05-19 22:22:40 +02:00
continue ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* skip this dependency if it has a timeperiod and the current time isn't valid */
time ( & current_time ) ;
2019-04-18 17:09:18 +02:00
if ( temp_dependency - > dependency_period ! = NULL
& & ( check_time_against_period ( current_time , temp_dependency - > dependency_period_ptr ) = = ERROR ) ) {
2017-05-19 22:22:40 +02:00
return FALSE ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* get the status to use (use last hard state if its currently in a soft state) */
2019-04-18 17:09:18 +02:00
if ( temp_service - > state_type = = SOFT_STATE & & soft_state_dependencies = = FALSE ) {
state = temp_service - > last_hard_state ;
}
else {
state = temp_service - > current_state ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* is the service we depend on in state that fails the dependency tests? */
if ( flag_isset ( temp_dependency - > failure_options , 1 < < state ) ) {
2017-05-19 22:22:40 +02:00
return DEPENDENCIES_FAILED ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* immediate dependencies ok at this point - check parent dependencies if necessary */
2019-04-18 17:09:18 +02:00
if ( temp_dependency - > inherits_parent = = TRUE ) {
if ( check_service_dependencies ( temp_service , dependency_type ) ! = DEPENDENCIES_OK ) {
2017-05-19 22:22:40 +02:00
return DEPENDENCIES_FAILED ;
}
}
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
return DEPENDENCIES_OK ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* check for services that never returned from a check... */
void check_for_orphaned_services ( void )
{
service * temp_service = NULL ;
2017-05-19 22:22:40 +02:00
time_t current_time = 0L ;
time_t expected_time = 0L ;
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " check_for_orphaned_services() \n " ) ;
2017-05-19 22:22:40 +02:00
/* get the current time */
time ( & current_time ) ;
2019-04-18 17:09:18 +02:00
/* check all services... */
for ( temp_service = service_list ; temp_service ! = NULL ; temp_service = temp_service - > next ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* skip services that are not currently executing */
if ( temp_service - > is_executing = = FALSE ) {
2017-05-19 22:22:40 +02:00
continue ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* determine the time at which the check results should have come in (allow 10 minutes slack time) */
2019-04-18 17:09:18 +02:00
expected_time = ( time_t ) ( temp_service - > next_check + temp_service - > latency + service_check_timeout + check_reaper_interval + 600 ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* this service was supposed to have executed a while ago, but for some reason the results haven't come back in... */
if ( expected_time < current_time ) {
2017-05-19 22:22:40 +02:00
/* log a warning */
2019-04-18 17:09:18 +02:00
logit ( NSLOG_RUNTIME_WARNING , TRUE , " Warning: The check of service '%s' on host '%s' looks like it was orphaned (results never came back; last_check=%lu; next_check=%lu). I'm scheduling an immediate check of the service... \n " , temp_service - > description , temp_service - > host_name , temp_service - > last_check , temp_service - > next_check ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Service '%s' on host '%s' was orphaned, so we're scheduling an immediate check... \n " , temp_service - > description , temp_service - > host_name ) ;
log_debug_info ( DEBUGL_CHECKS , 1 , " next_check=%lu (%s); last_check=%lu (%s); \n " ,
temp_service - > next_check , ctime ( & temp_service - > next_check ) ,
temp_service - > last_check , ctime ( & temp_service - > last_check ) ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* decrement the number of running service checks */
if ( currently_running_service_checks > 0 ) {
currently_running_service_checks - - ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
/* disable the executing flag */
temp_service - > is_executing = FALSE ;
/* schedule an immediate check of the service */
schedule_service_check ( temp_service , current_time , CHECK_OPTION_ORPHAN_CHECK ) ;
2017-05-19 22:22:40 +02:00
}
}
2019-04-18 17:09:18 +02:00
return ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* check freshness of service results */
void check_service_result_freshness ( void )
{
service * temp_service = NULL ;
2017-05-19 22:22:40 +02:00
time_t current_time = 0L ;
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " check_service_result_freshness() \n " ) ;
log_debug_info ( DEBUGL_CHECKS , 1 , " Checking the freshness of service check results... \n " ) ;
2017-05-19 22:22:40 +02:00
/* bail out if we're not supposed to be checking freshness */
2019-04-18 17:09:18 +02:00
if ( check_service_freshness = = FALSE ) {
log_debug_info ( DEBUGL_CHECKS , 1 , " Service freshness checking is disabled. \n " ) ;
2017-05-19 22:22:40 +02:00
return ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* get the current time */
time ( & current_time ) ;
2019-04-18 17:09:18 +02:00
/* check all services... */
for ( temp_service = service_list ; temp_service ! = NULL ; temp_service = temp_service - > next ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* skip services we shouldn't be checking for freshness */
if ( temp_service - > check_freshness = = FALSE ) {
2017-05-19 22:22:40 +02:00
continue ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* skip services that are currently executing (problems here will be caught by orphaned service check) */
if ( temp_service - > is_executing = = TRUE ) {
2017-05-19 22:22:40 +02:00
continue ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* skip services that have both active and passive checks disabled */
if ( temp_service - > checks_enabled = = FALSE & & temp_service - > accept_passive_checks = = FALSE ) {
2017-05-19 22:22:40 +02:00
continue ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* skip services that are already being freshened */
if ( temp_service - > is_being_freshened = = TRUE ) {
2017-05-19 22:22:40 +02:00
continue ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* see if the time is right... */
2019-04-18 17:09:18 +02:00
if ( check_time_against_period ( current_time , temp_service - > check_period_ptr ) = = ERROR ) {
2017-05-19 22:22:40 +02:00
continue ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* EXCEPTION */
/* don't check freshness of services without regular check intervals if we're using auto-freshness threshold */
if ( ( temp_service - > check_interval = = 0 ) & & ( temp_service - > freshness_threshold = = 0 ) ) {
continue ;
}
/* the results for the last check of this service are stale! */
if ( is_service_result_fresh ( temp_service , current_time , TRUE ) = = FALSE ) {
2017-05-19 22:22:40 +02:00
/* set the freshen flag */
2019-04-18 17:09:18 +02:00
temp_service - > is_being_freshened = TRUE ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* schedule an immediate forced check of the service */
schedule_service_check ( temp_service , current_time , CHECK_OPTION_FORCE_EXECUTION | CHECK_OPTION_FRESHNESS_CHECK ) ;
2017-05-19 22:22:40 +02:00
}
}
2019-04-18 17:09:18 +02:00
return ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* tests whether or not a service's check results are fresh */
int is_service_result_fresh ( service * temp_service , time_t current_time , int log_this )
{
2017-05-19 22:22:40 +02:00
int freshness_threshold = 0 ;
2019-04-18 17:09:18 +02:00
time_t expiration_time = 0L ;
2017-05-19 22:22:40 +02:00
int days = 0 ;
int hours = 0 ;
int minutes = 0 ;
int seconds = 0 ;
int tdays = 0 ;
int thours = 0 ;
int tminutes = 0 ;
int tseconds = 0 ;
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Checking freshness of service '%s' on host '%s'... \n " , temp_service - > description , temp_service - > host_name ) ;
2017-05-19 22:22:40 +02:00
/* use user-supplied freshness threshold or auto-calculate a freshness threshold to use? */
2019-04-18 17:09:18 +02:00
if ( temp_service - > freshness_threshold = = 0 ) {
if ( temp_service - > state_type = = HARD_STATE | | temp_service - > current_state = = STATE_OK ) {
freshness_threshold = ( temp_service - > check_interval * interval_length ) + temp_service - > latency + additional_freshness_latency ;
}
2017-05-19 22:22:40 +02:00
else {
2019-04-18 17:09:18 +02:00
freshness_threshold = ( temp_service - > retry_interval * interval_length ) + temp_service - > latency + additional_freshness_latency ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
}
else {
freshness_threshold = temp_service - > freshness_threshold ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Freshness thresholds: service=%d, use=%d \n " , temp_service - > freshness_threshold , freshness_threshold ) ;
2017-05-19 22:22:40 +02:00
/* calculate expiration time */
/*
2019-04-18 17:09:18 +02:00
* CHANGED 11 / 10 / 05 EG -
2017-05-19 22:22:40 +02:00
* program start is only used in expiration time calculation
* if > last check AND active checks are enabled , so active checks
* can become stale immediately upon program startup
*/
2019-04-18 17:09:18 +02:00
/*
* CHANGED 02 / 25 / 06 SG -
* passive checks also become stale , so remove dependence on active
* check logic
*/
if ( temp_service - > has_been_checked = = FALSE ) {
2017-05-19 22:22:40 +02:00
expiration_time = ( time_t ) ( event_start + freshness_threshold ) ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/*
2019-04-18 17:09:18 +02:00
* CHANGED 06 / 19 / 07 EG -
2017-05-19 22:22:40 +02:00
* Per Ton ' s suggestion ( and user requests ) , only use program start
* time over last check if no specific threshold has been set by user .
* Problems can occur if Nagios is restarted more frequently that
2019-04-18 17:09:18 +02:00
* freshness threshold intervals ( services never go stale ) .
2017-05-19 22:22:40 +02:00
*/
/*
* CHANGED 10 / 07 / 07 EG :
2019-04-18 17:09:18 +02:00
* Only match next condition for services that
* have active checks enabled . . .
2017-05-19 22:22:40 +02:00
*/
2019-04-18 17:09:18 +02:00
/*
* CHANGED 10 / 07 / 07 EG :
* Added max_service_check_spread to expiration time as suggested
* by Altinity
*/
else if ( ( temp_service - > checks_enabled = = TRUE )
& & ( event_start > temp_service - > last_check )
& & ( temp_service - > freshness_threshold = = 0 ) ) {
expiration_time = ( time_t ) ( event_start + freshness_threshold + ( max_service_check_spread * interval_length ) ) ;
}
else {
expiration_time = ( time_t ) ( temp_service - > last_check + freshness_threshold ) ;
}
2017-05-19 22:22:40 +02:00
/*
* If the check was last done passively , we assume it ' s going
* to continue that way and we need to handle the fact that
* Nagios might have been shut off for quite a long time . If so ,
* we mustn ' t spam freshness notifications but use event_start
* instead of last_check to determine freshness expiration time .
* The threshold for " long time " is determined as 61.8 % of the normal
* freshness threshold based on vast heuristical research ( ie , " some
* guy once told me the golden ratio is good for loads of stuff " ).
*/
2019-04-18 17:09:18 +02:00
if ( ( temp_service - > check_type = = CHECK_TYPE_PASSIVE )
& & ( temp_service - > last_check < event_start )
& & ( event_start - last_program_stop > freshness_threshold * 0.618 ) ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
expiration_time = event_start + freshness_threshold ;
}
log_debug_info ( DEBUGL_CHECKS , 2 , " HBC: %d, PS: %lu, ES: %lu, LC: %lu, CT: %lu, ET: %lu \n " , temp_service - > has_been_checked , ( unsigned long ) program_start , ( unsigned long ) event_start , ( unsigned long ) temp_service - > last_check , ( unsigned long ) current_time , ( unsigned long ) expiration_time ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* the results for the last check of this service are stale */
if ( expiration_time < current_time ) {
2017-05-19 22:22:40 +02:00
get_time_breakdown ( ( current_time - expiration_time ) , & days , & hours , & minutes , & seconds ) ;
get_time_breakdown ( freshness_threshold , & tdays , & thours , & tminutes , & tseconds ) ;
/* log a warning */
2019-04-18 17:09:18 +02:00
if ( log_this = = TRUE ) {
logit ( NSLOG_RUNTIME_WARNING , TRUE , " Warning: The results of service '%s' on host '%s' are stale by %dd %dh %dm %ds (threshold=%dd %dh %dm %ds). I'm forcing an immediate check of the service. \n " , temp_service - > description , temp_service - > host_name , days , hours , minutes , seconds , tdays , thours , tminutes , tseconds ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Check results for service '%s' on host '%s' are stale by %dd %dh %dm %ds (threshold=%dd %dh %dm %ds). Forcing an immediate check of the service... \n " , temp_service - > description , temp_service - > host_name , days , hours , minutes , seconds , tdays , thours , tminutes , tseconds ) ;
2017-05-19 22:22:40 +02:00
return FALSE ;
}
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Check results for service '%s' on host '%s' are fresh. \n " , temp_service - > description , temp_service - > host_name ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
return TRUE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
int handle_async_host_check_result ( host * hst , check_result * cr )
{
time_t current_time = 0L ;
time_t next_check = 0L ;
time_t preferred_time = 0L ;
time_t next_valid_time = 0L ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
int state_change = FALSE ;
int hard_state_change = FALSE ;
int send_notification = FALSE ;
int handle_event = FALSE ;
int log_event = FALSE ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
char * old_plugin_output = NULL ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " handle_async_host_check_result() \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( is_valid_check_result_data ( hst , cr ) = = FALSE ) {
return ERROR ;
}
2017-05-19 22:22:40 +02:00
2019-08-03 18:28:19 +02:00
int new_last_hard_state = hst - > last_hard_state ;
2019-04-18 17:09:18 +02:00
if ( cr - > check_type = = CHECK_TYPE_PASSIVE ) {
if ( host_is_passive ( hst , cr ) = = FALSE ) {
return ERROR ;
}
}
else {
host_is_active ( hst ) ;
}
time ( & current_time ) ;
initialize_last_host_state_change_times ( hst ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
debug_async_host ( hst , cr ) ;
host_fresh_check ( hst , cr , current_time ) ;
host_initial_handling ( hst , cr , & old_plugin_output ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* reschedule the next check at the regular interval - may be overridden */
next_check = ( time_t ) ( hst - > last_check + ( hst - > check_interval * interval_length ) ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/**************************************/
/********* HOST CHECK OK LOGIC ********/
/**************************************/
if ( hst - > last_state = = HOST_UP ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Host was UP. \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/***** HOST IS STILL UP *****/
if ( hst - > current_state = = HOST_UP ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Host is still UP. \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
hst - > state_type = HARD_STATE ;
hst - > current_attempt = 1 ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/***** HOST IS NOW DOWN/UNREACHABLE *****/
else {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Host is no longer UP (%s)! \n " , host_state_name ( hst - > current_state ) ) ;
hst - > state_type = SOFT_STATE ;
hst - > current_attempt = 1 ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* propagate checks to immediate parents if they are UP */
host_propagate_checks_to_immediate_parents ( hst , FALSE , current_time ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* propagate checks to immediate children if they are not UNREACHABLE */
host_propagate_checks_to_immediate_children ( hst , FALSE , TRUE , current_time ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* propagate checks to hosts that THIS ONE depends on for notifications AND execution */
host_propagate_dependency_checks ( hst , current_time ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* we need to handle this event */
handle_event = TRUE ;
2017-05-19 22:22:40 +02:00
}
}
2019-04-18 17:09:18 +02:00
/**************************************/
/****** HOST CHECK PROBLEM LOGIC ******/
/**************************************/
else {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Host was not UP last time. \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/***** HOST IS NOW UP *****/
if ( hst - > current_state = = HOST_UP ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Host is UP now. \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* propagate checks to immediate parents if they are not UP */
host_propagate_checks_to_immediate_parents ( hst , TRUE , current_time ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* propagate checks to immediate children if they are not UP */
host_propagate_checks_to_immediate_children ( hst , TRUE , FALSE , current_time ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* we need to handle this event */
handle_event = TRUE ;
/* but a soft recovery is not something we notify for */
if ( hst - > state_type = = HARD_STATE ) {
log_debug_info ( DEBUGL_CHECKS , 1 , " Host experienced a HARD recovery. \n " ) ;
send_notification = TRUE ;
hard_state_change = TRUE ;
hst - > current_attempt = 1 ;
2017-05-19 23:37:19 +02:00
}
2019-04-18 17:09:18 +02:00
else {
2017-05-19 23:37:19 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Host experienced a SOFT recovery. \n " ) ;
2017-05-19 23:37:19 +02:00
}
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/***** HOST IS STILL DOWN/UNREACHABLE *****/
else {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Host is still not UP (%s)! \n " , host_state_name ( hst - > current_state ) ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( hst - > state_type = = SOFT_STATE ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Host state type is soft, using retry_interval \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
handle_event = TRUE ;
next_check = ( unsigned long ) ( current_time + hst - > retry_interval * interval_length ) ;
}
/* if the state_type is hard, then send a notification */
else {
log_debug_info ( DEBUGL_CHECKS , 2 , " Host state type is hard, sending a notification \n " ) ;
send_notification = TRUE ;
}
2017-05-19 23:37:19 +02:00
}
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* translate host state between DOWN/UNREACHABLE (only for passive checks if enabled) */
if ( hst - > current_state ! = HOST_UP & & ( hst - > check_type = = CHECK_TYPE_ACTIVE | | translate_passive_host_checks = = TRUE ) ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
hst - > current_state = determine_host_reachability ( hst ) ;
if ( hst - > state_type = = SOFT_STATE )
next_check = ( unsigned long ) ( current_time + ( hst - > retry_interval * interval_length ) ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* check for state change */
if ( hst - > current_state ! = hst - > last_state | | ( hst - > current_state = = HOST_UP & & hst - > state_type = = SOFT_STATE ) ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Host experienced a state change \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
state_change = TRUE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* adjust the current attempt */
if ( hst - > state_type = = SOFT_STATE ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* this is an edge case for non-up states, it needs to be checked first */
if ( hst - > last_state ! = HOST_UP & & hst - > current_state ! = HOST_UP & & hst - > current_attempt < hst - > max_attempts ) {
hst - > current_attempt + + ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
/* reset it to 1 */
else if ( state_change = = TRUE | | hst - > current_state = = HOST_UP ) {
hst - > current_attempt = 1 ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
/* or increment if we can */
else if ( hst - > current_attempt < hst - > max_attempts ) {
hst - > current_attempt + + ;
}
2017-05-19 23:37:19 +02:00
}
2017-05-19 22:22:40 +02:00
2019-08-03 18:28:19 +02:00
if ( hst - > current_attempt > = hst - > max_attempts & & hst - > current_state ! = new_last_hard_state ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Host had a HARD STATE CHANGE!! \n " ) ;
2017-05-19 23:37:19 +02:00
2019-04-18 17:09:18 +02:00
next_check = ( unsigned long ) ( current_time + ( hst - > check_interval * interval_length ) ) ;
hard_state_change = TRUE ;
send_notification = TRUE ;
2017-05-19 23:37:19 +02:00
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* handle some acknowledgement things and update last_state_change */
2019-08-03 18:28:19 +02:00
/* @fixme 4.5.0 - See similar comment in handle_async_service_check_result() */
int original_last_hard_state = hst - > last_hard_state ;
2019-04-18 17:09:18 +02:00
host_state_or_hard_state_type_change ( hst , state_change , hard_state_change , & log_event , & handle_event , & send_notification ) ;
2019-08-03 18:28:19 +02:00
if ( original_last_hard_state ! = hst - > last_hard_state ) {
/* svc->last_hard_state now gets written only after the service status is brokered */
new_last_hard_state = hst - > last_hard_state ;
hst - > last_hard_state = original_last_hard_state ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
record_last_host_state_ended ( hst ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
check_for_host_flapping ( hst , TRUE , TRUE , TRUE ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* host with active checks disabled do not get rescheduled */
if ( hst - > checks_enabled = = FALSE ) {
hst - > should_be_scheduled = FALSE ;
2017-05-19 23:37:19 +02:00
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* hosts with non-recurring intervals do not get rescheduled if we're in a HARD or UP state */
else if ( hst - > check_interval = = 0 & & ( hst - > state_type = = HARD_STATE | | hst - > current_state = = HOST_UP ) ) {
hst - > should_be_scheduled = FALSE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* schedule a non-forced check if we can */
else if ( hst - > should_be_scheduled = = TRUE ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Rescheduling next check of host at %s " , ctime ( & next_check ) ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* next check time was calculated above */
/* make sure we don't get ourselves into too much trouble... */
if ( current_time > next_check ) {
hst - > next_check = current_time ;
} else {
hst - > next_check = next_check ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* make sure we rescheduled the next service check at a valid time */
preferred_time = hst - > next_check ;
get_next_valid_time ( preferred_time , & next_valid_time , hst - > check_period_ptr ) ;
hst - > next_check = next_valid_time ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* Next valid time is further in the future because of timeperiod
constraints . Add a random amount so we don ' t get all checks
subject to that timeperiod constraint scheduled at the same time */
if ( next_valid_time > preferred_time ) {
2019-08-03 18:28:19 +02:00
hst - > next_check = reschedule_within_timeperiod ( next_valid_time , hst - > check_period_ptr , check_window ( hst ) ) ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
schedule_host_check ( hst , hst - > next_check , CHECK_OPTION_NONE ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( hst - > current_attempt = = HOST_UP ) {
hst - > current_attempt = 1 ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( send_notification = = TRUE ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* send notifications */
if ( host_notification ( hst , NOTIFICATION_NORMAL , NULL , NULL , NOTIFICATION_OPTION_NONE ) = = OK ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* log state due to notification event when stalking_options N is set */
if ( should_stalk_notifications ( hst ) ) {
log_event = TRUE ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
}
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* the host recovered, so reset the current notification number and state flags (after the recovery notification has gone out) */
if ( hst - > current_state = = HOST_UP & & hst - > state_type = = HARD_STATE & & hard_state_change = = TRUE ) {
hst - > current_notification_number = 0 ;
hst - > notified_on = 0 ;
}
if ( obsess_over_hosts = = TRUE ) {
obsessive_compulsive_host_check_processor ( hst ) ;
}
/* if we're stalking this state type AND the plugin output changed since last check, log it now.. */
if ( should_stalk ( hst ) & & compare_strings ( old_plugin_output , hst - > plugin_output ) ) {
log_event = TRUE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* if log_host_retries is set to true, we have to log soft states too */
if ( hst - > state_type = = SOFT_STATE & & log_host_retries = = TRUE ) {
log_event = TRUE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( log_event = = TRUE ) {
log_host_event ( hst ) ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
if ( handle_event = = TRUE ) {
handle_host_event ( hst ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 ,
" STATE: %d, TYPE: %s, CUR: %d, MAX: %d, LAST_STATE: %d, LAST_HARD: %d, NOTIFY: %d, LOGGED: %d, HANDLED: %d \n " ,
hst - > current_state ,
( hst - > state_type = = SOFT_STATE ) ? " SOFT " : " HARD " ,
hst - > current_attempt ,
hst - > max_attempts ,
hst - > last_state ,
hst - > last_hard_state ,
send_notification ,
log_event ,
handle_event ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
# ifdef USE_EVENT_BROKER
broker_host_check ( NEBTYPE_HOSTCHECK_PROCESSED , NEBFLAG_NONE , NEBATTR_NONE , hst , hst - > check_type , hst - > current_state , hst - > state_type , cr - > start_time , cr - > finish_time , hst - > check_command , hst - > latency , hst - > execution_time , host_check_timeout , cr - > early_timeout , cr - > return_code , NULL , hst - > plugin_output , hst - > long_plugin_output , hst - > perf_data , NULL , cr ) ;
# endif
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
hst - > has_been_checked = TRUE ;
update_host_status ( hst , FALSE ) ;
update_host_performance_data ( hst ) ;
2017-05-19 22:22:40 +02:00
2019-08-03 18:28:19 +02:00
/* last_hard_state cleanup
* This occurs after being brokered so that last_hard_state refers to the previous logged hard state ,
* rather than the current hard state
*/
hst - > last_hard_state = new_last_hard_state ;
2019-04-18 17:09:18 +02:00
/* free memory */
my_free ( old_plugin_output ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
return OK ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/******************************************************************/
/*************** COMMON ROUTE/HOST CHECK FUNCTIONS ****************/
/******************************************************************/
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* schedules an immediate or delayed host check */
inline void schedule_host_check ( host * hst , time_t check_time , int options )
{
timed_event * temp_event = NULL ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* use the originally scheduled check unless we decide otherwise */
int use_original_event = TRUE ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " schedule_host_check() \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( hst = = NULL ) {
return ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 0 , " Scheduling a %s, active check of host '%s' @ %s " ,
( options & CHECK_OPTION_FORCE_EXECUTION ) ? " forced " : " non-forced " ,
hst - > name ,
ctime ( & check_time ) ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* don't schedule a check if active checks of this host are disabled */
if ( hst - > checks_enabled = = FALSE & & ! ( options & CHECK_OPTION_FORCE_EXECUTION ) ) {
log_debug_info ( DEBUGL_CHECKS , 0 , " Active checks are disabled for this host. \n " ) ;
return ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( ( options = = CHECK_OPTION_DEPENDENCY_CHECK )
& & ( hst - > last_check + cached_host_check_horizon > check_time ) ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 0 , " Last check result is recent enough (%s) \n " , ctime ( & hst - > last_check ) ) ;
return ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
temp_event = ( timed_event * ) hst - > next_check_event ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( temp_event = = NULL ) {
use_original_event = FALSE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/*
* If the host already had a check scheduled we need
* to decide which check event to use
*/
else {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Found another host check event for this host @ %s " , ctime ( & temp_event - > run_time ) ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* the original event is a forced check... */
if ( ( temp_event - > event_options & CHECK_OPTION_FORCE_EXECUTION ) ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* the new event is also forced and its execution time is earlier than the original, so use it instead */
if ( ( options & CHECK_OPTION_FORCE_EXECUTION ) & & ( check_time < temp_event - > run_time ) ) {
log_debug_info ( DEBUGL_CHECKS , 2 , " New host check event is forced and occurs before the existing event, so the new event be used instead. \n " ) ;
use_original_event = FALSE ;
}
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
/* the original event is not a forced check... */
else {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* the new event is a forced check, so use it instead */
if ( ( options & CHECK_OPTION_FORCE_EXECUTION ) ) {
log_debug_info ( DEBUGL_CHECKS , 2 , " New host check event is forced, so it will be used instead of the existing event. \n " ) ;
use_original_event = FALSE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* the new event is not forced either and its execution time is earlier than the original, so use it instead */
else if ( check_time < temp_event - > run_time ) {
log_debug_info ( DEBUGL_CHECKS , 2 , " New host check event occurs before the existing (older) event, so it will be used instead. \n " ) ;
use_original_event = FALSE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* the new event is older, so override the existing one */
else {
log_debug_info ( DEBUGL_CHECKS , 2 , " New host check event occurs after the existing event, so we'll ignore it. \n " ) ;
}
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* use the new event */
if ( use_original_event = = FALSE ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Scheduling new host check event. \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* possibly allocate memory for a new event item */
if ( temp_event ) {
remove_event ( nagios_squeue , temp_event ) ;
}
else if ( ( temp_event = ( timed_event * ) calloc ( 1 , sizeof ( timed_event ) ) ) = = NULL ) {
logit ( NSLOG_RUNTIME_WARNING , TRUE , " Warning: Could not reschedule check of host '%s'! \n " , hst - > name ) ;
return ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* set the next host check event and time */
hst - > next_check_event = temp_event ;
hst - > next_check = check_time ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* save check options for retention purposes */
hst - > check_options = options ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* place the new event in the event queue */
temp_event - > event_type = EVENT_HOST_CHECK ;
temp_event - > event_data = ( void * ) hst ;
temp_event - > event_args = ( void * ) NULL ;
temp_event - > event_options = options ;
temp_event - > run_time = hst - > next_check ;
temp_event - > recurring = FALSE ;
temp_event - > event_interval = 0L ;
temp_event - > timing_func = NULL ;
temp_event - > compensate_for_time_change = TRUE ;
add_event ( nagios_squeue , temp_event ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
else {
/* reset the next check time (it may be out of sync) */
if ( temp_event ! = NULL ) {
hst - > next_check = temp_event - > run_time ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Keeping original host check event (ignoring the new one). \n " ) ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
/* update the status log */
update_host_status ( hst , FALSE ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* checks host dependencies */
int check_host_dependencies ( host * hst , int dependency_type )
{
hostdependency * temp_dependency = NULL ;
objectlist * list ;
host * temp_host = NULL ;
int state = HOST_UP ;
2017-05-19 22:22:40 +02:00
time_t current_time = 0L ;
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " check_host_dependencies() \n " ) ;
if ( dependency_type = = NOTIFICATION_DEPENDENCY ) {
list = hst - > notify_deps ;
}
else {
list = hst - > exec_deps ;
}
/* check all dependencies... */
for ( ; list ; list = list - > next ) {
temp_dependency = ( hostdependency * ) list - > object_ptr ;
/* find the host we depend on... */
if ( ( temp_host = temp_dependency - > master_host_ptr ) = = NULL ) {
continue ;
}
/* skip this dependency if it has a timeperiod and the current time isn't valid */
time ( & current_time ) ;
if ( ( temp_dependency - > dependency_period ! = NULL )
& & ( check_time_against_period ( current_time , temp_dependency - > dependency_period_ptr ) = = ERROR ) ) {
return FALSE ;
}
/* get the status to use (use last hard state if its currently in a soft state) */
if ( temp_host - > state_type = = SOFT_STATE & & soft_state_dependencies = = FALSE ) {
state = temp_host - > last_hard_state ;
}
else {
state = temp_host - > current_state ;
}
/* is the host we depend on in state that fails the dependency tests? */
if ( flag_isset ( temp_dependency - > failure_options , 1 < < state ) ) {
return DEPENDENCIES_FAILED ;
}
/* immediate dependencies ok at this point - check parent dependencies if necessary */
if ( ( temp_dependency - > inherits_parent = = TRUE )
& & ( check_host_dependencies ( temp_host , dependency_type ) ! = DEPENDENCIES_OK ) ) {
return DEPENDENCIES_FAILED ;
}
}
return DEPENDENCIES_OK ;
}
/* check for hosts that never returned from a check... */
void check_for_orphaned_hosts ( void )
{
host * temp_host = NULL ;
time_t current_time = 0L ;
time_t expected_time = 0L ;
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " check_for_orphaned_hosts() \n " ) ;
2017-05-19 22:22:40 +02:00
/* get the current time */
time ( & current_time ) ;
2019-04-18 17:09:18 +02:00
/* check all hosts... */
for ( temp_host = host_list ; temp_host ! = NULL ; temp_host = temp_host - > next ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* skip hosts that don't have a set check interval (on-demand checks are missed by the orphan logic) */
if ( temp_host - > next_check = = ( time_t ) 0L ) {
continue ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
/* skip hosts that are not currently executing */
if ( temp_host - > is_executing = = FALSE ) {
continue ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* determine the time at which the check results should have come in (allow 10 minutes slack time) */
expected_time = ( time_t ) ( temp_host - > next_check + temp_host - > latency + host_check_timeout + check_reaper_interval + 600 ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* this host was supposed to have executed a while ago, but for some reason the results haven't come back in... */
if ( expected_time < current_time ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* log a warning */
logit ( NSLOG_RUNTIME_WARNING , TRUE ,
" Warning: The check of host '%s' looks like it was orphaned (results never came back). I'm scheduling an immediate check of the host... \n " ,
temp_host - > name ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 ,
" Host '%s' was orphaned, so we're scheduling an immediate check... \n " ,
temp_host - > name ) ;
2017-05-19 23:37:19 +02:00
2019-04-18 17:09:18 +02:00
/* decrement the number of running host checks */
if ( currently_running_host_checks > 0 ) {
currently_running_host_checks - - ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
/* disable the executing flag */
temp_host - > is_executing = FALSE ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* schedule an immediate check of the host */
schedule_host_check ( temp_host , current_time , CHECK_OPTION_ORPHAN_CHECK ) ;
}
}
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* check freshness of host results */
void check_host_result_freshness ( void )
{
host * temp_host = NULL ;
time_t current_time = 0L ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " check_host_result_freshness() \n " ) ;
log_debug_info ( DEBUGL_CHECKS , 2 , " Attempting to check the freshness of host check results... \n " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* bail out if we're not supposed to be checking freshness */
if ( check_host_freshness = = FALSE ) {
log_debug_info ( DEBUGL_CHECKS , 2 , " Host freshness checking is disabled. \n " ) ;
return ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* get the current time */
time ( & current_time ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* check all hosts... */
for ( temp_host = host_list ; temp_host ! = NULL ; temp_host = temp_host - > next ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* skip hosts we shouldn't be checking for freshness */
if ( temp_host - > check_freshness = = FALSE ) {
continue ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* skip hosts that have both active and passive checks disabled */
if ( temp_host - > checks_enabled = = FALSE & & temp_host - > accept_passive_checks = = FALSE ) {
continue ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* skip hosts that are currently executing (problems here will be caught by orphaned host check) */
if ( temp_host - > is_executing = = TRUE ) {
continue ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* skip hosts that are already being freshened */
if ( temp_host - > is_being_freshened = = TRUE ) {
continue ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* see if the time is right... */
if ( check_time_against_period ( current_time , temp_host - > check_period_ptr ) = = ERROR ) {
continue ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
/* the results for the last check of this host are stale */
if ( is_host_result_fresh ( temp_host , current_time , TRUE ) = = FALSE ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* set the freshen flag */
temp_host - > is_being_freshened = TRUE ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* schedule an immediate forced check of the host */
schedule_host_check ( temp_host , current_time , CHECK_OPTION_FORCE_EXECUTION | CHECK_OPTION_FRESHNESS_CHECK ) ;
}
}
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* checks to see if a hosts's check results are fresh */
int is_host_result_fresh ( host * temp_host , time_t current_time , int log_this )
{
time_t expiration_time = 0L ;
int freshness_threshold = 0 ;
int days = 0 ;
int hours = 0 ;
int minutes = 0 ;
int seconds = 0 ;
int tdays = 0 ;
int thours = 0 ;
int tminutes = 0 ;
int tseconds = 0 ;
double interval = 0 ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Checking freshness of host '%s'... \n " , temp_host - > name ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* use user-supplied freshness threshold or auto-calculate a freshness threshold to use? */
if ( temp_host - > freshness_threshold = = 0 ) {
if ( temp_host - > state_type = = HARD_STATE | | temp_host - > current_state = = STATE_OK ) {
interval = temp_host - > check_interval ;
}
2017-05-19 22:22:40 +02:00
else {
2019-04-18 17:09:18 +02:00
interval = temp_host - > retry_interval ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
freshness_threshold = ( interval * interval_length ) + temp_host - > latency + additional_freshness_latency ;
}
else {
freshness_threshold = temp_host - > freshness_threshold ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Freshness thresholds: host=%d, use=%d \n " , temp_host - > freshness_threshold , freshness_threshold ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* calculate expiration time */
/*
* CHANGED 11 / 10 / 05 EG :
* program start is only used in expiration time calculation
* if > last check AND active checks are enabled , so active checks
* can become stale immediately upon program startup
*/
if ( temp_host - > has_been_checked = = FALSE ) {
expiration_time = ( time_t ) ( event_start + freshness_threshold ) ;
}
/*
* CHANGED 06 / 19 / 07 EG :
* Per Ton ' s suggestion ( and user requests ) , only use program start
* time over last check if no specific threshold has been set by user .
* Problems can occur if Nagios is restarted more frequently that
* freshness threshold intervals ( hosts never go stale ) .
*/
/*
* CHANGED 10 / 07 / 07 EG :
* Added max_host_check_spread to expiration time as suggested by
* Altinity
*/
else if ( ( temp_host - > checks_enabled = = TRUE )
& & ( event_start > temp_host - > last_check )
& & ( temp_host - > freshness_threshold = = 0 ) ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
expiration_time = ( time_t ) ( event_start + freshness_threshold + ( max_host_check_spread * interval_length ) ) ;
}
else {
expiration_time = ( time_t ) ( temp_host - > last_check + freshness_threshold ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/*
* If the check was last done passively , we assume it ' s going
* to continue that way and we need to handle the fact that
* Nagios might have been shut off for quite a long time . If so ,
* we mustn ' t spam freshness notifications but use event_start
* instead of last_check to determine freshness expiration time .
* The threshold for " long time " is determined as 61.8 % of the normal
* freshness threshold based on vast heuristical research ( ie , " some
* guy once told me the golden ratio is good for loads of stuff " ).
*/
if ( ( temp_host - > check_type = = CHECK_TYPE_PASSIVE )
& & ( temp_host - > last_check < event_start )
& & ( event_start - last_program_stop > freshness_threshold * 0.618 ) ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
expiration_time = event_start + freshness_threshold ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 ,
" HBC: %d, PS: %lu, ES: %lu, LC: %lu, CT: %lu, ET: %lu \n " ,
temp_host - > has_been_checked ,
( unsigned long ) program_start ,
( unsigned long ) event_start ,
( unsigned long ) temp_host - > last_check ,
( unsigned long ) current_time ,
( unsigned long ) expiration_time ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* the results for the last check of this host are stale */
if ( expiration_time < current_time ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
get_time_breakdown ( ( current_time - expiration_time ) , & days , & hours , & minutes , & seconds ) ;
get_time_breakdown ( freshness_threshold , & tdays , & thours , & tminutes , & tseconds ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* log a warning */
if ( log_this = = TRUE ) {
logit ( NSLOG_RUNTIME_WARNING , TRUE ,
" Warning: The results of host '%s' are stale by %dd %dh %dm %ds (threshold=%dd %dh %dm %ds). I'm forcing an immediate check of the host. \n " ,
temp_host - > name ,
days ,
hours ,
minutes ,
seconds ,
tdays ,
thours ,
tminutes ,
tseconds ) ;
}
log_debug_info ( DEBUGL_CHECKS , 1 ,
" Check results for host '%s' are stale by %dd %dh %dm %ds (threshold=%dd %dh %dm %ds). Forcing an immediate check of the host... \n " ,
temp_host - > name ,
days ,
hours ,
minutes ,
seconds ,
tdays ,
thours ,
tminutes ,
tseconds ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
return FALSE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_CHECKS , 1 , " Check results for host '%s' are fresh. \n " , temp_host - > name ) ;
return TRUE ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* run a scheduled host check asynchronously */
int run_scheduled_host_check ( host * hst , int check_options , double latency )
{
int result = OK ;
time_t current_time = 0L ;
time_t preferred_time = 0L ;
time_t next_valid_time = 0L ;
int time_is_valid = TRUE ;
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " run_scheduled_host_check() \n " ) ;
if ( hst = = NULL ) {
return ERROR ;
}
log_debug_info ( DEBUGL_CHECKS , 0 , " Attempting to run scheduled check of host '%s': check options=%d, latency=%lf \n " , hst - > name , check_options , latency ) ;
/*
* reset the next_check_event so we know this host
* check is no longer in the scheduling queue
*/
hst - > next_check_event = NULL ;
/* attempt to run the check */
result = run_async_host_check ( hst , check_options , latency , TRUE , TRUE , & time_is_valid , & preferred_time ) ;
/* an error occurred, so reschedule the check */
if ( result = = ERROR ) {
log_debug_info ( DEBUGL_CHECKS , 1 , " Unable to run scheduled host check at this time \n " ) ;
/* only attempt to (re)schedule checks that should get checked... */
if ( hst - > should_be_scheduled = = TRUE ) {
/* get current time */
time ( & current_time ) ;
/* determine next time we should check the host if needed */
/* if host has no check interval, schedule it again for 5 minutes from now */
if ( current_time > = preferred_time ) {
preferred_time = current_time + ( ( hst - > check_interval < = 0 ) ? 300 : ( hst - > check_interval * interval_length ) ) ;
}
/* make sure we rescheduled the next host check at a valid time */
get_next_valid_time ( preferred_time , & next_valid_time , hst - > check_period_ptr ) ;
/*
* If the host really can ' t be rescheduled properly we
* set next check time to preferred_time and try again then
*/
if ( ( time_is_valid = = FALSE )
& & ( check_time_against_period ( next_valid_time , hst - > check_period_ptr ) = = ERROR ) ) {
2019-08-03 18:28:19 +02:00
hst - > next_check = reschedule_within_timeperiod ( next_valid_time , hst - > check_period_ptr , check_window ( hst ) ) ;
2019-04-18 17:09:18 +02:00
logit ( NSLOG_RUNTIME_WARNING , TRUE , " Warning: Check of host '%s' could not be rescheduled properly. Scheduling check for %s... \n " , hst - > name , ctime ( & preferred_time ) ) ;
log_debug_info ( DEBUGL_CHECKS , 1 , " Unable to find any valid times to reschedule the next host check! \n " ) ;
}
/* this service could be rescheduled... */
else {
hst - > next_check = next_valid_time ;
if ( next_valid_time > preferred_time ) {
/* Next valid time is further in the future because of
* timeperiod constraints . Add a random amount so we
* don ' t get all checks subject to that timeperiod
* constraint scheduled at the same time
*/
2019-08-03 18:28:19 +02:00
hst - > next_check = reschedule_within_timeperiod ( next_valid_time , hst - > check_period_ptr , check_window ( hst ) ) ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
hst - > should_be_scheduled = TRUE ;
log_debug_info ( DEBUGL_CHECKS , 1 , " Rescheduled next host check for %s " , ctime ( & next_valid_time ) ) ;
2017-05-19 22:22:40 +02:00
}
}
2019-04-18 17:09:18 +02:00
/* update the status log */
update_host_status ( hst , FALSE ) ;
/* reschedule the next host check - unless we couldn't find a valid next check time */
/* 10/19/07 EG - keep original check options */
if ( hst - > should_be_scheduled = = TRUE ) {
schedule_host_check ( hst , hst - > next_check , check_options ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
return ERROR ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
return OK ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* perform an asynchronous check of a host */
/* scheduled host checks will use this, as will some checks that result from on-demand checks... */
int run_async_host_check ( host * hst , int check_options , double latency , int scheduled_check , int reschedule_check , int * time_is_valid , time_t * preferred_time )
{
nagios_macros mac ;
char * raw_command = NULL ;
char * processed_command = NULL ;
struct timeval start_time , end_time ;
double old_latency = 0.0 ;
check_result * cr ;
int runchk_result = OK ;
int macro_options = STRIP_ILLEGAL_MACRO_CHARS | ESCAPE_MACRO_CHARS ;
# ifdef USE_EVENT_BROKER
int neb_result = OK ;
# endif
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " run_async_host_check(%s ...) \n " , hst ? hst - > name : " (NULL host!) " ) ;
/* make sure we have a host */
if ( hst = = NULL )
return ERROR ;
log_debug_info ( DEBUGL_CHECKS , 0 , " ** Running async check of host '%s'... \n " , hst - > name ) ;
/* abort if check is already running or was recently completed */
if ( ! ( check_options & CHECK_OPTION_FORCE_EXECUTION ) ) {
if ( hst - > is_executing = = TRUE ) {
log_debug_info ( DEBUGL_CHECKS , 1 , " A check of this host is already being executed, so we'll pass for the moment... \n " ) ;
return ERROR ;
}
if ( hst - > last_check + cached_host_check_horizon > time ( NULL ) ) {
log_debug_info ( DEBUGL_CHECKS , 0 , " Host '%s' was last checked within its cache horizon. Aborting check \n " , hst - > name ) ;
return ERROR ;
}
}
log_debug_info ( DEBUGL_CHECKS , 0 , " Host '%s' passed first hurdle (caching/execution) \n " , hst - > name ) ;
/* is the host check viable at this time? */
if ( check_host_check_viability ( hst , check_options , time_is_valid , preferred_time ) = = ERROR ) {
log_debug_info ( DEBUGL_CHECKS , 0 , " Host check isn't viable at this point. \n " ) ;
return ERROR ;
}
/******** GOOD TO GO FOR A REAL HOST CHECK AT THIS POINT ********/
# ifdef USE_EVENT_BROKER
/* initialize start/end times */
start_time . tv_sec = 0L ;
start_time . tv_usec = 0L ;
end_time . tv_sec = 0L ;
end_time . tv_usec = 0L ;
/* send data to event broker */
neb_result = broker_host_check ( NEBTYPE_HOSTCHECK_ASYNC_PRECHECK , NEBFLAG_NONE , NEBATTR_NONE , hst , CHECK_TYPE_ACTIVE , hst - > current_state , hst - > state_type , start_time , end_time , hst - > check_command , hst - > latency , 0.0 , host_check_timeout , FALSE , 0 , NULL , NULL , NULL , NULL , NULL , NULL ) ;
/* neb module wants to cancel the host check - the check will be rescheduled for a later time by the scheduling logic */
if ( neb_result = = NEBERROR_CALLBACKCANCEL ) {
log_debug_info ( DEBUGL_CHECKS , 0 , " Check of host '%s' (id=%u) was cancelled by a module \n " , hst - > name , hst - > id ) ;
if ( preferred_time ) {
* preferred_time + = check_window ( hst ) ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
return ERROR ;
}
/* neb module wants to override the host check - perhaps it will check the host itself */
/* NOTE: if a module does this, it has to do a lot of the stuff found below to make sure things don't get whacked out of shape! */
/* NOTE: if would be easier for modules to override checks when the NEBTYPE_SERVICECHECK_INITIATE event is called (later) */
if ( neb_result = = NEBERROR_CALLBACKOVERRIDE ) {
log_debug_info ( DEBUGL_CHECKS , 0 , " Check of host '%s' (id=%u) was overridden by a module \n " , hst - > name , hst - > id ) ;
return OK ;
}
# endif
log_debug_info ( DEBUGL_CHECKS , 0 , " Checking host '%s'... \n " , hst - > name ) ;
/* clear check options - we don't want old check options retained */
/* only clear options if this was a scheduled check - on demand check options shouldn't affect retained info */
if ( scheduled_check = = TRUE ) {
hst - > check_options = CHECK_OPTION_NONE ;
}
/* set latency (temporarily) for macros and event broker */
old_latency = hst - > latency ;
hst - > latency = latency ;
/* grab the host macro variables */
memset ( & mac , 0 , sizeof ( mac ) ) ;
grab_host_macros_r ( & mac , hst ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* get the raw command line */
get_raw_command_line_r ( & mac , hst - > check_command_ptr , hst - > check_command , & raw_command , macro_options ) ;
if ( raw_command = = NULL ) {
clear_volatile_macros_r ( & mac ) ;
log_debug_info ( DEBUGL_CHECKS , 0 , " Raw check command for host '%s' was NULL - aborting. \n " , hst - > name ) ;
return ERROR ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* process any macros contained in the argument */
process_macros_r ( & mac , raw_command , & processed_command , macro_options ) ;
my_free ( raw_command ) ;
if ( processed_command = = NULL ) {
clear_volatile_macros_r ( & mac ) ;
log_debug_info ( DEBUGL_CHECKS , 0 , " Processed check command for host '%s' was NULL - aborting. \n " , hst - > name ) ;
return ERROR ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* get the command start time */
gettimeofday ( & start_time , NULL ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
cr = calloc ( 1 , sizeof ( * cr ) ) ;
if ( ! cr ) {
log_debug_info ( DEBUGL_CHECKS , 0 , " Failed to allocate checkresult struct \n " ) ;
clear_volatile_macros_r ( & mac ) ;
clear_host_macros_r ( & mac ) ;
return ERROR ;
}
init_check_result ( cr ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* save check info */
cr - > object_check_type = HOST_CHECK ;
cr - > host_name = ( char * ) strdup ( hst - > name ) ;
cr - > service_description = NULL ;
cr - > check_type = CHECK_TYPE_ACTIVE ;
cr - > check_options = check_options ;
cr - > scheduled_check = scheduled_check ;
cr - > reschedule_check = reschedule_check ;
cr - > latency = latency ;
cr - > start_time = start_time ;
cr - > finish_time = start_time ;
cr - > early_timeout = FALSE ;
cr - > exited_ok = TRUE ;
cr - > return_code = STATE_OK ;
cr - > output = NULL ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
# ifdef USE_EVENT_BROKER
/* send data to event broker */
neb_result = broker_host_check ( NEBTYPE_HOSTCHECK_INITIATE , NEBFLAG_NONE , NEBATTR_NONE , hst , CHECK_TYPE_ACTIVE , hst - > current_state , hst - > state_type , start_time , end_time , hst - > check_command , hst - > latency , 0.0 , host_check_timeout , FALSE , 0 , processed_command , NULL , NULL , NULL , NULL , cr ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* neb module wants to override the service check - perhaps it will check the service itself */
if ( neb_result = = NEBERROR_CALLBACKOVERRIDE ) {
clear_volatile_macros_r ( & mac ) ;
hst - > latency = old_latency ;
free_check_result ( cr ) ;
2019-08-03 18:28:19 +02:00
my_free ( cr ) ;
2019-04-18 17:09:18 +02:00
my_free ( processed_command ) ;
return OK ;
}
# endif
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* reset latency (permanent value for this check will get set later) */
hst - > latency = old_latency ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
runchk_result = wproc_run_check ( cr , processed_command , & mac ) ;
if ( runchk_result = = ERROR ) {
logit ( NSLOG_RUNTIME_ERROR , TRUE , " Unable to send check for host '%s' to worker (ret=%d) \n " , hst - > name , runchk_result ) ;
}
else {
/* do the book-keeping */
currently_running_host_checks + + ;
hst - > is_executing = TRUE ;
update_check_stats ( ( scheduled_check = = TRUE ) ? ACTIVE_SCHEDULED_HOST_CHECK_STATS : ACTIVE_ONDEMAND_HOST_CHECK_STATS , start_time . tv_sec ) ;
update_check_stats ( PARALLEL_HOST_CHECK_STATS , start_time . tv_sec ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* free memory */
clear_volatile_macros_r ( & mac ) ;
my_free ( processed_command ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
return runchk_result ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* process results of an asynchronous host check */
2017-05-19 22:22:40 +02:00
/* checks viability of performing a host check */
2019-04-18 17:09:18 +02:00
int check_host_check_viability ( host * hst , int check_options , int * time_is_valid , time_t * new_time )
{
2017-05-19 22:22:40 +02:00
int perform_check = TRUE ;
time_t current_time = 0L ;
time_t preferred_time = 0L ;
int check_interval = 0 ;
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " check_host_check_viability() \n " ) ;
2017-05-19 22:22:40 +02:00
/* make sure we have a host */
2019-04-18 17:09:18 +02:00
if ( hst = = NULL ) {
2017-05-19 22:22:40 +02:00
return ERROR ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* get the check interval to use if we need to reschedule the check */
2019-04-18 17:09:18 +02:00
if ( hst - > state_type = = SOFT_STATE & & hst - > current_state ! = HOST_UP ) {
2017-05-19 22:22:40 +02:00
check_interval = ( hst - > retry_interval * interval_length ) ;
2019-04-18 17:09:18 +02:00
}
else {
2017-05-19 22:22:40 +02:00
check_interval = ( hst - > check_interval * interval_length ) ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* make sure check interval is positive - otherwise use 5 minutes out for next check */
2019-04-18 17:09:18 +02:00
if ( check_interval < = 0 ) {
2017-05-19 22:22:40 +02:00
check_interval = 300 ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* get the current time */
time ( & current_time ) ;
/* initialize the next preferred check time */
preferred_time = current_time ;
/* can we check the host right now? */
2019-04-18 17:09:18 +02:00
if ( ! ( check_options & CHECK_OPTION_FORCE_EXECUTION ) ) {
2017-05-19 22:22:40 +02:00
/* if checks of the host are currently disabled... */
2019-04-18 17:09:18 +02:00
if ( hst - > checks_enabled = = FALSE ) {
2017-05-19 22:22:40 +02:00
preferred_time = current_time + check_interval ;
perform_check = FALSE ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* make sure this is a valid time to check the host */
2019-04-18 17:09:18 +02:00
if ( check_time_against_period ( ( unsigned long ) current_time , hst - > check_period_ptr ) = = ERROR ) {
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_CHECKS , 0 , " Timeperiod check failed \n " ) ;
2017-05-19 22:22:40 +02:00
preferred_time = current_time ;
2019-04-18 17:09:18 +02:00
if ( time_is_valid ) {
2017-05-19 22:22:40 +02:00
* time_is_valid = FALSE ;
}
2019-04-18 17:09:18 +02:00
perform_check = FALSE ;
}
2017-05-19 22:22:40 +02:00
/* check host dependencies for execution */
2019-04-18 17:09:18 +02:00
if ( check_host_dependencies ( hst , EXECUTION_DEPENDENCY ) = = DEPENDENCIES_FAILED ) {
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_CHECKS , 0 , " Host check dependencies failed \n " ) ;
2017-05-19 22:22:40 +02:00
preferred_time = current_time + check_interval ;
perform_check = FALSE ;
2019-04-18 17:09:18 +02:00
if ( host_skip_check_dependency_status > = 0 ) {
hst - > current_state = host_skip_check_dependency_status ;
2017-05-19 22:22:40 +02:00
}
}
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* pass back the next viable check time */
2019-04-18 17:09:18 +02:00
if ( new_time ) {
2017-05-19 22:22:40 +02:00
* new_time = preferred_time ;
}
2019-04-18 17:09:18 +02:00
if ( perform_check = = TRUE ) {
return OK ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
return ERROR ;
}
2017-05-19 22:22:40 +02:00
/* determination of the host's state based on route availability*/
/* used only to determine difference between DOWN and UNREACHABLE states */
2019-04-18 17:09:18 +02:00
inline int determine_host_reachability ( host * hst )
{
2017-05-19 22:22:40 +02:00
host * parent_host = NULL ;
hostsmember * temp_hostsmember = NULL ;
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " determine_host_reachability(host=%s) \n " , hst ? hst - > name : " (NULL host!) " ) ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( hst = = NULL )
2017-05-19 22:22:40 +02:00
return HOST_DOWN ;
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Determining state of host '%s': current state=%d (%s) \n " , hst - > name , hst - > current_state , host_state_name ( hst - > current_state ) ) ;
2017-05-19 22:22:40 +02:00
/* host is UP - no translation needed */
2019-04-18 17:09:18 +02:00
if ( hst - > current_state = = HOST_UP ) {
2017-05-19 22:22:40 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Host is UP, no state translation needed. \n " ) ;
2017-05-19 23:37:19 +02:00
return HOST_UP ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* host has no parents, so it is DOWN */
2019-04-18 17:09:18 +02:00
if ( hst - > check_type = = CHECK_TYPE_PASSIVE & & hst - > current_state = = HOST_UNREACHABLE ) {
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Passive check so keep it UNREACHABLE. \n " ) ;
return HOST_UNREACHABLE ;
2019-04-18 17:09:18 +02:00
}
else if ( hst - > parent_hosts = = NULL ) {
2017-05-19 22:22:40 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " Host has no parents, so it is DOWN. \n " ) ;
2017-05-19 23:37:19 +02:00
return HOST_DOWN ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* check all parent hosts to see if we're DOWN or UNREACHABLE */
else {
for ( temp_hostsmember = hst - > parent_hosts ; temp_hostsmember ! = NULL ; temp_hostsmember = temp_hostsmember - > next ) {
2017-05-19 23:37:19 +02:00
parent_host = temp_hostsmember - > host_ptr ;
log_debug_info ( DEBUGL_CHECKS , 2 , " Parent '%s' is %s \n " , parent_host - > name , host_state_name ( parent_host - > current_state ) ) ;
2017-05-19 22:22:40 +02:00
/* bail out as soon as we find one parent host that is UP */
2019-04-18 17:09:18 +02:00
if ( parent_host - > current_state = = HOST_UP ) {
2017-05-19 22:22:40 +02:00
/* set the current state */
log_debug_info ( DEBUGL_CHECKS , 2 , " At least one parent (%s) is up, so host is DOWN. \n " , parent_host - > name ) ;
2017-05-19 23:37:19 +02:00
return HOST_DOWN ;
2017-05-19 22:22:40 +02:00
}
}
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_CHECKS , 2 , " No parents were up, so host is UNREACHABLE. \n " ) ;
return HOST_UNREACHABLE ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/******************************************************************/
/****************** HOST STATE HANDLER FUNCTIONS ******************/
/******************************************************************/
2017-05-19 23:37:19 +02:00
/* Parses raw plugin output and returns: short and long output, perf data. */
2019-04-18 17:09:18 +02:00
int parse_check_output ( char * buf , char * * short_output , char * * long_output , char * * perf_data , int escape_newlines_please , int newlines_are_escaped )
{
2017-05-19 22:22:40 +02:00
int current_line = 0 ;
int eof = FALSE ;
int in_perf_data = FALSE ;
2017-05-19 23:37:19 +02:00
const int dbuf_chunk = 1024 ;
dbuf long_text ;
dbuf perf_text ;
char * ptr = NULL ;
int x = 0 ;
int y = 0 ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* Initialize output values. */
2019-04-18 17:09:18 +02:00
if ( short_output ) {
2017-05-19 22:22:40 +02:00
* short_output = NULL ;
2019-04-18 17:09:18 +02:00
}
if ( long_output ) {
2017-05-19 22:22:40 +02:00
* long_output = NULL ;
2019-04-18 17:09:18 +02:00
}
if ( perf_data ) {
2017-05-19 22:22:40 +02:00
* perf_data = NULL ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* No input provided or no output requested, nothing to do. */
2019-04-18 17:09:18 +02:00
if ( ! buf
| | ! * buf
| | ( ! short_output & & ! long_output & & ! perf_data ) ) {
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
return OK ;
}
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* Initialize dynamic buffers (1KB chunk size). */
dbuf_init ( & long_text , dbuf_chunk ) ;
dbuf_init ( & perf_text , dbuf_chunk ) ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* We should never need to worry about unescaping here again. We assume a
* common internal plugin output format that is newline delimited . */
if ( newlines_are_escaped ) {
for ( x = 0 , y = 0 ; buf [ x ] ; x + + ) {
if ( buf [ x ] = = ' \\ ' & & buf [ x + 1 ] = = ' \\ ' ) {
2017-05-19 22:22:40 +02:00
x + + ;
buf [ y + + ] = buf [ x ] ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 23:37:19 +02:00
else if ( buf [ x ] = = ' \\ ' & & buf [ x + 1 ] = = ' n ' ) {
2017-05-19 22:22:40 +02:00
x + + ;
buf [ y + + ] = ' \n ' ;
2019-04-18 17:09:18 +02:00
}
else {
2017-05-19 22:22:40 +02:00
buf [ y + + ] = buf [ x ] ;
}
}
2019-04-18 17:09:18 +02:00
buf [ y ] = ' \0 ' ;
}
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* Process each line of input. */
for ( x = 0 ; ! eof & & buf [ 0 ] ; x + + ) {
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* Continue on until we reach the end of a line (or input). */
2019-04-18 17:09:18 +02:00
if ( buf [ x ] = = ' \n ' ) {
2017-05-19 23:37:19 +02:00
buf [ x ] = ' \0 ' ;
2019-04-18 17:09:18 +02:00
}
else if ( buf [ x ] = = ' \0 ' ) {
2017-05-19 22:22:40 +02:00
eof = TRUE ;
2019-04-18 17:09:18 +02:00
}
else {
2017-05-19 23:37:19 +02:00
continue ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* Handle this line of input. */
current_line + + ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* The first line contains short plugin output and optional perf data. */
if ( current_line = = 1 ) {
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* Get the short plugin output. If buf[0] is '|', strtok() will
2019-04-18 17:09:18 +02:00
* return buf + 1 or NULL if buf [ 1 ] is ' \0 ' . We use my_strtok_with_free ( )
2017-05-19 23:37:19 +02:00
* instead which returns a pointer to ' \0 ' in this case . */
2019-04-18 17:09:18 +02:00
ptr = my_strtok_with_free ( buf , " | " , FALSE ) ;
if ( ptr ! = NULL ) {
2017-05-19 23:37:19 +02:00
if ( short_output ) {
2019-04-18 17:09:18 +02:00
/* Remove leading and trailing whitespace. */
strip ( ptr ) ;
2017-05-19 23:37:19 +02:00
* short_output = strdup ( ptr ) ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* Get the optional perf data. */
2019-04-18 17:09:18 +02:00
ptr = my_strtok_with_free ( NULL , " \n " , FALSE ) ;
if ( ptr ! = NULL ) {
2017-05-19 23:37:19 +02:00
dbuf_strcat ( & perf_text , ptr ) ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
/* free anything we've allocated */
my_strtok_with_free ( NULL , NULL , TRUE ) ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
}
2017-05-19 23:37:19 +02:00
/* Additional lines contain long plugin output and optional perf data.
* Once we ' ve hit perf data , the rest of the output is perf data . */
else if ( in_perf_data ) {
2019-04-18 17:09:18 +02:00
if ( perf_text . buf & & * perf_text . buf ) {
2017-05-19 23:37:19 +02:00
dbuf_strcat ( & perf_text , " " ) ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 23:37:19 +02:00
dbuf_strcat ( & perf_text , buf ) ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* Look for the perf data separator. */
else if ( strchr ( buf , ' | ' ) ) {
in_perf_data = TRUE ;
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
ptr = my_strtok_with_free ( buf , " | " , FALSE ) ;
if ( ptr ! = NULL ) {
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* Get the remaining long plugin output. */
2019-04-18 17:09:18 +02:00
if ( current_line > 2 ) {
2017-05-19 23:37:19 +02:00
dbuf_strcat ( & long_text , " \n " ) ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 23:37:19 +02:00
dbuf_strcat ( & long_text , ptr ) ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* Get the perf data. */
2019-04-18 17:09:18 +02:00
ptr = my_strtok_with_free ( NULL , " \n " , FALSE ) ;
if ( ptr ! = NULL ) {
if ( perf_text . buf & & * perf_text . buf ) {
2017-05-19 23:37:19 +02:00
dbuf_strcat ( & perf_text , " " ) ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
dbuf_strcat ( & perf_text , ptr ) ;
2017-05-19 22:22:40 +02:00
}
2017-05-19 23:37:19 +02:00
2019-04-18 17:09:18 +02:00
/* free anything we've allocated */
my_strtok_with_free ( NULL , NULL , TRUE ) ;
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
}
2017-05-19 23:37:19 +02:00
/* Otherwise it's still just long output. */
else {
2019-04-18 17:09:18 +02:00
if ( current_line > 2 ) {
2017-05-19 23:37:19 +02:00
dbuf_strcat ( & long_text , " \n " ) ;
}
2019-04-18 17:09:18 +02:00
dbuf_strcat ( & long_text , buf ) ;
}
2017-05-19 23:37:19 +02:00
/* Point buf to the start of the next line. *(buf+x+1) will be a valid
* memory reference on our next iteration or we are at the end of input
* ( eof = = TRUE ) and * ( buf + x + 1 ) will never be referenced . */
buf + = x + 1 ;
2019-04-18 17:09:18 +02:00
/* x will be incremented to 0 by the loop update. */
x = - 1 ;
}
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* Save long output. */
if ( long_output & & long_text . buf & & * long_text . buf ) {
2019-04-18 17:09:18 +02:00
2017-05-19 23:37:19 +02:00
/* Escape newlines (and backslashes) in long output if requested. */
2019-04-18 17:09:18 +02:00
if ( escape_newlines_please ) {
2017-05-19 23:37:19 +02:00
* long_output = escape_newlines ( long_text . buf ) ;
2019-04-18 17:09:18 +02:00
}
else {
2017-05-19 23:37:19 +02:00
* long_output = strdup ( long_text . buf ) ;
}
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* Save perf data. */
if ( perf_data & & perf_text . buf & & * perf_text . buf ) {
2019-04-18 17:09:18 +02:00
/* Remove leading and trailing whitespace. */
strip ( perf_text . buf ) ;
2017-05-19 23:37:19 +02:00
* perf_data = strdup ( perf_text . buf ) ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* free dynamic buffers */
2017-05-19 23:37:19 +02:00
dbuf_free ( & long_text ) ;
dbuf_free ( & perf_text ) ;
2017-05-19 22:22:40 +02:00
return OK ;
2019-04-18 17:09:18 +02:00
}