2017-05-19 22:22:40 +02:00
/*****************************************************************************
*
* EVENTS . C - Timed event functions for Nagios
*
*
* License :
*
* This program is free software ; you can redistribute it and / or modify
* it under the terms of the GNU General Public License version 2 as
* published by the Free Software Foundation .
*
* This program is distributed in the hope that it will be useful ,
* but WITHOUT ANY WARRANTY ; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE . See the
* GNU General Public License for more details .
*
* You should have received a copy of the GNU General Public License
* along with this program ; if not , write to the Free Software
* Foundation , Inc . , 675 Mass Ave , Cambridge , MA 0213 9 , USA .
*
* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
# include "../include/config.h"
# include "../include/common.h"
# include "../include/downtime.h"
# include "../include/comments.h"
# include "../include/statusdata.h"
# include "../include/nagios.h"
# include "../include/broker.h"
# include "../include/sretention.h"
2017-05-19 23:37:19 +02:00
# include "../include/workers.h"
# include "../lib/squeue.h"
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* the event we're currently processing */
static timed_event * current_event ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
static unsigned int event_count [ EVENT_USER_FUNCTION + 1 ] ;
2017-05-19 22:22:40 +02:00
/******************************************************************/
/************ EVENT SCHEDULING/HANDLING FUNCTIONS *****************/
/******************************************************************/
2017-05-19 23:37:19 +02:00
int dump_event_stats ( int sd )
{
unsigned int i ;
for ( i = 0 ; i < ARRAY_SIZE ( event_count ) ; i + + ) {
nsock_printf ( sd , " %s=%u; " , EVENT_TYPE_STR ( i ) , event_count [ i ] ) ;
/*
* VERSIONFIX : Make EVENT_SLEEP and EVENT_USER_FUNCTION
* appear in linear order in include / nagios . h when we go
* from 4.0 - > 4.1 , so we can remove this junk .
*/
if ( i = = 16 )
i = 97 ;
}
nsock_printf_nul ( sd , " SQUEUE_ENTRIES=%u " , squeue_size ( nagios_squeue ) ) ;
return OK ;
}
static void track_events ( unsigned int type , int add )
{
/*
* remove_event ( ) calls track_events ( ) with add being - 1.
* add_event ( ) calls us with add being 1
*/
if ( type < ARRAY_SIZE ( event_count ) )
event_count [ type ] + = add ;
}
2017-05-19 22:22:40 +02:00
/* initialize the event timing loop before we start monitoring */
void init_timing_loop ( void ) {
host * temp_host = NULL ;
service * temp_service = NULL ;
time_t current_time = 0L ;
int total_interleave_blocks = 0 ;
int current_interleave_block = 1 ;
int interleave_block_index = 0 ;
int mult_factor = 0 ;
int is_valid_time = 0 ;
time_t next_valid_time = 0L ;
int schedule_check = 0 ;
double max_inter_check_delay = 0.0 ;
struct timeval tv [ 9 ] ;
double runtime [ 9 ] ;
2017-05-19 23:37:19 +02:00
struct timeval now ;
unsigned int fixed_hosts = 0 , fixed_services = 0 ;
int check_delay = 0 ;
2017-05-19 22:22:40 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " init_timing_loop() start \n " ) ;
2017-05-19 23:37:19 +02:00
/* get the time and seed the prng */
gettimeofday ( & now , NULL ) ;
current_time = now . tv_sec ;
srand ( ( now . tv_sec < < 10 ) ^ now . tv_usec ) ;
2017-05-19 22:22:40 +02:00
/******** GET BASIC HOST/SERVICE INFO ********/
scheduling_info . total_services = 0 ;
scheduling_info . total_scheduled_services = 0 ;
scheduling_info . total_hosts = 0 ;
scheduling_info . total_scheduled_hosts = 0 ;
scheduling_info . average_services_per_host = 0.0 ;
scheduling_info . average_scheduled_services_per_host = 0.0 ;
scheduling_info . average_service_execution_time = 0.0 ;
scheduling_info . service_check_interval_total = 0 ;
scheduling_info . average_service_inter_check_delay = 0.0 ;
scheduling_info . host_check_interval_total = 0 ;
scheduling_info . average_host_inter_check_delay = 0.0 ;
if ( test_scheduling = = TRUE )
gettimeofday ( & tv [ 0 ] , NULL ) ;
/* get info on service checks to be scheduled */
for ( temp_service = service_list ; temp_service ! = NULL ; temp_service = temp_service - > next ) {
schedule_check = TRUE ;
/* service has no check interval */
if ( temp_service - > check_interval = = 0 )
schedule_check = FALSE ;
/* active checks are disabled */
if ( temp_service - > checks_enabled = = FALSE )
schedule_check = FALSE ;
/* are there any valid times this service can be checked? */
is_valid_time = check_time_against_period ( current_time , temp_service - > check_period_ptr ) ;
if ( is_valid_time = = ERROR ) {
get_next_valid_time ( current_time , & next_valid_time , temp_service - > check_period_ptr ) ;
if ( current_time = = next_valid_time )
schedule_check = FALSE ;
}
if ( schedule_check = = TRUE ) {
2017-05-19 23:37:19 +02:00
double exec_time ;
/* get real exec time, or make a pessimistic guess */
exec_time = temp_service - > execution_time ? temp_service - > execution_time : 2.0 ;
2017-05-19 22:22:40 +02:00
scheduling_info . total_scheduled_services + + ;
/* used later in inter-check delay calculations */
scheduling_info . service_check_interval_total + = temp_service - > check_interval ;
/* calculate rolling average execution time (available from retained state information) */
2017-05-19 23:37:19 +02:00
scheduling_info . average_service_execution_time = ( double ) ( ( ( scheduling_info . average_service_execution_time * ( scheduling_info . total_scheduled_services - 1 ) ) + exec_time ) / ( double ) scheduling_info . total_scheduled_services ) ;
2017-05-19 22:22:40 +02:00
}
else {
temp_service - > should_be_scheduled = FALSE ;
log_debug_info ( DEBUGL_EVENTS , 1 , " Service '%s' on host '%s' should not be scheduled. \n " , temp_service - > description , temp_service - > host_name ) ;
}
scheduling_info . total_services + + ;
}
if ( test_scheduling = = TRUE )
gettimeofday ( & tv [ 1 ] , NULL ) ;
/* get info on host checks to be scheduled */
for ( temp_host = host_list ; temp_host ! = NULL ; temp_host = temp_host - > next ) {
schedule_check = TRUE ;
/* host has no check interval */
if ( temp_host - > check_interval = = 0 )
schedule_check = FALSE ;
/* active checks are disabled */
if ( temp_host - > checks_enabled = = FALSE )
schedule_check = FALSE ;
/* are there any valid times this host can be checked? */
is_valid_time = check_time_against_period ( current_time , temp_host - > check_period_ptr ) ;
if ( is_valid_time = = ERROR ) {
get_next_valid_time ( current_time , & next_valid_time , temp_host - > check_period_ptr ) ;
if ( current_time = = next_valid_time )
schedule_check = FALSE ;
}
if ( schedule_check = = TRUE ) {
scheduling_info . total_scheduled_hosts + + ;
/* this is used later in inter-check delay calculations */
scheduling_info . host_check_interval_total + = temp_host - > check_interval ;
}
else {
temp_host - > should_be_scheduled = FALSE ;
log_debug_info ( DEBUGL_EVENTS , 1 , " Host '%s' should not be scheduled. \n " , temp_host - > name ) ;
}
scheduling_info . total_hosts + + ;
}
if ( test_scheduling = = TRUE )
gettimeofday ( & tv [ 2 ] , NULL ) ;
scheduling_info . average_services_per_host = ( double ) ( ( double ) scheduling_info . total_services / ( double ) scheduling_info . total_hosts ) ;
scheduling_info . average_scheduled_services_per_host = ( double ) ( ( double ) scheduling_info . total_scheduled_services / ( double ) scheduling_info . total_hosts ) ;
/* adjust the check interval total to correspond to the interval length */
scheduling_info . service_check_interval_total = ( scheduling_info . service_check_interval_total * interval_length ) ;
/* calculate the average check interval for services */
scheduling_info . average_service_check_interval = ( double ) ( ( double ) scheduling_info . service_check_interval_total / ( double ) scheduling_info . total_scheduled_services ) ;
/******** DETERMINE SERVICE SCHEDULING PARAMS ********/
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 2 , " Determining service scheduling parameters... \n " ) ;
2017-05-19 22:22:40 +02:00
/* default max service check spread (in minutes) */
scheduling_info . max_service_check_spread = max_service_check_spread ;
/* how should we determine the service inter-check delay to use? */
switch ( service_inter_check_delay_method ) {
case ICD_NONE :
/* don't spread checks out - useful for testing parallelization code */
scheduling_info . service_inter_check_delay = 0.0 ;
break ;
case ICD_DUMB :
/* be dumb and just schedule checks 1 second apart */
scheduling_info . service_inter_check_delay = 1.0 ;
break ;
case ICD_USER :
/* the user specified a delay, so don't try to calculate one */
break ;
case ICD_SMART :
default :
/* be smart and calculate the best delay to use to minimize local load... */
if ( scheduling_info . total_scheduled_services > 0 & & scheduling_info . service_check_interval_total > 0 ) {
/* calculate the average inter check delay (in seconds) needed to evenly space the service checks out */
scheduling_info . average_service_inter_check_delay = ( double ) ( scheduling_info . average_service_check_interval / ( double ) scheduling_info . total_scheduled_services ) ;
/* set the global inter check delay value */
scheduling_info . service_inter_check_delay = scheduling_info . average_service_inter_check_delay ;
/* calculate max inter check delay and see if we should use that instead */
max_inter_check_delay = ( double ) ( ( scheduling_info . max_service_check_spread * 60.0 ) / ( double ) scheduling_info . total_scheduled_services ) ;
if ( scheduling_info . service_inter_check_delay > max_inter_check_delay )
scheduling_info . service_inter_check_delay = max_inter_check_delay ;
}
else
scheduling_info . service_inter_check_delay = 0.0 ;
log_debug_info ( DEBUGL_EVENTS , 1 , " Total scheduled service checks: %d \n " , scheduling_info . total_scheduled_services ) ;
log_debug_info ( DEBUGL_EVENTS , 1 , " Average service check interval: %0.2f sec \n " , scheduling_info . average_service_check_interval ) ;
log_debug_info ( DEBUGL_EVENTS , 1 , " Service inter-check delay: %0.2f sec \n " , scheduling_info . service_inter_check_delay ) ;
}
/* how should we determine the service interleave factor? */
switch ( service_interleave_factor_method ) {
case ILF_USER :
/* the user supplied a value, so don't do any calculation */
break ;
case ILF_SMART :
default :
/* protect against a divide by zero problem - shouldn't happen, but just in case... */
if ( scheduling_info . total_hosts = = 0 )
scheduling_info . total_hosts = 1 ;
scheduling_info . service_interleave_factor = ( int ) ( ceil ( scheduling_info . average_scheduled_services_per_host ) ) ;
log_debug_info ( DEBUGL_EVENTS , 1 , " Total scheduled service checks: %d \n " , scheduling_info . total_scheduled_services ) ;
log_debug_info ( DEBUGL_EVENTS , 1 , " Total hosts: %d \n " , scheduling_info . total_hosts ) ;
log_debug_info ( DEBUGL_EVENTS , 1 , " Service Interleave factor: %d \n " , scheduling_info . service_interleave_factor ) ;
}
/* calculate number of service interleave blocks */
if ( scheduling_info . service_interleave_factor = = 0 )
total_interleave_blocks = scheduling_info . total_scheduled_services ;
else
total_interleave_blocks = ( int ) ceil ( ( double ) scheduling_info . total_scheduled_services / ( double ) scheduling_info . service_interleave_factor ) ;
scheduling_info . first_service_check = ( time_t ) 0L ;
scheduling_info . last_service_check = ( time_t ) 0L ;
log_debug_info ( DEBUGL_EVENTS , 1 , " Total scheduled services: %d \n " , scheduling_info . total_scheduled_services ) ;
log_debug_info ( DEBUGL_EVENTS , 1 , " Service Interleave factor: %d \n " , scheduling_info . service_interleave_factor ) ;
log_debug_info ( DEBUGL_EVENTS , 1 , " Total service interleave blocks: %d \n " , total_interleave_blocks ) ;
log_debug_info ( DEBUGL_EVENTS , 1 , " Service inter-check delay: %2.1f \n " , scheduling_info . service_inter_check_delay ) ;
if ( test_scheduling = = TRUE )
gettimeofday ( & tv [ 3 ] , NULL ) ;
/******** SCHEDULE SERVICE CHECKS ********/
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 2 , " Scheduling service checks... \n " ) ;
2017-05-19 22:22:40 +02:00
/* determine check times for service checks (with interleaving to minimize remote load) */
current_interleave_block = 0 ;
for ( temp_service = service_list ; temp_service ! = NULL & & scheduling_info . service_interleave_factor > 0 ; ) {
log_debug_info ( DEBUGL_EVENTS , 2 , " Current Interleave Block: %d \n " , current_interleave_block ) ;
for ( interleave_block_index = 0 ; interleave_block_index < scheduling_info . service_interleave_factor & & temp_service ! = NULL ; temp_service = temp_service - > next ) {
log_debug_info ( DEBUGL_EVENTS , 2 , " Service '%s' on host '%s' \n " , temp_service - > description , temp_service - > host_name ) ;
/* skip this service if it shouldn't be scheduled */
if ( temp_service - > should_be_scheduled = = FALSE ) {
log_debug_info ( DEBUGL_EVENTS , 2 , " Service check should not be scheduled. \n " ) ;
continue ;
}
/*
* skip services that are already scheduled for the ( near )
* future from retention data , but reschedule ones that
* were supposed to happen while we weren ' t running . . .
* We check to make sure the check isn ' t scheduled to run
* far in the future to make sure checks who ' ve hade their
* timeperiods changed during the restart aren ' t left
* hanging too long without being run .
*/
check_delay = temp_service - > next_check - current_time ;
2017-05-19 23:37:19 +02:00
if ( check_delay > 0 & & check_delay < check_window ( temp_service ) ) {
2017-05-19 22:22:40 +02:00
log_debug_info ( DEBUGL_EVENTS , 2 , " Service is already scheduled to be checked in the future: %s \n " , ctime ( & temp_service - > next_check ) ) ;
continue ;
}
/* interleave block index should only be increased when we find a schedulable service */
/* moved from for() loop 11/05/05 EG */
interleave_block_index + + ;
mult_factor = current_interleave_block + ( interleave_block_index * total_interleave_blocks ) ;
log_debug_info ( DEBUGL_EVENTS , 2 , " CIB: %d, IBI: %d, TIB: %d, SIF: %d \n " , current_interleave_block , interleave_block_index , total_interleave_blocks , scheduling_info . service_interleave_factor ) ;
log_debug_info ( DEBUGL_EVENTS , 2 , " Mult factor: %d \n " , mult_factor ) ;
2017-05-19 23:37:19 +02:00
/*
* set the preferred next check time for the service
* If we end up too far into the future , grab a random
* time within the service ' s window instead .
*/
check_delay =
mult_factor * scheduling_info . service_inter_check_delay ;
if ( check_delay > check_window ( temp_service ) ) {
log_debug_info ( DEBUGL_EVENTS , 0 ,
" Fixing check time %lu secs too far away \n " ,
check_delay - check_window ( temp_service ) ) ;
fixed_services + + ;
check_delay = ranged_urand ( 0 , check_window ( temp_service ) ) ;
log_debug_info ( DEBUGL_EVENTS , 0 , " New check offset: %d \n " ,
check_delay ) ;
}
temp_service - > next_check = ( time_t ) ( current_time + check_delay ) ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 2 , " Preferred Check Time: %lu --> %s \n " , ( unsigned long ) temp_service - > next_check , ctime ( & temp_service - > next_check ) ) ;
2017-05-19 22:22:40 +02:00
/* make sure the service can actually be scheduled when we want */
is_valid_time = check_time_against_period ( temp_service - > next_check , temp_service - > check_period_ptr ) ;
if ( is_valid_time = = ERROR ) {
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 2 , " Preferred Time is Invalid In Timeperiod '%s': %lu --> %s \n " , temp_service - > check_period_ptr - > name , ( unsigned long ) temp_service - > next_check , ctime ( & temp_service - > next_check ) ) ;
2017-05-19 22:22:40 +02:00
get_next_valid_time ( temp_service - > next_check , & next_valid_time , temp_service - > check_period_ptr ) ;
2017-05-19 23:37:19 +02:00
temp_service - > next_check =
( time_t ) ( next_valid_time + check_delay ) ;
2017-05-19 22:22:40 +02:00
}
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 2 , " Actual Check Time: %lu --> %s \n " , ( unsigned long ) temp_service - > next_check , ctime ( & temp_service - > next_check ) ) ;
2017-05-19 22:22:40 +02:00
if ( scheduling_info . first_service_check = = ( time_t ) 0 | | ( temp_service - > next_check < scheduling_info . first_service_check ) )
scheduling_info . first_service_check = temp_service - > next_check ;
if ( temp_service - > next_check > scheduling_info . last_service_check )
scheduling_info . last_service_check = temp_service - > next_check ;
}
current_interleave_block + + ;
}
if ( test_scheduling = = TRUE )
gettimeofday ( & tv [ 4 ] , NULL ) ;
/* add scheduled service checks to event queue */
for ( temp_service = service_list ; temp_service ! = NULL ; temp_service = temp_service - > next ) {
/* Nagios XI/NDOUtils MOD */
/* update status of all services (scheduled or not) */
update_service_status ( temp_service , FALSE ) ;
/* skip most services that shouldn't be scheduled */
if ( temp_service - > should_be_scheduled = = FALSE ) {
/* passive checks are an exception if a forced check was scheduled before Nagios was restarted */
if ( ! ( temp_service - > checks_enabled = = FALSE & & temp_service - > next_check ! = ( time_t ) 0L & & ( temp_service - > check_options & CHECK_OPTION_FORCE_EXECUTION ) ) )
continue ;
}
/* create a new service check event */
2017-05-19 23:37:19 +02:00
temp_service - > next_check_event = schedule_new_event ( EVENT_SERVICE_CHECK , FALSE , temp_service - > next_check , FALSE , 0 , NULL , TRUE , ( void * ) temp_service , NULL , temp_service - > check_options ) ;
2017-05-19 22:22:40 +02:00
}
if ( test_scheduling = = TRUE )
gettimeofday ( & tv [ 5 ] , NULL ) ;
/******** DETERMINE HOST SCHEDULING PARAMS ********/
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 2 , " Determining host scheduling parameters... \n " ) ;
2017-05-19 22:22:40 +02:00
scheduling_info . first_host_check = ( time_t ) 0L ;
scheduling_info . last_host_check = ( time_t ) 0L ;
/* default max host check spread (in minutes) */
scheduling_info . max_host_check_spread = max_host_check_spread ;
/* how should we determine the host inter-check delay to use? */
switch ( host_inter_check_delay_method ) {
case ICD_NONE :
/* don't spread checks out */
scheduling_info . host_inter_check_delay = 0.0 ;
break ;
case ICD_DUMB :
/* be dumb and just schedule checks 1 second apart */
scheduling_info . host_inter_check_delay = 1.0 ;
break ;
case ICD_USER :
/* the user specified a delay, so don't try to calculate one */
break ;
case ICD_SMART :
default :
/* be smart and calculate the best delay to use to minimize local load... */
if ( scheduling_info . total_scheduled_hosts > 0 & & scheduling_info . host_check_interval_total > 0 ) {
/* adjust the check interval total to correspond to the interval length */
scheduling_info . host_check_interval_total = ( scheduling_info . host_check_interval_total * interval_length ) ;
/* calculate the average check interval for hosts */
scheduling_info . average_host_check_interval = ( double ) ( ( double ) scheduling_info . host_check_interval_total / ( double ) scheduling_info . total_scheduled_hosts ) ;
/* calculate the average inter check delay (in seconds) needed to evenly space the host checks out */
scheduling_info . average_host_inter_check_delay = ( double ) ( scheduling_info . average_host_check_interval / ( double ) scheduling_info . total_scheduled_hosts ) ;
/* set the global inter check delay value */
scheduling_info . host_inter_check_delay = scheduling_info . average_host_inter_check_delay ;
/* calculate max inter check delay and see if we should use that instead */
max_inter_check_delay = ( double ) ( ( scheduling_info . max_host_check_spread * 60.0 ) / ( double ) scheduling_info . total_scheduled_hosts ) ;
if ( scheduling_info . host_inter_check_delay > max_inter_check_delay )
scheduling_info . host_inter_check_delay = max_inter_check_delay ;
}
else
scheduling_info . host_inter_check_delay = 0.0 ;
log_debug_info ( DEBUGL_EVENTS , 2 , " Total scheduled host checks: %d \n " , scheduling_info . total_scheduled_hosts ) ;
log_debug_info ( DEBUGL_EVENTS , 2 , " Host check interval total: %lu \n " , scheduling_info . host_check_interval_total ) ;
log_debug_info ( DEBUGL_EVENTS , 2 , " Average host check interval: %0.2f sec \n " , scheduling_info . average_host_check_interval ) ;
log_debug_info ( DEBUGL_EVENTS , 2 , " Host inter-check delay: %0.2f sec \n " , scheduling_info . host_inter_check_delay ) ;
}
if ( test_scheduling = = TRUE )
gettimeofday ( & tv [ 6 ] , NULL ) ;
/******** SCHEDULE HOST CHECKS ********/
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 2 , " Scheduling host checks... \n " ) ;
2017-05-19 22:22:40 +02:00
/* determine check times for host checks */
mult_factor = 0 ;
for ( temp_host = host_list ; temp_host ! = NULL ; temp_host = temp_host - > next ) {
log_debug_info ( DEBUGL_EVENTS , 2 , " Host '%s' \n " , temp_host - > name ) ;
/* skip hosts that shouldn't be scheduled */
if ( temp_host - > should_be_scheduled = = FALSE ) {
log_debug_info ( DEBUGL_EVENTS , 2 , " Host check should not be scheduled. \n " ) ;
continue ;
}
/* skip hosts that are already scheduled for the future (from retention data), but reschedule ones that were supposed to be checked before we started */
if ( temp_host - > next_check > current_time ) {
log_debug_info ( DEBUGL_EVENTS , 2 , " Host is already scheduled to be checked in the future: %s \n " , ctime ( & temp_host - > next_check ) ) ;
continue ;
}
2017-05-19 23:37:19 +02:00
/*
* calculate preferred host check time .
* If it ' s too far into the future , we grab a random time
* within this host ' s max check window instead
*/
check_delay = mult_factor * scheduling_info . host_inter_check_delay ;
if ( check_delay > check_window ( temp_host ) ) {
log_debug_info ( DEBUGL_EVENTS , 1 , " Fixing check time (off by %lu) \n " ,
check_delay - check_window ( temp_host ) ) ;
fixed_hosts + + ;
check_delay = ranged_urand ( 0 , check_window ( temp_host ) ) ;
}
temp_host - > next_check = ( time_t ) ( current_time + check_delay ) ;
2017-05-19 22:22:40 +02:00
log_debug_info ( DEBUGL_EVENTS , 2 , " Preferred Check Time: %lu --> %s " , ( unsigned long ) temp_host - > next_check , ctime ( & temp_host - > next_check ) ) ;
/* make sure the host can actually be scheduled at this time */
is_valid_time = check_time_against_period ( temp_host - > next_check , temp_host - > check_period_ptr ) ;
if ( is_valid_time = = ERROR ) {
get_next_valid_time ( temp_host - > next_check , & next_valid_time , temp_host - > check_period_ptr ) ;
2017-05-19 23:37:19 +02:00
temp_host - > next_check = ( time_t ) ( next_valid_time | check_delay ) ;
2017-05-19 22:22:40 +02:00
}
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 2 , " Actual Check Time: %lu --> %s \n " , ( unsigned long ) temp_host - > next_check , ctime ( & temp_host - > next_check ) ) ;
2017-05-19 22:22:40 +02:00
if ( scheduling_info . first_host_check = = ( time_t ) 0 | | ( temp_host - > next_check < scheduling_info . first_host_check ) )
scheduling_info . first_host_check = temp_host - > next_check ;
if ( temp_host - > next_check > scheduling_info . last_host_check )
scheduling_info . last_host_check = temp_host - > next_check ;
mult_factor + + ;
}
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 0 , " Fixed scheduling for %u hosts and %u services \n " , fixed_hosts , fixed_services ) ;
2017-05-19 22:22:40 +02:00
if ( test_scheduling = = TRUE )
gettimeofday ( & tv [ 7 ] , NULL ) ;
/* add scheduled host checks to event queue */
for ( temp_host = host_list ; temp_host ! = NULL ; temp_host = temp_host - > next ) {
/* Nagios XI/NDOUtils Mod */
/* update status of all hosts (scheduled or not) */
update_host_status ( temp_host , FALSE ) ;
/* skip most hosts that shouldn't be scheduled */
if ( temp_host - > should_be_scheduled = = FALSE ) {
/* passive checks are an exception if a forced check was scheduled before Nagios was restarted */
if ( ! ( temp_host - > checks_enabled = = FALSE & & temp_host - > next_check ! = ( time_t ) 0L & & ( temp_host - > check_options & CHECK_OPTION_FORCE_EXECUTION ) ) )
continue ;
}
/* schedule a new host check event */
2017-05-19 23:37:19 +02:00
temp_host - > next_check_event = schedule_new_event ( EVENT_HOST_CHECK , FALSE , temp_host - > next_check , FALSE , 0 , NULL , TRUE , ( void * ) temp_host , NULL , temp_host - > check_options ) ;
2017-05-19 22:22:40 +02:00
}
if ( test_scheduling = = TRUE )
gettimeofday ( & tv [ 8 ] , NULL ) ;
/******** SCHEDULE MISC EVENTS ********/
/* add a host and service check rescheduling event */
if ( auto_reschedule_checks = = TRUE )
schedule_new_event ( EVENT_RESCHEDULE_CHECKS , TRUE , current_time + auto_rescheduling_interval , TRUE , auto_rescheduling_interval , NULL , TRUE , NULL , NULL , 0 ) ;
/* add a check result reaper event */
schedule_new_event ( EVENT_CHECK_REAPER , TRUE , current_time + check_reaper_interval , TRUE , check_reaper_interval , NULL , TRUE , NULL , NULL , 0 ) ;
/* add an orphaned check event */
if ( check_orphaned_services = = TRUE | | check_orphaned_hosts = = TRUE )
schedule_new_event ( EVENT_ORPHAN_CHECK , TRUE , current_time + DEFAULT_ORPHAN_CHECK_INTERVAL , TRUE , DEFAULT_ORPHAN_CHECK_INTERVAL , NULL , TRUE , NULL , NULL , 0 ) ;
/* add a service result "freshness" check event */
if ( check_service_freshness = = TRUE )
schedule_new_event ( EVENT_SFRESHNESS_CHECK , TRUE , current_time + service_freshness_check_interval , TRUE , service_freshness_check_interval , NULL , TRUE , NULL , NULL , 0 ) ;
/* add a host result "freshness" check event */
if ( check_host_freshness = = TRUE )
schedule_new_event ( EVENT_HFRESHNESS_CHECK , TRUE , current_time + host_freshness_check_interval , TRUE , host_freshness_check_interval , NULL , TRUE , NULL , NULL , 0 ) ;
/* add a status save event */
2017-05-19 23:37:19 +02:00
schedule_new_event ( EVENT_STATUS_SAVE , TRUE , current_time + status_update_interval , TRUE , status_update_interval , NULL , TRUE , NULL , NULL , 0 ) ;
2017-05-19 22:22:40 +02:00
/* add a log rotation event if necessary */
if ( log_rotation_method ! = LOG_ROTATION_NONE )
schedule_new_event ( EVENT_LOG_ROTATION , TRUE , get_next_log_rotation_time ( ) , TRUE , 0 , ( void * ) get_next_log_rotation_time , TRUE , NULL , NULL , 0 ) ;
/* add a retention data save event if needed */
if ( retain_state_information = = TRUE & & retention_update_interval > 0 )
schedule_new_event ( EVENT_RETENTION_SAVE , TRUE , current_time + ( retention_update_interval * 60 ) , TRUE , ( retention_update_interval * 60 ) , NULL , TRUE , NULL , NULL , 0 ) ;
if ( test_scheduling = = TRUE ) {
runtime [ 0 ] = ( double ) ( ( double ) ( tv [ 1 ] . tv_sec - tv [ 0 ] . tv_sec ) + ( double ) ( ( tv [ 1 ] . tv_usec - tv [ 0 ] . tv_usec ) / 1000.0 ) / 1000.0 ) ;
runtime [ 1 ] = ( double ) ( ( double ) ( tv [ 2 ] . tv_sec - tv [ 1 ] . tv_sec ) + ( double ) ( ( tv [ 2 ] . tv_usec - tv [ 1 ] . tv_usec ) / 1000.0 ) / 1000.0 ) ;
runtime [ 2 ] = ( double ) ( ( double ) ( tv [ 3 ] . tv_sec - tv [ 2 ] . tv_sec ) + ( double ) ( ( tv [ 3 ] . tv_usec - tv [ 2 ] . tv_usec ) / 1000.0 ) / 1000.0 ) ;
runtime [ 3 ] = ( double ) ( ( double ) ( tv [ 4 ] . tv_sec - tv [ 3 ] . tv_sec ) + ( double ) ( ( tv [ 4 ] . tv_usec - tv [ 3 ] . tv_usec ) / 1000.0 ) / 1000.0 ) ;
runtime [ 4 ] = ( double ) ( ( double ) ( tv [ 5 ] . tv_sec - tv [ 4 ] . tv_sec ) + ( double ) ( ( tv [ 5 ] . tv_usec - tv [ 4 ] . tv_usec ) / 1000.0 ) / 1000.0 ) ;
runtime [ 5 ] = ( double ) ( ( double ) ( tv [ 6 ] . tv_sec - tv [ 5 ] . tv_sec ) + ( double ) ( ( tv [ 6 ] . tv_usec - tv [ 5 ] . tv_usec ) / 1000.0 ) / 1000.0 ) ;
runtime [ 6 ] = ( double ) ( ( double ) ( tv [ 7 ] . tv_sec - tv [ 6 ] . tv_sec ) + ( double ) ( ( tv [ 7 ] . tv_usec - tv [ 6 ] . tv_usec ) / 1000.0 ) / 1000.0 ) ;
runtime [ 7 ] = ( double ) ( ( double ) ( tv [ 8 ] . tv_sec - tv [ 7 ] . tv_sec ) + ( double ) ( ( tv [ 8 ] . tv_usec - tv [ 7 ] . tv_usec ) / 1000.0 ) / 1000.0 ) ;
runtime [ 8 ] = ( double ) ( ( double ) ( tv [ 8 ] . tv_sec - tv [ 0 ] . tv_sec ) + ( double ) ( ( tv [ 8 ] . tv_usec - tv [ 0 ] . tv_usec ) / 1000.0 ) / 1000.0 ) ;
printf ( " EVENT SCHEDULING TIMES \n " ) ;
printf ( " ------------------------------------- \n " ) ;
printf ( " Get service info: %.6lf sec \n " , runtime [ 0 ] ) ;
printf ( " Get host info info: %.6lf sec \n " , runtime [ 1 ] ) ;
printf ( " Get service params: %.6lf sec \n " , runtime [ 2 ] ) ;
printf ( " Schedule service times: %.6lf sec \n " , runtime [ 3 ] ) ;
printf ( " Schedule service events: %.6lf sec \n " , runtime [ 4 ] ) ;
printf ( " Get host params: %.6lf sec \n " , runtime [ 5 ] ) ;
printf ( " Schedule host times: %.6lf sec \n " , runtime [ 6 ] ) ;
printf ( " Schedule host events: %.6lf sec \n " , runtime [ 7 ] ) ;
printf ( " ============ \n " ) ;
printf ( " TOTAL: %.6lf sec \n " , runtime [ 8 ] ) ;
printf ( " \n \n " ) ;
}
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " init_timing_loop() end \n " ) ;
return ;
}
/* displays service check scheduling information */
void display_scheduling_info ( void ) {
float minimum_concurrent_checks = 0.0 ;
int suggestions = 0 ;
printf ( " Projected scheduling information for host and service checks \n " ) ;
printf ( " is listed below. This information assumes that you are going \n " ) ;
printf ( " to start running Nagios with your current config files. \n \n " ) ;
printf ( " HOST SCHEDULING INFORMATION \n " ) ;
printf ( " --------------------------- \n " ) ;
printf ( " Total hosts: %d \n " , scheduling_info . total_hosts ) ;
printf ( " Total scheduled hosts: %d \n " , scheduling_info . total_scheduled_hosts ) ;
printf ( " Host inter-check delay method: " ) ;
if ( host_inter_check_delay_method = = ICD_NONE )
printf ( " NONE \n " ) ;
else if ( host_inter_check_delay_method = = ICD_DUMB )
printf ( " DUMB \n " ) ;
else if ( host_inter_check_delay_method = = ICD_SMART ) {
printf ( " SMART \n " ) ;
printf ( " Average host check interval: %.2f sec \n " , scheduling_info . average_host_check_interval ) ;
}
else
printf ( " USER-SUPPLIED VALUE \n " ) ;
printf ( " Host inter-check delay: %.2f sec \n " , scheduling_info . host_inter_check_delay ) ;
printf ( " Max host check spread: %d min \n " , scheduling_info . max_host_check_spread ) ;
printf ( " First scheduled check: %s " , ( scheduling_info . total_scheduled_hosts = = 0 ) ? " N/A \n " : ctime ( & scheduling_info . first_host_check ) ) ;
printf ( " Last scheduled check: %s " , ( scheduling_info . total_scheduled_hosts = = 0 ) ? " N/A \n " : ctime ( & scheduling_info . last_host_check ) ) ;
printf ( " \n \n " ) ;
printf ( " SERVICE SCHEDULING INFORMATION \n " ) ;
printf ( " ------------------------------- \n " ) ;
printf ( " Total services: %d \n " , scheduling_info . total_services ) ;
printf ( " Total scheduled services: %d \n " , scheduling_info . total_scheduled_services ) ;
printf ( " Service inter-check delay method: " ) ;
if ( service_inter_check_delay_method = = ICD_NONE )
printf ( " NONE \n " ) ;
else if ( service_inter_check_delay_method = = ICD_DUMB )
printf ( " DUMB \n " ) ;
else if ( service_inter_check_delay_method = = ICD_SMART ) {
printf ( " SMART \n " ) ;
printf ( " Average service check interval: %.2f sec \n " , scheduling_info . average_service_check_interval ) ;
}
else
printf ( " USER-SUPPLIED VALUE \n " ) ;
printf ( " Inter-check delay: %.2f sec \n " , scheduling_info . service_inter_check_delay ) ;
printf ( " Interleave factor method: %s \n " , ( service_interleave_factor_method = = ILF_USER ) ? " USER-SUPPLIED VALUE " : " SMART " ) ;
if ( service_interleave_factor_method = = ILF_SMART )
printf ( " Average services per host: %.2f \n " , scheduling_info . average_services_per_host ) ;
printf ( " Service interleave factor: %d \n " , scheduling_info . service_interleave_factor ) ;
printf ( " Max service check spread: %d min \n " , scheduling_info . max_service_check_spread ) ;
printf ( " First scheduled check: %s " , ctime ( & scheduling_info . first_service_check ) ) ;
printf ( " Last scheduled check: %s " , ctime ( & scheduling_info . last_service_check ) ) ;
printf ( " \n \n " ) ;
2017-05-19 23:37:19 +02:00
/***** MINIMUM CONCURRENT CHECKS RECOMMENDATION *****/
minimum_concurrent_checks = ceil ( ( ( ( scheduling_info . total_scheduled_services / scheduling_info . average_service_check_interval )
+ ( scheduling_info . total_scheduled_hosts / scheduling_info . average_host_check_interval ) )
* 1.4 * scheduling_info . average_service_execution_time ) ) ;
2017-05-19 22:22:40 +02:00
printf ( " CHECK PROCESSING INFORMATION \n " ) ;
printf ( " ---------------------------- \n " ) ;
2017-05-19 23:37:19 +02:00
printf ( " Average check execution time: %.2fs%s " ,
scheduling_info . average_service_execution_time ,
scheduling_info . average_service_execution_time = = 2.0 ? " (pessimistic guesstimate) \n " : " \n " ) ;
printf ( " Estimated concurrent checks: %.0f (%.2f per cpu core) \n " ,
minimum_concurrent_checks , ( float ) minimum_concurrent_checks / ( float ) online_cpus ( ) ) ;
printf ( " Max concurrent service checks: " ) ;
2017-05-19 22:22:40 +02:00
if ( max_parallel_service_checks = = 0 )
printf ( " Unlimited \n " ) ;
else
printf ( " %d \n " , max_parallel_service_checks ) ;
printf ( " \n \n " ) ;
printf ( " PERFORMANCE SUGGESTIONS \n " ) ;
printf ( " ----------------------- \n " ) ;
2017-05-19 23:37:19 +02:00
/* compare with configured value */
if ( ( ( int ) minimum_concurrent_checks > max_parallel_service_checks ) & & max_parallel_service_checks ! = 0 ) {
printf ( " * Value for 'max_concurrent_checks' option should be >= %d \n " , ( int ) minimum_concurrent_checks ) ;
2017-05-19 22:22:40 +02:00
suggestions + + ;
}
2017-05-19 23:37:19 +02:00
if ( loadctl . nofile_limit * 0.4 < minimum_concurrent_checks ) {
printf ( " * Increase the \" open files \" ulimit for user '%s' \n " , nagios_user ) ;
printf ( " - You can do this by adding \n %s hard nofiles %d \n to /etc/security/limits.conf \n " ,
nagios_user , rup2pof2 ( minimum_concurrent_checks * 2 ) ) ;
suggestions + + ;
}
if ( loadctl . nproc_limit * 0.75 < minimum_concurrent_checks ) {
printf ( " * Increase the \" max user processes \" ulimit for user '%s' \n " , nagios_user ) ;
printf ( " - You can do this by adding \n %s hard nproc %d \n to /etc/security/limits.conf \n " ,
nagios_user , rup2pof2 ( minimum_concurrent_checks ) ) ;
2017-05-19 22:22:40 +02:00
suggestions + + ;
}
2017-05-19 23:37:19 +02:00
if ( minimum_concurrent_checks > online_cpus ( ) * 75 ) {
printf ( " * Aim for a max of 50 concurrent checks / cpu core (current: %.2f) \n " ,
( float ) minimum_concurrent_checks / ( float ) online_cpus ( ) ) ;
2017-05-19 22:22:40 +02:00
suggestions + + ;
}
2017-05-19 23:37:19 +02:00
if ( suggestions ) {
printf ( " \n NOTE: These are just guidelines and *not* hard numbers. \n \n " ) ;
printf ( " Ultimately, only testing will tell if your settings and hardware are \n " ) ;
printf ( " suitable for the types and number of checks you're planning to run. \n " ) ;
}
else {
2017-05-19 22:22:40 +02:00
printf ( " I have no suggestions - things look okay. \n " ) ;
2017-05-19 23:37:19 +02:00
}
2017-05-19 22:22:40 +02:00
printf ( " \n " ) ;
return ;
}
2017-05-19 23:37:19 +02:00
/*
* Create the event queue
* We oversize it somewhat to avoid unnecessary growing
*/
int init_event_queue ( void )
{
unsigned int size ;
size = num_objects . hosts + num_objects . services ;
2019-04-18 17:09:18 +02:00
if ( size < 4096 ) {
2017-05-19 23:37:19 +02:00
size = 4096 ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
nagios_squeue = squeue_create ( size ) ;
return 0 ;
}
/* schedule a new timed event */
2019-04-18 17:09:18 +02:00
timed_event * schedule_new_event ( int event_type , int high_priority , time_t run_time ,
int recurring , unsigned long event_interval , void * timing_func ,
int compensate_for_time_change , void * event_data , void * event_args , int event_options )
{
timed_event * new_event = NULL ;
char run_time_string [ MAX_DATETIME_LENGTH ] = " " ;
get_datetime_string ( & run_time , run_time_string , MAX_DATETIME_LENGTH , SHORT_DATE_TIME ) ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
new_event = ( timed_event * ) calloc ( 1 , sizeof ( timed_event ) ) ;
2019-04-18 17:09:18 +02:00
if ( new_event ! = NULL ) {
new_event - > event_type = event_type ;
new_event - > event_data = event_data ;
new_event - > event_args = event_args ;
new_event - > event_options = event_options ;
new_event - > run_time = run_time ;
new_event - > recurring = recurring ;
new_event - > event_interval = event_interval ;
new_event - > timing_func = timing_func ;
2017-05-19 22:22:40 +02:00
new_event - > compensate_for_time_change = compensate_for_time_change ;
2019-04-18 17:09:18 +02:00
new_event - > priority = high_priority ;
}
else {
2017-05-19 23:37:19 +02:00
return NULL ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 23:37:19 +02:00
2019-04-18 17:09:18 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 ,
" schedule_new_event() \n "
" New Event Details: \n "
" Event ID: %p \n "
" Type: EVENT_%s \n "
" High Priority: %d \n "
" Run Time: %s \n "
" Recurring: %d \n "
" Event Interval: %lu \n "
" Compensate for Time Change: %d \n "
" Event Options: %d \n " ,
new_event ,
EVENT_TYPE_STR ( event_type ) ,
high_priority ,
run_time_string ,
recurring ,
event_interval ,
compensate_for_time_change ,
event_options ) ;
2017-05-19 22:22:40 +02:00
/* add the event to the event list */
2017-05-19 23:37:19 +02:00
add_event ( nagios_squeue , new_event ) ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
return new_event ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* reschedule an event in order of execution time */
2019-04-18 17:09:18 +02:00
void reschedule_event ( squeue_t * sq , timed_event * event )
{
2017-05-19 22:22:40 +02:00
time_t current_time = 0L ;
time_t ( * timingfunc ) ( void ) ;
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " reschedule_event() \n " ) ;
/* reschedule recurring events... */
if ( event - > recurring = = TRUE ) {
/* use custom timing function */
if ( event - > timing_func ! = NULL ) {
2019-04-18 17:09:18 +02:00
timingfunc = event - > timing_func ;
2017-05-19 22:22:40 +02:00
event - > run_time = ( * timingfunc ) ( ) ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* normal recurring events */
else {
2019-04-18 17:09:18 +02:00
2017-05-19 22:22:40 +02:00
event - > run_time = event - > run_time + event - > event_interval ;
time ( & current_time ) ;
2019-04-18 17:09:18 +02:00
if ( event - > run_time < current_time ) {
2017-05-19 22:22:40 +02:00
event - > run_time = current_time ;
}
}
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* add the event to the event list */
2017-05-19 23:37:19 +02:00
add_event ( sq , event ) ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* add an event to list ordered by execution time */
2019-04-18 17:09:18 +02:00
void add_event ( squeue_t * sq , timed_event * event )
{
2017-05-19 22:22:40 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " add_event() \n " ) ;
2019-04-18 17:09:18 +02:00
if ( event - > sq_event ! = NULL ) {
2017-05-19 23:37:19 +02:00
logit ( NSLOG_RUNTIME_ERROR , TRUE ,
" Error: Adding %s event that seems to already be scheduled \n " ,
EVENT_TYPE_STR ( event - > event_type ) ) ;
2019-04-18 17:09:18 +02:00
2017-05-19 23:37:19 +02:00
remove_event ( sq , event ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( event - > priority > 0 ) {
2017-05-19 23:37:19 +02:00
event - > sq_event = squeue_add_usec ( sq , event - > run_time , event - > priority - 1 , event ) ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
else {
2017-05-19 23:37:19 +02:00
event - > sq_event = squeue_add ( sq , event - > run_time , event ) ;
2019-04-18 17:09:18 +02:00
}
if ( event - > sq_event = = NULL ) {
logit ( NSLOG_RUNTIME_ERROR , TRUE ,
" Error: Failed to add event to squeue '%p' with prio %u: %s \n " ,
sq , event - > priority , strerror ( errno ) ) ;
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
/* this specifies + just to ensure different than `-1` visually */
if ( sq = = nagios_squeue ) {
2017-05-19 23:37:19 +02:00
track_events ( event - > event_type , + 1 ) ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 23:37:19 +02:00
2017-05-19 22:22:40 +02:00
# ifdef USE_EVENT_BROKER
2017-05-19 23:37:19 +02:00
else {
broker_timed_event ( NEBTYPE_TIMEDEVENT_ADD , NEBFLAG_NONE , NEBATTR_NONE , event , NULL ) ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
# endif
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
/* remove an event from the queue */
2019-04-18 17:09:18 +02:00
void remove_event ( squeue_t * sq , timed_event * event )
{
2017-05-19 22:22:40 +02:00
# ifdef USE_EVENT_BROKER
broker_timed_event ( NEBTYPE_TIMEDEVENT_REMOVE , NEBFLAG_NONE , NEBATTR_NONE , event , NULL ) ;
# endif
2019-04-18 17:09:18 +02:00
if ( event = = NULL | | event - > sq_event = = NULL ) {
2017-05-19 22:22:40 +02:00
return ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2019-04-18 17:09:18 +02:00
if ( sq ! = NULL ) {
2017-05-19 23:37:19 +02:00
squeue_remove ( sq , event - > sq_event ) ;
2019-04-18 17:09:18 +02:00
}
else {
2017-05-19 23:37:19 +02:00
logit ( NSLOG_RUNTIME_ERROR , TRUE ,
2019-04-18 17:09:18 +02:00
" Error: remove_event() called for %s event with NULL sq parameter \n " ,
EVENT_TYPE_STR ( event - > event_type ) ) ;
}
2017-05-19 23:37:19 +02:00
2019-04-18 17:09:18 +02:00
if ( sq = = nagios_squeue ) {
2017-05-19 23:37:19 +02:00
track_events ( event - > event_type , - 1 ) ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 23:37:19 +02:00
2019-04-18 17:09:18 +02:00
/* mark this event as unscheduled */
event - > sq_event = NULL ;
2017-05-19 23:37:19 +02:00
/*
* if we catch an event from the queue which gets removed when
* we go polling for input ( as might happen with f . e . downtime
* events that we get " cancel " commands for just as they are
* about to start or expire ) , we must make sure we mark the
* current event as no longer scheduled , or we ' ll run into
* segfaults and memory corruptions for sure .
*/
if ( event = = current_event ) {
current_event = NULL ;
}
2019-04-18 17:09:18 +02:00
}
2017-05-19 23:37:19 +02:00
static int should_run_event ( timed_event * temp_event )
{
2019-04-18 17:09:18 +02:00
/* default action is to execute the event */
int run_event = TRUE ;
2017-05-19 23:37:19 +02:00
int nudge_seconds = 0 ;
/* we only care about jobs that cause processes to run */
2019-04-18 17:09:18 +02:00
if ( temp_event - > event_type ! = EVENT_HOST_CHECK
& & temp_event - > event_type ! = EVENT_SERVICE_CHECK ) {
2017-05-19 23:37:19 +02:00
return TRUE ;
}
/* if we can't spawn any more jobs, don't bother */
2019-04-18 17:09:18 +02:00
if ( wproc_can_spawn ( & loadctl ) < = 0 ) {
/* Try to reap one job for one msec. */
wproc_reap ( 1 , 1 ) ;
2017-05-19 23:37:19 +02:00
return FALSE ;
}
/* run a few checks before executing a service check... */
2019-04-18 17:09:18 +02:00
if ( temp_event - > event_type = = EVENT_SERVICE_CHECK ) {
2017-05-19 23:37:19 +02:00
service * temp_service = ( service * ) temp_event - > event_data ;
/* forced checks override normal check logic */
2019-04-18 17:09:18 +02:00
if ( ( temp_service - > check_options & CHECK_OPTION_FORCE_EXECUTION ) ) {
2017-05-19 23:37:19 +02:00
return TRUE ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* don't run a service check if we're already maxed out on the number of parallel service checks... */
2019-04-18 17:09:18 +02:00
if ( max_parallel_service_checks ! = 0
& & currently_running_service_checks > = max_parallel_service_checks ) {
2017-05-19 23:37:19 +02:00
nudge_seconds = ranged_urand ( NUDGE_MIN , NUDGE_MAX ) ;
2019-04-18 17:09:18 +02:00
logit ( NSLOG_RUNTIME_WARNING , TRUE ,
" \t Max concurrent service checks (%d) has been reached. "
" Nudging %s:%s by %d seconds... \n " ,
max_parallel_service_checks , temp_service - > host_name ,
temp_service - > description , nudge_seconds ) ;
2017-05-19 23:37:19 +02:00
run_event = FALSE ;
2017-05-19 22:22:40 +02:00
}
2017-05-19 23:37:19 +02:00
/* don't run a service check if active checks are disabled */
2019-04-18 17:09:18 +02:00
if ( execute_service_checks = = FALSE ) {
log_debug_info ( DEBUGL_EVENTS | DEBUGL_CHECKS , 1 ,
" We're not executing service checks right now, so we'll skip check event for service '%s;%s'. \n " ,
temp_service - > host_name , temp_service - > description ) ;
2017-05-19 23:37:19 +02:00
run_event = FALSE ;
2017-05-19 22:22:40 +02:00
}
2017-05-19 23:37:19 +02:00
/* reschedule the check if we can't run it now */
2019-04-18 17:09:18 +02:00
if ( run_event = = FALSE ) {
2017-05-19 23:37:19 +02:00
remove_event ( nagios_squeue , temp_event ) ;
2019-04-18 17:09:18 +02:00
/* We nudge the next check time when it is
due to too many concurrent service checks */
if ( nudge_seconds ) {
2017-05-19 23:37:19 +02:00
temp_service - > next_check = ( time_t ) ( temp_service - > next_check + nudge_seconds ) ;
}
2019-04-18 17:09:18 +02:00
/* Otherwise just schedule as normal */
2017-05-19 23:37:19 +02:00
else {
temp_service - > next_check + = check_window ( temp_service ) ;
}
temp_event - > run_time = temp_service - > next_check ;
2019-04-18 17:09:18 +02:00
2017-05-19 23:37:19 +02:00
reschedule_event ( nagios_squeue , temp_event ) ;
update_service_status ( temp_service , FALSE ) ;
run_event = FALSE ;
}
2017-05-19 22:22:40 +02:00
}
2019-04-18 17:09:18 +02:00
2017-05-19 23:37:19 +02:00
/* run a few checks before executing a host check... */
2019-04-18 17:09:18 +02:00
else if ( temp_event - > event_type = = EVENT_HOST_CHECK ) {
2017-05-19 23:37:19 +02:00
host * temp_host = ( host * ) temp_event - > event_data ;
/* forced checks override normal check logic */
2019-04-18 17:09:18 +02:00
if ( ( temp_host - > check_options & CHECK_OPTION_FORCE_EXECUTION ) ) {
2017-05-19 23:37:19 +02:00
return TRUE ;
2019-04-18 17:09:18 +02:00
}
2017-05-19 23:37:19 +02:00
/* don't run a host check if active checks are disabled */
2019-04-18 17:09:18 +02:00
if ( execute_host_checks = = FALSE ) {
log_debug_info ( DEBUGL_EVENTS | DEBUGL_CHECKS , 1 ,
" We're not executing host checks right now, so we'll skip host check event for host '%s'. \n " ,
temp_host - > name ) ;
2017-05-19 23:37:19 +02:00
run_event = FALSE ;
}
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* reschedule the host check if we can't run it right now */
2019-04-18 17:09:18 +02:00
if ( run_event = = FALSE ) {
2017-05-19 23:37:19 +02:00
remove_event ( nagios_squeue , temp_event ) ;
2019-04-18 17:09:18 +02:00
2017-05-19 23:37:19 +02:00
temp_host - > next_check + = check_window ( temp_host ) ;
temp_event - > run_time = temp_host - > next_check ;
2019-04-18 17:09:18 +02:00
2017-05-19 23:37:19 +02:00
reschedule_event ( nagios_squeue , temp_event ) ;
update_host_status ( temp_host , FALSE ) ;
2019-04-18 17:09:18 +02:00
2017-05-19 23:37:19 +02:00
run_event = FALSE ;
}
}
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
return run_event ;
}
2017-05-19 22:22:40 +02:00
/* this is the main event handler loop */
int event_execution_loop ( void ) {
2017-05-19 23:37:19 +02:00
timed_event * temp_event , * last_event = NULL ;
2017-05-19 22:22:40 +02:00
time_t last_time = 0L ;
time_t current_time = 0L ;
time_t last_status_update = 0L ;
2017-05-19 23:37:19 +02:00
int poll_time_ms ;
2017-05-19 22:22:40 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " event_execution_loop() start \n " ) ;
time ( & last_time ) ;
while ( 1 ) {
2017-05-19 23:37:19 +02:00
struct timeval now ;
const struct timeval * event_runtime ;
int inputs ;
/* super-priority (hardcoded) events come first */
2017-05-19 22:22:40 +02:00
/* see if we should exit or restart (a signal was encountered) */
if ( sigshutdown = = TRUE | | sigrestart = = TRUE )
break ;
/* get the current time */
time ( & current_time ) ;
/* hey, wait a second... we traveled back in time! */
if ( current_time < last_time )
compensate_for_system_time_change ( ( unsigned long ) last_time , ( unsigned long ) current_time ) ;
/* else if the time advanced over the specified threshold, try and compensate... */
else if ( ( current_time - last_time ) > = time_change_threshold )
compensate_for_system_time_change ( ( unsigned long ) last_time , ( unsigned long ) current_time ) ;
2017-05-19 23:37:19 +02:00
/* get next scheduled event */
current_event = temp_event = ( timed_event * ) squeue_peek ( nagios_squeue ) ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* if we don't have any events to handle, exit */
if ( ! temp_event ) {
log_debug_info ( DEBUGL_EVENTS , 0 , " There aren't any events that need to be handled! Exiting... \n " ) ;
break ;
}
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* keep track of the last time */
last_time = current_time ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* update status information occasionally - NagVis watches the NDOUtils DB to see if Nagios is alive */
if ( ( unsigned long ) ( current_time - last_status_update ) > 5 ) {
last_status_update = current_time ;
update_program_status ( FALSE ) ;
2017-05-19 22:22:40 +02:00
}
2017-05-19 23:37:19 +02:00
event_runtime = squeue_event_runtime ( temp_event - > sq_event ) ;
if ( temp_event ! = last_event ) {
log_debug_info ( DEBUGL_EVENTS , 1 , " ** Event Check Loop \n " ) ;
log_debug_info ( DEBUGL_EVENTS , 1 , " Next Event Time: %s " , ctime ( & temp_event - > run_time ) ) ;
log_debug_info ( DEBUGL_EVENTS , 1 , " Current/Max Service Checks: %d/%d (%.3lf%% saturation) \n " ,
currently_running_service_checks , max_parallel_service_checks ,
( ( float ) currently_running_service_checks / ( float ) max_parallel_service_checks ) * 100 ) ;
}
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
last_event = temp_event ;
gettimeofday ( & now , NULL ) ;
poll_time_ms = tv_delta_msec ( & now , event_runtime ) ;
if ( poll_time_ms < 0 )
poll_time_ms = 0 ;
else if ( poll_time_ms > = 1500 )
poll_time_ms = 1500 ;
log_debug_info ( DEBUGL_SCHEDULING | DEBUGL_IPC , 1 , " ## Polling %dms; sockets=%d; events=%u; iobs=%p \n " ,
poll_time_ms , iobroker_get_num_fds ( nagios_iobs ) ,
squeue_size ( nagios_squeue ) , nagios_iobs ) ;
inputs = iobroker_poll ( nagios_iobs , poll_time_ms ) ;
if ( inputs < 0 & & errno ! = EINTR ) {
logit ( NSLOG_RUNTIME_ERROR , TRUE , " Error: Polling for input on %p failed: %s " , nagios_iobs , iobroker_strerror ( inputs ) ) ;
break ;
}
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_IPC , 2 , " ## %d descriptors had input \n " , inputs ) ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/*
* if the event we peaked was removed from the queue from
* one of the I / O operations , we must take care not to
* try to run at , as we ' re ( almost ) sure to access free ' d
* or invalid memory if we do .
*/
if ( ! current_event ) {
log_debug_info ( DEBUGL_EVENTS , 0 , " Event was cancelled by iobroker input \n " ) ;
continue ;
2017-05-19 22:22:40 +02:00
}
2017-05-19 23:37:19 +02:00
/* 5 milliseconds allowance for firing off events early */
gettimeofday ( & now , NULL ) ;
if ( tv_delta_msec ( & now , event_runtime ) > 5 )
continue ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* move on if we shouldn't run this event */
if ( should_run_event ( temp_event ) = = FALSE )
continue ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* handle the event */
handle_timed_event ( temp_event ) ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/*
* we must remove the entry we ' ve peeked , or
* we ' ll keep getting the same one over and over .
* This also maintains sync with broker modules .
*/
remove_event ( nagios_squeue , temp_event ) ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* reschedule the event if necessary */
if ( temp_event - > recurring = = TRUE )
reschedule_event ( nagios_squeue , temp_event ) ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* else free memory associated with the event */
else
my_free ( temp_event ) ;
}
2017-05-19 22:22:40 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " event_execution_loop() end \n " ) ;
return OK ;
}
/* handles a timed event */
int handle_timed_event ( timed_event * event ) {
host * temp_host = NULL ;
service * temp_service = NULL ;
void ( * userfunc ) ( void * ) ;
struct timeval tv ;
2017-05-19 23:37:19 +02:00
const struct timeval * event_runtime ;
double latency ;
2017-05-19 22:22:40 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " handle_timed_event() start \n " ) ;
# ifdef USE_EVENT_BROKER
/* send event data to broker */
broker_timed_event ( NEBTYPE_TIMEDEVENT_EXECUTE , NEBFLAG_NONE , NEBATTR_NONE , event , NULL ) ;
# endif
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 0 , " ** Timed Event ** Type: EVENT_%s, Run Time: %s " , EVENT_TYPE_STR ( event - > event_type ) , ctime ( & event - > run_time ) ) ;
/* get event latency */
gettimeofday ( & tv , NULL ) ;
event_runtime = squeue_event_runtime ( event - > sq_event ) ;
latency = ( double ) ( tv_delta_f ( event_runtime , & tv ) ) ;
if ( latency < 0.0 ) /* events may run up to 0.005 seconds early */
latency = 0.0 ;
2017-05-19 22:22:40 +02:00
/* how should we handle the event? */
switch ( event - > event_type ) {
case EVENT_SERVICE_CHECK :
temp_service = ( service * ) event - > event_data ;
log_debug_info ( DEBUGL_EVENTS , 0 , " ** Service Check Event ==> Host: '%s', Service: '%s', Options: %d, Latency: %f sec \n " , temp_service - > host_name , temp_service - > description , event - > event_options , latency ) ;
/* run the service check */
run_scheduled_service_check ( temp_service , event - > event_options , latency ) ;
break ;
case EVENT_HOST_CHECK :
temp_host = ( host * ) event - > event_data ;
log_debug_info ( DEBUGL_EVENTS , 0 , " ** Host Check Event ==> Host: '%s', Options: %d, Latency: %f sec \n " , temp_host - > name , event - > event_options , latency ) ;
/* run the host check */
2017-05-19 23:37:19 +02:00
run_scheduled_host_check ( temp_host , event - > event_options , latency ) ;
2017-05-19 22:22:40 +02:00
break ;
case EVENT_LOG_ROTATION :
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 0 , " ** Log File Rotation Event. Latency: %.3fs \n " , latency ) ;
2017-05-19 22:22:40 +02:00
/* rotate the log file */
rotate_log_file ( event - > run_time ) ;
break ;
case EVENT_PROGRAM_SHUTDOWN :
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 0 , " ** Program Shutdown Event. Latency: %.3fs \n " , latency ) ;
2017-05-19 22:22:40 +02:00
/* set the shutdown flag */
sigshutdown = TRUE ;
/* log the shutdown */
logit ( NSLOG_PROCESS_INFO , TRUE , " PROGRAM_SHUTDOWN event encountered, shutting down... \n " ) ;
break ;
case EVENT_PROGRAM_RESTART :
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 0 , " ** Program Restart Event. Latency: %.3fs \n " , latency ) ;
2017-05-19 22:22:40 +02:00
/* set the restart flag */
sigrestart = TRUE ;
/* log the restart */
logit ( NSLOG_PROCESS_INFO , TRUE , " PROGRAM_RESTART event encountered, restarting... \n " ) ;
break ;
case EVENT_CHECK_REAPER :
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 0 , " ** Check Result Reaper. Latency: %.3fs \n " , latency ) ;
2017-05-19 22:22:40 +02:00
/* reap host and service check results */
reap_check_results ( ) ;
break ;
case EVENT_ORPHAN_CHECK :
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 0 , " ** Orphaned Host and Service Check Event. Latency: %.3fs \n " , latency ) ;
2017-05-19 22:22:40 +02:00
/* check for orphaned hosts and services */
if ( check_orphaned_hosts = = TRUE )
check_for_orphaned_hosts ( ) ;
if ( check_orphaned_services = = TRUE )
check_for_orphaned_services ( ) ;
break ;
case EVENT_RETENTION_SAVE :
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 0 , " ** Retention Data Save Event. Latency: %.3fs \n " , latency ) ;
2017-05-19 22:22:40 +02:00
/* save state retention data */
save_state_information ( TRUE ) ;
break ;
case EVENT_STATUS_SAVE :
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 0 , " ** Status Data Save Event. Latency: %.3fs \n " , latency ) ;
2017-05-19 22:22:40 +02:00
/* save all status data (program, host, and service) */
update_all_status_data ( ) ;
break ;
case EVENT_SCHEDULED_DOWNTIME :
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 0 , " ** Scheduled Downtime Event. Latency: %.3fs \n " , latency ) ;
2017-05-19 22:22:40 +02:00
/* process scheduled downtime info */
if ( event - > event_data ) {
handle_scheduled_downtime_by_id ( * ( unsigned long * ) event - > event_data ) ;
free ( event - > event_data ) ;
event - > event_data = NULL ;
}
break ;
case EVENT_SFRESHNESS_CHECK :
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 0 , " ** Service Result Freshness Check Event. Latency: %.3fs \n " , latency ) ;
2017-05-19 22:22:40 +02:00
/* check service result freshness */
check_service_result_freshness ( ) ;
break ;
case EVENT_HFRESHNESS_CHECK :
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 0 , " ** Host Result Freshness Check Event. Latency: %.3fs \n " , latency ) ;
2017-05-19 22:22:40 +02:00
/* check host result freshness */
check_host_result_freshness ( ) ;
break ;
case EVENT_EXPIRE_DOWNTIME :
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 0 , " ** Expire Downtime Event. Latency: %.3fs \n " , latency ) ;
2017-05-19 22:22:40 +02:00
/* check for expired scheduled downtime entries */
check_for_expired_downtime ( ) ;
break ;
case EVENT_RESCHEDULE_CHECKS :
/* adjust scheduling of host and service checks */
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 0 , " ** Reschedule Checks Event. Latency: %.3fs \n " , latency ) ;
2017-05-19 22:22:40 +02:00
adjust_check_scheduling ( ) ;
break ;
case EVENT_EXPIRE_COMMENT :
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 0 , " ** Expire Comment Event. Latency: %.3fs \n " , latency ) ;
2017-05-19 22:22:40 +02:00
/* check for expired comment */
check_for_expired_comment ( ( unsigned long ) event - > event_data ) ;
break ;
case EVENT_CHECK_PROGRAM_UPDATE :
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 0 , " ** Check For Program Update. Latency: %.3fs \n " , latency ) ;
2017-05-19 22:22:40 +02:00
/* check for new versions of Nagios */
check_for_nagios_updates ( FALSE , TRUE ) ;
break ;
case EVENT_USER_FUNCTION :
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_EVENTS , 0 , " ** User Function Event. Latency: %.3fs \n " , latency ) ;
2017-05-19 22:22:40 +02:00
/* run a user-defined function */
if ( event - > event_data ! = NULL ) {
userfunc = event - > event_data ;
( * userfunc ) ( event - > event_args ) ;
}
break ;
default :
break ;
}
2017-05-19 23:37:19 +02:00
# ifdef USE_EVENT_BROKER
/* send event data to broker */
broker_timed_event ( NEBTYPE_TIMEDEVENT_END , NEBFLAG_NONE , NEBATTR_NONE , event , NULL ) ;
# endif
2017-05-19 22:22:40 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " handle_timed_event() end \n " ) ;
return OK ;
}
2017-05-19 23:37:19 +02:00
/* The squeue internal event type, declared again here so we can manipulate the
* scheduling queue without a malloc / free for each add / remove .
* @ todo : Refactor this to not depend so heavily on the event queue
* implementation , doing so efficiently may require a different scheduling queue
* data structure . */
struct squeue_event {
unsigned int pos ;
pqueue_pri_t pri ;
struct timeval when ;
void * data ;
} ;
/*
* Adjusts scheduling of active , non - forced host and service checks .
*/
2017-05-19 22:22:40 +02:00
void adjust_check_scheduling ( void ) {
2017-05-19 23:37:19 +02:00
pqueue_t * temp_pqueue ; /* squeue_t is a typedef of pqueue_t. */
struct squeue_event * sq_event ;
struct squeue_event * * events_to_reschedule ;
timed_event * temp_event ;
2017-05-19 22:22:40 +02:00
service * temp_service = NULL ;
host * temp_host = NULL ;
2017-05-19 23:37:19 +02:00
const double INTER_CHECK_RESCHEDULE_THRESHOLD = scheduling_info . service_inter_check_delay * 0.25 ;
2017-05-19 22:22:40 +02:00
double inter_check_delay = 0.0 ;
double new_run_time_offset = 0.0 ;
2017-05-19 23:37:19 +02:00
time_t first_window_time ;
time_t last_window_time ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
struct timeval last_check_tv = { ( time_t ) 0 , ( suseconds_t ) 0 } ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
int adjust_scheduling = FALSE ;
int total_checks = 0 ;
int i ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " adjust_check_scheduling() start \n " ) ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* Determine our adjustment window. */
first_window_time = time ( NULL ) ;
last_window_time = first_window_time + auto_rescheduling_window ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* Nothing to do if the first event is after the reschedule window. */
sq_event = pqueue_peek ( nagios_squeue ) ;
temp_event = sq_event ? sq_event - > data : NULL ;
if ( ! temp_event | | temp_event - > run_time > last_window_time )
return ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* Get a sorted array of all check events to reschedule. First we need a
* duplicate of nagios_squeue so we can get the events in - order without
* having to remove them from the original queue . We will use
* pqueue_change_priority ( ) to move the check events in the original queue .
* @ note : This is horribly dependent on implementation details of squeue
* and pqueue , but we don ' t have much choice to avoid a free / malloc of each
* squeue_event from the head to last_window_time , or avoid paying the full
* O ( n lg n ) penalty twice to drain and rebuild the queue . */
temp_pqueue = malloc ( sizeof ( * temp_pqueue ) ) ;
if ( ! temp_pqueue ) {
logit ( NSLOG_RUNTIME_ERROR , TRUE , " Failed to allocate queue needed to adjust check scheduling. \n " ) ;
return ;
}
* temp_pqueue = * nagios_squeue ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* We need a separate copy of the underlying queue array. */
temp_pqueue - > d = malloc ( temp_pqueue - > size * sizeof ( void * ) ) ;
if ( ! temp_pqueue - > d ) {
logit ( NSLOG_RUNTIME_ERROR , TRUE , " Failed to allocate queue data needed to adjust check scheduling. \n " ) ;
free ( temp_pqueue ) ;
return ;
}
memcpy ( temp_pqueue - > d , nagios_squeue - > d , temp_pqueue - > size * sizeof ( void * ) ) ;
temp_pqueue - > avail = temp_pqueue - > size ;
/* Now allocate space for a sorted array of check events. We shouldn't need
* space for all events , but we can ' t really calculate how many we ' ll need
* without looking at all events . */
events_to_reschedule = malloc ( ( temp_pqueue - > size - 1 ) * sizeof ( void * ) ) ;
if ( ! events_to_reschedule ) {
logit ( NSLOG_RUNTIME_ERROR , TRUE , " Failed to allocate memory needed to adjust check scheduling. \n " ) ;
pqueue_free ( temp_pqueue ) ; /* pqueue_free() to keep the events. */
return ;
}
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* Now we get the events to reschedule and collect some scheduling info. */
while ( ( sq_event = pqueue_pop ( temp_pqueue ) ) ) {
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* We need a timed_event and event data. */
temp_event = sq_event - > data ;
if ( ! temp_event | | ! temp_event - > event_data )
continue ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* Skip events before our current window. */
if ( temp_event - > run_time < first_window_time )
continue ;
/* We're done once past the end of the window. */
if ( temp_event - > run_time > last_window_time )
break ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
switch ( temp_event - > event_type ) {
case EVENT_HOST_CHECK :
temp_host = temp_event - > event_data ;
/* Leave forced checks. */
if ( temp_host - > check_options & CHECK_OPTION_FORCE_EXECUTION )
continue ;
break ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
case EVENT_SERVICE_CHECK :
temp_service = temp_event - > event_data ;
/* Leave forced checks. */
if ( temp_service - > check_options & CHECK_OPTION_FORCE_EXECUTION )
continue ;
break ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
default :
continue ;
2017-05-19 22:22:40 +02:00
}
2017-05-19 23:37:19 +02:00
/* Reschedule if the last check overlap into this one. */
if ( last_check_tv . tv_sec > 0 & & tv_delta_msec ( & last_check_tv , & sq_event - > when ) < INTER_CHECK_RESCHEDULE_THRESHOLD * 1000 ) {
/* log_debug_info(DEBUGL_SCHEDULING, 2, "Rescheduling event %d: %.3fs delay.\n", total_checks, tv_delta_f(&last_check_tv, &sq_event->when));
*/ adjust_scheduling = TRUE ;
}
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
last_check_tv = sq_event - > when ;
events_to_reschedule [ total_checks + + ] = sq_event ;
2017-05-19 22:22:40 +02:00
}
2017-05-19 23:37:19 +02:00
/* Removing squeue_events from temp_pqueue invalidates the positions of
* those events in nagios_squeue , so we need to fix that up before we
* return or change their priorities . Start at i = 1 since i = 0 is unused . */
for ( i = 1 ; i < ( int ) nagios_squeue - > size ; + + i ) {
if ( ( sq_event = nagios_squeue - > d [ i ] ) )
sq_event - > pos = i ;
}
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* No checks to reschedule, nothing to do... */
if ( total_checks < 2 | | ! adjust_scheduling ) {
log_debug_info ( DEBUGL_SCHEDULING , 0 , " No events need to be rescheduled (%d checks in %ds window). \n " , total_checks , auto_rescheduling_window ) ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
pqueue_free ( temp_pqueue ) ;
free ( events_to_reschedule ) ;
2017-05-19 22:22:40 +02:00
return ;
}
2017-05-19 23:37:19 +02:00
inter_check_delay = auto_rescheduling_window / ( double ) total_checks ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
log_debug_info ( DEBUGL_SCHEDULING , 0 , " Rescheduling events: %d checks in %ds window, ICD: %.3fs. \n " , total_checks , auto_rescheduling_window , inter_check_delay ) ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* Now smooth out the schedule. */
new_run_time_offset = inter_check_delay * 0.5 ;
for ( i = 0 ; i < total_checks ; + + i , new_run_time_offset + = inter_check_delay ) {
struct timeval new_run_time ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* All events_to_reschedule are valid squeue_events with data pointers
* to timed_events for non - forced host or service checks . */
sq_event = events_to_reschedule [ i ] ;
temp_event = sq_event - > data ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* Calculate and apply a new queue 'when' time. */
new_run_time . tv_sec = first_window_time + ( time_t ) floor ( new_run_time_offset ) ;
new_run_time . tv_usec = ( suseconds_t ) ( fmod ( new_run_time_offset , 1.0 ) * 1E6 ) ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/* log_debug_info(DEBUGL_SCHEDULING, 2, "Check %d: offset %.3fs, new run time %lu.%06ld.\n", i, new_run_time_offset, (unsigned long)new_run_time.tv_sec, (long)new_run_time.tv_usec);
*/
squeue_change_priority_tv ( nagios_squeue , sq_event , & new_run_time ) ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
if ( temp_event - > run_time ! = new_run_time . tv_sec )
temp_event - > run_time = new_run_time . tv_sec ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
switch ( temp_event - > event_type ) {
case EVENT_HOST_CHECK :
temp_host = temp_event - > event_data ;
if ( temp_host - > next_check ! = new_run_time . tv_sec ) {
temp_host - > next_check = new_run_time . tv_sec ;
update_host_status ( temp_host , FALSE ) ;
}
break ;
case EVENT_SERVICE_CHECK :
temp_service = temp_event - > event_data ;
if ( temp_service - > next_check ! = new_run_time . tv_sec ) {
temp_service - > next_check = new_run_time . tv_sec ;
update_service_status ( temp_service , FALSE ) ;
}
break ;
default :
break ;
2017-05-19 22:22:40 +02:00
}
}
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " adjust_check_scheduling() end \n " ) ;
2017-05-19 23:37:19 +02:00
pqueue_free ( temp_pqueue ) ;
free ( events_to_reschedule ) ;
2017-05-19 22:22:40 +02:00
return ;
}
2017-05-19 23:37:19 +02:00
static void adjust_squeue_for_time_change ( squeue_t * * q , int delta ) {
timed_event * event ;
squeue_t * sq_new ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
/*
* this is pretty inefficient in terms of free ( ) + malloc ( ) ,
* but it should be pretty rare that we have to adjust times
* so we go with the well - tested codepath .
*/
sq_new = squeue_create ( squeue_size ( * q ) ) ;
while ( ( event = squeue_pop ( * q ) ) ) {
if ( event - > compensate_for_time_change = = TRUE ) {
if ( event - > timing_func ) {
time_t ( * timingfunc ) ( void ) ;
timingfunc = event - > timing_func ;
event - > run_time = timingfunc ( ) ;
}
else {
event - > run_time + = delta ;
}
}
if ( event - > priority ) {
event - > sq_event = squeue_add_usec ( sq_new , event - > run_time , event - > priority - 1 , event ) ;
}
else {
event - > sq_event = squeue_add ( sq_new , event - > run_time , event ) ;
}
}
squeue_destroy ( * q , 0 ) ;
* q = sq_new ;
}
2017-05-19 22:22:40 +02:00
/* attempts to compensate for a change in the system time */
void compensate_for_system_time_change ( unsigned long last_time , unsigned long current_time ) {
unsigned long time_difference = 0L ;
service * temp_service = NULL ;
host * temp_host = NULL ;
int days = 0 ;
int hours = 0 ;
int minutes = 0 ;
int seconds = 0 ;
2017-05-19 23:37:19 +02:00
int delta = 0 ;
2017-05-19 22:22:40 +02:00
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " compensate_for_system_time_change() start \n " ) ;
2017-05-19 23:37:19 +02:00
/*
* if current_time < last_time , delta will be negative so we can
* still use addition to all effected timestamps
*/
delta = current_time - last_time ;
2017-05-19 22:22:40 +02:00
/* we moved back in time... */
if ( last_time > current_time ) {
time_difference = last_time - current_time ;
get_time_breakdown ( time_difference , & days , & hours , & minutes , & seconds ) ;
log_debug_info ( DEBUGL_EVENTS , 0 , " Detected a backwards time change of %dd %dh %dm %ds. \n " , days , hours , minutes , seconds ) ;
}
/* we moved into the future... */
else {
time_difference = current_time - last_time ;
get_time_breakdown ( time_difference , & days , & hours , & minutes , & seconds ) ;
log_debug_info ( DEBUGL_EVENTS , 0 , " Detected a forwards time change of %dd %dh %dm %ds. \n " , days , hours , minutes , seconds ) ;
}
/* log the time change */
2017-05-19 23:37:19 +02:00
logit ( NSLOG_PROCESS_INFO | NSLOG_RUNTIME_WARNING , TRUE , " Warning: A system time change of %d seconds (%dd %dh %dm %ds %s in time) has been detected. Compensating... \n " ,
delta , days , hours , minutes , seconds ,
( last_time > current_time ) ? " backwards " : " forwards " ) ;
2017-05-19 22:22:40 +02:00
2017-05-19 23:37:19 +02:00
adjust_squeue_for_time_change ( & nagios_squeue , delta ) ;
2017-05-19 22:22:40 +02:00
/* adjust service timestamps */
for ( temp_service = service_list ; temp_service ! = NULL ; temp_service = temp_service - > next ) {
adjust_timestamp_for_time_change ( last_time , current_time , time_difference , & temp_service - > last_notification ) ;
adjust_timestamp_for_time_change ( last_time , current_time , time_difference , & temp_service - > last_check ) ;
adjust_timestamp_for_time_change ( last_time , current_time , time_difference , & temp_service - > next_check ) ;
adjust_timestamp_for_time_change ( last_time , current_time , time_difference , & temp_service - > last_state_change ) ;
adjust_timestamp_for_time_change ( last_time , current_time , time_difference , & temp_service - > last_hard_state_change ) ;
/* recalculate next re-notification time */
temp_service - > next_notification = get_next_service_notification_time ( temp_service , temp_service - > last_notification ) ;
/* update the status data */
update_service_status ( temp_service , FALSE ) ;
}
/* adjust host timestamps */
for ( temp_host = host_list ; temp_host ! = NULL ; temp_host = temp_host - > next ) {
2017-05-19 23:37:19 +02:00
adjust_timestamp_for_time_change ( last_time , current_time , time_difference , & temp_host - > last_notification ) ;
2017-05-19 22:22:40 +02:00
adjust_timestamp_for_time_change ( last_time , current_time , time_difference , & temp_host - > last_check ) ;
adjust_timestamp_for_time_change ( last_time , current_time , time_difference , & temp_host - > next_check ) ;
adjust_timestamp_for_time_change ( last_time , current_time , time_difference , & temp_host - > last_state_change ) ;
adjust_timestamp_for_time_change ( last_time , current_time , time_difference , & temp_host - > last_hard_state_change ) ;
adjust_timestamp_for_time_change ( last_time , current_time , time_difference , & temp_host - > last_state_history_update ) ;
/* recalculate next re-notification time */
2017-05-19 23:37:19 +02:00
temp_host - > next_notification = get_next_host_notification_time ( temp_host , temp_host - > last_notification ) ;
2017-05-19 22:22:40 +02:00
/* update the status data */
update_host_status ( temp_host , FALSE ) ;
}
/* adjust program timestamps */
adjust_timestamp_for_time_change ( last_time , current_time , time_difference , & program_start ) ;
adjust_timestamp_for_time_change ( last_time , current_time , time_difference , & event_start ) ;
/* update the status data */
update_program_status ( FALSE ) ;
return ;
}
/* adjusts a timestamp variable in accordance with a system time change */
void adjust_timestamp_for_time_change ( time_t last_time , time_t current_time , unsigned long time_difference , time_t * ts ) {
log_debug_info ( DEBUGL_FUNCTIONS , 0 , " adjust_timestamp_for_time_change() \n " ) ;
/* we shouldn't do anything with epoch values */
if ( * ts = = ( time_t ) 0 )
return ;
/* we moved back in time... */
if ( last_time > current_time ) {
/* we can't precede the UNIX epoch */
if ( time_difference > ( unsigned long ) * ts )
* ts = ( time_t ) 0 ;
else
* ts = ( time_t ) ( * ts - time_difference ) ;
}
/* we moved into the future... */
else
* ts = ( time_t ) ( * ts + time_difference ) ;
return ;
}