1848 lines
66 KiB
C
1848 lines
66 KiB
C
|
/*****************************************************************************
|
||
|
*
|
||
|
* EVENTS.C - Timed event functions for Nagios
|
||
|
*
|
||
|
* Copyright (c) 1999-2010 Ethan Galstad (egalstad@nagios.org)
|
||
|
* Last Modified: 08-28-2010
|
||
|
*
|
||
|
* License:
|
||
|
*
|
||
|
* This program is free software; you can redistribute it and/or modify
|
||
|
* it under the terms of the GNU General Public License version 2 as
|
||
|
* published by the Free Software Foundation.
|
||
|
*
|
||
|
* This program is distributed in the hope that it will be useful,
|
||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||
|
* GNU General Public License for more details.
|
||
|
*
|
||
|
* You should have received a copy of the GNU General Public License
|
||
|
* along with this program; if not, write to the Free Software
|
||
|
* Foundation, Inc., 675 Mass Ave, Cambridge, MA 02139, USA.
|
||
|
*
|
||
|
*****************************************************************************/
|
||
|
|
||
|
#include "../include/config.h"
|
||
|
#include "../include/common.h"
|
||
|
#include "../include/downtime.h"
|
||
|
#include "../include/comments.h"
|
||
|
#include "../include/statusdata.h"
|
||
|
#include "../include/nagios.h"
|
||
|
#include "../include/broker.h"
|
||
|
#include "../include/sretention.h"
|
||
|
|
||
|
|
||
|
extern char *config_file;
|
||
|
|
||
|
extern int test_scheduling;
|
||
|
|
||
|
extern time_t program_start;
|
||
|
extern time_t event_start;
|
||
|
extern time_t last_command_check;
|
||
|
|
||
|
extern int sigshutdown;
|
||
|
extern int sigrestart;
|
||
|
|
||
|
extern double sleep_time;
|
||
|
extern int interval_length;
|
||
|
extern int service_inter_check_delay_method;
|
||
|
extern int host_inter_check_delay_method;
|
||
|
extern int service_interleave_factor_method;
|
||
|
extern int max_host_check_spread;
|
||
|
extern int max_service_check_spread;
|
||
|
|
||
|
extern int command_check_interval;
|
||
|
extern int check_reaper_interval;
|
||
|
extern int service_freshness_check_interval;
|
||
|
extern int host_freshness_check_interval;
|
||
|
extern int auto_rescheduling_interval;
|
||
|
extern int auto_rescheduling_window;
|
||
|
|
||
|
extern int check_external_commands;
|
||
|
extern int check_orphaned_services;
|
||
|
extern int check_orphaned_hosts;
|
||
|
extern int check_service_freshness;
|
||
|
extern int check_host_freshness;
|
||
|
extern int auto_reschedule_checks;
|
||
|
|
||
|
extern int retain_state_information;
|
||
|
extern int retention_update_interval;
|
||
|
|
||
|
extern int max_parallel_service_checks;
|
||
|
extern int currently_running_service_checks;
|
||
|
|
||
|
extern int aggregate_status_updates;
|
||
|
extern int status_update_interval;
|
||
|
|
||
|
extern int log_rotation_method;
|
||
|
|
||
|
extern int service_check_timeout;
|
||
|
|
||
|
extern int execute_service_checks;
|
||
|
extern int execute_host_checks;
|
||
|
|
||
|
extern int child_processes_fork_twice;
|
||
|
|
||
|
extern int time_change_threshold;
|
||
|
|
||
|
timed_event *event_list_low = NULL;
|
||
|
timed_event *event_list_low_tail = NULL;
|
||
|
timed_event *event_list_high = NULL;
|
||
|
timed_event *event_list_high_tail = NULL;
|
||
|
|
||
|
extern host *host_list;
|
||
|
extern service *service_list;
|
||
|
|
||
|
sched_info scheduling_info;
|
||
|
|
||
|
|
||
|
|
||
|
/******************************************************************/
|
||
|
/************ EVENT SCHEDULING/HANDLING FUNCTIONS *****************/
|
||
|
/******************************************************************/
|
||
|
|
||
|
|
||
|
/* initialize the event timing loop before we start monitoring */
|
||
|
void init_timing_loop(void) {
|
||
|
host *temp_host = NULL;
|
||
|
service *temp_service = NULL;
|
||
|
time_t current_time = 0L;
|
||
|
unsigned long interval_to_use = 0L;
|
||
|
int total_interleave_blocks = 0;
|
||
|
int current_interleave_block = 1;
|
||
|
int interleave_block_index = 0;
|
||
|
int mult_factor = 0;
|
||
|
int is_valid_time = 0;
|
||
|
time_t next_valid_time = 0L;
|
||
|
int schedule_check = 0;
|
||
|
double max_inter_check_delay = 0.0;
|
||
|
struct timeval tv[9];
|
||
|
double runtime[9];
|
||
|
|
||
|
|
||
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "init_timing_loop() start\n");
|
||
|
|
||
|
/* get the time right now */
|
||
|
time(¤t_time);
|
||
|
|
||
|
|
||
|
/******** GET BASIC HOST/SERVICE INFO ********/
|
||
|
|
||
|
scheduling_info.total_services = 0;
|
||
|
scheduling_info.total_scheduled_services = 0;
|
||
|
scheduling_info.total_hosts = 0;
|
||
|
scheduling_info.total_scheduled_hosts = 0;
|
||
|
scheduling_info.average_services_per_host = 0.0;
|
||
|
scheduling_info.average_scheduled_services_per_host = 0.0;
|
||
|
scheduling_info.average_service_execution_time = 0.0;
|
||
|
scheduling_info.service_check_interval_total = 0;
|
||
|
scheduling_info.average_service_inter_check_delay = 0.0;
|
||
|
scheduling_info.host_check_interval_total = 0;
|
||
|
scheduling_info.average_host_inter_check_delay = 0.0;
|
||
|
|
||
|
if(test_scheduling == TRUE)
|
||
|
gettimeofday(&tv[0], NULL);
|
||
|
|
||
|
/* get info on service checks to be scheduled */
|
||
|
for(temp_service = service_list; temp_service != NULL; temp_service = temp_service->next) {
|
||
|
|
||
|
schedule_check = TRUE;
|
||
|
|
||
|
/* service has no check interval */
|
||
|
if(temp_service->check_interval == 0)
|
||
|
schedule_check = FALSE;
|
||
|
|
||
|
/* active checks are disabled */
|
||
|
if(temp_service->checks_enabled == FALSE)
|
||
|
schedule_check = FALSE;
|
||
|
|
||
|
/* are there any valid times this service can be checked? */
|
||
|
is_valid_time = check_time_against_period(current_time, temp_service->check_period_ptr);
|
||
|
if(is_valid_time == ERROR) {
|
||
|
get_next_valid_time(current_time, &next_valid_time, temp_service->check_period_ptr);
|
||
|
if(current_time == next_valid_time)
|
||
|
schedule_check = FALSE;
|
||
|
}
|
||
|
|
||
|
if(schedule_check == TRUE) {
|
||
|
|
||
|
scheduling_info.total_scheduled_services++;
|
||
|
|
||
|
/* used later in inter-check delay calculations */
|
||
|
scheduling_info.service_check_interval_total += temp_service->check_interval;
|
||
|
|
||
|
/* calculate rolling average execution time (available from retained state information) */
|
||
|
scheduling_info.average_service_execution_time = (double)(((scheduling_info.average_service_execution_time * (scheduling_info.total_scheduled_services - 1)) + temp_service->execution_time) / (double)scheduling_info.total_scheduled_services);
|
||
|
}
|
||
|
else {
|
||
|
temp_service->should_be_scheduled = FALSE;
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 1, "Service '%s' on host '%s' should not be scheduled.\n", temp_service->description, temp_service->host_name);
|
||
|
}
|
||
|
|
||
|
scheduling_info.total_services++;
|
||
|
}
|
||
|
|
||
|
if(test_scheduling == TRUE)
|
||
|
gettimeofday(&tv[1], NULL);
|
||
|
|
||
|
/* get info on host checks to be scheduled */
|
||
|
for(temp_host = host_list; temp_host != NULL; temp_host = temp_host->next) {
|
||
|
|
||
|
schedule_check = TRUE;
|
||
|
|
||
|
/* host has no check interval */
|
||
|
if(temp_host->check_interval == 0)
|
||
|
schedule_check = FALSE;
|
||
|
|
||
|
/* active checks are disabled */
|
||
|
if(temp_host->checks_enabled == FALSE)
|
||
|
schedule_check = FALSE;
|
||
|
|
||
|
/* are there any valid times this host can be checked? */
|
||
|
is_valid_time = check_time_against_period(current_time, temp_host->check_period_ptr);
|
||
|
if(is_valid_time == ERROR) {
|
||
|
get_next_valid_time(current_time, &next_valid_time, temp_host->check_period_ptr);
|
||
|
if(current_time == next_valid_time)
|
||
|
schedule_check = FALSE;
|
||
|
}
|
||
|
|
||
|
if(schedule_check == TRUE) {
|
||
|
|
||
|
scheduling_info.total_scheduled_hosts++;
|
||
|
|
||
|
/* this is used later in inter-check delay calculations */
|
||
|
scheduling_info.host_check_interval_total += temp_host->check_interval;
|
||
|
}
|
||
|
else {
|
||
|
temp_host->should_be_scheduled = FALSE;
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 1, "Host '%s' should not be scheduled.\n", temp_host->name);
|
||
|
}
|
||
|
|
||
|
scheduling_info.total_hosts++;
|
||
|
}
|
||
|
|
||
|
if(test_scheduling == TRUE)
|
||
|
gettimeofday(&tv[2], NULL);
|
||
|
|
||
|
scheduling_info.average_services_per_host = (double)((double)scheduling_info.total_services / (double)scheduling_info.total_hosts);
|
||
|
scheduling_info.average_scheduled_services_per_host = (double)((double)scheduling_info.total_scheduled_services / (double)scheduling_info.total_hosts);
|
||
|
|
||
|
/* adjust the check interval total to correspond to the interval length */
|
||
|
scheduling_info.service_check_interval_total = (scheduling_info.service_check_interval_total * interval_length);
|
||
|
|
||
|
/* calculate the average check interval for services */
|
||
|
scheduling_info.average_service_check_interval = (double)((double)scheduling_info.service_check_interval_total / (double)scheduling_info.total_scheduled_services);
|
||
|
|
||
|
|
||
|
/******** DETERMINE SERVICE SCHEDULING PARAMS ********/
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "Determining service scheduling parameters...");
|
||
|
|
||
|
/* default max service check spread (in minutes) */
|
||
|
scheduling_info.max_service_check_spread = max_service_check_spread;
|
||
|
|
||
|
/* how should we determine the service inter-check delay to use? */
|
||
|
switch(service_inter_check_delay_method) {
|
||
|
|
||
|
case ICD_NONE:
|
||
|
|
||
|
/* don't spread checks out - useful for testing parallelization code */
|
||
|
scheduling_info.service_inter_check_delay = 0.0;
|
||
|
break;
|
||
|
|
||
|
case ICD_DUMB:
|
||
|
|
||
|
/* be dumb and just schedule checks 1 second apart */
|
||
|
scheduling_info.service_inter_check_delay = 1.0;
|
||
|
break;
|
||
|
|
||
|
case ICD_USER:
|
||
|
|
||
|
/* the user specified a delay, so don't try to calculate one */
|
||
|
break;
|
||
|
|
||
|
case ICD_SMART:
|
||
|
default:
|
||
|
|
||
|
/* be smart and calculate the best delay to use to minimize local load... */
|
||
|
if(scheduling_info.total_scheduled_services > 0 && scheduling_info.service_check_interval_total > 0) {
|
||
|
|
||
|
/* calculate the average inter check delay (in seconds) needed to evenly space the service checks out */
|
||
|
scheduling_info.average_service_inter_check_delay = (double)(scheduling_info.average_service_check_interval / (double)scheduling_info.total_scheduled_services);
|
||
|
|
||
|
/* set the global inter check delay value */
|
||
|
scheduling_info.service_inter_check_delay = scheduling_info.average_service_inter_check_delay;
|
||
|
|
||
|
/* calculate max inter check delay and see if we should use that instead */
|
||
|
max_inter_check_delay = (double)((scheduling_info.max_service_check_spread * 60.0) / (double)scheduling_info.total_scheduled_services);
|
||
|
if(scheduling_info.service_inter_check_delay > max_inter_check_delay)
|
||
|
scheduling_info.service_inter_check_delay = max_inter_check_delay;
|
||
|
}
|
||
|
else
|
||
|
scheduling_info.service_inter_check_delay = 0.0;
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 1, "Total scheduled service checks: %d\n", scheduling_info.total_scheduled_services);
|
||
|
log_debug_info(DEBUGL_EVENTS, 1, "Average service check interval: %0.2f sec\n", scheduling_info.average_service_check_interval);
|
||
|
log_debug_info(DEBUGL_EVENTS, 1, "Service inter-check delay: %0.2f sec\n", scheduling_info.service_inter_check_delay);
|
||
|
}
|
||
|
|
||
|
/* how should we determine the service interleave factor? */
|
||
|
switch(service_interleave_factor_method) {
|
||
|
|
||
|
case ILF_USER:
|
||
|
|
||
|
/* the user supplied a value, so don't do any calculation */
|
||
|
break;
|
||
|
|
||
|
case ILF_SMART:
|
||
|
default:
|
||
|
|
||
|
/* protect against a divide by zero problem - shouldn't happen, but just in case... */
|
||
|
if(scheduling_info.total_hosts == 0)
|
||
|
scheduling_info.total_hosts = 1;
|
||
|
|
||
|
scheduling_info.service_interleave_factor = (int)(ceil(scheduling_info.average_scheduled_services_per_host));
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 1, "Total scheduled service checks: %d\n", scheduling_info.total_scheduled_services);
|
||
|
log_debug_info(DEBUGL_EVENTS, 1, "Total hosts: %d\n", scheduling_info.total_hosts);
|
||
|
log_debug_info(DEBUGL_EVENTS, 1, "Service Interleave factor: %d\n", scheduling_info.service_interleave_factor);
|
||
|
}
|
||
|
|
||
|
/* calculate number of service interleave blocks */
|
||
|
if(scheduling_info.service_interleave_factor == 0)
|
||
|
total_interleave_blocks = scheduling_info.total_scheduled_services;
|
||
|
else
|
||
|
total_interleave_blocks = (int)ceil((double)scheduling_info.total_scheduled_services / (double)scheduling_info.service_interleave_factor);
|
||
|
|
||
|
scheduling_info.first_service_check = (time_t)0L;
|
||
|
scheduling_info.last_service_check = (time_t)0L;
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 1, "Total scheduled services: %d\n", scheduling_info.total_scheduled_services);
|
||
|
log_debug_info(DEBUGL_EVENTS, 1, "Service Interleave factor: %d\n", scheduling_info.service_interleave_factor);
|
||
|
log_debug_info(DEBUGL_EVENTS, 1, "Total service interleave blocks: %d\n", total_interleave_blocks);
|
||
|
log_debug_info(DEBUGL_EVENTS, 1, "Service inter-check delay: %2.1f\n", scheduling_info.service_inter_check_delay);
|
||
|
|
||
|
|
||
|
if(test_scheduling == TRUE)
|
||
|
gettimeofday(&tv[3], NULL);
|
||
|
|
||
|
/******** SCHEDULE SERVICE CHECKS ********/
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "Scheduling service checks...");
|
||
|
|
||
|
/* determine check times for service checks (with interleaving to minimize remote load) */
|
||
|
current_interleave_block = 0;
|
||
|
for(temp_service = service_list; temp_service != NULL && scheduling_info.service_interleave_factor > 0;) {
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "Current Interleave Block: %d\n", current_interleave_block);
|
||
|
|
||
|
for(interleave_block_index = 0; interleave_block_index < scheduling_info.service_interleave_factor && temp_service != NULL; temp_service = temp_service->next) {
|
||
|
int check_delay = 0;
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "Service '%s' on host '%s'\n", temp_service->description, temp_service->host_name);
|
||
|
/* skip this service if it shouldn't be scheduled */
|
||
|
if(temp_service->should_be_scheduled == FALSE) {
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "Service check should not be scheduled.\n");
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
/* skip services whose checks are currently executing */
|
||
|
if(temp_service->is_executing) {
|
||
|
log_debug_info(DEBUGL_EVENTS, 2,
|
||
|
"Service check '%s' on host '%s' is already executing.\n",
|
||
|
temp_service->description, temp_service->host_name);
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* skip services that are already scheduled for the (near)
|
||
|
* future from retention data, but reschedule ones that
|
||
|
* were supposed to happen while we weren't running...
|
||
|
* We check to make sure the check isn't scheduled to run
|
||
|
* far in the future to make sure checks who've hade their
|
||
|
* timeperiods changed during the restart aren't left
|
||
|
* hanging too long without being run.
|
||
|
*/
|
||
|
check_delay = temp_service->next_check - current_time;
|
||
|
if(check_delay > 0 && check_delay < (temp_service->check_interval * interval_length)) {
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "Service is already scheduled to be checked in the future: %s\n", ctime(&temp_service->next_check));
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
/* interleave block index should only be increased when we find a schedulable service */
|
||
|
/* moved from for() loop 11/05/05 EG */
|
||
|
interleave_block_index++;
|
||
|
|
||
|
mult_factor = current_interleave_block + (interleave_block_index * total_interleave_blocks);
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "CIB: %d, IBI: %d, TIB: %d, SIF: %d\n", current_interleave_block, interleave_block_index, total_interleave_blocks, scheduling_info.service_interleave_factor);
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "Mult factor: %d\n", mult_factor);
|
||
|
|
||
|
/* set the preferred next check time for the service */
|
||
|
temp_service->next_check = (time_t)(current_time + (mult_factor * scheduling_info.service_inter_check_delay));
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "Preferred Check Time: %lu --> %s", (unsigned long)temp_service->next_check, ctime(&temp_service->next_check));
|
||
|
|
||
|
|
||
|
/* make sure the service can actually be scheduled when we want */
|
||
|
is_valid_time = check_time_against_period(temp_service->next_check, temp_service->check_period_ptr);
|
||
|
if(is_valid_time == ERROR) {
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "Preferred Time is Invalid In Timeperiod '%s': %lu --> %s", temp_service->check_period_ptr->name, (unsigned long)temp_service->next_check, ctime(&temp_service->next_check));
|
||
|
get_next_valid_time(temp_service->next_check, &next_valid_time, temp_service->check_period_ptr);
|
||
|
temp_service->next_check = next_valid_time;
|
||
|
}
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "Actual Check Time: %lu --> %s", (unsigned long)temp_service->next_check, ctime(&temp_service->next_check));
|
||
|
|
||
|
if(scheduling_info.first_service_check == (time_t)0 || (temp_service->next_check < scheduling_info.first_service_check))
|
||
|
scheduling_info.first_service_check = temp_service->next_check;
|
||
|
if(temp_service->next_check > scheduling_info.last_service_check)
|
||
|
scheduling_info.last_service_check = temp_service->next_check;
|
||
|
}
|
||
|
|
||
|
current_interleave_block++;
|
||
|
}
|
||
|
|
||
|
if(test_scheduling == TRUE)
|
||
|
gettimeofday(&tv[4], NULL);
|
||
|
|
||
|
/* add scheduled service checks to event queue */
|
||
|
for(temp_service = service_list; temp_service != NULL; temp_service = temp_service->next) {
|
||
|
|
||
|
/* Nagios XI/NDOUtils MOD */
|
||
|
/* update status of all services (scheduled or not) */
|
||
|
update_service_status(temp_service, FALSE);
|
||
|
|
||
|
/* skip services whose checks are currently executing */
|
||
|
if(temp_service->is_executing)
|
||
|
continue;
|
||
|
|
||
|
/* skip most services that shouldn't be scheduled */
|
||
|
if(temp_service->should_be_scheduled == FALSE) {
|
||
|
|
||
|
/* passive checks are an exception if a forced check was scheduled before Nagios was restarted */
|
||
|
if(!(temp_service->checks_enabled == FALSE && temp_service->next_check != (time_t)0L && (temp_service->check_options & CHECK_OPTION_FORCE_EXECUTION)))
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
/* create a new service check event */
|
||
|
log_debug_info(DEBUGL_EVENTS, 2,
|
||
|
"Scheduling check for service '%s' on host '%s'.\n",
|
||
|
temp_service->description, temp_service->host_name);
|
||
|
schedule_new_event(EVENT_SERVICE_CHECK, FALSE, temp_service->next_check, FALSE, 0, NULL, TRUE, (void *)temp_service, NULL, temp_service->check_options);
|
||
|
}
|
||
|
|
||
|
|
||
|
if(test_scheduling == TRUE)
|
||
|
gettimeofday(&tv[5], NULL);
|
||
|
|
||
|
/******** DETERMINE HOST SCHEDULING PARAMS ********/
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "Determining host scheduling parameters...");
|
||
|
|
||
|
scheduling_info.first_host_check = (time_t)0L;
|
||
|
scheduling_info.last_host_check = (time_t)0L;
|
||
|
|
||
|
/* default max host check spread (in minutes) */
|
||
|
scheduling_info.max_host_check_spread = max_host_check_spread;
|
||
|
|
||
|
/* how should we determine the host inter-check delay to use? */
|
||
|
switch(host_inter_check_delay_method) {
|
||
|
|
||
|
case ICD_NONE:
|
||
|
|
||
|
/* don't spread checks out */
|
||
|
scheduling_info.host_inter_check_delay = 0.0;
|
||
|
break;
|
||
|
|
||
|
case ICD_DUMB:
|
||
|
|
||
|
/* be dumb and just schedule checks 1 second apart */
|
||
|
scheduling_info.host_inter_check_delay = 1.0;
|
||
|
break;
|
||
|
|
||
|
case ICD_USER:
|
||
|
|
||
|
/* the user specified a delay, so don't try to calculate one */
|
||
|
break;
|
||
|
|
||
|
case ICD_SMART:
|
||
|
default:
|
||
|
|
||
|
/* be smart and calculate the best delay to use to minimize local load... */
|
||
|
if(scheduling_info.total_scheduled_hosts > 0 && scheduling_info.host_check_interval_total > 0) {
|
||
|
|
||
|
/* adjust the check interval total to correspond to the interval length */
|
||
|
scheduling_info.host_check_interval_total = (scheduling_info.host_check_interval_total * interval_length);
|
||
|
|
||
|
/* calculate the average check interval for hosts */
|
||
|
scheduling_info.average_host_check_interval = (double)((double)scheduling_info.host_check_interval_total / (double)scheduling_info.total_scheduled_hosts);
|
||
|
|
||
|
/* calculate the average inter check delay (in seconds) needed to evenly space the host checks out */
|
||
|
scheduling_info.average_host_inter_check_delay = (double)(scheduling_info.average_host_check_interval / (double)scheduling_info.total_scheduled_hosts);
|
||
|
|
||
|
/* set the global inter check delay value */
|
||
|
scheduling_info.host_inter_check_delay = scheduling_info.average_host_inter_check_delay;
|
||
|
|
||
|
/* calculate max inter check delay and see if we should use that instead */
|
||
|
max_inter_check_delay = (double)((scheduling_info.max_host_check_spread * 60.0) / (double)scheduling_info.total_scheduled_hosts);
|
||
|
if(scheduling_info.host_inter_check_delay > max_inter_check_delay)
|
||
|
scheduling_info.host_inter_check_delay = max_inter_check_delay;
|
||
|
}
|
||
|
else
|
||
|
scheduling_info.host_inter_check_delay = 0.0;
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "Total scheduled host checks: %d\n", scheduling_info.total_scheduled_hosts);
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "Host check interval total: %lu\n", scheduling_info.host_check_interval_total);
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "Average host check interval: %0.2f sec\n", scheduling_info.average_host_check_interval);
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "Host inter-check delay: %0.2f sec\n", scheduling_info.host_inter_check_delay);
|
||
|
}
|
||
|
|
||
|
if(test_scheduling == TRUE)
|
||
|
gettimeofday(&tv[6], NULL);
|
||
|
|
||
|
|
||
|
/******** SCHEDULE HOST CHECKS ********/
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "Scheduling host checks...");
|
||
|
|
||
|
/* determine check times for host checks */
|
||
|
mult_factor = 0;
|
||
|
for(temp_host = host_list; temp_host != NULL; temp_host = temp_host->next) {
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "Host '%s'\n", temp_host->name);
|
||
|
|
||
|
/* skip hosts that shouldn't be scheduled */
|
||
|
if(temp_host->should_be_scheduled == FALSE) {
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "Host check should not be scheduled.\n");
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
/* skip hosts whose checks are currently executing */
|
||
|
if(temp_host->is_executing) {
|
||
|
log_debug_info(DEBUGL_EVENTS, 2,
|
||
|
"Host check %s is already executing.\n", temp_host->name);
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
/* skip hosts that are already scheduled for the future (from retention data), but reschedule ones that were supposed to be checked before we started */
|
||
|
if(temp_host->next_check > current_time) {
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "Host is already scheduled to be checked in the future: %s\n", ctime(&temp_host->next_check));
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
/* calculate preferred host check time */
|
||
|
temp_host->next_check = (time_t)(current_time + (mult_factor * scheduling_info.host_inter_check_delay));
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "Preferred Check Time: %lu --> %s", (unsigned long)temp_host->next_check, ctime(&temp_host->next_check));
|
||
|
|
||
|
/* make sure the host can actually be scheduled at this time */
|
||
|
is_valid_time = check_time_against_period(temp_host->next_check, temp_host->check_period_ptr);
|
||
|
if(is_valid_time == ERROR) {
|
||
|
get_next_valid_time(temp_host->next_check, &next_valid_time, temp_host->check_period_ptr);
|
||
|
temp_host->next_check = next_valid_time;
|
||
|
}
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "Actual Check Time: %lu --> %s", (unsigned long)temp_host->next_check, ctime(&temp_host->next_check));
|
||
|
|
||
|
if(scheduling_info.first_host_check == (time_t)0 || (temp_host->next_check < scheduling_info.first_host_check))
|
||
|
scheduling_info.first_host_check = temp_host->next_check;
|
||
|
if(temp_host->next_check > scheduling_info.last_host_check)
|
||
|
scheduling_info.last_host_check = temp_host->next_check;
|
||
|
|
||
|
mult_factor++;
|
||
|
}
|
||
|
|
||
|
if(test_scheduling == TRUE)
|
||
|
gettimeofday(&tv[7], NULL);
|
||
|
|
||
|
/* add scheduled host checks to event queue */
|
||
|
for(temp_host = host_list; temp_host != NULL; temp_host = temp_host->next) {
|
||
|
|
||
|
/* Nagios XI/NDOUtils Mod */
|
||
|
/* update status of all hosts (scheduled or not) */
|
||
|
update_host_status(temp_host, FALSE);
|
||
|
|
||
|
/* skip hosts whose checks are currently executing */
|
||
|
if(temp_host->is_executing)
|
||
|
continue;
|
||
|
|
||
|
/* skip most hosts that shouldn't be scheduled */
|
||
|
if(temp_host->should_be_scheduled == FALSE) {
|
||
|
|
||
|
/* passive checks are an exception if a forced check was scheduled before Nagios was restarted */
|
||
|
if(!(temp_host->checks_enabled == FALSE && temp_host->next_check != (time_t)0L && (temp_host->check_options & CHECK_OPTION_FORCE_EXECUTION)))
|
||
|
continue;
|
||
|
}
|
||
|
|
||
|
/* schedule a new host check event */
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "Scheduling check for host '%s'.\n",
|
||
|
temp_host->name);
|
||
|
schedule_new_event(EVENT_HOST_CHECK, FALSE, temp_host->next_check, FALSE, 0, NULL, TRUE, (void *)temp_host, NULL, temp_host->check_options);
|
||
|
}
|
||
|
|
||
|
if(test_scheduling == TRUE)
|
||
|
gettimeofday(&tv[8], NULL);
|
||
|
|
||
|
|
||
|
/******** SCHEDULE MISC EVENTS ********/
|
||
|
|
||
|
/* add a host and service check rescheduling event */
|
||
|
if(auto_reschedule_checks == TRUE)
|
||
|
schedule_new_event(EVENT_RESCHEDULE_CHECKS, TRUE, current_time + auto_rescheduling_interval, TRUE, auto_rescheduling_interval, NULL, TRUE, NULL, NULL, 0);
|
||
|
|
||
|
/* add a check result reaper event */
|
||
|
schedule_new_event(EVENT_CHECK_REAPER, TRUE, current_time + check_reaper_interval, TRUE, check_reaper_interval, NULL, TRUE, NULL, NULL, 0);
|
||
|
|
||
|
/* add an orphaned check event */
|
||
|
if(check_orphaned_services == TRUE || check_orphaned_hosts == TRUE)
|
||
|
schedule_new_event(EVENT_ORPHAN_CHECK, TRUE, current_time + DEFAULT_ORPHAN_CHECK_INTERVAL, TRUE, DEFAULT_ORPHAN_CHECK_INTERVAL, NULL, TRUE, NULL, NULL, 0);
|
||
|
|
||
|
/* add a service result "freshness" check event */
|
||
|
if(check_service_freshness == TRUE)
|
||
|
schedule_new_event(EVENT_SFRESHNESS_CHECK, TRUE, current_time + service_freshness_check_interval, TRUE, service_freshness_check_interval, NULL, TRUE, NULL, NULL, 0);
|
||
|
|
||
|
/* add a host result "freshness" check event */
|
||
|
if(check_host_freshness == TRUE)
|
||
|
schedule_new_event(EVENT_HFRESHNESS_CHECK, TRUE, current_time + host_freshness_check_interval, TRUE, host_freshness_check_interval, NULL, TRUE, NULL, NULL, 0);
|
||
|
|
||
|
/* add a status save event */
|
||
|
if(aggregate_status_updates == TRUE)
|
||
|
schedule_new_event(EVENT_STATUS_SAVE, TRUE, current_time + status_update_interval, TRUE, status_update_interval, NULL, TRUE, NULL, NULL, 0);
|
||
|
|
||
|
/* add an external command check event if needed */
|
||
|
if(check_external_commands == TRUE) {
|
||
|
if(command_check_interval == -1)
|
||
|
interval_to_use = (unsigned long)5;
|
||
|
else
|
||
|
interval_to_use = (unsigned long)command_check_interval;
|
||
|
schedule_new_event(EVENT_COMMAND_CHECK, TRUE, current_time + interval_to_use, TRUE, interval_to_use, NULL, TRUE, NULL, NULL, 0);
|
||
|
}
|
||
|
|
||
|
/* add a log rotation event if necessary */
|
||
|
if(log_rotation_method != LOG_ROTATION_NONE)
|
||
|
schedule_new_event(EVENT_LOG_ROTATION, TRUE, get_next_log_rotation_time(), TRUE, 0, (void *)get_next_log_rotation_time, TRUE, NULL, NULL, 0);
|
||
|
|
||
|
/* add a retention data save event if needed */
|
||
|
if(retain_state_information == TRUE && retention_update_interval > 0)
|
||
|
schedule_new_event(EVENT_RETENTION_SAVE, TRUE, current_time + (retention_update_interval * 60), TRUE, (retention_update_interval * 60), NULL, TRUE, NULL, NULL, 0);
|
||
|
|
||
|
if(test_scheduling == TRUE) {
|
||
|
|
||
|
runtime[0] = (double)((double)(tv[1].tv_sec - tv[0].tv_sec) + (double)((tv[1].tv_usec - tv[0].tv_usec) / 1000.0) / 1000.0);
|
||
|
runtime[1] = (double)((double)(tv[2].tv_sec - tv[1].tv_sec) + (double)((tv[2].tv_usec - tv[1].tv_usec) / 1000.0) / 1000.0);
|
||
|
runtime[2] = (double)((double)(tv[3].tv_sec - tv[2].tv_sec) + (double)((tv[3].tv_usec - tv[2].tv_usec) / 1000.0) / 1000.0);
|
||
|
runtime[3] = (double)((double)(tv[4].tv_sec - tv[3].tv_sec) + (double)((tv[4].tv_usec - tv[3].tv_usec) / 1000.0) / 1000.0);
|
||
|
runtime[4] = (double)((double)(tv[5].tv_sec - tv[4].tv_sec) + (double)((tv[5].tv_usec - tv[4].tv_usec) / 1000.0) / 1000.0);
|
||
|
runtime[5] = (double)((double)(tv[6].tv_sec - tv[5].tv_sec) + (double)((tv[6].tv_usec - tv[5].tv_usec) / 1000.0) / 1000.0);
|
||
|
runtime[6] = (double)((double)(tv[7].tv_sec - tv[6].tv_sec) + (double)((tv[7].tv_usec - tv[6].tv_usec) / 1000.0) / 1000.0);
|
||
|
runtime[7] = (double)((double)(tv[8].tv_sec - tv[7].tv_sec) + (double)((tv[8].tv_usec - tv[7].tv_usec) / 1000.0) / 1000.0);
|
||
|
|
||
|
runtime[8] = (double)((double)(tv[8].tv_sec - tv[0].tv_sec) + (double)((tv[8].tv_usec - tv[0].tv_usec) / 1000.0) / 1000.0);
|
||
|
|
||
|
printf("EVENT SCHEDULING TIMES\n");
|
||
|
printf("-------------------------------------\n");
|
||
|
printf("Get service info: %.6lf sec\n", runtime[0]);
|
||
|
printf("Get host info info: %.6lf sec\n", runtime[1]);
|
||
|
printf("Get service params: %.6lf sec\n", runtime[2]);
|
||
|
printf("Schedule service times: %.6lf sec\n", runtime[3]);
|
||
|
printf("Schedule service events: %.6lf sec\n", runtime[4]);
|
||
|
printf("Get host params: %.6lf sec\n", runtime[5]);
|
||
|
printf("Schedule host times: %.6lf sec\n", runtime[6]);
|
||
|
printf("Schedule host events: %.6lf sec\n", runtime[7]);
|
||
|
printf(" ============\n");
|
||
|
printf("TOTAL: %.6lf sec\n", runtime[8]);
|
||
|
printf("\n\n");
|
||
|
}
|
||
|
|
||
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "init_timing_loop() end\n");
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/* displays service check scheduling information */
|
||
|
void display_scheduling_info(void) {
|
||
|
float minimum_concurrent_checks1 = 0.0;
|
||
|
float minimum_concurrent_checks2 = 0.0;
|
||
|
float minimum_concurrent_checks = 0.0;
|
||
|
float max_reaper_interval = 0.0;
|
||
|
int suggestions = 0;
|
||
|
|
||
|
printf("Projected scheduling information for host and service checks\n");
|
||
|
printf("is listed below. This information assumes that you are going\n");
|
||
|
printf("to start running Nagios with your current config files.\n\n");
|
||
|
|
||
|
printf("HOST SCHEDULING INFORMATION\n");
|
||
|
printf("---------------------------\n");
|
||
|
printf("Total hosts: %d\n", scheduling_info.total_hosts);
|
||
|
printf("Total scheduled hosts: %d\n", scheduling_info.total_scheduled_hosts);
|
||
|
|
||
|
printf("Host inter-check delay method: ");
|
||
|
if(host_inter_check_delay_method == ICD_NONE)
|
||
|
printf("NONE\n");
|
||
|
else if(host_inter_check_delay_method == ICD_DUMB)
|
||
|
printf("DUMB\n");
|
||
|
else if(host_inter_check_delay_method == ICD_SMART) {
|
||
|
printf("SMART\n");
|
||
|
printf("Average host check interval: %.2f sec\n", scheduling_info.average_host_check_interval);
|
||
|
}
|
||
|
else
|
||
|
printf("USER-SUPPLIED VALUE\n");
|
||
|
printf("Host inter-check delay: %.2f sec\n", scheduling_info.host_inter_check_delay);
|
||
|
printf("Max host check spread: %d min\n", scheduling_info.max_host_check_spread);
|
||
|
printf("First scheduled check: %s", (scheduling_info.total_scheduled_hosts == 0) ? "N/A\n" : ctime(&scheduling_info.first_host_check));
|
||
|
printf("Last scheduled check: %s", (scheduling_info.total_scheduled_hosts == 0) ? "N/A\n" : ctime(&scheduling_info.last_host_check));
|
||
|
printf("\n\n");
|
||
|
|
||
|
printf("SERVICE SCHEDULING INFORMATION\n");
|
||
|
printf("-------------------------------\n");
|
||
|
printf("Total services: %d\n", scheduling_info.total_services);
|
||
|
printf("Total scheduled services: %d\n", scheduling_info.total_scheduled_services);
|
||
|
|
||
|
printf("Service inter-check delay method: ");
|
||
|
if(service_inter_check_delay_method == ICD_NONE)
|
||
|
printf("NONE\n");
|
||
|
else if(service_inter_check_delay_method == ICD_DUMB)
|
||
|
printf("DUMB\n");
|
||
|
else if(service_inter_check_delay_method == ICD_SMART) {
|
||
|
printf("SMART\n");
|
||
|
printf("Average service check interval: %.2f sec\n", scheduling_info.average_service_check_interval);
|
||
|
}
|
||
|
else
|
||
|
printf("USER-SUPPLIED VALUE\n");
|
||
|
printf("Inter-check delay: %.2f sec\n", scheduling_info.service_inter_check_delay);
|
||
|
|
||
|
printf("Interleave factor method: %s\n", (service_interleave_factor_method == ILF_USER) ? "USER-SUPPLIED VALUE" : "SMART");
|
||
|
if(service_interleave_factor_method == ILF_SMART)
|
||
|
printf("Average services per host: %.2f\n", scheduling_info.average_services_per_host);
|
||
|
printf("Service interleave factor: %d\n", scheduling_info.service_interleave_factor);
|
||
|
|
||
|
printf("Max service check spread: %d min\n", scheduling_info.max_service_check_spread);
|
||
|
printf("First scheduled check: %s", ctime(&scheduling_info.first_service_check));
|
||
|
printf("Last scheduled check: %s", ctime(&scheduling_info.last_service_check));
|
||
|
printf("\n\n");
|
||
|
|
||
|
printf("CHECK PROCESSING INFORMATION\n");
|
||
|
printf("----------------------------\n");
|
||
|
printf("Check result reaper interval: %d sec\n", check_reaper_interval);
|
||
|
printf("Max concurrent service checks: ");
|
||
|
if(max_parallel_service_checks == 0)
|
||
|
printf("Unlimited\n");
|
||
|
else
|
||
|
printf("%d\n", max_parallel_service_checks);
|
||
|
printf("\n\n");
|
||
|
|
||
|
printf("PERFORMANCE SUGGESTIONS\n");
|
||
|
printf("-----------------------\n");
|
||
|
|
||
|
|
||
|
/***** MAX REAPER INTERVAL RECOMMENDATION *****/
|
||
|
|
||
|
/* assume a 100% (2x) check burst for check reaper */
|
||
|
/* assume we want a max of 2k files in the result queue at any given time */
|
||
|
max_reaper_interval = floor(2000 * scheduling_info.service_inter_check_delay);
|
||
|
if(max_reaper_interval < 2.0)
|
||
|
max_reaper_interval = 2.0;
|
||
|
if(max_reaper_interval > 30.0)
|
||
|
max_reaper_interval = 30.0;
|
||
|
if((int)max_reaper_interval < check_reaper_interval) {
|
||
|
printf("* Value for 'check_result_reaper_frequency' should be <= %d seconds\n", (int)max_reaper_interval);
|
||
|
suggestions++;
|
||
|
}
|
||
|
if(check_reaper_interval < 2) {
|
||
|
printf("* Value for 'check_result_reaper_frequency' should be >= 2 seconds\n");
|
||
|
suggestions++;
|
||
|
}
|
||
|
|
||
|
/***** MINIMUM CONCURRENT CHECKS RECOMMENDATION *****/
|
||
|
|
||
|
/* first method (old) - assume a 100% (2x) service check burst for max concurrent checks */
|
||
|
if(scheduling_info.service_inter_check_delay == 0.0)
|
||
|
minimum_concurrent_checks1 = ceil(check_reaper_interval * 2.0);
|
||
|
else
|
||
|
minimum_concurrent_checks1 = ceil((check_reaper_interval * 2.0) / scheduling_info.service_inter_check_delay);
|
||
|
|
||
|
/* second method (new) - assume a 25% (1.25x) service check burst for max concurrent checks */
|
||
|
minimum_concurrent_checks2 = ceil((((double)scheduling_info.total_scheduled_services) / scheduling_info.average_service_check_interval) * 1.25 * check_reaper_interval * scheduling_info.average_service_execution_time);
|
||
|
|
||
|
/* use max of computed values */
|
||
|
if(minimum_concurrent_checks1 > minimum_concurrent_checks2)
|
||
|
minimum_concurrent_checks = minimum_concurrent_checks1;
|
||
|
else
|
||
|
minimum_concurrent_checks = minimum_concurrent_checks2;
|
||
|
|
||
|
/* compare with configured value */
|
||
|
if(((int)minimum_concurrent_checks > max_parallel_service_checks) && max_parallel_service_checks != 0) {
|
||
|
printf("* Value for 'max_concurrent_checks' option should be >= %d\n", (int)minimum_concurrent_checks);
|
||
|
suggestions++;
|
||
|
}
|
||
|
|
||
|
if(suggestions == 0)
|
||
|
printf("I have no suggestions - things look okay.\n");
|
||
|
|
||
|
printf("\n");
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* schedule a new timed event */
|
||
|
int schedule_new_event(int event_type, int high_priority, time_t run_time, int recurring, unsigned long event_interval, void *timing_func, int compensate_for_time_change, void *event_data, void *event_args, int event_options) {
|
||
|
timed_event **event_list = NULL;
|
||
|
timed_event **event_list_tail = NULL;
|
||
|
timed_event *new_event = NULL;
|
||
|
|
||
|
char run_time_string[MAX_DATETIME_LENGTH] = "";
|
||
|
|
||
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "schedule_new_event()\n");
|
||
|
|
||
|
get_datetime_string(&run_time, run_time_string, MAX_DATETIME_LENGTH,
|
||
|
SHORT_DATE_TIME);
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, "New Event Details:\n");
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, " Type: %s\n",
|
||
|
EVENT_TYPE_STR(event_type));
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, " High Priority: %s\n",
|
||
|
(high_priority ? "Yes" : "No"));
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, " Run Time: %s\n",
|
||
|
run_time_string);
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, " Recurring: %s\n",
|
||
|
(recurring ? "Yes" : "No"));
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, " Event Interval: %lu\n",
|
||
|
event_interval);
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, " Compensate for Time Change: %s\n",
|
||
|
(compensate_for_time_change ? "Yes" : "No"));
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, " Event Options: %d\n",
|
||
|
event_options);
|
||
|
|
||
|
if(high_priority == TRUE) {
|
||
|
event_list = &event_list_high;
|
||
|
event_list_tail = &event_list_high_tail;
|
||
|
}
|
||
|
else {
|
||
|
event_list = &event_list_low;
|
||
|
event_list_tail = &event_list_low_tail;
|
||
|
}
|
||
|
|
||
|
new_event = (timed_event *)malloc(sizeof(timed_event));
|
||
|
if(new_event != NULL) {
|
||
|
new_event->event_type = event_type;
|
||
|
new_event->event_data = event_data;
|
||
|
new_event->event_args = event_args;
|
||
|
new_event->event_options = event_options;
|
||
|
new_event->run_time = run_time;
|
||
|
new_event->recurring = recurring;
|
||
|
new_event->event_interval = event_interval;
|
||
|
new_event->timing_func = timing_func;
|
||
|
new_event->compensate_for_time_change = compensate_for_time_change;
|
||
|
}
|
||
|
else
|
||
|
return ERROR;
|
||
|
|
||
|
/* add the event to the event list */
|
||
|
add_event(new_event, event_list, event_list_tail);
|
||
|
|
||
|
return OK;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* reschedule an event in order of execution time */
|
||
|
void reschedule_event(timed_event *event, timed_event **event_list, timed_event **event_list_tail) {
|
||
|
time_t current_time = 0L;
|
||
|
time_t (*timingfunc)(void);
|
||
|
|
||
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "reschedule_event()\n");
|
||
|
|
||
|
/* reschedule recurring events... */
|
||
|
if(event->recurring == TRUE) {
|
||
|
|
||
|
/* use custom timing function */
|
||
|
if(event->timing_func != NULL) {
|
||
|
timingfunc = event->timing_func;
|
||
|
event->run_time = (*timingfunc)();
|
||
|
}
|
||
|
|
||
|
/* normal recurring events */
|
||
|
else {
|
||
|
event->run_time = event->run_time + event->event_interval;
|
||
|
time(¤t_time);
|
||
|
if(event->run_time < current_time)
|
||
|
event->run_time = current_time;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* add the event to the event list */
|
||
|
add_event(event, event_list, event_list_tail);
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* add an event to list ordered by execution time */
|
||
|
void add_event(timed_event *event, timed_event **event_list, timed_event **event_list_tail) {
|
||
|
timed_event *temp_event = NULL;
|
||
|
timed_event *first_event = NULL;
|
||
|
|
||
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "add_event()\n");
|
||
|
|
||
|
event->next = NULL;
|
||
|
event->prev = NULL;
|
||
|
|
||
|
first_event = *event_list;
|
||
|
|
||
|
/* add the event to the head of the list if there are no other events */
|
||
|
if(*event_list == NULL) {
|
||
|
*event_list = event;
|
||
|
*event_list_tail = event;
|
||
|
}
|
||
|
|
||
|
/* add event to head of the list if it should be executed first */
|
||
|
else if(event->run_time < first_event->run_time) {
|
||
|
event->prev = NULL;
|
||
|
(*event_list)->prev = event;
|
||
|
event->next = *event_list;
|
||
|
*event_list = event;
|
||
|
}
|
||
|
|
||
|
/* else place the event according to next execution time */
|
||
|
else {
|
||
|
|
||
|
/* start from the end of the list, as new events are likely to be executed in the future, rather than now... */
|
||
|
for(temp_event = *event_list_tail; temp_event != NULL; temp_event = temp_event->prev) {
|
||
|
if(event->run_time >= temp_event->run_time) {
|
||
|
event->next = temp_event->next;
|
||
|
event->prev = temp_event;
|
||
|
temp_event->next = event;
|
||
|
if(event->next == NULL)
|
||
|
*event_list_tail = event;
|
||
|
else
|
||
|
event->next->prev = event;
|
||
|
break;
|
||
|
}
|
||
|
else if(temp_event->prev == NULL) {
|
||
|
temp_event->prev = event;
|
||
|
event->next = temp_event;
|
||
|
*event_list = event;
|
||
|
break;
|
||
|
}
|
||
|
}
|
||
|
}
|
||
|
|
||
|
#ifdef USE_EVENT_BROKER
|
||
|
/* send event data to broker */
|
||
|
broker_timed_event(NEBTYPE_TIMEDEVENT_ADD, NEBFLAG_NONE, NEBATTR_NONE, event, NULL);
|
||
|
#endif
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/* remove an event from the queue */
|
||
|
void remove_event(timed_event *event, timed_event **event_list, timed_event **event_list_tail) {
|
||
|
timed_event *prev_event, *next_event;
|
||
|
|
||
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "remove_event()\n");
|
||
|
|
||
|
if(!event)
|
||
|
return;
|
||
|
|
||
|
#ifdef USE_EVENT_BROKER
|
||
|
/* send event data to broker */
|
||
|
broker_timed_event(NEBTYPE_TIMEDEVENT_REMOVE, NEBFLAG_NONE, NEBATTR_NONE, event, NULL);
|
||
|
#endif
|
||
|
|
||
|
if(*event_list == NULL)
|
||
|
return;
|
||
|
|
||
|
prev_event = event->prev;
|
||
|
next_event = event->next;
|
||
|
if(prev_event) {
|
||
|
prev_event->next = next_event;
|
||
|
}
|
||
|
if(next_event) {
|
||
|
next_event->prev = prev_event;
|
||
|
}
|
||
|
|
||
|
if(!prev_event) {
|
||
|
/* no previous event, so "next" is now first in list */
|
||
|
*event_list = next_event;
|
||
|
}
|
||
|
if(!next_event) {
|
||
|
/* no following event, so "prev" is now last in list */
|
||
|
*event_list_tail = prev_event;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
* If there was only one event in the list, we're already
|
||
|
* done, just as if there were events before and efter the
|
||
|
* deleted event
|
||
|
*/
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/* this is the main event handler loop */
|
||
|
int event_execution_loop(void) {
|
||
|
timed_event *temp_event = NULL;
|
||
|
timed_event sleep_event;
|
||
|
time_t last_time = 0L;
|
||
|
time_t current_time = 0L;
|
||
|
time_t last_status_update = 0L;
|
||
|
int run_event = TRUE;
|
||
|
int nudge_seconds;
|
||
|
host *temp_host = NULL;
|
||
|
service *temp_service = NULL;
|
||
|
struct timespec delay;
|
||
|
pid_t wait_result;
|
||
|
|
||
|
|
||
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "event_execution_loop() start\n");
|
||
|
|
||
|
time(&last_time);
|
||
|
|
||
|
/* initialize fake "sleep" event */
|
||
|
sleep_event.event_type = EVENT_SLEEP;
|
||
|
sleep_event.run_time = last_time;
|
||
|
sleep_event.recurring = FALSE;
|
||
|
sleep_event.event_interval = 0L;
|
||
|
sleep_event.compensate_for_time_change = FALSE;
|
||
|
sleep_event.timing_func = NULL;
|
||
|
sleep_event.event_data = NULL;
|
||
|
sleep_event.event_args = NULL;
|
||
|
sleep_event.event_options = 0;
|
||
|
sleep_event.next = NULL;
|
||
|
sleep_event.prev = NULL;
|
||
|
|
||
|
while(1) {
|
||
|
|
||
|
/* see if we should exit or restart (a signal was encountered) */
|
||
|
if(sigshutdown == TRUE || sigrestart == TRUE)
|
||
|
break;
|
||
|
|
||
|
/* if we don't have any events to handle, exit */
|
||
|
if(event_list_high == NULL && event_list_low == NULL) {
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, "There aren't any events that need to be handled! Exiting...\n");
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
/* get the current time */
|
||
|
time(¤t_time);
|
||
|
|
||
|
/* hey, wait a second... we traveled back in time! */
|
||
|
if(current_time < last_time)
|
||
|
compensate_for_system_time_change((unsigned long)last_time, (unsigned long)current_time);
|
||
|
|
||
|
/* else if the time advanced over the specified threshold, try and compensate... */
|
||
|
else if((current_time - last_time) >= time_change_threshold)
|
||
|
compensate_for_system_time_change((unsigned long)last_time, (unsigned long)current_time);
|
||
|
|
||
|
/* keep track of the last time */
|
||
|
last_time = current_time;
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 1, "** Event Check Loop\n");
|
||
|
if(event_list_high != NULL)
|
||
|
log_debug_info(DEBUGL_EVENTS, 1, "Next High Priority Event Time: %s", ctime(&event_list_high->run_time));
|
||
|
else
|
||
|
log_debug_info(DEBUGL_EVENTS, 1, "No high priority events are scheduled...\n");
|
||
|
if(event_list_low != NULL)
|
||
|
log_debug_info(DEBUGL_EVENTS, 1, "Next Low Priority Event Time: %s", ctime(&event_list_low->run_time));
|
||
|
else
|
||
|
log_debug_info(DEBUGL_EVENTS, 1, "No low priority events are scheduled...\n");
|
||
|
log_debug_info(DEBUGL_EVENTS, 1, "Current/Max Service Checks: %d/%d\n", currently_running_service_checks, max_parallel_service_checks);
|
||
|
|
||
|
/* get rid of terminated child processes (zombies) */
|
||
|
if(child_processes_fork_twice == FALSE) {
|
||
|
while((wait_result = waitpid(-1, NULL, WNOHANG)) > 0);
|
||
|
}
|
||
|
|
||
|
/* handle high priority events */
|
||
|
if(event_list_high != NULL && (current_time >= event_list_high->run_time)) {
|
||
|
|
||
|
/* remove the first event from the timing loop */
|
||
|
temp_event = event_list_high;
|
||
|
event_list_high = event_list_high->next;
|
||
|
event_list_high->prev = NULL;
|
||
|
|
||
|
/* handle the event */
|
||
|
handle_timed_event(temp_event);
|
||
|
|
||
|
/* reschedule the event if necessary */
|
||
|
if(temp_event->recurring == TRUE)
|
||
|
reschedule_event(temp_event, &event_list_high, &event_list_high_tail);
|
||
|
|
||
|
/* else free memory associated with the event */
|
||
|
else
|
||
|
my_free(temp_event);
|
||
|
}
|
||
|
|
||
|
/* handle low priority events */
|
||
|
else if(event_list_low != NULL && (current_time >= event_list_low->run_time)) {
|
||
|
|
||
|
/* default action is to execute the event */
|
||
|
run_event = TRUE;
|
||
|
nudge_seconds = 0;
|
||
|
|
||
|
/* run a few checks before executing a service check... */
|
||
|
if(event_list_low->event_type == EVENT_SERVICE_CHECK) {
|
||
|
|
||
|
temp_service = (service *)event_list_low->event_data;
|
||
|
|
||
|
/* don't run a service check if we're already maxed out on the number of parallel service checks... */
|
||
|
if(max_parallel_service_checks != 0 && (currently_running_service_checks >= max_parallel_service_checks)) {
|
||
|
|
||
|
/* Move it at least 5 seconds (to overcome the current peak), with a random 10 seconds (to spread the load) */
|
||
|
nudge_seconds = 5 + (rand() % 10);
|
||
|
log_debug_info(DEBUGL_EVENTS | DEBUGL_CHECKS, 0, "**WARNING** Max concurrent service checks (%d) has been reached! Nudging %s:%s by %d seconds...\n", max_parallel_service_checks, temp_service->host_name, temp_service->description, nudge_seconds);
|
||
|
|
||
|
logit(NSLOG_RUNTIME_WARNING, TRUE, "\tMax concurrent service checks (%d) has been reached. Nudging %s:%s by %d seconds...\n", max_parallel_service_checks, temp_service->host_name, temp_service->description, nudge_seconds);
|
||
|
run_event = FALSE;
|
||
|
}
|
||
|
|
||
|
/* don't run a service check if active checks are disabled */
|
||
|
if(execute_service_checks == FALSE) {
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS | DEBUGL_CHECKS, 1, "We're not executing service checks right now, so we'll skip this event.\n");
|
||
|
|
||
|
run_event = FALSE;
|
||
|
}
|
||
|
|
||
|
/* forced checks override normal check logic */
|
||
|
if((temp_service->check_options & CHECK_OPTION_FORCE_EXECUTION))
|
||
|
run_event = TRUE;
|
||
|
|
||
|
/* reschedule the check if we can't run it now */
|
||
|
if(run_event == FALSE) {
|
||
|
|
||
|
/* remove the service check from the event queue and reschedule it for a later time */
|
||
|
/* 12/20/05 since event was not executed, it needs to be remove()'ed to maintain sync with event broker modules */
|
||
|
temp_event = event_list_low;
|
||
|
remove_event(temp_event, &event_list_low, &event_list_low_tail);
|
||
|
/*
|
||
|
event_list_low=event_list_low->next;
|
||
|
*/
|
||
|
if(nudge_seconds) {
|
||
|
/* We nudge the next check time when it is due to too many concurrent service checks */
|
||
|
temp_service->next_check = (time_t)(temp_service->next_check + nudge_seconds);
|
||
|
}
|
||
|
else {
|
||
|
/* Otherwise reschedule (TODO: This should be smarter as it doesn't consider its timeperiod) */
|
||
|
if(temp_service->state_type == SOFT_STATE && temp_service->current_state != STATE_OK)
|
||
|
temp_service->next_check = (time_t)(temp_service->next_check + (temp_service->retry_interval * interval_length));
|
||
|
else
|
||
|
temp_service->next_check = (time_t)(temp_service->next_check + (temp_service->check_interval * interval_length));
|
||
|
}
|
||
|
|
||
|
temp_event->run_time = temp_service->next_check;
|
||
|
reschedule_event(temp_event, &event_list_low, &event_list_low_tail);
|
||
|
update_service_status(temp_service, FALSE);
|
||
|
|
||
|
run_event = FALSE;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* run a few checks before executing a host check... */
|
||
|
else if(event_list_low->event_type == EVENT_HOST_CHECK) {
|
||
|
|
||
|
/* default action is to execute the event */
|
||
|
run_event = TRUE;
|
||
|
|
||
|
temp_host = (host *)event_list_low->event_data;
|
||
|
|
||
|
/* don't run a host check if active checks are disabled */
|
||
|
if(execute_host_checks == FALSE) {
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS | DEBUGL_CHECKS, 1, "We're not executing host checks right now, so we'll skip this event.\n");
|
||
|
|
||
|
run_event = FALSE;
|
||
|
}
|
||
|
|
||
|
/* forced checks override normal check logic */
|
||
|
if((temp_host->check_options & CHECK_OPTION_FORCE_EXECUTION))
|
||
|
run_event = TRUE;
|
||
|
|
||
|
/* reschedule the host check if we can't run it right now */
|
||
|
if(run_event == FALSE) {
|
||
|
|
||
|
/* remove the host check from the event queue and reschedule it for a later time */
|
||
|
/* 12/20/05 since event was not executed, it needs to be remove()'ed to maintain sync with event broker modules */
|
||
|
temp_event = event_list_low;
|
||
|
remove_event(temp_event, &event_list_low, &event_list_low_tail);
|
||
|
/*
|
||
|
event_list_low=event_list_low->next;
|
||
|
*/
|
||
|
if(temp_host->state_type == SOFT_STATE && temp_host->current_state != STATE_OK)
|
||
|
temp_host->next_check = (time_t)(temp_host->next_check + (temp_host->retry_interval * interval_length));
|
||
|
else
|
||
|
temp_host->next_check = (time_t)(temp_host->next_check + (temp_host->check_interval * interval_length));
|
||
|
|
||
|
temp_event->run_time = temp_host->next_check;
|
||
|
reschedule_event(temp_event, &event_list_low, &event_list_low_tail);
|
||
|
update_host_status(temp_host, FALSE);
|
||
|
|
||
|
run_event = FALSE;
|
||
|
}
|
||
|
}
|
||
|
|
||
|
/* run the event */
|
||
|
if(run_event == TRUE) {
|
||
|
|
||
|
/* remove the first event from the timing loop */
|
||
|
temp_event = event_list_low;
|
||
|
event_list_low = event_list_low->next;
|
||
|
/* we may have just removed the only item from the list */
|
||
|
if(event_list_low != NULL)
|
||
|
event_list_low->prev = NULL;
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 1, "Running event...\n");
|
||
|
|
||
|
# /* handle the event */
|
||
|
handle_timed_event(temp_event);
|
||
|
|
||
|
/* reschedule the event if necessary */
|
||
|
if(temp_event->recurring == TRUE)
|
||
|
reschedule_event(temp_event, &event_list_low, &event_list_low_tail);
|
||
|
|
||
|
/* else free memory associated with the event */
|
||
|
else
|
||
|
my_free(temp_event);
|
||
|
}
|
||
|
|
||
|
}
|
||
|
|
||
|
/* we don't have anything to do at this moment in time... */
|
||
|
else if((event_list_high == NULL || (current_time < event_list_high->run_time)) && (event_list_low == NULL || (current_time < event_list_low->run_time))) {
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 2, "No events to execute at the moment. Idling for a bit...\n");
|
||
|
|
||
|
/* check for external commands if we're supposed to check as often as possible */
|
||
|
if(command_check_interval == -1)
|
||
|
check_for_external_commands();
|
||
|
|
||
|
/* set time to sleep so we don't hog the CPU... */
|
||
|
#ifdef USE_NANOSLEEP
|
||
|
delay.tv_sec = (time_t)sleep_time;
|
||
|
delay.tv_nsec = (long)((sleep_time - (double)delay.tv_sec) * 1000000000);
|
||
|
#else
|
||
|
delay.tv_sec = (time_t)sleep_time;
|
||
|
if(delay.tv_sec == 0L)
|
||
|
delay.tv_sec = 1;
|
||
|
delay.tv_nsec = 0L;
|
||
|
#endif
|
||
|
|
||
|
#ifdef USE_EVENT_BROKER
|
||
|
/* populate fake "sleep" event */
|
||
|
sleep_event.run_time = current_time;
|
||
|
sleep_event.event_data = (void *)&delay;
|
||
|
|
||
|
/* send event data to broker */
|
||
|
broker_timed_event(NEBTYPE_TIMEDEVENT_SLEEP, NEBFLAG_NONE, NEBATTR_NONE, &sleep_event, NULL);
|
||
|
#endif
|
||
|
|
||
|
/* wait a while so we don't hog the CPU... */
|
||
|
#ifdef USE_NANOSLEEP
|
||
|
nanosleep(&delay, NULL);
|
||
|
#else
|
||
|
sleep((unsigned int)delay.tv_sec);
|
||
|
#endif
|
||
|
}
|
||
|
|
||
|
/* update status information occassionally - NagVis watches the NDOUtils DB to see if Nagios is alive */
|
||
|
if((unsigned long)(current_time - last_status_update) > 5) {
|
||
|
last_status_update = current_time;
|
||
|
update_program_status(FALSE);
|
||
|
}
|
||
|
}
|
||
|
|
||
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "event_execution_loop() end\n");
|
||
|
|
||
|
return OK;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/* handles a timed event */
|
||
|
int handle_timed_event(timed_event *event) {
|
||
|
host *temp_host = NULL;
|
||
|
service *temp_service = NULL;
|
||
|
void (*userfunc)(void *);
|
||
|
struct timeval tv;
|
||
|
double latency = 0.0;
|
||
|
|
||
|
|
||
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "handle_timed_event() start\n");
|
||
|
|
||
|
#ifdef USE_EVENT_BROKER
|
||
|
/* send event data to broker */
|
||
|
broker_timed_event(NEBTYPE_TIMEDEVENT_EXECUTE, NEBFLAG_NONE, NEBATTR_NONE, event, NULL);
|
||
|
#endif
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, "** Timed Event ** Type: %s, Run Time: %s", EVENT_TYPE_STR(event->event_type), ctime(&event->run_time));
|
||
|
|
||
|
/* how should we handle the event? */
|
||
|
switch(event->event_type) {
|
||
|
|
||
|
case EVENT_SERVICE_CHECK:
|
||
|
|
||
|
temp_service = (service *)event->event_data;
|
||
|
|
||
|
/* get check latency */
|
||
|
gettimeofday(&tv, NULL);
|
||
|
latency = (double)((double)(tv.tv_sec - event->run_time) + (double)(tv.tv_usec / 1000) / 1000.0);
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, "** Service Check Event ==> Host: '%s', Service: '%s', Options: %d, Latency: %f sec\n", temp_service->host_name, temp_service->description, event->event_options, latency);
|
||
|
|
||
|
/* run the service check */
|
||
|
temp_service = (service *)event->event_data;
|
||
|
run_scheduled_service_check(temp_service, event->event_options, latency);
|
||
|
break;
|
||
|
|
||
|
case EVENT_HOST_CHECK:
|
||
|
|
||
|
temp_host = (host *)event->event_data;
|
||
|
|
||
|
/* get check latency */
|
||
|
gettimeofday(&tv, NULL);
|
||
|
latency = (double)((double)(tv.tv_sec - event->run_time) + (double)(tv.tv_usec / 1000) / 1000.0);
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, "** Host Check Event ==> Host: '%s', Options: %d, Latency: %f sec\n", temp_host->name, event->event_options, latency);
|
||
|
|
||
|
/* run the host check */
|
||
|
temp_host = (host *)event->event_data;
|
||
|
perform_scheduled_host_check(temp_host, event->event_options, latency);
|
||
|
break;
|
||
|
|
||
|
case EVENT_COMMAND_CHECK:
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, "** External Command Check Event\n");
|
||
|
|
||
|
/* check for external commands */
|
||
|
check_for_external_commands();
|
||
|
break;
|
||
|
|
||
|
case EVENT_LOG_ROTATION:
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, "** Log File Rotation Event\n");
|
||
|
|
||
|
/* rotate the log file */
|
||
|
rotate_log_file(event->run_time);
|
||
|
break;
|
||
|
|
||
|
case EVENT_PROGRAM_SHUTDOWN:
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, "** Program Shutdown Event\n");
|
||
|
|
||
|
/* set the shutdown flag */
|
||
|
sigshutdown = TRUE;
|
||
|
|
||
|
/* log the shutdown */
|
||
|
logit(NSLOG_PROCESS_INFO, TRUE, "PROGRAM_SHUTDOWN event encountered, shutting down...\n");
|
||
|
break;
|
||
|
|
||
|
case EVENT_PROGRAM_RESTART:
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, "** Program Restart Event\n");
|
||
|
|
||
|
/* set the restart flag */
|
||
|
sigrestart = TRUE;
|
||
|
|
||
|
/* log the restart */
|
||
|
logit(NSLOG_PROCESS_INFO, TRUE, "PROGRAM_RESTART event encountered, restarting...\n");
|
||
|
break;
|
||
|
|
||
|
case EVENT_CHECK_REAPER:
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, "** Check Result Reaper\n");
|
||
|
|
||
|
/* reap host and service check results */
|
||
|
reap_check_results();
|
||
|
break;
|
||
|
|
||
|
case EVENT_ORPHAN_CHECK:
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, "** Orphaned Host and Service Check Event\n");
|
||
|
|
||
|
/* check for orphaned hosts and services */
|
||
|
if(check_orphaned_hosts == TRUE)
|
||
|
check_for_orphaned_hosts();
|
||
|
if(check_orphaned_services == TRUE)
|
||
|
check_for_orphaned_services();
|
||
|
break;
|
||
|
|
||
|
case EVENT_RETENTION_SAVE:
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, "** Retention Data Save Event\n");
|
||
|
|
||
|
/* save state retention data */
|
||
|
save_state_information(TRUE);
|
||
|
break;
|
||
|
|
||
|
case EVENT_STATUS_SAVE:
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, "** Status Data Save Event\n");
|
||
|
|
||
|
/* save all status data (program, host, and service) */
|
||
|
update_all_status_data();
|
||
|
break;
|
||
|
|
||
|
case EVENT_SCHEDULED_DOWNTIME:
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, "** Scheduled Downtime Event\n");
|
||
|
|
||
|
/* process scheduled downtime info */
|
||
|
if(event->event_data) {
|
||
|
handle_scheduled_downtime_by_id(*(unsigned long *)event->event_data);
|
||
|
free(event->event_data);
|
||
|
event->event_data = NULL;
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
case EVENT_SFRESHNESS_CHECK:
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, "** Service Result Freshness Check Event\n");
|
||
|
|
||
|
/* check service result freshness */
|
||
|
check_service_result_freshness();
|
||
|
break;
|
||
|
|
||
|
case EVENT_HFRESHNESS_CHECK:
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, "** Host Result Freshness Check Event\n");
|
||
|
|
||
|
/* check host result freshness */
|
||
|
check_host_result_freshness();
|
||
|
break;
|
||
|
|
||
|
case EVENT_EXPIRE_DOWNTIME:
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, "** Expire Downtime Event\n");
|
||
|
|
||
|
/* check for expired scheduled downtime entries */
|
||
|
check_for_expired_downtime();
|
||
|
break;
|
||
|
|
||
|
case EVENT_RESCHEDULE_CHECKS:
|
||
|
|
||
|
/* adjust scheduling of host and service checks */
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, "** Reschedule Checks Event\n");
|
||
|
|
||
|
adjust_check_scheduling();
|
||
|
break;
|
||
|
|
||
|
case EVENT_EXPIRE_COMMENT:
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, "** Expire Comment Event\n");
|
||
|
|
||
|
/* check for expired comment */
|
||
|
check_for_expired_comment((unsigned long)event->event_data);
|
||
|
break;
|
||
|
|
||
|
case EVENT_CHECK_PROGRAM_UPDATE:
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, "** Check For Program Update\n");
|
||
|
|
||
|
/* check for new versions of Nagios */
|
||
|
check_for_nagios_updates(FALSE, TRUE);
|
||
|
break;
|
||
|
|
||
|
case EVENT_USER_FUNCTION:
|
||
|
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, "** User Function Event\n");
|
||
|
|
||
|
/* run a user-defined function */
|
||
|
if(event->event_data != NULL) {
|
||
|
userfunc = event->event_data;
|
||
|
(*userfunc)(event->event_args);
|
||
|
}
|
||
|
break;
|
||
|
|
||
|
default:
|
||
|
|
||
|
break;
|
||
|
}
|
||
|
|
||
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "handle_timed_event() end\n");
|
||
|
|
||
|
return OK;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/* adjusts scheduling of host and service checks */
|
||
|
void adjust_check_scheduling(void) {
|
||
|
timed_event *temp_event = NULL;
|
||
|
service *temp_service = NULL;
|
||
|
host *temp_host = NULL;
|
||
|
double projected_host_check_overhead = 0.1;
|
||
|
double projected_service_check_overhead = 0.1;
|
||
|
time_t current_time = 0L;
|
||
|
time_t first_window_time = 0L;
|
||
|
time_t last_window_time = 0L;
|
||
|
time_t last_check_time = 0L;
|
||
|
time_t new_run_time = 0L;
|
||
|
int total_checks = 0;
|
||
|
int current_check = 0;
|
||
|
double inter_check_delay = 0.0;
|
||
|
double current_icd_offset = 0.0;
|
||
|
double total_check_exec_time = 0.0;
|
||
|
double last_check_exec_time = 0.0;
|
||
|
int adjust_scheduling = FALSE;
|
||
|
double exec_time_factor = 0.0;
|
||
|
double current_exec_time = 0.0;
|
||
|
double current_exec_time_offset = 0.0;
|
||
|
double new_run_time_offset = 0.0;
|
||
|
|
||
|
|
||
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "adjust_check_scheduling() start\n");
|
||
|
|
||
|
/* TODO:
|
||
|
- Track host check overhead on a per-host basis
|
||
|
- Figure out how to calculate service check overhead
|
||
|
*/
|
||
|
|
||
|
/* determine our adjustment window */
|
||
|
time(¤t_time);
|
||
|
first_window_time = current_time;
|
||
|
last_window_time = first_window_time + auto_rescheduling_window;
|
||
|
|
||
|
/* get current scheduling data */
|
||
|
for(temp_event = event_list_low; temp_event != NULL; temp_event = temp_event->next) {
|
||
|
|
||
|
/* skip events outside of our current window */
|
||
|
if(temp_event->run_time <= first_window_time)
|
||
|
continue;
|
||
|
if(temp_event->run_time > last_window_time)
|
||
|
break;
|
||
|
|
||
|
if(temp_event->event_type == EVENT_HOST_CHECK) {
|
||
|
|
||
|
if((temp_host = (host *)temp_event->event_data) == NULL)
|
||
|
continue;
|
||
|
|
||
|
/* ignore forced checks */
|
||
|
if(temp_host->check_options & CHECK_OPTION_FORCE_EXECUTION)
|
||
|
continue;
|
||
|
|
||
|
/* does the last check "bump" into this one? */
|
||
|
if((unsigned long)(last_check_time + last_check_exec_time) > temp_event->run_time)
|
||
|
adjust_scheduling = TRUE;
|
||
|
|
||
|
last_check_time = temp_event->run_time;
|
||
|
|
||
|
/* calculate time needed to perform check */
|
||
|
/* NOTE: host check execution time is not taken into account, as scheduled host checks are run in parallel */
|
||
|
last_check_exec_time = projected_host_check_overhead;
|
||
|
total_check_exec_time += last_check_exec_time;
|
||
|
}
|
||
|
|
||
|
else if(temp_event->event_type == EVENT_SERVICE_CHECK) {
|
||
|
|
||
|
if((temp_service = (service *)temp_event->event_data) == NULL)
|
||
|
continue;
|
||
|
|
||
|
/* ignore forced checks */
|
||
|
if(temp_service->check_options & CHECK_OPTION_FORCE_EXECUTION)
|
||
|
continue;
|
||
|
|
||
|
/* does the last check "bump" into this one? */
|
||
|
if((unsigned long)(last_check_time + last_check_exec_time) > temp_event->run_time)
|
||
|
adjust_scheduling = TRUE;
|
||
|
|
||
|
last_check_time = temp_event->run_time;
|
||
|
|
||
|
/* calculate time needed to perform check */
|
||
|
/* NOTE: service check execution time is not taken into account, as service checks are run in parallel */
|
||
|
last_check_exec_time = projected_service_check_overhead;
|
||
|
total_check_exec_time += last_check_exec_time;
|
||
|
}
|
||
|
|
||
|
else
|
||
|
continue;
|
||
|
|
||
|
total_checks++;
|
||
|
}
|
||
|
|
||
|
|
||
|
/* nothing to do... */
|
||
|
if(total_checks == 0 || adjust_scheduling == FALSE) {
|
||
|
|
||
|
/*
|
||
|
printf("\n\n");
|
||
|
printf("NOTHING TO DO!\n");
|
||
|
printf("# CHECKS: %d\n",total_checks);
|
||
|
printf("WINDOW TIME: %d\n",auto_rescheduling_window);
|
||
|
printf("EXEC TIME: %.3f\n",total_check_exec_time);
|
||
|
*/
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
if((unsigned long)total_check_exec_time > auto_rescheduling_window) {
|
||
|
inter_check_delay = 0.0;
|
||
|
exec_time_factor = (double)((double)auto_rescheduling_window / total_check_exec_time);
|
||
|
}
|
||
|
else {
|
||
|
inter_check_delay = (double)((((double)auto_rescheduling_window) - total_check_exec_time) / (double)(total_checks * 1.0));
|
||
|
exec_time_factor = 1.0;
|
||
|
}
|
||
|
|
||
|
/*
|
||
|
printf("\n\n");
|
||
|
printf("TOTAL CHECKS: %d\n",total_checks);
|
||
|
printf("WINDOW TIME: %d\n",auto_rescheduling_window);
|
||
|
printf("EXEC TIME: %.3f\n",total_check_exec_time);
|
||
|
printf("ICD: %.3f\n",inter_check_delay);
|
||
|
printf("EXEC FACTOR: %.3f\n",exec_time_factor);
|
||
|
*/
|
||
|
|
||
|
/* adjust check scheduling */
|
||
|
current_icd_offset = (inter_check_delay / 2.0);
|
||
|
for(temp_event = event_list_low; temp_event != NULL; temp_event = temp_event->next) {
|
||
|
|
||
|
/* skip events outside of our current window */
|
||
|
if(temp_event->run_time <= first_window_time)
|
||
|
continue;
|
||
|
if(temp_event->run_time > last_window_time)
|
||
|
break;
|
||
|
|
||
|
if(temp_event->event_type == EVENT_HOST_CHECK) {
|
||
|
|
||
|
if((temp_host = (host *)temp_event->event_data) == NULL)
|
||
|
continue;
|
||
|
|
||
|
/* ignore forced checks */
|
||
|
if(temp_host->check_options & CHECK_OPTION_FORCE_EXECUTION)
|
||
|
continue;
|
||
|
|
||
|
current_exec_time = ((temp_host->execution_time + projected_host_check_overhead) * exec_time_factor);
|
||
|
}
|
||
|
|
||
|
else if(temp_event->event_type == EVENT_SERVICE_CHECK) {
|
||
|
|
||
|
if((temp_service = (service *)temp_event->event_data) == NULL)
|
||
|
continue;
|
||
|
|
||
|
/* ignore forced checks */
|
||
|
if(temp_service->check_options & CHECK_OPTION_FORCE_EXECUTION)
|
||
|
continue;
|
||
|
|
||
|
/* NOTE: service check execution time is not taken into account, as service checks are run in parallel */
|
||
|
current_exec_time = (projected_service_check_overhead * exec_time_factor);
|
||
|
}
|
||
|
|
||
|
else
|
||
|
continue;
|
||
|
|
||
|
current_check++;
|
||
|
new_run_time_offset = current_exec_time_offset + current_icd_offset;
|
||
|
new_run_time = (time_t)(first_window_time + (unsigned long)new_run_time_offset);
|
||
|
|
||
|
/*
|
||
|
printf(" CURRENT CHECK #: %d\n",current_check);
|
||
|
printf(" CURRENT ICD OFFSET: %.3f\n",current_icd_offset);
|
||
|
printf(" CURRENT EXEC TIME: %.3f\n",current_exec_time);
|
||
|
printf(" CURRENT EXEC OFFSET: %.3f\n",current_exec_time_offset);
|
||
|
printf(" NEW RUN TIME: %lu\n",new_run_time);
|
||
|
*/
|
||
|
|
||
|
if(temp_event->event_type == EVENT_HOST_CHECK) {
|
||
|
temp_event->run_time = new_run_time;
|
||
|
temp_host->next_check = new_run_time;
|
||
|
update_host_status(temp_host, FALSE);
|
||
|
}
|
||
|
else {
|
||
|
temp_event->run_time = new_run_time;
|
||
|
temp_service->next_check = new_run_time;
|
||
|
update_service_status(temp_service, FALSE);
|
||
|
}
|
||
|
|
||
|
current_icd_offset += inter_check_delay;
|
||
|
current_exec_time_offset += current_exec_time;
|
||
|
}
|
||
|
|
||
|
/* resort event list (some events may be out of order at this point) */
|
||
|
resort_event_list(&event_list_low, &event_list_low_tail);
|
||
|
|
||
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "adjust_check_scheduling() end\n");
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/* attempts to compensate for a change in the system time */
|
||
|
void compensate_for_system_time_change(unsigned long last_time, unsigned long current_time) {
|
||
|
unsigned long time_difference = 0L;
|
||
|
timed_event *temp_event = NULL;
|
||
|
service *temp_service = NULL;
|
||
|
host *temp_host = NULL;
|
||
|
int days = 0;
|
||
|
int hours = 0;
|
||
|
int minutes = 0;
|
||
|
int seconds = 0;
|
||
|
time_t (*timingfunc)(void);
|
||
|
|
||
|
|
||
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "compensate_for_system_time_change() start\n");
|
||
|
|
||
|
/* we moved back in time... */
|
||
|
if(last_time > current_time) {
|
||
|
time_difference = last_time - current_time;
|
||
|
get_time_breakdown(time_difference, &days, &hours, &minutes, &seconds);
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, "Detected a backwards time change of %dd %dh %dm %ds.\n", days, hours, minutes, seconds);
|
||
|
}
|
||
|
|
||
|
/* we moved into the future... */
|
||
|
else {
|
||
|
time_difference = current_time - last_time;
|
||
|
get_time_breakdown(time_difference, &days, &hours, &minutes, &seconds);
|
||
|
log_debug_info(DEBUGL_EVENTS, 0, "Detected a forwards time change of %dd %dh %dm %ds.\n", days, hours, minutes, seconds);
|
||
|
}
|
||
|
|
||
|
/* log the time change */
|
||
|
logit(NSLOG_PROCESS_INFO | NSLOG_RUNTIME_WARNING, TRUE, "Warning: A system time change of %dd %dh %dm %ds (%s in time) has been detected. Compensating...\n", days, hours, minutes, seconds, (last_time > current_time) ? "backwards" : "forwards");
|
||
|
|
||
|
/* adjust the next run time for all high priority timed events */
|
||
|
for(temp_event = event_list_high; temp_event != NULL; temp_event = temp_event->next) {
|
||
|
|
||
|
/* skip special events that occur at specific times... */
|
||
|
if(temp_event->compensate_for_time_change == FALSE)
|
||
|
continue;
|
||
|
|
||
|
/* use custom timing function */
|
||
|
if(temp_event->timing_func != NULL) {
|
||
|
timingfunc = temp_event->timing_func;
|
||
|
temp_event->run_time = (*timingfunc)();
|
||
|
}
|
||
|
|
||
|
/* else use standard adjustment */
|
||
|
else
|
||
|
adjust_timestamp_for_time_change(last_time, current_time, time_difference, &temp_event->run_time);
|
||
|
}
|
||
|
|
||
|
/* resort event list (some events may be out of order at this point) */
|
||
|
resort_event_list(&event_list_high, &event_list_high_tail);
|
||
|
|
||
|
/* adjust the next run time for all low priority timed events */
|
||
|
for(temp_event = event_list_low; temp_event != NULL; temp_event = temp_event->next) {
|
||
|
|
||
|
/* skip special events that occur at specific times... */
|
||
|
if(temp_event->compensate_for_time_change == FALSE)
|
||
|
continue;
|
||
|
|
||
|
/* use custom timing function */
|
||
|
if(temp_event->timing_func != NULL) {
|
||
|
timingfunc = temp_event->timing_func;
|
||
|
temp_event->run_time = (*timingfunc)();
|
||
|
}
|
||
|
|
||
|
/* else use standard adjustment */
|
||
|
else
|
||
|
adjust_timestamp_for_time_change(last_time, current_time, time_difference, &temp_event->run_time);
|
||
|
}
|
||
|
|
||
|
/* resort event list (some events may be out of order at this point) */
|
||
|
resort_event_list(&event_list_low, &event_list_low_tail);
|
||
|
|
||
|
/* adjust service timestamps */
|
||
|
for(temp_service = service_list; temp_service != NULL; temp_service = temp_service->next) {
|
||
|
|
||
|
adjust_timestamp_for_time_change(last_time, current_time, time_difference, &temp_service->last_notification);
|
||
|
adjust_timestamp_for_time_change(last_time, current_time, time_difference, &temp_service->last_check);
|
||
|
adjust_timestamp_for_time_change(last_time, current_time, time_difference, &temp_service->next_check);
|
||
|
adjust_timestamp_for_time_change(last_time, current_time, time_difference, &temp_service->last_state_change);
|
||
|
adjust_timestamp_for_time_change(last_time, current_time, time_difference, &temp_service->last_hard_state_change);
|
||
|
|
||
|
/* recalculate next re-notification time */
|
||
|
temp_service->next_notification = get_next_service_notification_time(temp_service, temp_service->last_notification);
|
||
|
|
||
|
/* update the status data */
|
||
|
update_service_status(temp_service, FALSE);
|
||
|
}
|
||
|
|
||
|
/* adjust host timestamps */
|
||
|
for(temp_host = host_list; temp_host != NULL; temp_host = temp_host->next) {
|
||
|
|
||
|
adjust_timestamp_for_time_change(last_time, current_time, time_difference, &temp_host->last_host_notification);
|
||
|
adjust_timestamp_for_time_change(last_time, current_time, time_difference, &temp_host->last_check);
|
||
|
adjust_timestamp_for_time_change(last_time, current_time, time_difference, &temp_host->next_check);
|
||
|
adjust_timestamp_for_time_change(last_time, current_time, time_difference, &temp_host->last_state_change);
|
||
|
adjust_timestamp_for_time_change(last_time, current_time, time_difference, &temp_host->last_hard_state_change);
|
||
|
adjust_timestamp_for_time_change(last_time, current_time, time_difference, &temp_host->last_state_history_update);
|
||
|
|
||
|
/* recalculate next re-notification time */
|
||
|
temp_host->next_host_notification = get_next_host_notification_time(temp_host, temp_host->last_host_notification);
|
||
|
|
||
|
/* update the status data */
|
||
|
update_host_status(temp_host, FALSE);
|
||
|
}
|
||
|
|
||
|
/* adjust program timestamps */
|
||
|
adjust_timestamp_for_time_change(last_time, current_time, time_difference, &program_start);
|
||
|
adjust_timestamp_for_time_change(last_time, current_time, time_difference, &event_start);
|
||
|
adjust_timestamp_for_time_change(last_time, current_time, time_difference, &last_command_check);
|
||
|
|
||
|
/* update the status data */
|
||
|
update_program_status(FALSE);
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/* resorts an event list by event execution time - needed when compensating for system time changes */
|
||
|
void resort_event_list(timed_event **event_list, timed_event **event_list_tail) {
|
||
|
timed_event *temp_event_list = NULL;
|
||
|
timed_event *temp_event = NULL;
|
||
|
timed_event *next_event = NULL;
|
||
|
|
||
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "resort_event_list()\n");
|
||
|
|
||
|
/* move current event list to temp list */
|
||
|
temp_event_list = *event_list;
|
||
|
*event_list = NULL;
|
||
|
|
||
|
/* move all events to the new event list */
|
||
|
for(temp_event = temp_event_list; temp_event != NULL; temp_event = next_event) {
|
||
|
next_event = temp_event->next;
|
||
|
|
||
|
/* add the event to the newly created event list so it will be resorted */
|
||
|
temp_event->next = NULL;
|
||
|
temp_event->prev = NULL;
|
||
|
add_event(temp_event, event_list, event_list_tail);
|
||
|
}
|
||
|
|
||
|
return;
|
||
|
}
|
||
|
|
||
|
|
||
|
|
||
|
/* adjusts a timestamp variable in accordance with a system time change */
|
||
|
void adjust_timestamp_for_time_change(time_t last_time, time_t current_time, unsigned long time_difference, time_t *ts) {
|
||
|
|
||
|
log_debug_info(DEBUGL_FUNCTIONS, 0, "adjust_timestamp_for_time_change()\n");
|
||
|
|
||
|
/* we shouldn't do anything with epoch values */
|
||
|
if(*ts == (time_t)0)
|
||
|
return;
|
||
|
|
||
|
/* we moved back in time... */
|
||
|
if(last_time > current_time) {
|
||
|
|
||
|
/* we can't precede the UNIX epoch */
|
||
|
if(time_difference > (unsigned long)*ts)
|
||
|
*ts = (time_t)0;
|
||
|
else
|
||
|
*ts = (time_t)(*ts - time_difference);
|
||
|
}
|
||
|
|
||
|
/* we moved into the future... */
|
||
|
else
|
||
|
*ts = (time_t)(*ts + time_difference);
|
||
|
|
||
|
return;
|
||
|
}
|