implement reaction system, based on the amount of time that host is offline

This commit is contained in:
Sergey Popov 2011-12-19 17:02:42 +04:00
parent bc36207f1f
commit c2aa0ac0ab
2 changed files with 23 additions and 3 deletions

25
yasnd.c
View File

@ -20,7 +20,9 @@ char* recipient_number=NULL; // recipient of sms alerts
bool lpt_enable=false; // control usage of LPT port to reset target devices
int lpt_port=0x378; // LPT port in hex(0x378 is usually LPT1)
bool loop_locked=false; // flag for locking main loop while config re-reading
long failures_first=3; // count of failures while hosts checking, that triggers first SMS sending
long failures_first=3; // count of failures while hosts checking, that triggers SMS sending
long failures_second=6; // count of failures while hosts checking, that triggers host's reset
long failures_third=9; // count of failures while hosts checking, that clean failure statistics
pid_t mainloop_clone_pid; // pid of clone process, that containt main loop
void log_debug(const char *message,int verbosity)
@ -157,6 +159,7 @@ void init()
hosts[i].lpt_pin=cfg_getint(host,"lpt_pin");
hosts[i].fail_count=0;
hosts[i].alert_sent=false;
hosts[i].reaction_obtained=false;
}
// initialize gammu structures
if (sms_send_enable)
@ -207,7 +210,7 @@ void reset_pin(int pin_num)
if (ioperm (lpt_port, 3, 1))
{
log_event("Error: LPT port access error");
exit(EXIT_FAILURE);
return;
}
// Reset host
outb (pins, lpt_port);
@ -242,6 +245,7 @@ int loop_function()
{
hosts[i].fail_count=0;
hosts[i].alert_sent=false;
hosts[i].reaction_obtained=false;
}
else
hosts[i].fail_count++;
@ -250,16 +254,31 @@ int loop_function()
}
for (int i=0;i<hosts_count;i++)
{
if (hosts[i].fail_count>failures_second && hosts[i].alert_sent && !hosts[i].reaction_obtained)
{
if (lpt_enable)
{
char message[150];
sprintf(message,"Host %s does not answer and no reaction on this. Trying to reset it(LPT pin %d)",hosts[i].hostname,hosts[i].lpt_pin);
log_debug(message,DEBUG_BASE);
if (sms_send_enable)
gammu_send_sms(message);
reset_pin(hosts[i].lpt_pin);
hosts[i].reaction_obtained=true;
}
continue;
}
if (hosts[i].fail_count>failures_first && !hosts[i].alert_sent)
{
char message[100];
sprintf(message,"Host %s does not answer",hosts[i].hostname);
sprintf(message,"Host %s does not answer(LPT pin %d)",hosts[i].hostname,hosts[i].lpt_pin);
log_debug(message,DEBUG_BASE);
if (sms_send_enable)
gammu_send_sms(message);
// set alert flag to prevent sending more than 1 message
// for unreachable host
hosts[i].alert_sent=true;
continue;
}
}
}

View File

@ -22,6 +22,7 @@ typedef struct {
pid_t helper_pid; // pid of helper('pinger') child process
int fail_count; // how many times in a row host was unreachable
bool alert_sent; // variable, that changed when host goes online/offline
bool reaction_obtained; // variable, that changed when reaction on it's behaviour obtained
} host_decl;
// Structure, that described single IPC message