downtime_sched 1.5.1 Halfing & Duplication Issue?


#1

Has anyone got a clue on this one?

Nagios 2.x installation, I installed downtime_sched add-on. The pl script powering downtime_sched 1.5.1 looks like this:
downtime_job.pl

[code]#!/usr/bin/perl

vim:ts=4

this should be run regularly from your crontabs, to schedule any outages

for the forthcoming 24 hours.

Daily:

crontabs: 01 07 * * * downtime_job.pl > /dev/null 2>&1

Hourly:

crontabs: 01 * * * * downtime_job.pl > /dev/null 2>&1

WARNING! Only minor verification is made on the config file. If you give

incorrect hostnames, service descriptions, or hostgrouname then it

will not be noticed until Nagios tries to parse the command!

See companion file for example of structure of schedule.cfg file.

Version 1.1 : fixed for nagios 1.2

Version 1.2 : trim trailing spaces from parameters, allow smaller increments

Version 1.3 : allow wildcards in service name, check for already sched

Version 1.4 : fix parsing of files to use ,\s]+ instead of ,

Version 1.5 : Nagios 2.0 needs extra parameter (triggerID=0) in commands

Version 1.5.1 : Nagios 2.x file format added (status.dat, downtime.dat)

use strict;
use Time::Local;

my($NAGDIR) = “/usr/local/nagios” ; # Nagios root directory

my($NAGVER) = 2.4; # Nagios version - 1.2 or 2.0
my($CMDFILE) = “$NAGDIR/var/rw/nagios.cmd”; # Nagios CMD file
my($STATUSLOG) = “$NAGDIR/var/status.dat”; # Nagios status log file
my($CFGFILE) = “$NAGDIR/etc/schedule.cfg”; # my configuration file
my($HGCFG) = “$NAGDIR/etc/hostgroups.cfg”;# needed only if ver=1.x
my($DOWNLOG) = “$NAGDIR/var/downtime.dat”; # existing sched downtime log
my($FREQUENCY) = 1440; # how many minutes between runs. DO NOT CHANGE
my($MINDUR) = 5; # shortest outage allowed in minutes
my($DEBUG) = 0;
my($SVCALSO) = 0; # schedule outages for all services as well as for host?
my(%hostgroups) = ();
my($rv);

############################################################################
my(%hostsvc);
sub readstatuslog {

if(! -r $STATUSLOG) {
print “ERROR: status file $STATUSLOG not readable\n”;
return;
}

%hostsvc = ();

my $line;
my @services = ();
my %newservice;
my $v;
my $k;

open SL, "<$STATUSLOG" or return;

while( <SL> ) {
  # support for 2.4 status file format
  if($NAGVER >= 2) {
     $line = $_;
     $line =~ s/#.*$//;
     next if(!$line);
     if( $line =~ /^\s*service\s*{/i ) { %newservice = (); next; }
     if( $line =~ /^\s*}/ ) {
        push @services, { %newservice }
           if(%newservice);
           %newservice = ();
        next;
     }
     if( $line =~ /^\s*(\S+)\s*=\s*(\S.*)/ ) {
        ($k,$v)=($1,$2);
        $v =~ s/\s*$//; # trim trailing spaces
        $newservice{$k} = $v;
     }
  }
  # support for 1.4
  else {
       if( /^\\d+\]\s+SERVICE;(^;]+);(^;]+);/ ) {
           $hostsvc{$1}{$2} = 1;
       }
  }
}
close SL;

support for 2.4

if($NAGVER >= 2) {
foreach (@services) {
if(defined $->{“service_description”}) {
$hostsvc{$
->{“host_name”}}{$->{“service_description”}} = 1;
#print $
->{“host_name”} . “->” . $_->{“service_description”} . “\n”;
}
}
}

}

############################################################################
my(%downtime);
sub readdowntime {

if(! -r $DOWNLOG) {
print “ERROR: downtime file $DOWNLOG not readable\n”;
return;
}

my $line;
my @downtimes = ();
my %newdt;
my $v;
my $k;

open DL, "<$DOWNLOG" or return;

while( <DL> ) {
  # support for 2.4 status file format
  if($NAGVER >= 2) {
     $line = $_;
     $line =~ s/#.*$//;
     next if(!$line);
     if( $line =~ /^\s*hostdowntime\s*{/i ) { %newdt = (); next; }
     if( $line =~ /^\s*servicedowntime\s*{/i ) { %newdt = (); next; }
     if( $line =~ /^\s*}/ ) {
        push @downtimes, { %newdt }
           if(%newdt);
           %newdt = ();
        next;
     }
     if( $line =~ /^\s*(\S+)\s*=\s*(\S.*)/ ) {
        ($k,$v)=($1,$2);
        $v =~ s/\s*$//; # trim trailing spaces
        $newdt{$k} = $v;
     }
  }
  else {
       if( /^\\d+\]\s+SERVICE_DOWNTIME;\d+;(^;]+);(^;]+);(\d+);/ ) {
           $downtime{"$1:$2:$3"} = 1;
       } elsif( /^\\d+\]\s+HOST_DOWNTIME;\d+;(^;]+);(\d+);/ ) {
           $downtime{"$1:$2"} = 1;
       } elsif( /^\\d+\]\s+HOSTGROUP_DOWNTIME;\d+;(^;]+);(\d+);/ ) {
           $downtime{"HG!$1:$2"} = 1;
       }
  }
}
close DL;

support for 2.4

if($NAGVER >= 2) {
foreach (@downtimes) {
if(!defined $->{“service_description”}) {
my $h = $
->{“host_name”};
my $s = $->{“start_time”};
$downtime{"$h:$s"} = 1;
#print "HOST DOWNTIME: " . $h . “\n”;
}
else {
my $h = $
->{“host_name”};
my $svc = $->{“service_description”};
my $s = $
->{“start_time”};
$downtime{"$h:$svc:$s"} = 1;
#print "SERVICE DOWNTIME: " . $h . “->” . $svc . “\n”;
}
}
}
}

############################################################################
sub sendcmd($) {
my($msg) = $_[0];
my($t) = time;
if(!$DEBUG) {
open CMD,">$CMDFILE" or return “Error: $!”;
print CMD “$t] $msg\n”;
close CMD;
}
print “$msg\n”;
return 0;
}

############################################################################
sub schedule_host($$$$$) {
my($h,$s,$d,$u,$c) = @_;
my($rv);
$u = “Automatic” if(!$u);
$c = “AUTO: $c” if($c);
$c = “AUTO: Automatically scheduled for host” if(!$c);
return “Invalid host $h!” if(!$h or !defined $hostsvc{$h});
return “Invalid time $s!” if(!$s);
return “Invalid duration $d!” if(!$DEBUG and ($d < $MINDUR));
if( !defined $downtime{"$h:$s"} ) {
$rv = sendcmd “SCHEDULE_HOST_DOWNTIME;$h;$s;”.($s+($d60)).";1"
.(($NAGVER>=2)?";0":"").";".($d
60).";$u;$c";
if($SVCALSO) {
$rv = sendcmd “SCHEDULE_HOST_SVC_DOWNTIME;$h;$s;”
.($s+($d60)).";1"
.(($NAGVER>=2)?";0":"").";".($d
60).";$u;$c"
if(!$rv);
}
} else { print “Downtime for host $h already scheduled\n”; return 0; }
return $rv;
}

############################################################################
sub schedule_service($$$$$$) {
my($h,$svc,$s,$d,$u,$c) = @;
my($rv);
$u = “Automatic” if(!$u);
$c = “AUTO: $c” if($c);
$c = “AUTO: Automatically scheduled for service” if(!$c);
return “Invalid host $h!” if(!$h or !defined $hostsvc{$h});
return “Invalid service!” if(!$svc);
return “Invalid time $s!” if(!$s);
return “Invalid duration $d!” if(!$DEBUG and ($d < $MINDUR));
$rv = 0;
if( $svc =~ /*/ ) { # wildcarded?
$svc =~ s/*/.*/g; # change to regexp
foreach ( keys %{$hostsvc{$h}} ) {
if( /^$svc$/ ) {
if(!defined $downtime{"$h:$
:$s"}) {
$rv = sendcmd “SCHEDULE_SVC_DOWNTIME;$h;$_;$s;”.($s+($d60)).";1"
.(($NAGVER>=2)?";0":"").";".($d
60).";$u;$c";
} else { print “Downtime for service ‘$svc’ on $h already scheduled!\n”; }
}
last if($rv);
}
} else {
return “Invalid service ‘$svc’ on host $h!” if(!defined $hostsvc{$h}{$svc});
if(!defined $downtime{"$h:$svc:$s"}) {
$rv = sendcmd “SCHEDULE_SVC_DOWNTIME;$h;$svc;$s;”.($s+($d60)).";1"
.(($NAGVER>=2)?";0":"").";".($d
60).";$u;$c";
} else { print “Downtime for service ‘$svc’ on $h already scheduled!\n”; }
}
return $rv;
}

############################################################################
sub schedule_hostgroup($$$$$) {
my($hg,$s,$d,$u,$c) = @_;
my($rv,$h);
$u = “Automatic” if(!$u);
$c = “AUTO: $c” if($c);
$c = “AUTO: Automatically scheduled for hostgroup” if(!$c);
return “Invalid hostgroup $hg!” if(!$hg);
return “Invalid time $s!” if(!$s);
return “Invalid duration $d!” if(!$DEBUG and ($d < $MINDUR));
$rv = 0;
if( $NAGVER >= 2 ) {
if(!defined $downtime{“HG!$hg:$s”}) {
$rv = sendcmd “SCHEDULE_HOSTGROUP_HOST_DOWNTIME;$hg;$s;”.($s+($d60))
.";1".(($NAGVER>=2)?";0":"").";".($d
60).";$u;$c";
if($SVCALSO) {
$rv = sendcmd “SCHEDULE_HOSTGROUP_SERVICE_DOWNTIME;$hg;$s;”
.($s+($d60)).";1"
.(($NAGVER>=2)?";0":"").";".($d
60).";$u;$c"
if(!$rv);
}
}
} else {
return “Hostgroup $hg not recognised!” if(!defined $hostgroups{$hg}) ;
foreach $h ( @{$hostgroups{$hg}} ) {
if( !defined $downtime{"$h:$s"} ) {
$rv = sendcmd “SCHEDULE_HOST_DOWNTIME;$h;$s;”
.($s+($d60)).";1"
.(($NAGVER>=2)?";0":"").";".($d
60).";$u;$c";
if($SVCALSO) {
$rv = sendcmd “SCHEDULE_HOST_SVC_DOWNTIME;$h;$s;”
.($s+($d60)).";1"
.(($NAGVER>=2)?";0":"").";".($d
60).";$u;$c"
if(!$rv);
}
} else { print “Already scheduled!\n”; }
last if($rv);
}
}
return $rv;
}

############################################################################
sub readhgcfg {
my($name,@members);
return if( $NAGVER >= 2 ); # not needed
%hostgroups = ();
open HG, “<$HGCFG”;
while ( ) {
if( /^\sdefine / ) { $name = “”; next; }
if( /^\s
hostgroup_name\s+(\S+)/ ) { $name = $1; next; }
if( $name and /^\smembers\s+(.)$/ ) {
@members = split /,\s]+/,$1;
$hostgroups{$name} = @members ];
$name = “”;
}
}
close HG;
}

############################################################################
my( @schedules ) = ();

sub readcfg {
my(%newsched);
my($line,$k,$v);
open CFG, “<$CFGFILE” or return “Error: $CFGFILE: $!”;
while( $line= ) {
chomp $line;
$line =~ s/#.$//;
next if(!$line);
if( $line =~ /^\s
define\s+schedule\s*{/i ) { %newsched = (); next; }
if( $line =~ /^\s*}/ ) {
push @schedules, { %newsched }
if(%newsched);
%newsched = ();
next;
}
if( $line =~ /^\s*(\S+)\s*(\S.)/ ) {
($k,$v)=($1,$2);
$v =~ s/\s
$//; # trim trailing spaces
$newsched{$k} = $v;
}
}
close CFG;
return 0;
}

############################################################################
sub numerically { $a<=>$b; }

############################################################################
my %dow = ( mon=>1, tue=>2, wed=>3, thu=>4, fri=>5, sat=>6, sun=>0 );
sub parse_days($) {
my(@rv);

foreach my $dn ( split /,\s]+/,$_[0] ) {
$dn = lc( substr($dn,0,3) );
push @rv,$dow{$dn} if(defined $dow{$dn});
push @rv,($1+0) if($dn=~/(\d+)/);
}
return ( sort numerically @rv );
}

############################################################################
sub parse_dates($) {
my(@rv);
foreach ( split /,\s]+/,$[0] ) { push @rv,($+0); }
return ( sort numerically @rv );
}

############################################################################
sub checkscheds {
my($sref);
my($T) = time();
my($dow,$h,$min,$d,$m,$y,$next,$nh,$nmin,$nd,$nm,$ny,$rv);
my(@lt,@nlt,@lst,$f,$t);

@lt = localtime($T);
($dow,$h,$min,$d,$m,$y) = ($lt[6],$lt[2],$lt[1],$lt[3],$lt[4],$lt[5]);

foreach $sref ( @schedules ) {
if($DEBUG) {
if(defined $sref->{comment}) {
print $sref->{comment} .": ";
} else {
print "Next schedule: ";
}
print " ".$sref->{host_name} if(defined $sref->{host_name});
print " ".$sref->{service_description} if(defined $sref->{service_description});
print “\n”;
}
$t = $sref->{‘time’};
next if($t !~ /^(\d\d?):(\d\d)/);
($nh,$nmin)=($1,$2);
($nd,$nm,$ny)=($d,$m,$y);
if(($h>$nh) or ($h==$nh and $min>$nmin) ) {
$nd+=1;if($nd>31) {$nm+=1;$nd=1;}
if($nm>11) { $ny+=1; $nm=0; }
}
if( $sref->{days_of_month} ) {
@lst = parse_dates($sref->{days_of_month});
if($#lst>=0) {
$f = 0;
foreach ( @lst ) { if( $_ >= $nd ) { $nd=$; $f = 1; last; } }
if(!$f) { $nd = $lst[0]; $nm+=1; if($nm>11){$nm=0;$ny+=1; } }
}
}
$next = timelocal( 0,$nmin,$nh,$nd,$nm,$ny );
@nlt = localtime($next); # to get day of week
if( $sref->{days_of_week} ) {
@lst = parse_days($sref->{days_of_week});
if($#lst>=0) {
print “Checking days of week: “.(join “,”,@lst).”\n” if($DEBUG);
$f = 0;
foreach ( @lst ) {
if( $
>= $nlt[6] ) {
print “Using $_ (today is $dow, looking at “.$nlt[6].”)\n” if($DEBUG);
$nd+=($_-$nlt[6]); $f = 1; last; }
}
if(!$f) { $nd +=(7-$dow+$lst[0]);
print “Advancing a week\n” if($DEBUG); }
if($nd>31) {$nm+=1;$nd=1;}
if($nm>11){$nm=0;$ny+=1; }
}
}
$next = timelocal( 0,$nmin,$nh,$nd,$nm,$ny );
# now we know when its next due to run!

  if( $next < $T ) { print "ERROR!  Going back in time?\n"; next; }
  if( ($next-$T) <= ($FREQUENCY*60) ) {
     # Schedule it!
     $rv = "";
     if( $sref->{schedule_type} =~ /hostgroup|hg/i ) {

$rv = schedule_hostgroup($sref->{hostgroup_name} ,$next,$sref->{duration},$sref->{user},$sref->{comment});
} elsif( $sref->{schedule_type} =~ /host/i ) {
$rv = schedule_host($sref->{host_name} ,$next,$sref->{duration},$sref->{user},$sref->{comment});
} elsif( $sref->{schedule_type} =~ /service|svc/i ) {
$rv = schedule_service($sref->{host_name},$sref->{service_description} ,$next,$sref->{duration},$sref->{user},$sref->{comment});
} else {
$rv = "Unknown schedule type : ".$sref->{schedule_type};
}
if($rv) {
print “ERROR: $rv\n”;
}
} else {
print "Not yet time for this one (wait ".($next-$T).“sec)\n” if($DEBUG);
}
}
}

############################################################################

print “Reading in configuration\n”;
$rv = readcfg;
if($rv) {
print “ERROR: $rv\n”;
exit 1;
}
print “Reading hostgroups if necessary\n”;
readhgcfg;
print “Reading in status log to get list of services\n”;
readstatuslog;
print “Reading in list of already scheduled downtime\n”;
readdowntime;
print “Checking for downtime due in next $FREQUENCY minutes\n”;
checkscheds;
exit 0;[/code]

I’m getting a weird problem: If I schedule recurring server outages for the same time/date, it only picks up half of those that I schedule. On the “Recurring Downtime” page it shows them all fine, but then on the “Downtime” page it only shows the half.

For example:
schedule.cfg (generated by the Recurring Downtime page)

define schedule { schedule_type host host_name bcnc01 user Sean Feeney comment Recurring schedule time 02:55 duration 30 } define schedule { schedule_type host host_name bcnc02 user Sean Feeney comment Recurring schedule time 02:55 duration 30 } define schedule { schedule_type host host_name bcnc03 user Sean Feeney comment Recurring schedule time 02:55 duration 30 } define schedule { schedule_type host host_name bcnc04 user Sean Feeney comment Recurring schedule time 02:55 duration 30 } define schedule { schedule_type host host_name sdath204 user Sean Feeney comment Recurring schedule time 20:00 duration 60 } define schedule { schedule_type host host_name thi1800dv1 user Sean Feeney comment Recurring schedule time 20:00 duration 60 } define schedule { schedule_type host host_name thi1800mt1 user Sean Feeney comment Recurring schedule time 01:26 duration 30 }

Resulting downtime.dat after running downtime_job.pl:

[code]########################################

NAGIOS DOWNTIME FILE

THIS FILE IS AUTOMATICALLY GENERATED

BY NAGIOS. DO NOT MODIFY THIS FILE!

########################################

info {
created=1163624461
version=2.5
}

hostdowntime {
host_name=thi1800dv1
downtime_id=12
entry_time=1163620862
start_time=1163638800
end_time=1163642400
triggered_by=0
fixed=1
duration=3600
author=Sean Feeney
comment=AUTO: Recurring schedule
}

hostdowntime {
host_name=thi1800dv1
downtime_id=16
entry_time=1163624461
start_time=1163638800
end_time=1163642400
triggered_by=0
fixed=1
duration=3600
author=Sean Feeney
comment=AUTO: Recurring schedule
}

hostdowntime {
host_name=thi1800mt1
downtime_id=13
entry_time=1163620862
start_time=1163658360
end_time=1163660160
triggered_by=0
fixed=1
duration=1800
author=Sean Feeney
comment=AUTO: Recurring schedule
}

hostdowntime {
host_name=thi1800mt1
downtime_id=17
entry_time=1163624461
start_time=1163658360
end_time=1163660160
triggered_by=0
fixed=1
duration=1800
author=Sean Feeney
comment=AUTO: Recurring schedule
}

hostdowntime {
host_name=bcnc01
downtime_id=10
entry_time=1163620862
start_time=1163663700
end_time=1163665500
triggered_by=0
fixed=1
duration=1800
author=Sean Feeney
comment=AUTO: Recurring schedule
}

hostdowntime {
host_name=bcnc02
downtime_id=11
entry_time=1163620862
start_time=1163663700
end_time=1163665500
triggered_by=0
fixed=1
duration=1800
author=Sean Feeney
comment=AUTO: Recurring schedule
}

hostdowntime {
host_name=bcnc01
downtime_id=14
entry_time=1163624461
start_time=1163663700
end_time=1163665500
triggered_by=0
fixed=1
duration=1800
author=Sean Feeney
comment=AUTO: Recurring schedule
}

hostdowntime {
host_name=bcnc02
downtime_id=15
entry_time=1163624461
start_time=1163663700
end_time=1163665500
triggered_by=0
fixed=1
duration=1800
author=Sean Feeney
comment=AUTO: Recurring schedule
}[/code]

As you can see, it only picks up half of the servers listed with 02:55 outages (bcnc01, bcnc02 - 2 out of 4) and half of the 20:00 outages (thi1800dv1 - 1 out of 2).

Additionally, it is generating copies of these outages every hour instead of noticing that it has already created them for such and such date and time.

Any ideas on why it would be doing this?