Has anyone got a clue on this one?
Nagios 2.x installation, I installed downtime_sched add-on. The pl script powering downtime_sched 1.5.1 looks like this:
downtime_job.pl
[code]#!/usr/bin/perl
vim:ts=4
this should be run regularly from your crontabs, to schedule any outages
for the forthcoming 24 hours.
Daily:
crontabs: 01 07 * * * downtime_job.pl > /dev/null 2>&1
Hourly:
crontabs: 01 * * * * downtime_job.pl > /dev/null 2>&1
WARNING! Only minor verification is made on the config file. If you give
incorrect hostnames, service descriptions, or hostgrouname then it
will not be noticed until Nagios tries to parse the command!
See companion file for example of structure of schedule.cfg file.
Version 1.1 : fixed for nagios 1.2
Version 1.2 : trim trailing spaces from parameters, allow smaller increments
Version 1.3 : allow wildcards in service name, check for already sched
Version 1.4 : fix parsing of files to use ,\s]+ instead of ,
Version 1.5 : Nagios 2.0 needs extra parameter (triggerID=0) in commands
Version 1.5.1 : Nagios 2.x file format added (status.dat, downtime.dat)
use strict;
use Time::Local;
my($NAGDIR) = “/usr/local/nagios” ; # Nagios root directory
my($NAGVER) = 2.4; # Nagios version - 1.2 or 2.0
my($CMDFILE) = “$NAGDIR/var/rw/nagios.cmd”; # Nagios CMD file
my($STATUSLOG) = “$NAGDIR/var/status.dat”; # Nagios status log file
my($CFGFILE) = “$NAGDIR/etc/schedule.cfg”; # my configuration file
my($HGCFG) = “$NAGDIR/etc/hostgroups.cfg”;# needed only if ver=1.x
my($DOWNLOG) = “$NAGDIR/var/downtime.dat”; # existing sched downtime log
my($FREQUENCY) = 1440; # how many minutes between runs. DO NOT CHANGE
my($MINDUR) = 5; # shortest outage allowed in minutes
my($DEBUG) = 0;
my($SVCALSO) = 0; # schedule outages for all services as well as for host?
my(%hostgroups) = ();
my($rv);
############################################################################
my(%hostsvc);
sub readstatuslog {
if(! -r $STATUSLOG) {
print “ERROR: status file $STATUSLOG not readable\n”;
return;
}
%hostsvc = ();
my $line;
my @services = ();
my %newservice;
my $v;
my $k;
open SL, "<$STATUSLOG" or return;
while( <SL> ) {
# support for 2.4 status file format
if($NAGVER >= 2) {
$line = $_;
$line =~ s/#.*$//;
next if(!$line);
if( $line =~ /^\s*service\s*{/i ) { %newservice = (); next; }
if( $line =~ /^\s*}/ ) {
push @services, { %newservice }
if(%newservice);
%newservice = ();
next;
}
if( $line =~ /^\s*(\S+)\s*=\s*(\S.*)/ ) {
($k,$v)=($1,$2);
$v =~ s/\s*$//; # trim trailing spaces
$newservice{$k} = $v;
}
}
# support for 1.4
else {
if( /^\\d+\]\s+SERVICE;(^;]+);(^;]+);/ ) {
$hostsvc{$1}{$2} = 1;
}
}
}
close SL;
support for 2.4
if($NAGVER >= 2) {
foreach (@services) {
if(defined $->{“service_description”}) {
$hostsvc{$->{“host_name”}}{$->{“service_description”}} = 1;
#print $->{“host_name”} . “->” . $_->{“service_description”} . “\n”;
}
}
}
}
############################################################################
my(%downtime);
sub readdowntime {
if(! -r $DOWNLOG) {
print “ERROR: downtime file $DOWNLOG not readable\n”;
return;
}
my $line;
my @downtimes = ();
my %newdt;
my $v;
my $k;
open DL, "<$DOWNLOG" or return;
while( <DL> ) {
# support for 2.4 status file format
if($NAGVER >= 2) {
$line = $_;
$line =~ s/#.*$//;
next if(!$line);
if( $line =~ /^\s*hostdowntime\s*{/i ) { %newdt = (); next; }
if( $line =~ /^\s*servicedowntime\s*{/i ) { %newdt = (); next; }
if( $line =~ /^\s*}/ ) {
push @downtimes, { %newdt }
if(%newdt);
%newdt = ();
next;
}
if( $line =~ /^\s*(\S+)\s*=\s*(\S.*)/ ) {
($k,$v)=($1,$2);
$v =~ s/\s*$//; # trim trailing spaces
$newdt{$k} = $v;
}
}
else {
if( /^\\d+\]\s+SERVICE_DOWNTIME;\d+;(^;]+);(^;]+);(\d+);/ ) {
$downtime{"$1:$2:$3"} = 1;
} elsif( /^\\d+\]\s+HOST_DOWNTIME;\d+;(^;]+);(\d+);/ ) {
$downtime{"$1:$2"} = 1;
} elsif( /^\\d+\]\s+HOSTGROUP_DOWNTIME;\d+;(^;]+);(\d+);/ ) {
$downtime{"HG!$1:$2"} = 1;
}
}
}
close DL;
support for 2.4
if($NAGVER >= 2) {
foreach (@downtimes) {
if(!defined $->{“service_description”}) {
my $h = $->{“host_name”};
my $s = $->{“start_time”};
$downtime{"$h:$s"} = 1;
#print "HOST DOWNTIME: " . $h . “\n”;
}
else {
my $h = $->{“host_name”};
my $svc = $->{“service_description”};
my $s = $->{“start_time”};
$downtime{"$h:$svc:$s"} = 1;
#print "SERVICE DOWNTIME: " . $h . “->” . $svc . “\n”;
}
}
}
}
############################################################################
sub sendcmd($) {
my($msg) = $_[0];
my($t) = time;
if(!$DEBUG) {
open CMD,">$CMDFILE" or return “Error: $!”;
print CMD “$t] $msg\n”;
close CMD;
}
print “$msg\n”;
return 0;
}
############################################################################
sub schedule_host($$$$$) {
my($h,$s,$d,$u,$c) = @_;
my($rv);
$u = “Automatic” if(!$u);
$c = “AUTO: $c” if($c);
$c = “AUTO: Automatically scheduled for host” if(!$c);
return “Invalid host $h!” if(!$h or !defined $hostsvc{$h});
return “Invalid time $s!” if(!$s);
return “Invalid duration $d!” if(!$DEBUG and ($d < $MINDUR));
if( !defined $downtime{"$h:$s"} ) {
$rv = sendcmd “SCHEDULE_HOST_DOWNTIME;$h;$s;”.($s+($d60)).";1"
.(($NAGVER>=2)?";0":"").";".($d60).";$u;$c";
if($SVCALSO) {
$rv = sendcmd “SCHEDULE_HOST_SVC_DOWNTIME;$h;$s;”
.($s+($d60)).";1"
.(($NAGVER>=2)?";0":"").";".($d60).";$u;$c"
if(!$rv);
}
} else { print “Downtime for host $h already scheduled\n”; return 0; }
return $rv;
}
############################################################################
sub schedule_service($$$$$$) {
my($h,$svc,$s,$d,$u,$c) = @;
my($rv);
$u = “Automatic” if(!$u);
$c = “AUTO: $c” if($c);
$c = “AUTO: Automatically scheduled for service” if(!$c);
return “Invalid host $h!” if(!$h or !defined $hostsvc{$h});
return “Invalid service!” if(!$svc);
return “Invalid time $s!” if(!$s);
return “Invalid duration $d!” if(!$DEBUG and ($d < $MINDUR));
$rv = 0;
if( $svc =~ /*/ ) { # wildcarded?
$svc =~ s/*/.*/g; # change to regexp
foreach ( keys %{$hostsvc{$h}} ) {
if( /^$svc$/ ) {
if(!defined $downtime{"$h:$:$s"}) {
$rv = sendcmd “SCHEDULE_SVC_DOWNTIME;$h;$_;$s;”.($s+($d60)).";1"
.(($NAGVER>=2)?";0":"").";".($d60).";$u;$c";
} else { print “Downtime for service ‘$svc’ on $h already scheduled!\n”; }
}
last if($rv);
}
} else {
return “Invalid service ‘$svc’ on host $h!” if(!defined $hostsvc{$h}{$svc});
if(!defined $downtime{"$h:$svc:$s"}) {
$rv = sendcmd “SCHEDULE_SVC_DOWNTIME;$h;$svc;$s;”.($s+($d60)).";1"
.(($NAGVER>=2)?";0":"").";".($d60).";$u;$c";
} else { print “Downtime for service ‘$svc’ on $h already scheduled!\n”; }
}
return $rv;
}
############################################################################
sub schedule_hostgroup($$$$$) {
my($hg,$s,$d,$u,$c) = @_;
my($rv,$h);
$u = “Automatic” if(!$u);
$c = “AUTO: $c” if($c);
$c = “AUTO: Automatically scheduled for hostgroup” if(!$c);
return “Invalid hostgroup $hg!” if(!$hg);
return “Invalid time $s!” if(!$s);
return “Invalid duration $d!” if(!$DEBUG and ($d < $MINDUR));
$rv = 0;
if( $NAGVER >= 2 ) {
if(!defined $downtime{“HG!$hg:$s”}) {
$rv = sendcmd “SCHEDULE_HOSTGROUP_HOST_DOWNTIME;$hg;$s;”.($s+($d60))
.";1".(($NAGVER>=2)?";0":"").";".($d60).";$u;$c";
if($SVCALSO) {
$rv = sendcmd “SCHEDULE_HOSTGROUP_SERVICE_DOWNTIME;$hg;$s;”
.($s+($d60)).";1"
.(($NAGVER>=2)?";0":"").";".($d60).";$u;$c"
if(!$rv);
}
}
} else {
return “Hostgroup $hg not recognised!” if(!defined $hostgroups{$hg}) ;
foreach $h ( @{$hostgroups{$hg}} ) {
if( !defined $downtime{"$h:$s"} ) {
$rv = sendcmd “SCHEDULE_HOST_DOWNTIME;$h;$s;”
.($s+($d60)).";1"
.(($NAGVER>=2)?";0":"").";".($d60).";$u;$c";
if($SVCALSO) {
$rv = sendcmd “SCHEDULE_HOST_SVC_DOWNTIME;$h;$s;”
.($s+($d60)).";1"
.(($NAGVER>=2)?";0":"").";".($d60).";$u;$c"
if(!$rv);
}
} else { print “Already scheduled!\n”; }
last if($rv);
}
}
return $rv;
}
############################################################################
sub readhgcfg {
my($name,@members);
return if( $NAGVER >= 2 ); # not needed
%hostgroups = ();
open HG, “<$HGCFG”;
while ( ) {
if( /^\sdefine / ) { $name = “”; next; }
if( /^\shostgroup_name\s+(\S+)/ ) { $name = $1; next; }
if( $name and /^\smembers\s+(.)$/ ) {
@members = split /,\s]+/,$1;
$hostgroups{$name} = @members ];
$name = “”;
}
}
close HG;
}
############################################################################
my( @schedules ) = ();
sub readcfg {
my(%newsched);
my($line,$k,$v);
open CFG, “<$CFGFILE” or return “Error: $CFGFILE: $!”;
while( $line= ) {
chomp $line;
$line =~ s/#.$//;
next if(!$line);
if( $line =~ /^\sdefine\s+schedule\s*{/i ) { %newsched = (); next; }
if( $line =~ /^\s*}/ ) {
push @schedules, { %newsched }
if(%newsched);
%newsched = ();
next;
}
if( $line =~ /^\s*(\S+)\s*(\S.)/ ) {
($k,$v)=($1,$2);
$v =~ s/\s$//; # trim trailing spaces
$newsched{$k} = $v;
}
}
close CFG;
return 0;
}
############################################################################
sub numerically { $a<=>$b; }
############################################################################
my %dow = ( mon=>1, tue=>2, wed=>3, thu=>4, fri=>5, sat=>6, sun=>0 );
sub parse_days($) {
my(@rv);
foreach my $dn ( split /,\s]+/,$_[0] ) {
$dn = lc( substr($dn,0,3) );
push @rv,$dow{$dn} if(defined $dow{$dn});
push @rv,($1+0) if($dn=~/(\d+)/);
}
return ( sort numerically @rv );
}
############################################################################
sub parse_dates($) {
my(@rv);
foreach ( split /,\s]+/,$[0] ) { push @rv,($+0); }
return ( sort numerically @rv );
}
############################################################################
sub checkscheds {
my($sref);
my($T) = time();
my($dow,$h,$min,$d,$m,$y,$next,$nh,$nmin,$nd,$nm,$ny,$rv);
my(@lt,@nlt,@lst,$f,$t);
@lt = localtime($T);
($dow,$h,$min,$d,$m,$y) = ($lt[6],$lt[2],$lt[1],$lt[3],$lt[4],$lt[5]);
foreach $sref ( @schedules ) {
if($DEBUG) {
if(defined $sref->{comment}) {
print $sref->{comment} .": ";
} else {
print "Next schedule: ";
}
print " ".$sref->{host_name} if(defined $sref->{host_name});
print " ".$sref->{service_description} if(defined $sref->{service_description});
print “\n”;
}
$t = $sref->{‘time’};
next if($t !~ /^(\d\d?):(\d\d)/);
($nh,$nmin)=($1,$2);
($nd,$nm,$ny)=($d,$m,$y);
if(($h>$nh) or ($h==$nh and $min>$nmin) ) {
$nd+=1;if($nd>31) {$nm+=1;$nd=1;}
if($nm>11) { $ny+=1; $nm=0; }
}
if( $sref->{days_of_month} ) {
@lst = parse_dates($sref->{days_of_month});
if($#lst>=0) {
$f = 0;
foreach ( @lst ) { if( $_ >= $nd ) { $nd=$; $f = 1; last; } }
if(!$f) { $nd = $lst[0]; $nm+=1; if($nm>11){$nm=0;$ny+=1; } }
}
}
$next = timelocal( 0,$nmin,$nh,$nd,$nm,$ny );
@nlt = localtime($next); # to get day of week
if( $sref->{days_of_week} ) {
@lst = parse_days($sref->{days_of_week});
if($#lst>=0) {
print “Checking days of week: “.(join “,”,@lst).”\n” if($DEBUG);
$f = 0;
foreach ( @lst ) {
if( $ >= $nlt[6] ) {
print “Using $_ (today is $dow, looking at “.$nlt[6].”)\n” if($DEBUG);
$nd+=($_-$nlt[6]); $f = 1; last; }
}
if(!$f) { $nd +=(7-$dow+$lst[0]);
print “Advancing a week\n” if($DEBUG); }
if($nd>31) {$nm+=1;$nd=1;}
if($nm>11){$nm=0;$ny+=1; }
}
}
$next = timelocal( 0,$nmin,$nh,$nd,$nm,$ny );
# now we know when its next due to run!
if( $next < $T ) { print "ERROR! Going back in time?\n"; next; }
if( ($next-$T) <= ($FREQUENCY*60) ) {
# Schedule it!
$rv = "";
if( $sref->{schedule_type} =~ /hostgroup|hg/i ) {
$rv = schedule_hostgroup($sref->{hostgroup_name} ,$next,$sref->{duration},$sref->{user},$sref->{comment});
} elsif( $sref->{schedule_type} =~ /host/i ) {
$rv = schedule_host($sref->{host_name} ,$next,$sref->{duration},$sref->{user},$sref->{comment});
} elsif( $sref->{schedule_type} =~ /service|svc/i ) {
$rv = schedule_service($sref->{host_name},$sref->{service_description} ,$next,$sref->{duration},$sref->{user},$sref->{comment});
} else {
$rv = "Unknown schedule type : ".$sref->{schedule_type};
}
if($rv) {
print “ERROR: $rv\n”;
}
} else {
print "Not yet time for this one (wait ".($next-$T).“sec)\n” if($DEBUG);
}
}
}
############################################################################
print “Reading in configuration\n”;
$rv = readcfg;
if($rv) {
print “ERROR: $rv\n”;
exit 1;
}
print “Reading hostgroups if necessary\n”;
readhgcfg;
print “Reading in status log to get list of services\n”;
readstatuslog;
print “Reading in list of already scheduled downtime\n”;
readdowntime;
print “Checking for downtime due in next $FREQUENCY minutes\n”;
checkscheds;
exit 0;[/code]
I’m getting a weird problem: If I schedule recurring server outages for the same time/date, it only picks up half of those that I schedule. On the “Recurring Downtime” page it shows them all fine, but then on the “Downtime” page it only shows the half.
For example:
schedule.cfg (generated by the Recurring Downtime page)
define schedule {
schedule_type host
host_name bcnc01
user Sean Feeney
comment Recurring schedule
time 02:55
duration 30
}
define schedule {
schedule_type host
host_name bcnc02
user Sean Feeney
comment Recurring schedule
time 02:55
duration 30
}
define schedule {
schedule_type host
host_name bcnc03
user Sean Feeney
comment Recurring schedule
time 02:55
duration 30
}
define schedule {
schedule_type host
host_name bcnc04
user Sean Feeney
comment Recurring schedule
time 02:55
duration 30
}
define schedule {
schedule_type host
host_name sdath204
user Sean Feeney
comment Recurring schedule
time 20:00
duration 60
}
define schedule {
schedule_type host
host_name thi1800dv1
user Sean Feeney
comment Recurring schedule
time 20:00
duration 60
}
define schedule {
schedule_type host
host_name thi1800mt1
user Sean Feeney
comment Recurring schedule
time 01:26
duration 30
}
Resulting downtime.dat after running downtime_job.pl:
[code]########################################
NAGIOS DOWNTIME FILE
THIS FILE IS AUTOMATICALLY GENERATED
BY NAGIOS. DO NOT MODIFY THIS FILE!
########################################
info {
created=1163624461
version=2.5
}
hostdowntime {
host_name=thi1800dv1
downtime_id=12
entry_time=1163620862
start_time=1163638800
end_time=1163642400
triggered_by=0
fixed=1
duration=3600
author=Sean Feeney
comment=AUTO: Recurring schedule
}
hostdowntime {
host_name=thi1800dv1
downtime_id=16
entry_time=1163624461
start_time=1163638800
end_time=1163642400
triggered_by=0
fixed=1
duration=3600
author=Sean Feeney
comment=AUTO: Recurring schedule
}
hostdowntime {
host_name=thi1800mt1
downtime_id=13
entry_time=1163620862
start_time=1163658360
end_time=1163660160
triggered_by=0
fixed=1
duration=1800
author=Sean Feeney
comment=AUTO: Recurring schedule
}
hostdowntime {
host_name=thi1800mt1
downtime_id=17
entry_time=1163624461
start_time=1163658360
end_time=1163660160
triggered_by=0
fixed=1
duration=1800
author=Sean Feeney
comment=AUTO: Recurring schedule
}
hostdowntime {
host_name=bcnc01
downtime_id=10
entry_time=1163620862
start_time=1163663700
end_time=1163665500
triggered_by=0
fixed=1
duration=1800
author=Sean Feeney
comment=AUTO: Recurring schedule
}
hostdowntime {
host_name=bcnc02
downtime_id=11
entry_time=1163620862
start_time=1163663700
end_time=1163665500
triggered_by=0
fixed=1
duration=1800
author=Sean Feeney
comment=AUTO: Recurring schedule
}
hostdowntime {
host_name=bcnc01
downtime_id=14
entry_time=1163624461
start_time=1163663700
end_time=1163665500
triggered_by=0
fixed=1
duration=1800
author=Sean Feeney
comment=AUTO: Recurring schedule
}
hostdowntime {
host_name=bcnc02
downtime_id=15
entry_time=1163624461
start_time=1163663700
end_time=1163665500
triggered_by=0
fixed=1
duration=1800
author=Sean Feeney
comment=AUTO: Recurring schedule
}[/code]
As you can see, it only picks up half of the servers listed with 02:55 outages (bcnc01, bcnc02 - 2 out of 4) and half of the 20:00 outages (thi1800dv1 - 1 out of 2).
Additionally, it is generating copies of these outages every hour instead of noticing that it has already created them for such and such date and time.
Any ideas on why it would be doing this?