Talk:Monitor your fax queue with Nagios

This check doesn't include a timeout which means that if the check runs for too long you can have a heap of old checks running which consume a lot of CPU. The problem cascades as the more CPU used by old checks, the more likely it is for new checks to timeout...

At least, this is the situation I encountered when running the check on remote system via check_by_ssh.

I've made the following change to include a timeout. The code was mostly pulled from check_mailq. I've only done basic testing although the change is trivial.

--- check_hylafax      2009-08-11 08:38:27.000000000 +1000 +++ check_hylafax.new  2009-08-11 08:38:39.000000000 +1000 @@ -2,7 +2,7 @@

use strict; use Getopt::Long; -use vars qw($opt_V $opt_h $opt_H $opt_P $opt_w $opt_c $opt_l $opt_p $PROGNAME); +use vars qw($opt_V $opt_h $opt_H $opt_P $opt_w $opt_c $opt_l $opt_p $opt_t $PROGNAME); use lib "/usr/lib/nagios/plugins" ; use utils qw(%ERRORS &print_revision &support &usage); use Net::FTP; @@ -26,6 +26,7 @@        "P=i" => \$opt_P, "port=i" => \$opt_P, "l=s" => \$opt_l, "login=s" => \$opt_l, "p=s" => \$opt_p, "password=s" => \$opt_p, +        "t=i" => \$opt_t, "timeout=i"  => \$opt_t );

if ($opt_V) { @@ -41,6 +42,10 @@ my $port = ($opt_P ? $opt_P : "4559"); usage("Invalid port: $opt_P\n") unless ($port > 1 and $port < 32767);

+unless (defined $opt_t) { +       $opt_t = $utils::TIMEOUT ;      # default timeout +} + usage("Warning threshold not specified\n") if not $opt_w; my $warning = $opt_w; usage("Invalid warning threshold: $opt_w\n") unless $warning =~ /^\d+$/; @@ -53,6 +58,12 @@
 * 1) my $critical = $1 if ($opt_c =~ /([0-9]{1,2}|100)/);
 * 2) ($critical) || usage("Invalid critical threshold: $opt_c\n");

+$SIG{'ALRM'} = sub { +       print ("ERROR: timed out waiting for HylaFAX queue \n"); +       exit $ERRORS{"WARNING"}; +}; +alarm($opt_t); + my $sig=0; my $ftp = Net::FTP->new($host, Port => $port); if (not $ftp) { @@ -67,6 +78,9 @@ $ftp->quit; $sig = scalar(@jobs);

+## shut off the alarm +alarm(0); + my $err = "OK"; if ($sig >= $critical) { $err = "CRITICAL"; @@ -91,6 +105,8 @@       print "     connect to this port. Default is 4559.\n"; print " -l, --login=STRING\n"; print "    connect using this login name.\n"; +      print "  -t, --timeout=INTEGER\n"; +      print "     Plugin timeout in seconds (default = $utils::TIMEOUT)\n"; print " -p, --password=STRING\n"; print "    connect using this password.\n"; print "\n";