#!/usr/bin/perl use strict; use Getopt::Std; use Sys::Hostname; # look for packet loss on a specified host # check that loss on destination hop is greater than $max # if loss is detected, confirm by 2 more attempts 10 seconds apart. # do not alert more than once while loss is detected # if loss is not confirmed, do not alert about it. my $rcpt = 'user@example.com'; my %opt; getopts('DXh:m:',\%opt); #D Debug #X extra debug (print report) #h host #m maximum packet loss $ENV{PATH}="/usr/sbin"; die "must specify -h \n" unless $opt{h}; if( $opt{m} ){ die "argument to -m must be a positive number\n" unless( $opt{m} > 0 ); }else{ $opt{m} = 1; } my %trace; my $date = localtime(); my $tries = 1; my $mode; # to keep track of if we're losing packets or not my $resets; # track how many times has $mode switched while( $tries ){ $tries--; open(my $tr, "mtr --report -c 10 $opt{h} |") || die "cannot fork traceroute: $!\n"; @{$trace{$tries}} = map { chomp; $_; } <$tr>; close $tr; die "problem running mtr\n" unless @{$trace{$tries}}; if( $opt{X} ){ for( @{$trace{$tries}} ){ verbose( $_ ); } } my ($loss) = $trace{$tries}[-1] =~ /^\S+\s+(\d+\.\d+)%?\s+\d+/; die "malformed response from mtr: $trace{$tries}[-1]\n" unless( defined($loss) ); if( $loss > $opt{m} ){ debug( "see packet loss on destination: $loss" ); if( $mode eq 'ok' ){ debug( "inconsitent results. resetting tries." ); $tries = 3; reset(); } $mode = 'loss'; if( open(my $fh, "/var/tmp/packetloss.$opt{h}") ){ chop(my $line = <$fh>); my ($time,$origloss) = split /\|/, $line; close $fh; debug( "already in alert mode, not resending alert" ); critical( "PACKET LOSS: $loss percent on $opt{h} (started $time at ${origloss}%" ); } if( $tries == 1 ){ # throw an alert debug( "alert: packet loss on destination host" ); open(my $fh, ">/var/tmp/packetloss.$opt{h}") || die "cannot write to /var/tmp/packetloss.$opt{h}: $!\n"; print $fh "${date}|${loss}\n"; close $fh; alert( "packetloss: ${loss}% loss to $opt{h}" ); critical( "PACKET LOSS" ); }else{ $tries = 3 unless $tries; } }else{ debug( "no/insignifigant packet loss: ${loss}%" ); if( $mode eq 'loss' ){ debug( "inconsistent results. resetting tries." ); $tries = 3; reset(); } $mode = 'ok'; if( -f "/var/tmp/packetloss.$opt{h}" ){ debug( "in alert mode, waiting for more tries: $tries" ); if( $tries == 1 ){ unlink( "/var/tmp/packetloss.$opt{h}" ) || die "cannot unlink /var/tmp/packetloss.$opt{h}: $!\n"; }else{ $tries = 2 unless $tries; } } } if($tries){ debug( "sleeping 10" ); sleep 1; }else{ print "OK\n"; } } sub reset { $resets++; if( $resets == 5 ){ print "WARNING - consistent sporadic packet loss\n"; alert( "packetloss: consistent sporadic packet loss" ); exit 1; } } sub alert { my $subject = shift; my $hostname = hostname(); open(SM, "| /usr/lib/sendmail -t" ) || die "cannot fork sendmail: $!\n"; print SM "To: $rcpt\n", "From: root\@$hostname\n", "Subject: $subject\n", "Date: $date\n", "\n", "*** trace1:\n"; for my $line (@{$trace{0}}){ print SM "$line\n"; } print SM "\n", "*** trace2:\n"; for my $line (@{$trace{2}}){ print SM "$line\n"; } print SM "\n", "*** trace3:\n"; for my $line (@{$trace{1}}){ print SM "$line\n"; } close SM; } sub verbose { my($msg) = join('', @_); warn "$msg\n"; } sub debug { verbose('debug: ', @_) if $opt{D}; } sub critical { my($msg) = join('', @_); print "CRITICAL - $msg\n"; exit 2; }