#!/usr/local/bin/perl # Kister IPMP daemon # use active probes on a redundant target to determine interface # Copyright 2009 Jeremy Kister - http://jeremy.kister.net # ## kipmp may be copied and distributed under the terms found in ## the Perl "Artistic License", found in the standard Perl distribution. # # v1.04 . 2009.10.13.03 # # cat </etc/kipmp.cfg # #key #value # r=required, d=has a default, o=optional # master bge0 # [r] primary interface # slave bge1 # [r] backup interface # address 10.0.0.23 # [r] address to flip between interfaces # netmask 255.255.255.0 # [r] netmask of address # target 10.0.0.1 # [r] address to test # frequency 2 # [d] how often (seconds) to test target # retries 1 # [d] how many retries before we flip # timeout 1 # [d] how long to wait # tcp_fallback 1 # [o] if icmp fails, try tcp connect # tcp_portnum 23 # [o] # curr_file /var/run/kipmp # [o] file to write the current interface name # test_layer1 1 # [o] use ndd or mii-tool, if available # notify user@example.com # [o] address to send important things to # post_flip sleep 1; svcadm restart network/pfil # [o] command to run after flipping # EOM use strict; use Net::Ping 2.36; use Getopt::Std; use Sys::Syslog; use Sys::Hostname; use POSIX 'setsid'; $ENV{PATH} = "/usr/sbin:/usr/lib:/sbin:/usr/local/sbin"; chdir('/'); my (%opt,%cfg); my %health = map { $_ => 0 } qw/icmp_ok icmp_fail tcp_ok tcp_fail flips/; my $failed = 0; $SIG{USR1} = sub { log_msg( "icmp_ok: $health{icmp_ok} - icmp_fail: $health{icmp_fail} ", "tcp_ok: $health{tcp_ok} - tcp_fail: $health{tcp_fail} ", "flips: $health{flips}\n" ); }; $SIG{USR2} = sub { log_msg( "received SIGUSR2: flipping interface" ); $failed=0; flip(); }; $SIG{ALRM} = sub { debug( "received SIGALRM: checking standby interface." ); my $res = chk_next(); debug( "standby interface ok" ) if($res); alarm(600) if($cfg{test_layer1}); }; getopts('Ds:fc:', \%opt); # Debug, syslog facility, foreground, config file $opt{s} ||= 'daemon'; $opt{c} ||= '/etc/kipmp.cfg'; openlog( 'kipmp', 'pid', $opt{s} ); slowdie( "won't start - /var/run/kipmp.pid exists." ) if(-f "/var/run/kipmp.pid"); my $childpid; daemonize(2, 'kipmp') unless($opt{f}); # make sure permissions are sane on kipmp.cfg my ($mode,$uid) = ( stat($opt{c}) )[2,4]; slowdie( "owner of $opt{c} must be root" ) if($uid); my $permissions = sprintf "%04o", $mode & 0777; slowdie( "mode of $opt{c} must be 0400" ) unless($permissions eq '0400'); open(CONFIG, $opt{c}) || slowdie( "cannot open $opt{c}: $!" ); while(){ chomp; s/#.*//g; next if(/^\s*$/); my($k,$v) = split(/\s+/, $_, 2); $cfg{$k} = $v; } close CONFIG; $cfg{retries} = 1 unless(defined($cfg{retries})); $cfg{frequency} ||= 2; $cfg{timeout} ||= 2; foreach my $key (qw/master slave address netmask target/){ slowdie( "config error: no $key key in cfg file" ) unless($cfg{$key}); } my %keys = map { $_ => 1 } qw/master slave address netmask target frequency retries timeout tcp_fallback tcp_portnum curr_file test_layer1 post_flip notify/; foreach my $key (keys %cfg){ slowdie( "config error: invalid key: $key" ) unless($keys{$key}); } if($cfg{test_layer1}){ foreach my $dir (split /:/, $ENV{PATH}){ foreach my $tool (qw/ndd ethtool mii-tool/){ if(-x "$dir/$tool"){ $cfg{l1tool} = $tool; last; } } last if($cfg{l1tool}); } slowdie( "no layer1 tool found in PATH. remove test_layer1 from $opt{c}?" ) unless($cfg{l1tool}); } log_msg( "starting." ); mail ( "starting." ); my ($current,$next); foreach my $int (qw/master slave/){ if( grep /\s$cfg{address}\s/, split(/\n/, `ifconfig $cfg{$int}`) ){ chcurr( $cfg{$int} ); debug( "found current active interface is $cfg{$int}" ); last; } } unless($current){ log_msg( "running: ifconfig $cfg{master} $cfg{address} up netmask $cfg{netmask}" ); system( "ifconfig $cfg{master} $cfg{address} up netmask $cfg{netmask}" ); chcurr( $cfg{master} ); } debug( "running: ifconfig $next plumb up" ); system( "ifconfig $next plumb up" ); if( $cfg{post_flip} ){ debug( "running: $cfg{post_flip}" ); system( $cfg{post_flip} ); } my $ping = Net::Ping->new('icmp'); $ping->bind($cfg{address}); # causes older versions of Net::Ping to fail alarm(600) if($cfg{test_layer1}); while(1){ debug( "attempting ping of $cfg{target}" ); my($icmpok,$tcpok); my $result = eval { $ping->ping($cfg{target}, $cfg{timeout}); }; if($result){ debug( "icmp ping ok" ); $health{icmp_ok}++; $icmpok=1; }elsif($@){ debug( "eval error: $@" ); mail ( "eval error: $@" ); }else{ debug( "icmp ping failed" ); $health{icmp_fail}++; if($cfg{tcp_fallback}){ # icmp ping failed, try tcp ping my $tcp = Net::Ping->new('tcp'); $tcp->bind($cfg{address}); $tcp->port_number($cfg{tcp_portnum}) if($cfg{tcp_portnum}); my $result = eval { $tcp->ping($cfg{target}, $cfg{timeout}); }; if($result){ debug( "tcp ping ok." ); log_msg( "icmp ping failed, but tcp ping ok." ); $health{tcp_ok}++; $tcpok=1; }elsif($@){ debug( "eval error: $@" ); mail ( "eval error: $@" ); }else{ debug( "tcp ping failed" ); $health{tcp_fail}++; $failed++; } }else{ $failed++; } if($failed >= $cfg{retries}){ flip(); $failed=0; }elsif( $tcpok ){ $failed=0; } } sleep $cfg{frequency} if( $icmpok || $tcpok ); } sub chk_next { return 1 unless($cfg{l1tool}); my $res; if($cfg{l1tool} eq 'ndd'){ chomp($res = `ndd -get /dev/$next link_status`); }elsif($cfg{l1tool} eq 'ethtool'){ my $tmp = (split /\n/, `ethtool -t $next`)[0]; $res = 1 if($tmp =~ /PASS/); }elsif($cfg{l1tool} eq 'mii-tool'){ my $tmp = `mii-tool $next`; $res = 1 if($tmp =~ /link ok/); } unless( $res ){ my $msg = "interface $next is not up!"; debug ( $msg ); mail ( $msg ); } return($res); } sub flip { return unless( chk_next() ); $health{flips}++; system( "ifconfig $current 0" ); system( "ifconfig $next $cfg{address} up netmask $cfg{netmask}" ); system( $cfg{post_flip} ) if( $cfg{post_flip} ); for my $i (1..6){ log_msg( "sleep 10 [$i of 6]" ); sleep 10; # to prevent rapid interface flipping } log_msg( "ran: ifconfig $current 0\n", "ran: ifconfig $next $cfg{address} up netmask $cfg{netmask}" ); log_msg( "ran: $cfg{post_flip}" ) if( $cfg{post_flip} ); mail( "changed interface from $current to $next" ); chcurr( $next ); } sub chcurr { my $new = shift; if($cfg{curr_file}){ if( open(CURR_FILE, ">", $cfg{curr_file}) ){ print CURR_FILE "$new\n"; close CURR_FILE; }else{ warn "cannot write to $cfg{curr_file}: $!\n"; } } $current = $new; $next = ($current eq $cfg{master}) ? $cfg{slave} : $cfg{master}; } sub daemonize { # daemonize code stolen from jeff weisberg my $to = shift; my $name = shift; fork && exit; close STDIN; open( STDIN, "/dev/null" ); close STDOUT; open( STDOUT, ">", "/dev/null" ); close STDERR; open( STDERR, ">", "/dev/null" ); setsid(); $SIG{HUP} = $SIG{QUIT} = $SIG{INT} = $SIG{TERM} = sub { sighandler($name, @_) }; if( $name ){ # save pid file open(PIF, "> /var/run/$name.pid") || slowdie( "cannot write /var/run/$name.pid: $!" ); print PIF "$$\n"; close PIF; } # run as 2 processes while(1){ if( $childpid = fork ){ # parent wait; $childpid = undef; sleep $to; }else{ # child return; } } } sub sighandler { my $name = shift; if( $childpid > 1 ){ unlink( "/var/run/$name.pid" ) if $name; kill "TERM", $childpid; wait; } log_msg( "caught signal SIG$_[0] - exiting" ); exit; } sub debug { if($opt{D}){ my $msg = join('', @_); log_msg( $msg ); } } sub log_msg { my $msg = join('', @_); syslog( 'info', $msg ); warn "$msg\n" if($opt{f}); } sub slowdie { my $msg = join('', @_); log_msg( $msg ); sleep 10; exit 1; } sub mail { if( $cfg{notify} ){ my $msg = join('', @_); my $hostname = hostname(); if( open(SENDMAIL, "| sendmail -t -f root\@${hostname}") ){ foreach my $rcpt (split /[,\s]+/, $cfg{notify}){ print SENDMAIL "To: ${rcpt}\n"; } my $date = localtime(); print SENDMAIL "From: root\@${hostname}\n", "Date: $date\n", "Subject: kipmp notice\n\n", "$hostname: \n", $msg; close SENDMAIL; }else{ log_msg( "could not fork sendmail: $!" ); } } }