One of the biggest issue in maintain a huge production plant is you are going to need a number of administrators to manage it. And each of the administrator comes on-board with his own set of naming conventions, tastes, liking and past experience. However, this is not good for the overall production plant maintenance. We witnessed this typically with IPMP configuration on each of our boxes. All of them used to work but someone used to name the IPs like IP-1, IP-2, IP3 or for that matter IP-A, IP-B, IP-C.
During outages, it used to become a daunting task to figure this out and we used to loose precious time. So we came up with a plan that the problem needs to be fixed at the root level.
During outages, it used to become a daunting task to figure this out and we used to loose precious time. So we came up with a plan that the problem needs to be fixed at the root level.
- We came up with a clean naming convention system around it.
- Next, we created a script that used to source a file called net.txt (generated for our plant by network administrators) to setup IPMP on individual boxes.
- We ensured that all the administrators only use this script to configure networks on the system and provide necessary artifacts.
Here is the script ipmp.sh and net.txt -
$ cat ipmp.sh
#!/usr/bin/ksh
# IPMP Auto Configuration
# net.txt is Generated by Network Engineers
usage() {
echo "usage: $0"
exit 1
}
cont() {
[ $MODE = force ] && return
echo "Continue? yes/[no] \c"
read answer
[ "$answer" != yes ] && exit 0
}
netmasks() {
cat <<EOF
# table of netmasks
# 32 255.255.255.255 ffffffff
# 31 255.255.255.254 fffffffe
# 30 255.255.255.252 fffffffc
# 29 255.255.255.248 fffffff8
# 28 255.255.255.240 fffffff0
# 27 255.255.255.224 ffffffe0
# 26 255.255.255.192 ffffffc0
# 25 255.255.255.128 ffffff80
# 24 255.255.255.0 ffffff00 "CLASS C"
# 23 255.255.254.0 fffffe00
# 22 255.255.252.0 fffffc00
# 21 255.255.248.0 fffff800
# 20 255.255.240.0 fffff000
# 19 255.255.224.0 ffffe000
# 18 255.255.192.0 ffffc000
# 17 255.255.128.0 ffff8000
# 16 255.255.0.0 ffff0000 "CLASS B"
EOF
}
clean_net() {
[ -z $1 ] && myhost=`uname -n` || myhost=$1
grep "^$myhost" $NET |
sed -e 's#non_tcp/ip#n/a#g' |
sed -e 's#N/A#n/a#g' |
grep -v NET_MGT \
>$NET.local.$myhost
for i in `cut -f3 $NET.local.$myhost`; do
grep "^$i " $NET >>$NET.local.$myhost
done
NET=$NET.local.$myhost
}
check_net_txt() {
# a few basic checks of the net.txt file
# check that net.txt exists
if [ ! -a $NET ]; then
echo "ERROR: can't find $NET"
exit 1
fi
# check net.txt for valid data
nettxterror=0
cat $NET |
while read line
do
cols=`echo $line |wc -w`
if [ $cols -ne 14 -a $cols -ne 4 ]; then
echo "WARNING: the following line of $NET has `echo $line |wc -w` columns:"
echo " $line"
nettxterror=1
elif [ $cols -eq 14 ]; then
echo $line |read ibmhostdiscard telhostdiscard vlanid vip if1name if1addr if1mode if1port if2name if2addr if2mode if2port primaryipdiscard defroutdiscard
syntaxerror=0
case $vip-$if1addr-$if2addr in
# error codes:
# 1 - interface wrong
# 21 - mode wrong (VCS)
# 22 - mode wrong (no_IPMP)
# 23 - mode wrong (IPMP)
# 3 - IP address wrong
# 4 - switch port wrong
# 10 - can't interpret, VCS, IPMP or non-IPMP.
n/a-n/a-n/a )
# VCS vlan
echo $if1name |egrep "^(ce|e1000g|nxge)[0-9][0-9]*$" >/dev/null || syntaxerror=1
[ "$if1mode" = n/a -o "$if1mode" = active ] || syntaxerror=21
echo $if1port |grep "^CPOD[0-9][0-9]*_Access_*[0-9][0-9]*,_[0-9][0-9]*/[0-9][0-9]*$" >/dev/null || syntaxerror=4
[ "$if2name" = n/a ] || syntaxerror=10
[ "$if2addr" = n/a ] || syntaxerror=10
[ "$if2mode" = n/a ] || syntaxerror=10
[ "$if2port" = n/a ] || syntaxerror=10
;;
n/a-* )
# no-ipmp group
echo $if1name |egrep "^(ce|e1000g|nxge)[0-9][0-9]*$" >/dev/null || syntaxerror=1
echo $if1addr |grep "^[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*$" >/dev/null || syntaxerror=3
[ "$if1mode" = active -o "$if1mode" = passive ] || syntaxerror=22
echo $if1port |grep "^CPOD[0-9][0-9]*_Access_*[0-9][0-9]*,_[0-9][0-9]*/[0-9][0-9]*$" >/dev/null || syntaxerror=4
[ "$if2name" = n/a ] || syntaxerror=10
[ "$if2addr" = n/a ] || syntaxerror=10
[ "$if2mode" = n/a ] || syntaxerror=10
[ "$if2port" = n/a ] || syntaxerror=10
;;
* )
# ipmp group
echo $if1name |egrep "^(ce|e1000g|nxge)[0-9][0-9]*$" >/dev/null || syntaxerror=1
echo $if1addr |grep "^[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*$" >/dev/null || syntaxerror=3
[ "$if1mode" = active -o "$if1mode" = passive ] || syntaxerror=23
echo $if1port |grep "^CPOD[0-9][0-9]*_Access_*[0-9][0-9]*,_[0-9][0-9]*/[0-9][0-9]*$" >/dev/null || syntaxerror=4
echo $if2name |egrep "^(ce|e1000g|nxge)[0-9][0-9]*$" >/dev/null || syntaxerror=1
echo $if2addr |grep "^[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*$" >/dev/null || syntaxerror=3
[ "$if2mode" = active -o "$if2mode" = passive ] || syntaxerror=23
echo $if2port |grep "^CPOD[0-9][0-9]*_Access_*[0-9][0-9]*,_[0-9][0-9]*/[0-9][0-9]*$" >/dev/null || syntaxerror=4
[ "${if1mode}/${if2mode}" = "active/passive" -o "${if1mode}/${if2mode}" = "passive/active" ] || syntaxerror=23
;;
esac
if [ $syntaxerror -ne 0 ]; then
echo "WARNING: the following line of net.txt contains syntax errors:"
case $syntaxerror in
1 ) message="cannot decode an interface name" ;;
21 ) message="cannot interpret active/passive mode of an interface (VCS group)" ;;
22 ) message="cannot interpret active/passive mode of an interface (no-IPMP group)" ;;
23 ) message="cannot interpret active/passive mode of an interface (IPMP group)" ;;
3 ) message="cannot interpret an IP number" ;;
4 ) message="cannot interpret a switch port" ;;
10 ) message="does not fit into category of VCS, IPMP or non-IPMP group" ;;
esac
echo " $line <-- $message"
nettxterror=1
fi
fi
done
# check that all the VLAN lines are present
for i in $(grep "^`uname -n`" $NET |cut -f3 |sort -u); do
if ! grep "^$i " $NET >/dev/null; then
echo "WARNING: cannot find a VLAN line for VLAN $i"
nettxterror=1
fi
done
if [ $nettxterror -eq 1 ]; then
echo "ERROR: the $NET file is corrupt"
exit 1
fi
# extract expected UMI hostname, Telstra hostname, default gateway & primary IP address
if grep ^$IBMHOST $NET >/dev/null 2>&1; then
set -- `grep ^$IBMHOST $NET |head -1`
ibmhost=$1
telhost=$2
telhost=`echo $telhost |tr "[:upper:]" "[:lower:]"`
primaryip=${13}
defrout=${14}
else
echo "WARNING: cannot find local hostname in $NET"
exit 1
fi
# check that a primary IP address & default gateway was extracted
while ! echo $primaryip |egrep "^[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*$" >/dev/null; do
echo "WARNING: the primary IP address \"$primaryip\" is invalid"
echo "Please enter a valid primary IP address for this host: \c"
read primaryip
done
while ! echo $defrout |egrep "^[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*\.[0-9][0-9]*$" >/dev/null; do
echo "WARNING: the default router address \"$defrout\" is invalid"
echo "Please enter a valid default router address for this host: \c"
read defrout
done
}
check_input_data() {
kstat -p 2>/dev/null >$KSTAT
# present the data & summarise proposed changes
clear
(
echo "Please check the following input data against the latest release of the buildsheet:\n"
count=1
hostinfoprinted=0
grep ^$IBMHOST $NET |
while read \
ibmhostdiscard \
telhostdiscard \
vlanid \
vip \
if1name \
if1addr \
if1mode \
if1port \
if2name \
if2addr \
if2mode \
if2port \
primaryipdiscard \
defroutdiscard
do
if1port=`echo $if1port |sed -e 's/_/ /g'`
if2port=`echo $if2port |sed -e 's/_/ /g'`
set -- `grep "^$vlanid " $NET` # this line contains a tab
ntwid=$2
ntmsk=$3
ntmsk=`netmasks |grep $ntmsk |awk '{print $3}'`
[ -z $ntmsk ] && ntmsk=n/a
descr=$4
descr=`echo $descr |sed -e 's/_/ /g'`
if [ $hostinfoprinted -eq 0 ]; then
# these global vars are set in the main section
cat <<EOF
UMI Hostname = $ibmhost
Telstra Hostname = $telhost
Default Router = $defrout
Primary IP Address = $primaryip
EOF
hostinfoprinted=1
fi
cat <<EOF |grep -v n/a
$count.
VIP address = $vip
Interface 1 name = $if1name
Interface 1 address = $if1addr
Interface 1 mode = $if1mode
Interface 1 port = $if1port
Interface 2 name = $if2name
Interface 2 address = $if2addr
Interface 2 mode = $if2mode
Interface 2 port = $if2port
Network information for this interface or IPMP group:
VLAN ID = $vlanid
Network Address = $ntwid
Netmask = $ntmsk
Network Description = $descr
EOF
(( count = count + 1 ))
done
echo "\nPress 'q' to continue."
) |less
cont
}
check_link_speeds() {
# check the link speeds
echo "\nChecking link speeds and duplex settings:\n"
grep ^$IBMHOST $NET |
while read \
ibmhostdiscard \
telhostdiscard \
vlanid \
vip \
if1name \
if1addr \
if1mode \
if1port \
if2name \
if2addr \
if2mode \
if2port \
primaryipdiscard \
defroutdiscard
do
if1type=`echo $if1name |sed -e 's/[0-9][0-9]*$//'`
if1num=`echo $if1name |sed -e 's/^.*\([0-9][0-9]*\)$/\1/'`
if2type=`echo $if2name |sed -e 's/[0-9][0-9]*$//'`
if2num=`echo $if2name |sed -e 's/^.*\([0-9][0-9]*\)$/\1/'`
if [ "$vip" = n/a ]; then
echo "Checking $if1name (VLAN $vlanid)"
egrep "^${if1type}.*link_(speed|duplex)" $KSTAT |grep "$if1num:"
else
echo "Checking $if1name (VLAN $vlanid)"
egrep "^${if1type}.*link_(speed|duplex)" $KSTAT |grep "$if1num:"
echo "Checking $if2name (VLAN $vlanid)"
egrep "^${if2type}.*link_(speed|duplex)" $KSTAT |grep "$if2num:"
fi
done
cont
}
unplumb_all_interfaces() {
ifs=`cat $KSTAT |egrep "^(ce|e1000g|nxge):" |awk -F: '{print $1$2}' |uniq`
if [ $MODE != force ]; then
echo "\nAbout to unplumb the following interfaces:"
echo "$ifs"
echo "\nWARNING: this is your LAST chance to abort without making changes!"
cont
fi
for i in $ifs; do
if ! grep "mtu *9000" /etc/hostname.$i >/dev/null 2>&1; then
[ -a /etc/hostname.$i ] && cp -ip /etc/hostname.$i /var/tmp/hostname.$i.$DATE
[ -a /etc/hostname.$i ] && rm /etc/hostname.$i
ifconfig $i unplumb 2>/dev/null
else
echo "Skipping EBR interface /etc/hostname.$i..."
fi
done
}
configure_interfaces() {
echo "\nConfiguring ethernet interfaces ..."
grep ^$IBMHOST $NET |
while read \
ibmhostdiscard \
telhostdiscard \
vlanid \
vip \
if1name \
if1addr \
if1mode \
if1port \
if2name \
if2addr \
if2mode \
if2port \
primaryipdiscard \
defroutdiscard
do
shost=`echo $ibmhost |sed 's/..\(........\)...../\1/'`
group=${shost}-${vlanid}
if1port=`echo $if1port |sed -e 's/_/ /g'`
if2port=`echo $if2port |sed -e 's/_/ /g'`
set -- `grep "^$vlanid " $NET` # this line contains a tab
ntwid=$2
[ $ntwid = non_tcp/ip ] && ntwid=n/a
[ $ntwid = N/A ] && ntwid=n/a
ntmsk=$3
ntmsk=`netmasks |grep $ntmsk |awk '{print $2}'`
descr=$4
descr=`echo $descr |sed -e 's/_/ /g'`
case $vip-$if1addr-$if2addr in
n/a-n/a-n/a )
echo "Skipping VCS interface $if1name ..."
;;
n/a-* )
echo "Configuring $if1name with $if1addr/$ntmsk ..."
no_ipmp $if1name $if1addr $ntmsk
;;
* )
echo "Configuring $if1name with $if1addr/$ntmsk [$if1mode] and $if2name with $if2addr/$ntmsk [$if2mode] and assigning VIP address $vip/$ntmsk [group name is $group] ..."
ipmp $if1name $if1addr $if1mode $if2name $if2addr $if2mode $vip $group $ntmsk
;;
esac
done
}
no_ipmp() {
if=$1
ip=$2
ntmsk=$3
if ! grep "mtu *9000" /etc/hostname.$if >/dev/null 2>&1; then
cat <<EOF >/etc/hostname.$if
$ip/$ntmsk broadcast + mtu 1500 up
EOF
ifconfig $if plumb
ifconfig $if $ip/$ntmsk broadcast + mtu 1500 up
else
echo "WARNING: interface /etc/hostname.$i appears to be configured for EBR."
echo "Skipping interface /etc/hostname.$i..."
fi
}
ipmp() {
if1name=$1
if1addr=$2
if1mode=$3
if2name=$4
if2addr=$5
if2mode=$6
vipip=$7
group=$8
ntmsk=$9
if ! egrep "mtu *9000" /etc/hostname.$if1name >/dev/null 2>&1 && \
! egrep "mtu *9000" /etc/hostname.$if2name >/dev/null 2>&1; then
case $if1mode in
active )
cat <<EOF >/etc/hostname.$if1name
$if1addr/$ntmsk broadcast + group $group deprecated -failover mtu 1500 up
addif $vipip/$ntmsk broadcast + failover mtu 1500 up
EOF
cat <<EOF >/etc/hostname.$if2name
$if2addr/$ntmsk broadcast + group $group deprecated -failover mtu 1500 up
EOF
ifconfig $if1name plumb
ifconfig $if1name $if1addr/$ntmsk broadcast + group $group deprecated -failover mtu 1500 up addif $vipip/$ntmsk broadcast + failover mtu 1500 up
ifconfig $if2name plumb
ifconfig $if2name $if2addr/$ntmsk broadcast + group $group deprecated -failover mtu 1500 up
;;
passive )
cat <<EOF >/etc/hostname.$if2name
$if2addr/$ntmsk broadcast + group $group deprecated -failover mtu 1500 up
addif $vipip/$ntmsk broadcast + failover mtu 1500 up
EOF
cat <<EOF >/etc/hostname.$if1name
$if1addr/$ntmsk broadcast + group $group deprecated -failover mtu 1500 up
EOF
ifconfig $if2name plumb
ifconfig $if2name $if2addr/$ntmsk broadcast + group $group deprecated -failover mtu 1500 up addif $vipip/$ntmsk broadcast + failover mtu 1500 up
ifconfig $if1name plumb
ifconfig $if1name $if1addr/$ntmsk broadcast + group $group deprecated -failover mtu 1500 up
;;
esac
else
echo "WARNING: one of /etc/hostname.$if1name or /etc/hostname.$if2name appears to be configured for EBR."
echo "Skipping IPMP group /etc/hostname.$if1name and /etc/hostname.$if2name..."
fi
}
hup_mpath() {
pid=`ps -ef |grep /usr/lib/inet/in.mpathd |grep -v grep |awk '{print $2}'`
if [ ! -z $pid ]; then
echo "Sending HUP signal to /usr/lib/inet/in.mpathd ..."
kill -HUP $pid
else
echo "WARNING: in.mpathd does NOT appear to be running, skipping ..."
fi
}
recreate_hosts_file() {
# build the hosts file
echo "Reconfiguring /etc/inet/hosts ..."
cp -p /etc/inet/hosts /var/tmp/hosts.$DATE
header=0
grep ^$IBMHOST $NET |
while read \
ibmhostdiscard \
telhostdiscard \
vlanid \
vip \
if1name \
if1addr \
if1mode \
if1port \
if2name \
if2addr \
if2mode \
if2port \
primaryipdiscard \
defroutdiscard
do
if1port=`echo $if1port |sed -e 's/_/ /g'`
if2port=`echo $if2port |sed -e 's/_/ /g'`
set -- `grep "^$vlanid " $NET` # this line contains a tab
ntwid=$2
[ $ntwid = non_tcp/ip ] && ntwid=n/a
[ $ntwid = N/A ] && ntwid=n/a
ntmsk=$3
ntmsk=`netmasks $ntmsk`
descr=$4
descr=`echo $descr |sed -e 's/_/ /g'`
# abbreviated IBM hostname
shost=`echo $ibmhost |sed 's/..\(........\)...../\1/'`
if [ $header -eq 0 ]; then
cat <<EOF >/etc/inet/hosts
#
# Internet host table
#
127.0.0.1$(echo \\t)localhost
$primaryip$(echo \\t)$ibmhost ${ibmhost}.in.telstra.com.au $telhost $shost loghost
EOF
header=1
fi
case $vip-$if1addr-$if2addr in
n/a-n/a-n/a )
echo "# $if1name is connected to $if1port on VLAN $vlanid ($descr)" >>/var/tmp/hosts.tmp
;;
n/a-* )
echo "$if1addr\\t$shost-$vlanid" >>/etc/inet/hosts
echo "# $if1name is connected to $if1port on VLAN $vlanid ($descr)" >>/var/tmp/hosts.tmp
;;
* )
echo "$vip\\t$shost-$vlanid" >>/etc/inet/hosts
echo "# $if1name is connected to $if1port on VLAN $vlanid ($descr)" >>/var/tmp/hosts.tmp
echo "# $if2name is connected to $if2port on VLAN $vlanid ($descr)" >>/var/tmp/hosts.tmp
;;
esac
done
cat /var/tmp/hosts.tmp >>/etc/inet/hosts
rm /var/tmp/hosts.tmp
cat <<EOF >>/etc/inet/hosts
# administration servers
192.74.189.172$(echo \\t)nus808.in.telstra.com.au nus808
146.132.8.23$(echo \\t)nus721.in.telstra.com.au nus721
172.15.12.5$(echo \\t)nus022.telecom.com.au nus022
EOF
}
recreate_netmasks_file() {
echo "Reconfiguring /etc/inet/netmasks ..."
cp -ip /etc/inet/netmasks /var/tmp/netmasks.$DATE
# configure netmasks file
cat <<EOF >/etc/inet/netmasks
#
# The netmasks file associates Internet Protocol (IP) address
# masks with IP network numbers.
#
# network-number netmask
#
# The term network-number refers to a number obtained from the Internet Network
# Information Center.
#
# Both the network-number and the netmasks are specified in
# "decimal dot" notation, e.g:
#
# 128.32.0.0 255.255.255.0
#
EOF
}
configure_defaultrouter() {
if [ -z "$defrout" ]; then
echo "WARNING: no default route specified in input data, skipping ..."
return
fi
echo "Reconfiguring /etc/defaultrouter ..."
if netstat -rn |grep ^default >/dev/null; then
defaultroutes=`netstat -rn |awk '$1 == "default" {print $2}'`
for gateway in $defaultroutes; do
echo "Deleting default route $gateway ..."
route delete default -gateway $gateway >/dev/null 2>&1
done
fi
cp -ip /etc/defaultrouter /var/tmp/defaultrouter.$DATE
echo $defrout >/etc/defaultrouter
echo "Adding $defrout as the default route ..."
route add default -gateway $defrout >/dev/null 2>&1
}
check_nodename() {
echo "Checking /etc/nodename ..."
if [ "`cat /etc/nodename`" != "$ibmhost" ]; then
echo "/etc/nodename not correct, set this manually ..."
fi
}
update_permissions() {
echo "Resetting permissions on"
for i in /etc/inet/hosts /etc/inet/netmasks /etc/defaultrouter /etc/hostname.*
do
echo " $i ..."
chmod 444 $i
chown root:root $i
done
}
###
###
### MAIN
###
###
MODE=normal
[ "$1" = -h ] && usage
[ "$1" = -n -o "$1" = -nocheck ] && MODE=nocheck
[ "$1" = -f -o "$1" = -force ] && MODE=force
[ "$1" = -clean_net ] && MODE=clean_net
[ "$1" = -check_net ] && MODE=check_net
[ ! -a ./net.txt ] && {
echo "ERROR: can't find 'net.txt'. Please save the file 'net.txt'"
echo " in the current directory and re-run $0."
echo "Goodbye."
exit 1
}
IBMHOST=`uname -n`
NET=net.txt
KSTAT=/tmp/kstat.txt
DATE=`date +20%y%m%d%H%M%S`
case $MODE in
clean_net )
clean_net $2
echo "Saved net.txt.local.$2 ..."
echo "Goodbye."
exit 0
;;
check_net )
check_net_txt
exit 0
;;
* )
clean_net
check_net_txt
[ $MODE = normal ] && check_input_data
[ $MODE = normal ] && check_link_speeds
unplumb_all_interfaces
configure_interfaces
hup_mpath
configure_defaultrouter
recreate_hosts_file
recreate_netmasks_file
check_nodename
update_permissions
echo "\nNOTE: Original files were saved as /var/tmp/*.$DATE files."
echo "All done.\n"
;;
esac
# end of script
$ cat net.txt
unix-server-1 unix-server-1 2300_9/10 n/a e1000g1 130.103.248.110 active CPOD1_Access9,_6/15 n/a n/a n/a n/a n/a -
unix-server-1 unix-server-1 3994_7/8 n/a NET_MGT 10.0.3.177 active CPOD1_Access7,_12/11 n/a n/a n/a n/a n/a -
unix-server-2 unix-server-2 2300_9/10 n/a e1000g1 130.103.248.111 active CPOD1_Access9,_6/16 n/a n/a n/a n/a n/a -
unix-server-2 unix-server-2 3994_7/8 n/a NET_MGT 10.0.3.178 active CPOD1_Access7,_12/12 n/a n/a n/a n/a n/a -
...
I am sure you can setup similar things for your plant.




