#!/bin/ksh # $Id: netlint,v 1.66 2012/03/19 15:54:29 ksb Exp $ # Run some checks & report some information on the local host. # #****h* /netlint # NAME # netlint # DESCRIPTION # Netlint is designed to report subtle errors in the configuration of # many networked hosts. These might relate to: # * Interface duplex/speed # * IP address errors # * Route errors # * Resolver errors (dead nameservers) # * Ntp errors (poor network time protocol picks) # * Documentation errors (resolved with peg's interface.cl) #*** #****a* netlint/STDERR_program # NAME # STDERR_program # FORMAT # STDERR_program: text # DESCRIPTION # Any program netlint calls might output errors to stderr. Some of these are # trapped, if such output has been useful in the past to narrow down a hard # to find issues, and reported to the reporter. # IMPACT # These messages just help trace down other issues. # REMEDIATION # Remediate this as best you can. #*** #****p* netlint/core # NAME # core # DESCRIPTION # The netlint core. #*** #****p* netlint/peg # NAME # peg # DESCRIPTION # Every host should report to the performace and engineering graphics system. # Those that don't are marked in the netlint report. A pseudo-attribute is # created by the Mason _dhandler for the "/report/" directory that explains # which peg host the client reports to, if any. # IMPACT # Lost performance data. #*** PATH=/usr/local/bin:/usr/bin:/bin:/usr/local/sbin:/usr/sbin:/sbin:/usr/local/etc:$PATH:/etc:/usr/local/libexec : ${CARP:=echo} progname=`basename $0` cd /tmp # save common code for plugins (in ksh at least) -- ksb NETLINT_FUNCS=/tmp/nlfunc$$.$((RANDOM%100)) while [ -f $NETLINT_FUNCS ] do NETLINT_FUNCS=$NETLINT_FUNCS,$((RANDOM%100)) done typeset rCache trap 'rm -rf $NETLINT_FUNCS $rCache' EXIT cat >$NETLINT_FUNCS <<\!kshsux function AndIt { typeset A8=255 A7=254 A6=252 A5=248 A4=240 A3=224 A2=192 A1=128 A0=0 typeset mask if expr "$2" : "[0-9]*$" >/dev/null ; then case _$2 in _[012345678]) eval mask=\"\$A$2.0.0.0\" ;; _[89]|_1[0123456]) eval mask=\"255.\$A$(($2-8)).0.0\" ;; _1[789]|_2[01234]) eval mask=\"255.255.\$A$(($2-16)).0\" ;; _2[56789]|_3[012]) eval mask=\"255.255.255.\$A$(($2-24))\" ;; *) echo "$progname: $2: CIDR out of range" 1>&2 echo "$1" return 0 ;; esac set _ $1 $mask shift fi set _ `echo ${1?"IP-address"} ${2?"netmask"} | tr '.' ' '` shift echo "$(($1&$5)).$(($2&$6)).$(($3&$7)).$(($4&$8))" } # Return FQDN given a host. (petef,ksb) function FQDN { # if we got localhost, return our hostname case $1 in localhost|localhost.*) set _ `hostname` shift ;; esac typeset H=${1-'provide host'} FTMP=/tmp/fqdn.$$ # get an IP address/name from nslookup for $H (resolv.conf helps us here) nslookup $H >$FTMP 2>&1 if egrep "can't find" $FTMP >/dev/null ; then RE=`echo $H | sed -e 's/[.]/\\./g'` # Is it in /etc/hosts, assume "ip fqdn [alias]" lines if tr -s '\t ' ' ' $FTMP ; then sed -n -e '2,$d' -e 's/^[^ ]* \([^ ]*\).*/\1/p' $FTMP | sed -e 's/[.]*$//' else echo $H fi rm $FTMP* return 0 fi # if we got more than one IP back (RR), just return the whole name we got back N=`tr -s '\t ' ' ' <$FTMP |sed -ne 's/.*[Nn][Aa][Mm][Ee] *= *//p' |sed -e 's/[.]*$//'` : ${N:=$H} sed -n -e 's,^Addr[es:]* \(.*\),\1,p' <$FTMP | tr ',' '\012' | tr -d ' ' >$FTMP.a [ -s $FTMP.a ] || { echo $N rm $FTMP* return 0 } for IP in `sed -e '/#/d' $FTMP.a` do nslookup $IP >$FTMP 2>&1 egrep "can't find" $FTMP >/dev/null && continue sed -ne 's/.*[Nn][Aa][Mm][Ee] *= *//p' $FTMP done | sed -e 's/[.]*$//' |sort -u >$FTMP.b [ `wc -l <$FTMP.b` -eq 1 ] && { cat $FTMP.b rm $FTMP* return 0 } # IPs don't reverse to something consistent; return the name # we were given. echo $H rm $FTMP* return 0 } !kshsux . $NETLINT_FUNCS # parse our options usage="$progname: usage [-t timezone] $progname: usage -R host [-p port] [-t timezone] $progname: usage -h | -F | -V" # how to slide a single letter option off the beginning of a bundle # -barf -> -arf slide='P=$1; shift; set _ -`expr _"$P" : _'\''-.\(.*\)'\''` ${1+"$@"}; shift' param='if [ $# -lt 2 ]; then echo "$progname: missing value for $1" 1>&2 ; exit 1; fi' # default values for all the flags, or leave unset for a ${flag-value) form GOODTZ="C[SD]T" typeset rPORT rREPO rPORT=`tr -s '\t ' ' '/dev/null | sed -e "s,^,$progname: plugin ," exit 0 ;; --) shift break ;; -h|-h*) cat <&2 exit 1 ;; *) # process and continue for intermixed options & args break ;; esac done #****a* netlint/REPO # NAME # REPO # FORMAT # REPO: host:port # DESCRIPTION # When using a remote module directory, explail where it came from. # REMEDIATION # None. #*** # ksb uses mktemp here, but fears it is still not portable -- nor is rsync if [ ! -z "$rREPO" ] ; then export rREPO rPORT rCache=$(mktemp -d ${TMPDIR:=/tmp}/nrCc$$XXXXXX) || exit 70 cd $rCache || exit 75 rsync -q ${rPORT:+"--port=$rPORT"} $rREPO::netlint/ . || { $CARP "REPO_FAILED: $rREPO:$rPORT with $?" exit 75 } $CARP "REPO: $rREPO:$rPORT" fi #****a* core/OS # NAME # OS # FORMAT # OS: uname # DESCRIPTION # The operating system of the host as determined by uname. # The netlint attribute list includes this to allow site policy to # make recommendations based on the node's OS. # REMEDIATION # There should be no remediation needed for an OS attribute, unless # site policy marks it as "unsupported". Remediate by supporting # the new OS, or cross-grading the node to another, supported one. #*** OS=`uname -s` $CARP "OS: $OS" #****a* core/VERSION # NAME # VERSION # FORMAT # VERSION: cvs_version # DESCRIPTION # The version of the netlint script that produced the report. # This should be the same as the version of netlint installed on # the reporter host. # REMEDIATION # When it is out-of-sync, the reporter expects you to install a # common version on every host. Update those that are out of date. #*** $CARP "VERSION: `expr '$Id: netlint,v 1.66 2012/03/19 15:54:29 ksb Exp $' : '.Id: [^ ]* \([^ ]*\) .*'`" #****a* core/TIME_TZ # NAME # TIME_TZ # FORMAT # TIME_TZ: timezone # DESCRIPTION # The timezone of the host is reported if it didn't match the squelch value. # This might not be an error if local policy allows the host to be in a # different timezone. # IMPACT # Having the wrong timezone on a node is as bad as having the clock off by # hours. Log files and transaction logs won't align with other hosts. # REMEDIATION # Install the correct squelch value from the master-source. The command line # option -t specifies a regular expression to match good timezones at # run-time. #*** # check the timezone TESTTZ=`date | awk '{print $5}' | egrep -v "$GOODTZ"` [ -n "$TESTTZ" ] && $CARP "TIME_TZ: $TESTTZ" # Build a list of network interfaces we want to look at; we look at all the # interfaces from netstat -ni, and ignore ones we don't want - then go # platform specific and get the IP & broadcast for each interface, and # record the subnet for each interface. IFACE_LIST='' IP_LIST='' INTS=`(netstat -niW 2>/dev/null || netstat -ni) | cut -d" " -f1 | egrep -v '(^(Name|Kernel|Iface)|^lo|^tun|^lan0:|^faith|^vlan|^lp|\*)' | sort -u | grep -v ':' | paste -d" " -s -` # the '-c 1' syntax isn't valid on older Solaris PING='ping -c 1' ARP='arp -a -n' # Make sure we get all of the interfaces (especially aliases). case $OS in SunOS) INTS="$INTS `ifconfig -a | sed -n -e 's/[ ].*UP.*//p' | egrep -v '(^lo)' | sed -n -e 's,:$,,p' | fgrep :`" ;; esac for int in $INTS; do BROADCAST=""; IP=""; HEXMASK=""; MAC=""; NETMASK="" case $int in lo[0-9]*) IF_TYPE="loopback" ;; fddi[0-9]*) IF_TYPE="fddi" ;; tun[0-9]*|slip[0-9]*|ppp[0-9]*) IF_TYPE="ppp" ;; pflog[0-9]*) IF_TYPE="PF" ;; *) IF_TYPE="ether" ;; esac case $OS in SunOS) BROADCAST=`ifconfig $int | grep 'inet ' | cut -d" " -f6` IP=`ifconfig $int | grep 'inet ' | cut -d" " -f2` HEXMASK=`ifconfig $int | grep 'inet ' | cut -d" " -f4` MAC=`/usr/sbin/arp $IP | cut -d" " -f4` ERRORS=`netstat -ni | grep "^$int" | awk '{print $6}' | head -1` PING='ping' BPING='ping -s %1 64 9' ARP='arp -a' case $int in nf[0-9]*|fddi[0-9]*) IF_TYPE="fddi" ;; tr[0-9]) # Because we can't see the MAC w/o ifconfig as root, # we usually get unknown here -- ksb MAC=`ifconfig tr0 | sed -n -e 's/^[ ]*ether *//p'` PING=":" : ${MAC:="unknown"} IF_TYPE="token" ;; esac ;; Linux) BROADCAST=`ifconfig $int | awk '/inet / { sub("Bcast:","",$3); print $3 }'` IP=`ifconfig $int | awk '/inet / { sub("addr:","",$2); print $2 }'` NETMASK=`ifconfig $int | awk '/inet / { sub("Mask:","",$4); print $4 }'` MAC=`ifconfig $int | awk '/HWaddr/ { print $5 }'` ERRORS=`ifconfig $int | awk '/(R|T)X packets/ {sub("errors:","",$3); TOTERR+=$3;} END {print TOTERR}'` BPING='ping -b -c 3' ARP='arp -a -n' ;; AIX) BROADCAST=`ifconfig $int | grep 'inet ' | head -1 | cut -d" " -f6` IP=`ifconfig $int | grep 'inet ' | head -1 | cut -d" " -f2` HEXMASK=`ifconfig $int | grep 'inet ' | head -1 | cut -d" " -f4` MAC=`netstat -ni | grep "^$int" | grep link | awk '{print $4}'` ERRORS=`netstat -ni | grep "^$int" | awk '{print $6}' | head -1` BPING='ping -c 3' ARP='arp -an' ;; FreeBSD) BROADCAST=`ifconfig $int | grep 'inet ' | head -1 | cut -d" " -f6` IP=`ifconfig $int | grep 'inet ' | head -1 | cut -d" " -f2` HEXMASK=`ifconfig $int | grep 'inet ' | head -1 | cut -d" " -f4` MAC=`ifconfig $int | grep 'ether ' | cut -d" " -f2` ERRORS=`netstat -ni | grep "^$int" | awk '{print $6}' | head -1` BPING='ping -c 3' ;; OpenBSD|NetBSD) BROADCAST=`ifconfig $int | grep 'inet ' | head -1 | cut -d" " -f6` IP=`ifconfig $int | grep 'inet ' | head -1 | cut -d" " -f2` HEXMASK=`ifconfig $int | grep 'inet ' | head -1 | cut -d" " -f4` MAC=`ifconfig $int | sed -n -e 's/^[ ]*address[: ]*//p'` ERRORS=`netstat -ni | grep "^$int" | awk '{print $6}' | head -1` BPING='ping -c 3' ;; HP-UX) BROADCAST=`ifconfig $int | awk '/inet / { print $6 }'` IP=`ifconfig $int | awk '/inet / { print $2 }'` HEXMASK=`ifconfig $int | awk '/inet / { print $4 }'` MAC=`lanscan | grep $int | head -1 | awk '{ print $2 }'` ERRORS=0 PING='ping' BPING='ping -s %1 64 9' ARP='arp -a' if lanscan | tr ' ' ' ' | grep " $int[ 0-9]*FDDI " >/dev/null ; then IF_TYPE="fddi" fi ;; *) echo "netlint doesn't yet support $OS" 1>&2 exit 1 ;; esac # sun interface plumbed, but not active. if [ _"0" = _"$HEXMASK" -o _"0" = _"$NETMASK" ]; then continue fi # hex -> netmask if [ -z "$NETMASK" -a ! -z "$HEXMASK" ]; then NETMASK=`echo ${HEXMASK#0x} | sed -e 's/\(..\)\(..\)\(..\)\(..\)/echo \"$((16#\1)).$((16#\2)).$((16#\3)).$((16#\4))\"/'` NETMASK=`eval $NETMASK` fi if [ -z "$NETMASK" -a -z "$BROADCAST" ] ; then continue fi # netmask -> /N CIDR notation, if contiguous CIDR=`echo $NETMASK".0" | sed -e 's/255\./8+/g' \ -e 's/254\./7+/g' -e 's/252\./6+/g' -e 's/248\./5+/g' \ -e 's/240\./4+/g' -e 's/224\./3+/g' -e 's/192\./2+/g' \ -e 's/128\./1+/g' -e 's/0\./0+/g'` if expr "$CIDR" : ".*\..*" >/dev/null; then CIDR="$NETMASK" else CIDR=`echo $CIDR | bc` fi # record uniq subnets, IPs and interfaces SUBNET=`AndIt $IP $NETMASK` if [ x"127.0.0.1" != x"$IP" ] ; then if [ _"$IF_TYPE" = _token ] ; then : 'do not put in IP list' elif echo $SUBNET_LIST | fgrep -v ", $SUBNET/$CIDR" >/dev/null ; then SUBNET_LIST="$SUBNET_LIST $SUBNET/$CIDR" fi fi IP_LIST="$IP_LIST $IP" IFACE_LIST="$IFACE_LIST, $int($IP $MAC $SUBNET/$CIDR $IF_TYPE)" #****a* core/ERRORS # NAME # ERRORS # FORMAT # ERRORS_interface: integer # DESCRIPTION # The number of errors on an # interface if that number is available, zero otherwise. # # A host's network interface may accumulate random errors from time to time. # Transevers are not perfect, cables may be run close to sources of noise, # or the link partner might reboot or reset the peer interface. For small # increments, this is normally not a cause for alarm. # IMPACT # High error counts on an interface that have an explanation have no impact # on the system. Increasing error counts indicate delayed network traffic # and might slow all network applications. High error counts might indicate # a duplex issue; check those carefully. Low error counts usually come from # host reboots and/or switch port resets (viz. when the switch is # reconfigured). # REMEDIATION # * Assure that the switch and the host are connected as documented (e.g. # PEG's interface.cl). # * Assure that the switch port and the host are set to the same duplex # and speed. # * Check for hardware errors, e.g. bad network interface controller (NIC) # on the host, bad port on the switch, bad cable. # * Schedule a time to stress test the interface, and/or reboot the host. # * Schedule a time to swap the interface on the host or switch side to # isolate the cause. #*** # Ignore errors on Solaris colon interfaces, else output them. if expr "$OS" : "SunOS.*" >/dev/null && expr "$int" : ".*:.*" >/dev/null; then : else $CARP "ERRORS_$int: $ERRORS" fi # Check for a peer on the network, unless it is a token ring interface # or a /32 alias interface [ x"$int" = x"tr0" ] && continue [ $CIDR -eq 32 ] && continue # ping the broadcast address so we have some arp entries for this # network if it's alive xapply "$BPING" $BROADCAST 2>/dev/null >/dev/null # Look in the arp table and see how many IPs on $int's subnet we see. # If we're on Solaris, only check for the interfaces. if expr "$OS" : "SunOS.*" >/dev/null; then COUNT=`$ARP | grep "^${int%%:*}[ ]" | wc -l` else ARPADDRS=`$ARP | sed -n -e 's,.*[^0-9]\([0-9]*\.[0-9]*\.[0-9]*\.[0-9]*\).*,\1,p'` COUNT=0 for addr in $ARPADDRS do NET=`AndIt $addr $NETMASK` [ x"$NET" = x"$SUBNET" ] && COUNT=$((COUNT+1)); done fi #****a* core/IP_PEERS # NAME # IP_PEERS # FORMAT # IP_PEERS_interface: ip/prefix: integer # DESCRIPTION # This is an estimate of the number of peer interfaces that could be reached # via ping on the attached network using the broadcast address advertised # on the interface. # IMPACT # Sometimes we might guess that the out-of-sync host is wrong, when in fact # the other hosts on the subnet are misconfigured. Don't always believe that # the configuration with the most "votes" is correct. Always follow the local # site policy. # REMEDIATION # Site policy in ~netlint/lib/peers (and peers.map) implements a filter # which complains about interfaces that have too few peers. This might # indicate that either: # * The interface is configured with the wrong netmask, broadcast, or IP. # * The interface is plugged into the wrong port or switch. # * The switch port is on the wrong VLAN. # * The other hosts are not configured correctly. # # Login to the host and debug all these issue to remediate the error or # change site policy to grant the exception. # NOTES # The IP configuration for a host is kept in different files based on the # OS of the node and the purpose of the data. Remdiate that configuration # from the appropriate controlled source. #*** $CARP "IP_PEERS_$int: $IP/$CIDR: $COUNT" done #****a* core/IP_SUBNETS # NAME # IP_SUBNETS # FORMAT # IP_SUBNETS: list # DESCRIPTION # List of all subnets the host is connected to. # # This is not an error, but is used by the reporter to send data to the # modules. # # This might be presented in the report a an anomalous result. For example, # if two hosts have different default routes on the same subnet. Such # results are in the scope of site policy, not hard errors. #*** $CARP "IP_SUBNETS: ${SUBNET_LIST#, }" #****a* core/IP_IFS # NAME # IP_IFS # FORMAT # IP_IFS: list # DESCRIPTION # List of all interfaces on the host. # # This is not an error, but is used by the reporter to send data to the # modules. #*** $CARP "IP_IFS: ${IFACE_LIST#, }" #****a* core/IP_ROUTES # NAME # IP_ROUTES # FORMAT # IP_ROUTES: list # DESCRIPTION # List of all routes from the host. # # This is not an error, but is used by the reporter to send data to the # modules. #*** # record the default route and multicast route $CARP "IP_ROUTES: "`( ( netstat -rn | egrep '^(22[4-9]|2[345][0-9])\.[0-9]*\.[0-9]*\.[0-9]*' | awk '{print "multicast("$1,$2")"}' netstat -rn | egrep '^(default|0\.0\.0\.0)' | awk '{print "default("$2")"}' | sort -u ) | tr '\012' ','; echo "") | sed -e 's|,$||' -e 's|,|, |g'` # Run any plugins if we've got them, pass useful environment variables. export OS INTS IFACE_LIST IP_LIST SUBNET_LIST PATH CARP NETLINT_FUNCS if [ -z "$rREPO" ] ; then xapply "[ -f '%1' ] && exec %1" /usr/local/libexec/netlint-plugins/* else xapply "[ -f '%1' ] && exec %1" ./* fi exit 0