Support Support Downloads Knowledge Base Case Manager My Juniper Community

Knowledge Base

Search our Knowledge Base sites to find answers to your questions.

Ask All Knowledge Base Sites All Knowledge Base Sites JunosE Defect (KA)Knowledge BaseSecurity AdvisoriesTechnical BulletinsTechnotes Sign in to display secure content and recently viewed articles

Standalone IDP reboots after IDP service is stopped

0

0

Article ID: KB28945 KB Last Updated: 15 May 2014Version: 1.0
Summary:

The IDP series appliance reboots after the sh idp.sh stop command is issued, the service idp stop command is issued, or the restart equivalent is issued.

Symptoms:

The problem is that the IDP reboots after the IDP service is stopped on any version of IDP code, including the latest, which, as of this article, is 5.1R3.

Cause:

The reboot occurs because of a faulty script located at /usr/idp/device/bin/nicBypass.

Solution:

The solution is to replace the faulty nicBypass script with an updated script.

When replacing the script, make sure to use a proper text editor so that no control or formatting characters interfere with the script.

The script is listed below.

Be sure to log onto the IDP as root, and remember that the file permissions and file owner should be as follows:

-r-x------ 1 idp idp 24785 Jan 13 2012 nicBypass  (disregard the file size)

After the script is replaced, start IDP via the service IDP start command, then stop it again with the service IDP stop command. The IDP series appliance should not reboot.

Replacement Script

#! /bin/bash

# utility commands
KILL="/bin/kill"
PIDOF="/sbin/pidof"

# nicBypass
datenow() {
$DATE +'%Y%m%d'
}

getslavenic() {
log $SPEW 'Entered getslavenic()'

local masternic=$1
local slavenic

SLAVENICFILE="/proc/net/bypass/bypass_$nic/bypass_slave"
if [[ -r $SLAVENICFILE ]] ; then
log $DEBUG "$SLAVENICFILE exists"
slavenic=`$CAT $SLAVENICFILE`
else
log $DEBUG "$SLAVENICFILE does not exist"
case $IDPPLATFORM in
50)
if [[ $masternic == eth1 ]] ; then
slavenic=eth2
fi
;;
200)
case "$masternic" in
eth2)
slavenic=eth3
;;
eth4)
slavenic=eth5
;;
eth6)
slavenic=eth7
;;
eth8)
slavenic=eth9
;;
esac
;;
600C | 1100C)
case "$masternic" in
eth2)
slavenic=eth3
;;
eth4)
slavenic=eth5
;;
eth6)
slavenic=eth7
;;
eth8)
slavenic=eth9
;;
eth10)
slavenic=eth11
;;
esac
;;
600F | 1100F)
if [[ $masternic == eth2 ]] ; then
slavenic=eth3
fi
;;
75 | 250 | 800 | 8200)
case "$IDPPLATFORM" in
75 | 800)
case "$masternic" in
eth2)
slavenic=eth1
;;
eth3)
slavenic=eth2
;;
esac
;;
esac

if [[ -z $slavenic ]] ; then
slavenic=placeholder

# Find master NIC in NICBUSMAP
for token in $NICBUSMAP ; do
OIFS=$IFS
IFS=$OIFS:
set -- $token
IFS=$OIFS
nicname=$1
if [[ $nicname == $masternic ]] ; then
MASTERNICBUS=$2
MASTERNICBUSFUNC=$4
break
fi
done

if [[ $MASTERNICBUSFUNC -eq 0 ]] ; then
# Look for matching NIC in NICBUSMAP
for token in $NICBUSMAP ; do
OIFS=$IFS
IFS=$OIFS:
set -- $token
IFS=$OIFS
# Slave nic has same bus ID as master NIC and bus function 1
if [[ "$ :1" == "$2:$4" ]] ; then
slavenic=$1
break
fi
done
fi
fi
;;
esac
fi

log $DEBUG "Slave NIC for $masternic is $slavenic"
$PRINTF "$slavenic"

log $SPEW 'Exiting getslavenic()'
}

log() {
# Logging levels
# NONE = 0
# CRITICAL = 1
# ERROR = 2
# WARN = 3
# INFO = 4
# DEBUG = 5
# SPEW = 6
# SPEW2 = 7
if [[ $LOGLEVEL -ge $1 ]] ; then
local LOGENTRY="`now`:[`resolveLogLevel $1`]:$2"
case "$SO_PRINTER_WHERE" in
file)
# log to file
$ECHO "$LOGENTRY" >> $LOGFILE
;;
screen)
# log to screen
$ECHO "$LOGENTRY"
;;
both)
# log to file and screen
$ECHO "$LOGENTRY" | $TEE $LOGFILE
;;
*)
# log to file
$ECHO "$LOGENTRY" >> $LOGFILE
;;
esac
fi
}

now() {
$DATE +'%Y%m%d%H%M%S'
}

programExit() {
log $SPEW 'Entered programExit()'

if [[ $HWPLATFORM != Supermicro2 ]] ; then
# Set normal state for NIC
setnormal nosendlog
fi

# Set IDP-inactive states for NICs that have IDP-inactive parameters specified
setidpinactive

# # Kill smr procs
# for pid in $SMRPIDS ; do
# $PS p $pid > /dev/null
# if [ $? -eq 0 ] ; then
# /bin/kill $pid 2> /dev/null
# fi
# done

# Remove pidfile
$RM -f $PIDFILE

# Signal the idpinit process
# to stop monitoring network outage
IDPINITPID=`$PIDOF idpinit`
$KILL -SIGUSR2 $IDPINITPID 2> /dev/null

log $SPEW 'Exiting programExit()'
if [[ -z $1 ]] ; then
log $WARN 'Exit code 0'
exit 0
else
log $WARN "Exit code $1"
exit $1
fi
}

receiveHup() {
log $SPEW 'Entered receiveHup()'
log $DEBUG 'Ignoring HUP signal'
}

resolveLogLevel() {
# Logging levels
# NONE = 0
# CRITICAL = 1
# ERROR = 2
# WARN = 3
# INFO = 4
# DEBUG = 5
# SPEW = 6
# SPEW2 = 7
case "$1" in
0)
echo NONE
;;
1)
echo CRITICAL
;;
2)
echo ERROR
;;
3)
echo WARN
;;
4)
echo INFO
;;
5)
echo DEBUG
;;
6)
echo SPEW
;;
7)
echo SPEW2
;;
esac
}

setidpinactive() {
# Set IDP-inactive state for NICs
for token in $BYPASSNICS_IDPINACTIVE ; do
OIFS=$IFS
IFS=$IFS:
set -- $token
IFS=$OIFS
nic=$1

case "$nic" in
eth0 | eth1)
continue
;;
eth2)
# Skip eth2 on IDP 800
if [[ $IDPPLATFORM == 800 ]] ; then
continue
fi
esac

slavenic=`getslavenic $nic`
bypasssetting=$2
log $SPEW "nic: $nic, slavenic: $slavenic, bypasssetting: $bypasssetting"

log $DEBUG "Running $BPCTL2 $nic set_bypass_wd off"
$BPCTL2 $nic set_bypass_wd off > /dev/null 2>&1

log $DEBUG "Running $BPCTL2 $nic set_sys_timer_wd off"
$BPCTL2 $nic set_sys_timer_wd off > /dev/null 2>&1 &

OUTPUT=''
ATTEMPTS=0
MAX_ATTEMPTS=10

case "$bypasssetting" in
1)
STATE=bypass
DIS_BYPASS_RELAY=off
BYPASS_LOG_TYPE=nic_bypass
;;
*)
STATE=off
DIS_BYPASS_RELAY=on
BYPASS_LOG_TYPE=nic_off
;;
esac

# Flip dis_bypass relay
log $DEBUG "Running $BPCTL2 $nic set_dis_bypass $DIS_BYPASS_RELAY ; $BPCTL2 $nic set_bypass on ; $BPCTL2 $nic get_bypass | $GREP -q 'in the Bypass mode'"
$BPCTL2 $nic set_dis_bypass $DIS_BYPASS_RELAY > /dev/null 2>&1

# Put NICs into bypass state immediately
while [[ $ATTEMPTS -lt $MAX_ATTEMPTS ]] ; do
$BPCTL2 $nic set_bypass on > /dev/null 2>&1
OUTPUT=`$BPCTL2 $nic get_bypass 2>&1`
RETCODE=$?
if [[ $OUTPUT =~ 'in the Bypass mode' ]] ; then
break
fi
((++ATTEMPTS))
done
if [[ $RETCODE -ne 0 ]] ; then
log $DEBUG "ERROR: Could not put $nic into bypass state"
fi

# If 8086:10a9 1304:0900 and switching NICs to NICs off state, skip return code check
if [[ $bypasssetting -ne 1 ]] ; then
local BUSINFO=`$ETHTOOL -i $nic | $SED '/^bus-info/!d ; s/^bus-info: //g'`
local DEVICEINFO=''
for token in `$LSPCI -nvs $BUSINFO | $SED '/0200:\|Subsystem/!d ; s/.* 0200: //g ; s/.*Subsystem: //g ; s/ .*//g'` ; do
DEVICEINFO="$DEVICEINFO $token"
done
if [[ $DEVICEINFO == ' 8086:10a9 1304:0900' ]] ; then
log $DEBUG "Skip return code check on $nic"
RETCODE=0
fi
fi

# Set power-up relay setting
$BPCTL2 $nic set_bypass_pwup on | $GREP -q success
if [[ $? -eq 0 && $HWPLATFORM == Supermicro ]] ; then
log $WARN "Enabled bypass_pwup for $nic"
else
log $WARN "ERROR: Failed to enable bypass_pwup for $nic"
fi

# Report new state up to NSM or error out
if [[ $RETCODE -eq 0 ]] ; then
log $WARN "$nic in $STATE state"
$IDPDIR/device/bin/scio loggen $BYPASS_LOG_TYPE $nic,$slavenic & 2> /dev/null
else
log $WARN "ERROR: Failed to put $nic into $STATE state"
fi

log $DEBUG "SK: Running $BPCTL2 $nic set_bypass_wd off"
$BPCTL2 $nic set_bypass_wd off >> $LOGFILE 2>&1
log $DEBUG "return value = $?" >> $LOGFILE

done
}

setnormal() {
if [[ $1 = nosendlog ]] ; then
local sendlog=n
else
local sendlog=y
fi

# Set normal state on all NICs supporting bypass feature in VRs
log $DEBUG "ALLNICS: $ALLNICS"
BPNICS=''

ACTIVENICS=`$SED "/^vrnics/!d ; s/.*=//g ; s/'//g" $IDPDIR/device/bin/system_funcs`
# Make NIC-businfo map
for nic in $ALLNICS ; do
for token in $NICBUSMAP ; do
OIFS=$IFS
IFS=$IFS:
set -- $token
IFS=$OIFS
nicname=$1
nicbus=$2
nicfunc=$4
if [[ $nicname == $nic ]] ; then
break
fi
done

# Skip slave NICs
log $DEBUG "nicname, nicbus, nicfunc: $nicname, $nicbus, $nicfunc"
if [[ $HWPLATFORM == Supermicro2 && $nicfunc != 0 ]] ; then
continue
elif [[ $HWPLATFORM == Supermicro ]] ; then
$BPCTL2 $nicname is_bypass | $GREP -q control
if [[ $? -ne 0 ]] ; then
continue
fi
fi

case "$nic" in
eth0 | eth1)
continue
;;
*)
# Skip eth2 on IDP 800
if [[ $IDPPLATFORM == 800 && $nic == eth2 ]] ; then
continue
fi

BUSINFO=`$ETHTOOL -i $nic | $SED '/^bus-info/!d ; s/^bus-info: 0000://g'`

KERNELMINORVER=`$UNAME -r | $CUT -c3`
case "$KERNELMINORVER" in
4)
# 2.4 system
VENDOR=`$LSPCI -nvs "$BUSINFO" | $GREP ' 0200:' | $SED -e "s/.* 0200: *//g" -e "s/:.*//g"`
DEVICE=`$LSPCI -nvs "$BUSINFO" | $GREP ' 0200:' | $SED -e "s/.* 0200: *//g" -e "s/.*:\([0-9a-f]*\).*/\1/g"`
SVENDOR=`$LSPCI -nvs "$BUSINFO" | $GREP 'Subsystem:' | $SED -e "s/.*Subsystem: *//g" -e "s/:.*//g"`
SDEVICE=`$LSPCI -nvs "$BUSINFO" | $GREP 'Subsystem:' | $SED -e "s/.*Subsystem:: *//g" -e "s/.*:\([0-9a-f]*\).*/\1/g"`
;;
6)
# 2.6 system
VENDOR=`$LSPCI -nvms "$BUSINFO" | $SED -e '/^Vendor/!d ; s/.*:\t//g'`
DEVICE=`$LSPCI -nvms "$BUSINFO" | $SED -e '1 d ; /^Device/!d ; s/.*:\t//g'`
SVENDOR=`$LSPCI -nvms "$BUSINFO" | $SED -e '/^SVendor/!d ; s/.*:\t//g'`
SDEVICE=`$LSPCI -nvms "$BUSINFO" | $SED -e '/^SDevice/!d ; s/.*:\t//g'`
;;
esac
SDTRUNC=`$PRINTF "$SDEVICE" | $CUT -c1-2`

BYPASSCAPABLE=n
case "$VENDOR:$DEVICE $SVENDOR:$SDTRUNC" in
'1374:0028 1374:00' | '1374:0029 1374:00')
# Copper bypass on Malta devices
BYPASSCAPABLE=y
;;
'8086:109a 1304:00')
# Built-in bypass NIC (8086:109a 1304:0001)
BYPASSCAPABLE=y
;;
'8086:10a7 1304:03')
# 4 port copper 1GbE with bypass (8086:10a7 1304:0303)
BYPASSCAPABLE=y
;;
'8086:10c6 1304:08')
# 2 port fiber 10GbE with bypass (8086:10c6 1304:0800)
BYPASSCAPABLE=y
;;
'8086:10a9 1304:09')
# 4 port LC fiber 1GbE with bypass (8086:10a9 1304:0900, 8086:10a9 1304:0901)
BYPASSCAPABLE=y
;;
esac
log $DEBUG "nic: $nic, VENDOR: $VENDOR, DEVICE: $DEVICE, SVENDOR: $SVENDOR, SDEVICE: $SDEVICE, BYPASSCAPABLE: $BYPASSCAPABLE"

# Only set normal state on NICs in VRs
ACTIVENIC=n
$ECHO "$ACTIVENICS" | $GREP -qw $nic
if [[ $? -eq 0 ]] ; then
ACTIVENIC=y
fi

if [[ $BYPASSCAPABLE == y && $ACTIVENIC == y ]] ; then
BPNICS="$BPNICS $nic"
fi
;;
esac
done
log $DEBUG "BPNICS:$BPNICS"

BP_PIDS=''
for nic in $BPNICS ; do
slavenic=`getslavenic $nic`
# Setting NIC in normal state and disabling watchdog timer
log $SPEW "Disabling watchdog timer and putting $nic into normal state"

case "$HWPLATFORM" in
Supermicro)
$BPCTL2 $nic set_bypass_wd off > /dev/null 2>&1
$BPCTL2 $nic set_bypass off > /dev/null 2>&1
;;
Supermicro2)
$BPCTL2 $nic set_bypass_wd off > /dev/null 2>&1
$BPCTL2 $nic set_bypass off > /dev/null 2>&1
$BPCTL2 $nic set_bypass_pwup off > /dev/null 2>&1
;;
esac
COMMAND="$BPCTL2 $nic get_bypass | $GREP -q 'non-Bypass'"

log $DEBUG "Running $COMMAND"
$COMMAND > /dev/null 2>&1 &
BP_PIDS="$BP_PIDS $!"

if [[ $sendlog = y ]] ; then
$IDPDIR/device/bin/scio loggen nic_normal $nic,$slavenic & 2> /dev/null
fi
done

if [[ -n $BP_PIDS ]] ; then
wait $BP_PIDS
fi
}

#sleepresident() {
# while [ 1 ] ; do
# # 1 hour
# $SLEEP 3600
# done
#}

source $IDPDIR/device/lib/sh.lib
setbins

SO_PRINTER_LEVEL="spew2"
# Define and read in log levels
NONE=0
CRITICAL=1
ERROR=2
WARN=3
INFO=4
DEBUG=5
SPEW=6
SPEW2=7
case "$SO_PRINTER_LEVEL" in
none)
LOGLEVEL=$NONE
;;
critical)
LOGLEVEL=$CRITICAL
;;
error)
LOGLEVEL=$ERROR
;;
warn)
LOGLEVEL=$WARN
;;
info)
LOGLEVEL=$INFO
;;
debug)
LOGLEVEL=$DEBUG
;;
spew)
LOGLEVEL=$SPEW
;;
spew2)
LOGLEVEL=$SPEW2
;;
*)
LOGLEVEL=$WARN
;;
esac

# Version info
if [[ $1 == -v ]] ; then
version=5.0.136712
$PRINTF "$version\n"
exit 0
fi

# Default value for max_intf_recv_failed_cnt_nicbypass
# for backward compatibility of user_funcs file
max_intf_recv_failed_cnt_nicbypass=18
jnet_outage_monitor_delay=10

# Start up
BINNAME=$0
BASEBINNAME=`$BASENAME $BINNAME`
LOGDIR="$IDPDIR/device/var/sysinfo/logs"
LOGFILE="$LOGDIR/`$BASENAME $0`.`datenow`"
HWPLATFORM=`getidpinfo | $GREP "^HWPLATFORM:" | $CUT -f2 -d':'`
IDPPLATFORM=`$IDPDIR/device/utils/getplatform | $CUT -f1 -d',' | $CUT -f2 -d'-'`
IDPCONF="$IDPDIR/device/cfg/idpconf.cfg"
DPCORES=`getidpinfo | $GREP "^DPCORES" | $CUT -f2 -d":"`
IDPDPSH=$IDPDIR/device/bin/idpengine.sh
$TOUCH $LOGFILE
$CHMOD 644 $LOGFILE
# Get watchdogInterval
$GREP -q 'nicBypass.watchdogInterval' $IDPDIR/device/cfg/idp.cfg
if [[ $? -eq 0 ]] ; then
WATCHDOGVAL=`$GREP 'nicBypass.watchdogInterval' $IDPDIR/device/cfg/idp.cfg | $AWK '{print $2}'`
else
WATCHDOGVAL=3
log $DEBUG 'Using default value for watchdogInterval'
fi
log $DEBUG "watchdogInterval: $WATCHDOGVAL"
# Get loopInterval
$GREP -q 'nicBypass.loopInterval' $IDPDIR/device/cfg/idp.cfg
if [[ $? -eq 0 ]] ; then
LOOPWAIT=`$GREP 'nicBypass.loopInterval' $IDPDIR/device/cfg/idp.cfg | $AWK '{print $2}'`
else
LOOPWAIT=50000
log $DEBUG 'Using default value for loopInterval'
fi
log $DEBUG "loopInterval (sec): $LOOPWAIT"
log $WARN "Starting up $BASEBINNAME"

# Load bpctl2.ko if any controllable NICs exist on system
$LSPCI -nv | $GREP ' 0200:' | $GREP -qE "8086:10a7|8086:10a9|8086:10c6|8086:109a|1374:0028|1374:0029"
if [[ $? -eq 0 ]] ; then
LOADBPCTL=y
else
LOADBPCTL=n
fi

if [[ $LOADBPCTL == y ]] ; then
if [[ -r $IDPDIR/device/lib/bpctl2.ko ]] ; then
BPMODULE=$IDPDIR/device/lib/bpctl2.ko
else
BPMODULE=$IDPDIR/device/lib/bpctl2.o
fi
OUTPUT=`$LSMOD 2>&1`
if [[ ! $OUTPUT =~ bpctl_mod ]] ; then
log $DEBUG "`$INSMOD $BPMODULE 2>&1`"
fi
BPDEV='/dev/bpctl0'
BPMAJOR=`$GREP bpctl /proc/devices | $AWK '{print $1}'`
if [[ -r $BPDEV ]] ; then
EXISTINGMAJOR=`$FILE $BPDEV | $SED "s/.*(\([0-9]*\)\/[0-9]*).*/\1/g"`
if [[ $BPMAJOR != $EXISTINGMAJOR ]] ; then
$RM -f $BPDEV
$MKNOD $BPDEV c $BPMAJOR 0
fi
else
$MKNOD $BPDEV c $BPMAJOR 0
fi
fi

# Set exit trap
log $SPEW2 'Setting trap for SIGINT SIGTERM'
trap programExit SIGINT SIGTERM
trap receiveHup SIGHUP

# Write pid file
PIDFILE="$IDPDIR/device/var/sysinfo/pids/$BASEBINNAME.pid"
$PRINTF "$$" > $PIDFILE

# Set normal state on all NICs
ALLNICS=`$IFCONFIG -a | $GREP 'Link encap' | $GREP HWaddr | $AWK '{print $1}' | $GREP -v "\."`
for nic in $ALLNICS ; do
BUSINFO=`$ETHTOOL -i $nic | $SED '/bus-info/!d ; s/^bus-info: 0000://g ; s/\./:/g'`
NICBUSMAP="$NICBUSMAP $nic:$BUSINFO"
done
log $DEBUG "NICBUSMAP: $NICBUSMAP"
setnormal

log $DEBUG 'Checking if bypass is enabled for any NICs'
BYPASSENABLED=n
if [[ -r $IDPCONF ]] ; then
$GREP -v undef $IDPCONF | $EGREP "'bypass_idpactive'|'bypass_idpinactive'" | $EGREP " 1| 2" > /dev/null
if [[ $? -eq 0 ]] ; then
BYPASSENABLED=y
fi
fi
if [[ $BYPASSENABLED == y ]] ; then
log $DEBUG 'Bypass is enabled for one or more NICs'

BYPASSSETTINGS=`$IDPDIR/device/bin/getBypassSettings.pl`
log $SPEW "BYPASSSETTINGS: $BYPASSSETTINGS"

IFCFGPIDS=''
for token in $BYPASSSETTINGS ; do
OIFS=$IFS
IFS=$IFS:
set -- $token
IFS=$OIFS
DEVICE=$1
IDPACTIVE=$2
IDPINACTIVE=$3
log $SPEW "DEVICE: $DEVICE"
log $SPEW "IDPACTIVE: $IDPACTIVE"
log $SPEW "IDPINACTIVE: $IDPINACTIVE"

# Skip NICs not configured in a virtual router
$ECHO "$ACTIVENICS" | $GREP -qw $DEVICE
if [[ $? != 0 ]] ; then
# If NIC is control interface, put NIC into NICs off state since it's unused
$BPCTL2 $DEVICE is_bypass | $GREP -q 'is a control interface'
if [[ $? == 0 ]] ; then
$BPCTL2 $DEVICE set_bypass_pwup on > /dev/null 2>&1
$BPCTL2 $DEVICE set_dis_bypass on > /dev/null 2>&1
$BPCTL2 $DEVICE set_bypass on > /dev/null 2>&1
fi
continue
fi

if [[ $IDPACTIVE == 1 || $IDPACTIVE == 2 ]] ; then
# Populate BYPASSNICS_IDPACTIVE with NICs marked with bypass_idpactive
BYPASSNICS_IDPACTIVE="$BYPASSNICS_IDPACTIVE $DEVICE:$IDPACTIVE"

# Flip ifconfig down and up to bring up link on fiber NICs
$ETHTOOL $DEVICE | $GREP -q FIBRE
if [[ $? -eq 0 ]] ; then
slavenic=`getslavenic $DEVICE`
# For Pktloss during start up # $IFCONFIG $DEVICE down && $IFCONFIG $DEVICE up &
$IFCONFIG $DEVICE up &
IFCFGPIDS="$IFCFGPIDS $!"
#For Pktloss during start up # $IFCONFIG $slavenic down && $IFCONFIG $slavenic up &
$IFCONFIG $slavenic up &
IFCFGPIDS="$IFCFGPIDS $!"
fi
fi
if [[ $IDPINACTIVE == 1 || $IDPINACTIVE == 2 ]] ; then
# Populate BYPASSNICS_IDPINACTIVE with NICs marked with bypass_idpinactive
BYPASSNICS_IDPINACTIVE="$BYPASSNICS_IDPINACTIVE $DEVICE:$IDPINACTIVE"
fi
done
if [[ -n $IFCFGPIDS ]] ; then
wait $IFCFGPIDS
fi
log $DEBUG "BYPASSNICS_IDPACTIVE:$BYPASSNICS_IDPACTIVE"
log $DEBUG "BYPASSNICS_IDPINACTIVE:$BYPASSNICS_IDPINACTIVE"

# Enable bypass on specified NICs that will do bypass
BP_PIDS=''
for token in $BYPASSNICS_IDPACTIVE ; do
OIFS=$IFS
IFS=$IFS:
set -- $token
IFS=$OIFS
nic=$1
bypasssetting=$2

case "$bypasssetting" in
1)
BYPASS_UTIL_MODE=2
DIS_BYPASS_SETTING=off
;;
2)
BYPASS_UTIL_MODE=3
DIS_BYPASS_SETTING=on
;;
esac

case "$bypasssetting" in
1 | 2)
slavenic=`getslavenic $nic`
log $SPEW "Setting bypass watchdog timer, nic: $nic, $slavenic: $slavenic, WATCHDOGVAL: $WATCHDOGVAL, DIS_BYPASS_SETTING: $DIS_BYPASS_SETTING"
$BPCTL2 $nic set_bypass_wd $((WATCHDOGVAL * 1000)) > /dev/null 2>&1 && \
$BPCTL2 $nic set_dis_bypass $DIS_BYPASS_SETTING > /dev/null 2>&1 && \
$BPCTL2 $nic get_bypass | $GREP -q 'in the non-Bypass mode' > /dev/null 2>&1

if [[ $HWPLATFORM == Supermicro ]] ; then
log $SPEW "Setting powerup state for $nic to Bypass to support NIC bypass"
log $SPEW "Running $BPCTL2 $nic set_bypass_pwup on"
$BPCTL2 $nic set_bypass_pwup on | $GREP -q success
if [[ $? -eq 0 ]] ; then
log $WARN "Set pwup state for $nic to Bypass"
else
log $WARN "ERROR: Failed to set pwup state for $nic to Bypass"
fi
fi
;;
esac
done

if [[ -n $BP_PIDS ]] ; then
wait $BP_PIDS
fi
#$SLEEP 3

log $DEBUG 'Enabling system watchdog for built-in NICs if needed'
# Enable system watchdog
if [[ $HWPLATFORM == Supermicro2 ]] ; then
for nic in $BPNICS ; do
log $SPEW "Enabling system watchdog on $nic"
$BPCTL2 $nic set_bypass off > /dev/null 2>&1
$BPCTL2 $nic reset_bypass_wd > /dev/null 2>&1
done
fi
else
log $DEBUG 'Bypass is NOT enabled for any NICs'

# Feature is not enabled, wait a minute, then exit cleanly
$SLEEP 60
programExit
fi

for token in $BYPASSNICS_IDPACTIVE ; do
OIFS=$IFS
IFS=$IFS:
set -- $token
IFS=$OIFS
nicsidpactive="$nicsidpactive $1"
done

#sleepresident &
#SMRPIDS=$!
#if [ "$LOADBPCTL" = 'y' ] ; then
# $BPCTL2 smr 2> /dev/null &
# SMRPIDS="$SMRPIDS $!"
#fi

# Read user_funcs
source $IDPDIR/device/bin/user_funcs

#Make a list of NICS whose counters need to be monitored.
#This is to avoid network outages in customer site
#if the system stops receiving the traffic due to
#Some error in the JNET driver
FILTERETH1="eth1"
if [ "$IDPPLATFORM" == "75" ] ; then
FILTERETH1=""
fi
MONITOREDNICS=`$IFCONFIG -a | $GREP 'Link encap' | $GREP HWaddr | $AWK '{print $1}' | $GREP -v "\." | $GREP -vw "eth0" | $GREP -vw $FILTERETH1`
for nicmon in $MONITOREDNICS ; do
eval rx_packet1_$nicmon=`$ETHTOOL -S $nicmon | grep "rx_packets" | $AWK '{print $2}'`
eval rx_missed_error1_$nicmon=`$ETHTOOL -S $nicmon | grep "rx_missed_errors" | $AWK '{print $2}'`
eval nicmonitoringcount_$nicmon=0
declare -a DEBUG_RX_PACKET_ARRAY_$nicmon
declare -a DEBUG_RX_MISSED_ERRORS_ARRAY_$nicmon
eval DEBUG_CIRCULAR_RING_COUNTER_$nicmon=0
done

# To maintain the time difference
time1=0
time2=0
time1=`$DATE +'%s'`

RESET_COUNTER=0

while [[ $BYPASSENABLED == y ]] ; do
log $SPEW "Top of main loop, LOOPWAIT: $LOOPWAIT"
DOSLEEP=1
for nic in $nicsidpactive ; do
log $SPEW "$nic: Checking watchdog status"


if [[ $HWPLATFORM == Supermicro ]] ; then
OUTPUT=`$BPCTL2 $nic check_n_reset_bypass 1 $((WATCHDOGVAL * 1000))`

else
OUTPUT=`$BPCTL2 $nic check_n_reset_bypass 0 $((WATCHDOGVAL * 1000))`

fi

if [[ $OUTPUT == 'WDT Reset Done. ' ]] ; then
#$ECHO $OUTPUT
slavenic=`getslavenic $nic`
$IDPDIR/device/bin/scio loggen nic_normal $nic,$slavenic & 2> /dev/null
DOSLEEP=0
((++RESET_COUNTER))
log $WARN "Bypass Reset done for $nic. total_count=$RESET_COUNTER "
if [[ $RESET_COUNTER -eq 1000 ]] ; then
RESET_COUNTER=0
fi

fi

done

$PS --no-headers -U 0 nuc | $GREP "recover" > /dev/null
if [ $? -ne 0 ] ; then
$PS --no-headers -U 0 nuc | $GREP "hasignal" > /dev/null
if [ $? -ne 0 ] ; then
num_running_engines=`$PS --no-headers -U 0 nuc | $GREP -wc "idpengine$"`

if [[ $num_running_engines -lt $DPCORES ]] ; then
log $WARN "One or more IDP instance got terminated, triggering nicBypass"
programExit
fi
fi
fi

# Start monitoring the nic counters using ethtool.
# Check for rx_packet and rx_missed_errors count.
# Do this once for every 50 times of this loop
time2=`$DATE +'%s'`
delta_time=`$EXPR $time2 - $time1`
if [ $delta_time -ge $jnet_outage_monitor_delay ] ; then
for nicmon in $MONITOREDNICS ; do
eval rx_packet2_$nicmon=`$ETHTOOL -S $nicmon | grep "rx_packets" | $AWK '{print $2}'`
eval rx_missed_error2_$nicmon=`$ETHTOOL -S $nicmon | grep "rx_missed_errors" | $AWK '{print $2}'`
eval tmp_rx_packet1=\$rx_packet1_$nicmon
eval tmp_rx_packet2=\$rx_packet2_$nicmon
eval tmp_rx_missed_error1=\$rx_missed_error1_$nicmon
eval tmp_rx_missed_error2=\$rx_missed_error2_$nicmon
if [ $tmp_rx_packet1 -eq $tmp_rx_packet2 -a $tmp_rx_missed_error1 -ne $tmp_rx_missed_error2 ] ; then
eval tmp_nicmonitoringcount=\$nicmonitoringcount_$nicmon
tmp_nicmonitoringcount=`$EXPR $tmp_nicmonitoringcount \+ 1`
eval nicmonitoringcount_$nicmon=$tmp_nicmonitoringcount
eval DEBUG_RX_PACKET_ARRAY_$nicmon[\$DEBUG_CIRCULAR_RING_COUNTER_$nicmon]=$tmp_rx_packet2
eval DEBUG_RX_MISSED_ERRORS_ARRAY_$nicmon[\$DEBUG_CIRCULAR_RING_COUNTER_$nicmon]=$tmp_rx_missed_error2
eval tmp_debug_circular_ring_counter=\$DEBUG_CIRCULAR_RING_COUNTER_$nicmon
tmp_debug_circular_ring_counter=`$EXPR $tmp_debug_circular_ring_counter \+ 1`
tmp_debug_circular_ring_counter=`$EXPR $tmp_debug_circular_ring_counter \% $max_intf_recv_failed_cnt_nicbypass`
eval DEBUG_CIRCULAR_RING_COUNTER_$nicmon=$tmp_debug_circular_ring_counter
elif [ $tmp_rx_packet1 -ne $tmp_rx_packet2 ] ; then
eval nicmonitoringcount_$nicmon=0
eval DEBUG_CIRCULAR_RING_COUNTER_$nicmon=0
for (( i = 0 ; i < $max_intf_recv_failed_cnt_nicbypass ; i++ ))
do
eval DEBUG_RX_PACKET_ARRAY_$nicmon[$i]=0
eval DEBUG_RX_MISSED_ERRORS_ARRAY_$nicmon[$i]=0
done
fi
eval rx_packet1_$nicmon=\$rx_packet2_$nicmon
eval rx_missed_error1_$nicmon=\$rx_missed_error2_$nicmon
eval tmp_nicmonitoringcount=\$nicmonitoringcount_$nicmon
if [ $tmp_nicmonitoringcount -eq $max_intf_recv_failed_cnt_nicbypass ] ; then
log $WARN "Interface $nicmon not receiving any packets, will trigger nicBypass"
log $WARN "System not able to receive any packets, triggering nicBypass"
eval tmp_debug_circular_ring_counter=\$DEBUG_CIRCULAR_RING_COUNTER_$nicmon
for (( i = 0 ; i < $max_intf_recv_failed_cnt_nicbypass ; i++ ))
do
eval tmp_rx_packet_print=\${DEBUG_RX_PACKET_ARRAY_$nicmon[$tmp_debug_circular_ring_counter]}
eval tmp_rx_missed_errors_print=\${DEBUG_RX_MISSED_ERRORS_ARRAY_$nicmon[$tmp_debug_circular_ring_counter]}
log $WARN "rx packets received $tmp_rx_packet_print with missed error count $tmp_rx_missed_errors_print"
tmp_debug_circular_ring_counter=`$EXPR $tmp_debug_circular_ring_counter \+ 1`
tmp_debug_circular_ring_counter=`$EXPR $tmp_debug_circular_ring_counter \% $max_intf_recv_failed_cnt_nicbypass`
done
# print the jnet stats
log $WARN "The jnet statistics are:"
$IDPDIR/device/utils/jnetStats > $IDPDIR/device/var/tmp/jnetStatsDump
log $WARN "`$CAT $IDPDIR/device/var/tmp/jnetStatsDump`"
programExit
fi
done
# Update the time stamp
time1=$time2
fi

#
# Sleep only if some hard work was done above
#
if [[ $DOSLEEP == 1 ]] ; then
$USLEEP $LOOPWAIT
fi
done

Related Links

Comment on this article > Affected Products Browse the Knowledge Base for more articles related to these product categories. Select a category to begin.

Getting Up and Running with Junos

Getting Up and Running with Junos Security Alerts and Vulnerabilities Product Alerts and Software Release Notices Problem Report (PR) Search Tool EOL Notices and Bulletins JTAC User Guide Customer Care User Guide Pathfinder SRX High Availability Configurator SRX VPN Configurator Training Courses and Videos End User Licence Agreement Global Search