#!/bin/bash

LOG_DIR=/var/log

BEROCONF='/usr/fallback/beroconf'

DUMPDROP=${LOG_DIR}
CONF=/usr/conf
ISGW_BIN=/usr/local/bin/isgw
MAX_ISGW_ERR_SIZE=30000
PIDFILE=/var/run/isgw.pid

if [[ -f "/tmp/isgw.devmode" && -x /tmp/isgw ]]; then
	ISGW_BIN=/tmp/isgw
fi

LOG_SIZE=500000
GENERATE_DEBUG=1

export PATH=$PATH:/bin:/sbin/:/usr/bin:/usr/sbin

#
# Let's  dump core
#
ulimit -c unlimited
echo 'core%' > /proc/sys/kernel/core_pattern

export DMALLOC_OPTIONS=debug=0x34f47d83,inter=100,log=/var/log/log.dmalloc
function dmalloc {
	eval `command dmalloc -b $*`;
}


safe_debug_data()
{
	#disabe generate_debug until we are finished with packing
	GENERATE_DEBUG=0
	if [ -f ${LOG_DIR}/core ]; then
# 		mv ${LOG_DIR}/core ${DUMPDROP}/core.`hostname`-`date -Iseconds`
		#make a hard link, so core file is protected from being overwritten with a new core file
		ln ${LOG_DIR}/core ${LOG_DIR}/corelink
	fi

	if [ ! -f ${LOG_DIR}/debug-info.tar.gz ] ; then
		/usr/local/sbin/generate_info.sh ${LOG_DIR}/info.txt
		sync
		tar czf ${LOG_DIR}/debug-info.tar.gz /usr/conf ${LOG_DIR}/isgw.log.* ${LOG_DIR}/info.txt ${LOG_DIR}/isgw.err ${LOG_DIR}/sofia.log ${LOG_DIR}/debug.txt ${LOG_DIR}/dmesg.log ${LOG_DIR}/dmesg.1.log /usr/local/repo_info.txt
		rm ${LOG_DIR}/info.txt
	fi
# 	rm ${LOG_DIR}/core*
	GENERATE_DEBUG=1
}


check_and_cut_file()
{
    FILENAME=$1
    FILESIZE=$2

    TMPSIZE=$(ls -l $FILENAME)
    ISGWERRSIZE=$(echo $TMPSIZE | cut -d " " -f 5)

    if [ $ISGWERRSIZE -ge $FILESIZE ]  ; then
        cp $FILENAME $FILENAME.1
        sync
        echo > $FILENAME
    fi
}

SSLOG_RECENT=/var/log/sslog.recent.log
SSLOG_RECENT_ROT=/var/log/sslog.recent.log.1

SSLOG_HISTORY=/var/log/sslog.history.log
SSLOG_HISTORY_ROT=/var/log/sslog.history.log.1

generate_debug()
{
	if [ $GENERATE_DEBUG == 1 ]
	then
		rm -rf ${LOG_DIR}/debug.txt
		/usr/local/sbin/generate_info.sh ${LOG_DIR}/debug.txt
		echo "########################SSLOG RECENT##################" >> ${LOG_DIR}/debug.txt
		cat $SSLOG_RECENT_ROT $SSLOG_RECENT >> ${LOG_DIR}/debug.txt
		echo "########################SSLOG HISTORY##################" >> ${LOG_DIR}/debug.txt
		cat $SSLOG_HISTORY_ROT $SSLOG_HISTORY >> ${LOG_DIR}/debug.txt
	fi
}

LINES_RECENT=90
LINES_HISTORY=96
collect_system_state() {
	date=`date +%Y%m%d_%H%M%S`
	
	isgw_pid=`ps aux|grep "$ISGW_BIN"|grep -v time|grep -v grep|head -1|awk '{print $1}'`
	isgw_rss=`cat /proc/$isgw_pid/status|grep RSS|awk '{print $2}'`
	
	memtotal=`cat /proc/meminfo |grep MemTotal|awk '{print $2}'`
# 	memfree=`cat /proc/meminfo |grep MemFree|awk '{print $2}'`
	memcached=`cat /proc/meminfo |grep ^Cached|awk '{print $2}'`

	load=`cat /proc/loadavg`
	free=`free|grep Mem|sed 's/[[:space:]]*/ /g;s/.*Mem: *//'`
	
	#get the process with the highest memory usage, if it is not isgw
	MAXMEMPROC=`ps |awk '{$1="";$2="";print $0}'|sort -n|tail -1|grep -v "$ISGW_BIN"`

	[[ -z "$isgw_rss" ]] && isgw_rss=000

	logserver=$(beroconf get ari:key_val logserver | sed "s/:/ /")
	
	tmpsize=`du -s /tmp/|awk '{print $1}'`
	
	if [ -n "$MAXMEMPROC" ]; then
		echo -ne "$date: MEMISGW: $isgw_rss/$memtotal MEMSYS: $free $memcached LOAD: $load TMP: $tmpsize MMPROC: $MAXMEMPROC\n" >> $SSLOG_RECENT
		echo -ne "$date: MEMISGW: $isgw_rss/$memtotal MEMSYS: $free $memcached LOAD: $load TMP: $tmpsize MMPROC: $MAXMEMPROC\n" | nc -q1 -w1 -u $logserver
	else
		echo -ne "$date: MEMISGW: $isgw_rss/$memtotal MEMSYS: $free $memcached LOAD: $load TMP: $tmpsize\n" >> $SSLOG_RECENT
		echo -ne "$date: MEMISGW: $isgw_rss/$memtotal MEMSYS: $free $memcached LOAD: $load TMP: $tmpsize\n" | nc -q1 -w1 -u $logserver
	fi
	
	lines=`cat $SSLOG_RECENT|wc -l`
	if [[ $lines -gt $LINES_RECENT ]]; then
		\mv $SSLOG_RECENT $SSLOG_RECENT_ROT
	fi

	#every half an hour grep a sample
	if [[ $(((lines-1)%15)) == 0 ]]; then
		if [ -n "$MAXMEMPROC" ]; then
			echo -ne "$date: MEMISGW: $isgw_rss/$memtotal MEMSYS: $free $memcached LOAD: $load MMPROC: $MAXMEMPROC\n" >> $SSLOG_HISTORY
		else
			echo -ne "$date: MEMISGW: $isgw_rss/$memtotal MEMSYS: $free $memcached LOAD: $load\n" >> $SSLOG_HISTORY
		fi
	fi
	lines=`cat $SSLOG_HISTORY|wc -l`
	if [[ $lines -gt $LINES_HISTORY ]]; then
		\mv $SSLOG_HISTORY $SSLOG_HISTORY_ROT
	fi
}


check_if_config_update_triggered() {
	[[ ! -f /tmp/provisioning.triggered ]] && return
	\rm -rf /tmp/provisioning.triggered
	/usr/php/provisioningTool.php config
}


SAVEGUARD_INITIAL_TIMEOUT=300
SAVEGUARD_AFTER_TIMEOUT=180
SAVEGUARD_SYS_ENABLE_FILE=/sys/class/beronet/safeguard/enabled
SAVEGUARD_SYS_TIMER_FILE=/sys/class/beronet/safeguard/timer


function safeguard_enable {
	echo 1 > $SAVEGUARD_SYS_ENABLE_FILE
}

function safeguard_disable {
	echo 0 > $SAVEGUARD_SYS_ENABLE_FILE
}

function safeguard_set_interval {
	echo "$1" > $SAVEGUARD_SYS_TIMER_FILE
}

function safe_isgw_kill_really_every_isgw {

	killall isgw
	killall -9 isgw
	# since killall -9 isgw does not always work, loop over every instance of isgw pid found by ps and kill it
	for pid in `ps aux|grep "isgw"|grep -v grep|awk '{print $1}'`; do
		PIDDIR=/proc/"$pid"
		if [ -d "$PIDDIR" ]; then
			#make sure it is the isgw executable
			exefile=`readlink -f "$PIDDIR"/exe`
			if [ "$exefile" = "$ISGW_BIN" ]; then
				#some processes ignored by killall, e.g. stuck threads in system calls
				kill -9 "$pid"
				if [ -f /tmp/dmesg.log ]; then
					echo "########## safe_isgw: ISGWKILL: " `date ` " killed isgw with PID " $pid >> /tmp/dmesg.log
				elif [ -f /var/log/dmesg.log ]; then
					echo "########## safe_isgw: ISGWKILL: " `date ` " killed isgw with PID " $pid >> /var/log/dmesg.log
				fi
			fi
		fi
	done
}


run_tasks() {

	CHECK_ISDN_LAYER_ERROR=0
	cat /usr/conf/isgw.conf|grep '^experimental_options'|grep 'isdn_layer_fix=2' &> /dev/null
	[[ $? = 0 ]] && CHECK_ISDN_LAYER_ERROR=1
	
	echo CHECK_ISDN_LAYER_ERROR $CHECK_ISDN_LAYER_ERROR
	i=0
	while :; do
		sleep 60;
		((i++))
		#every 60 seconds check if provisioning was triggered
		check_if_config_update_triggered
		[[ $(($i%2)) = 1 ]] && continue
		#every 120 seconds update tend to logfiles and system state
		check_and_cut_file ${LOG_DIR}/isgw.err $MAX_ISGW_ERR_SIZE
		collect_system_state
		if [ $CHECK_ISDN_LAYER_ERROR = 1 ]; then
			dmesg |grep "frame wrong EA0" &> /dev/null
			if [ $? = 0 ]; then
				echo "=========================================" >> ${LOG_DIR}/isgw.err
				echo "killing isgw and reloading drivers because of isdn_layer_fix=2" >> ${LOG_DIR}/isgw.err
				safe_isgw_kill_really_every_isgw
				touch ${LOG_DIR}/isdn_layer_fix_reload_drivers
			fi
			
		fi
		generate_debug
	done
}


SYSCMD5_SH="/tmp/isgw.syscallmode5.sh"
SYSCMD5_PIDFILE="/tmp/isgw.syscallmode5.pid"
SYSCMD5_DIR="/tmp/isgw.syscallmode5/"

function run_syscall_mode5_loop {

echo '#!/bin/bash

SYSCMD5_SH="/tmp/isgw.syscallmode5.sh"
SYSCMD5_PIDFILE="/tmp/isgw.syscallmode5.pid"
SYSCMD5_DIR="/tmp/isgw.syscallmode5/"

SECONDS=0

mkdir -p "$SYSCMD5_DIR"
echo $$ > "$SYSCMD5_PIDFILE"
cd /tmp/isgw.syscallmode5

while true; do
# 	echo NOW $SECONDS $$
	if [ -e stop ]; then
		echo exiting
		break
	fi
	
	for file in *.do; do
		if [ $file = "*.do" ]; then
			break;
		fi
		echo "RUNNING: " $file
		cat $file
		source "$file" &> /dev/null
		rm -f "$file"
	done;
	sleep 1
done

rm -f "$SYSCMD5_PIDFILE"
rm -rf "$SYSCMD5_DIR"
' > $SYSCMD5_SH

	#try to kill any old instance
	if [ -f "$SYSCMD5_PIDFILE" ]; then
		pid=`cat "$SYSCMD5_PIDFILE"`
		cat /proc/"$pid"/cmdline|grep syscallmode5 &> /dev/null
		if [ $? = 0 ]; then
			kill -9 `cat "$SYSCMD5_PIDFILE"`
		fi
		rm -f "$SYSCMD5_PIDFILE"
	fi
	#clean it up, recreate
	rm -f "$SYSCMD5_DIR"/*.do
	rm -rf "$SYSCMD5_DIR"
	mkdir -p "$SYSCMD5_DIR"


	chmod u+x $SYSCMD5_SH
	$SYSCMD5_SH&
}


# wait a couple of seconds, then try to get an isgw pid
create_isgw_pid_file () {

	sleep 5
	\rm $PIDFILE
	isgw_pid=`ps aux|grep "$ISGW_BIN"|grep -v time|grep -v grep|head -1|awk '{print $1}'`
	if [ -n "$isgw_pid" ]; then
		echo $isgw_pid > $PIDFILE
	fi
}


run_isgw()
{
	run_tasks &

	while :; do

		\rm -f $PIDFILE
		cd ${LOG_DIR}

		echo "=========================================" >> ${LOG_DIR}/isgw.err
		date >> ${LOG_DIR}/isgw.err
		/sbin/ifconfig eth1 up

		
		safeguard_set_interval $SAVEGUARD_INITIAL_TIMEOUT
		
		if [ "$(/usr/fallback/beroconf get root safeguard)" == 1 ]; then
			safeguard_enabled=1
			safeguard_enable
		else
			safeguard_enabled=0
			safeguard_disable
		fi
		
		#checking for syscall mode 5, in this case we clean up and start a function here
		syscall_mode5=1
		#we changed the default
		cat /usr/conf/isgw.conf|grep ^experimental_options|grep 'syscall_mode=' &> /dev/null
		if [ $? = 0 ]; then
			cat /usr/conf/isgw.conf|grep ^experimental_options|grep 'syscall_mode=5' &> /dev/null
			if [ $? != 0 ]; then
				syscall_mode5=0
			fi
		fi
		
		if [ $syscall_mode5 = 1 ]; then
			run_syscall_mode5_loop&
		fi
		
		
		#we kill here any already existing instance of isgw
		safe_isgw_kill_really_every_isgw
		
		echo Starting ISGW \(`cat /usr/local/FILENAME`\) >> ${LOG_DIR}/isgw.err
		
		
		if [ -f ${LOG_DIR}/isdn_layer_fix_reload_drivers ]; then
			rm -f ${LOG_DIR}/isdn_layer_fix_reload_drivers
			/usr/local/sbin/li-stop
			/usr/local/sbin/li-start
		fi
			
		
		#check if should turn on advanced SIP debugging
		cat /usr/conf/isgw.conf|grep ^experimental_options|head -1|grep sip_debug=1 &> /dev/null
		if [ $? = 0 ]; then
			export SOFIA_DEBUG=9
			export NUA_DEBUG=9
			export SOA_DEBUG=9
			export NEA_DEBUG=9
			export IPTSEC_DEBUG=9
			export NTA_DEBUG=9
			export TPORT_DEBUG=9
			export TPORT_LOG=9
			export SU_DEBUG=9
		else
			unset NUA_DEBUG
			unset SOA_DEBUG
			unset NEA_DEBUG
			unset IPTSEC_DEBUG
			unset NTA_DEBUG
			unset TPORT_DEBUG
			unset TPORT_LOG
			unset SU_DEBUG
		fi
		create_isgw_pid_file&
		ARGS="-e $CONF"
		
		#if full-trace is on, we start isgw with full logging on
		beroconf get ari:key_val trace |grep '^1$' &> /dev/null
		[[ $? = 0 ]] && { ARGS="-e $CONF -l9 -w2"; }
		/usr/bin/time "$ISGW_BIN" $ARGS > /dev/null 2>> ${LOG_DIR}/isgw.err
		EXITSTATUS=$?
		
		\rm -f $PIDFILE
		
		date >> ${LOG_DIR}/isgw.err
		echo "ISGW ended with exit status $EXITSTATUS" >> ${LOG_DIR}/isgw.err

		echo "ISGW ended with exit status $EXITSTATUS"
		safeguard_set_interval $SAVEGUARD_AFTER_TIMEOUT
		

		#we kill here any remaining instance of isgw
		safe_isgw_kill_really_every_isgw
		if [ $syscall_mode5 = 1 ]; then
			touch $SYSCMD5_DIR/stop
		fi
		
		if [ "$EXITSTATUS" = "0" ]; then
			# Properly shutdown....
			echo "ISGW shutdown normally."
			#disable safeguard
			safeguard_disable
			exit 0
		elif [ $EXITSTATUS = "1" ]; then
			echo "Waiting 20 second until next start" >> ${LOG_DIR}/isgw.err
			sleep 20
		elif [ $EXITSTATUS -gt 128 ]; then
			let EXITSIGNAL=EXITSTATUS-128
			echo "ISGW exited on signal $EXITSIGNAL."
			sleep 1
			safe_debug_data
		else
			echo "ISGW died with code $EXITSTATUS."
			safe_debug_data
			sleep 1
		fi
		
		#disable safeguard
		safeguard_disable

		#uptime check
		ut=`cat /proc/uptime |sed 's/\..*//'`
		echo uptime $ut -- exit $EXITSTATUS -- mspcheck $safeguard_enabled >> ${LOG_DIR}/isgw.err

		#additional safeguard
		if [ "$safeguard_enabled" = 1 -a $ut -gt 300 ];	then
			isgw -e $CONF -A
			ret=$?
			if [ $ret = 116 -o $ret = 110 ]
			then
				echo mspcheck '=>' rebooting isgw because of exit cause $ret on msp test >> ${LOG_DIR}/isgw.err
				reboot
			else
				echo mspcheck '=>' not rebooting isgw: exit cause $ret on msp test >> ${LOG_DIR}/isgw.err
			fi
		fi

		echo "Automatically restarting ISGW."
		sleep 4
	done
}

run_isgw &
