@@ -146,6 +146,33 @@ function wait_for_model_stable() {
146146 echo " === Timeout $wait_time_secs secs. Not all models stable."
147147}
148148
149+ function gdb_helper () {
150+ if ! command -v gdb; then
151+ echo " === WARNING: gdb not installed"
152+ return
153+ fi
154+
155+ # ## Server Hang ###
156+ if kill -0 ${SERVER_PID} ; then
157+ # If server process is still alive, try to get backtrace and core dump from it
158+ GDB_LOG=" gdb_bt.${SERVER_PID} .log"
159+ echo -e " === WARNING: SERVER HANG DETECTED, DUMPING GDB BACKTRACE TO [${PWD} /${GDB_LOG} ] ==="
160+ # Dump backtrace log for quick analysis. Allow these commands to fail.
161+ gdb -batch -ex " thread apply all bt" -p " ${SERVER_PID} " 2>&1 | tee " ${GDB_LOG} " || true
162+
163+ # Generate core dump for deeper analysis. Default filename is "core.${PID}"
164+ gdb -batch -ex " gcore" -p " ${SERVER_PID} " || true
165+ fi
166+
167+ # ## Server Segfaulted ###
168+ # If there are any core dumps locally from a segfault, load them and get a backtrace
169+ for corefile in $( ls core.* ) ; do
170+ GDB_LOG=" ${corefile} .log"
171+ echo -e " === WARNING: SEGFAULT DETECTED, DUMPING GDB BACKTRACE TO [${PWD} /${GDB_LOG} ] ==="
172+ gdb -batch ${SERVER} ${corefile} -ex " thread apply all bt" | tee " ${corefile} .log" || true ;
173+ done
174+ }
175+
149176# Run inference server. Return once server's health endpoint shows
150177# ready or timeout expires. Sets SERVER_PID to pid of SERVER, or 0 if
151178# error (including expired timeout)
@@ -173,17 +200,8 @@ function run_server () {
173200
174201 wait_for_server_ready $SERVER_PID $SERVER_TIMEOUT
175202 if [ " $WAIT_RET " != " 0" ]; then
176- # If gdb is installed, collect a backtrace from the hanging process
177- if command -v gdb; then
178- GDB_LOG=" gdb_bt.${SERVER_PID} .log"
179- echo -e " === WARNING: SERVER FAILED TO START, DUMPING GDB BACKTRACE TO [${PWD} /${GDB_LOG} ] ==="
180- # Dump backtrace log for quick analysis. Allow these commands to fail.
181- gdb -batch -ex " thread apply all bt" -p " ${SERVER_PID} " 2>&1 >> " ${GDB_LOG} " || true
182- # Generate core dump for deeper analysis. Default filename is "core.${PID}"
183- gdb -batch -ex " gcore" -p " ${SERVER_PID} " || true
184- else
185- echo -e " === ERROR: SERVER FAILED TO START, BUT GDB NOT FOUND ==="
186- fi
203+ # Get further debug information about server startup failure
204+ gdb_helper || true
187205
188206 # Cleanup
189207 kill $SERVER_PID || true
0 commit comments