Skip to content

Commit dfa151e

Browse files
authored
Improve GDB usage in CI (#5429)
* Don't spam gdb log/error if process has already exited * Try to get backtrace from any dumped corefiles if any
1 parent 71243fb commit dfa151e

File tree

1 file changed

+29
-11
lines changed

1 file changed

+29
-11
lines changed

qa/common/util.sh

Lines changed: 29 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -146,6 +146,33 @@ function wait_for_model_stable() {
146146
echo "=== Timeout $wait_time_secs secs. Not all models stable."
147147
}
148148

149+
function gdb_helper () {
150+
if ! command -v gdb; then
151+
echo "=== WARNING: gdb not installed"
152+
return
153+
fi
154+
155+
### Server Hang ###
156+
if kill -0 ${SERVER_PID}; then
157+
# If server process is still alive, try to get backtrace and core dump from it
158+
GDB_LOG="gdb_bt.${SERVER_PID}.log"
159+
echo -e "=== WARNING: SERVER HANG DETECTED, DUMPING GDB BACKTRACE TO [${PWD}/${GDB_LOG}] ==="
160+
# Dump backtrace log for quick analysis. Allow these commands to fail.
161+
gdb -batch -ex "thread apply all bt" -p "${SERVER_PID}" 2>&1 | tee "${GDB_LOG}" || true
162+
163+
# Generate core dump for deeper analysis. Default filename is "core.${PID}"
164+
gdb -batch -ex "gcore" -p "${SERVER_PID}" || true
165+
fi
166+
167+
### Server Segfaulted ###
168+
# If there are any core dumps locally from a segfault, load them and get a backtrace
169+
for corefile in $(ls core.*); do
170+
GDB_LOG="${corefile}.log"
171+
echo -e "=== WARNING: SEGFAULT DETECTED, DUMPING GDB BACKTRACE TO [${PWD}/${GDB_LOG}] ==="
172+
gdb -batch ${SERVER} ${corefile} -ex "thread apply all bt" | tee "${corefile}.log" || true;
173+
done
174+
}
175+
149176
# Run inference server. Return once server's health endpoint shows
150177
# ready or timeout expires. Sets SERVER_PID to pid of SERVER, or 0 if
151178
# error (including expired timeout)
@@ -173,17 +200,8 @@ function run_server () {
173200

174201
wait_for_server_ready $SERVER_PID $SERVER_TIMEOUT
175202
if [ "$WAIT_RET" != "0" ]; then
176-
# If gdb is installed, collect a backtrace from the hanging process
177-
if command -v gdb; then
178-
GDB_LOG="gdb_bt.${SERVER_PID}.log"
179-
echo -e "=== WARNING: SERVER FAILED TO START, DUMPING GDB BACKTRACE TO [${PWD}/${GDB_LOG}] ==="
180-
# Dump backtrace log for quick analysis. Allow these commands to fail.
181-
gdb -batch -ex "thread apply all bt" -p "${SERVER_PID}" 2>&1 >> "${GDB_LOG}" || true
182-
# Generate core dump for deeper analysis. Default filename is "core.${PID}"
183-
gdb -batch -ex "gcore" -p "${SERVER_PID}" || true
184-
else
185-
echo -e "=== ERROR: SERVER FAILED TO START, BUT GDB NOT FOUND ==="
186-
fi
203+
# Get further debug information about server startup failure
204+
gdb_helper || true
187205

188206
# Cleanup
189207
kill $SERVER_PID || true

0 commit comments

Comments
 (0)