Skip to content

Commit 68d7228

Browse files
[0.6.0-UT] Adding abort support to run_multi_gpu (#529)
1 parent f697dde commit 68d7228

File tree

1 file changed

+22
-3
lines changed

1 file changed

+22
-3
lines changed

build/rocm/run_multi_gpu.sh

Lines changed: 22 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -28,7 +28,7 @@ detect_amd_gpus() {
2828
echo "Error: lspci command not found. Aborting."
2929
exit 1
3030
fi
31-
# Count AMD GPUs.
31+
# Count AMD/ATI GPU controllers.
3232
local count
3333
count=$(rocm-smi | grep -E '^Device' -A 1000 | awk '$1 ~ /^[0-9]+$/ {count++} END {print count}')
3434
echo "$count"
@@ -73,17 +73,36 @@ run_tests() {
7373
echo "Running multi-GPU test: $test_file"
7474

7575
# Define file paths for abort detection (files created by conftest.py)
76+
last_running_file="${LOG_DIR}/${test_name}_last_running.json"
7677
json_log_file="${LOG_DIR}/multi_gpu_${test_name}_log.json"
7778
html_log_file="${LOG_DIR}/multi_gpu_${test_name}_log.html"
7879

79-
# Run the test
80+
# Run the test (conftest.py will create the last_running_file automatically)
8081
python3 -m pytest \
8182
--html="$html_log_file" \
8283
--json-report \
8384
--json-report-file="$json_log_file" \
8485
--reruns 3 \
8586
"$test_file"
86-
87+
88+
# Check for aborted test and handle it
89+
if [[ -f "$last_running_file" ]]; then
90+
echo "Abort detected for test: $test_name"
91+
# Get the absolute path of the script directory
92+
script_dir="$(cd "$(dirname "$0")" && pwd)"
93+
# Convert relative paths to absolute paths
94+
abs_json_log_file="$(realpath "$json_log_file")"
95+
abs_html_log_file="$(realpath "$html_log_file")"
96+
abs_last_running_file="$(realpath "$last_running_file")"
97+
98+
cd "$script_dir"
99+
python3 -c "
100+
from run_single_gpu import handle_abort
101+
import sys
102+
success = handle_abort('$abs_json_log_file', '$abs_html_log_file', '$abs_last_running_file', 'multi_gpu_$test_name')
103+
sys.exit(0 if success else 1)
104+
"
105+
fi
87106
done
88107

89108
# Merge individual HTML reports into one.

0 commit comments

Comments
 (0)