ci: Partially fix L0_additional_outputs_vllm--sbsa (#111)

yinggeh · web-flow · commit b2414c5665b3 · 2025-11-17T14:38:56.000-08:00
diff --git a/ci/L0_additional_outputs_vllm/test.sh b/ci/L0_additional_outputs_vllm/test.sh
@@ -32,7 +32,7 @@ pip3 install pytest==8.1.1
 pip3 install tritonclient[grpc]
 
 # Prepare Model
-rm -rf models vllm_baseline_output.pkl && mkdir -p models
+rm -rf models && mkdir -p models
 SAMPLE_MODELS_REPO="../../samples/model_repository"
 cp -r $SAMPLE_MODELS_REPO/vllm_model models/vllm_opt
 sed -i 's/"gpu_memory_utilization": 0.5/"gpu_memory_utilization": 0.3/' models/vllm_opt/1/model.json
@@ -42,7 +42,11 @@ RET=0
 # Test
 SERVER_LOG="additional_outputs_test.server.log"
 SERVER_ARGS="--model-repository=models"
+# Cold start on SBSA device can take longer than default 120 seconds
+PREV_SERVER_TIMEOUT=$SERVER_TIMEOUT
+SERVER_TIMEOUT=240
 run_server
+SERVER_TIMEOUT=$PREV_SERVER_TIMEOUT
 if [ "$SERVER_PID" == "0" ]; then
     echo -e "\n***\n*** Failed to start $SERVER\n***"
     cat $SERVER_LOG
@@ -61,6 +65,7 @@ wait $SERVER_PID
 if [ $RET -eq 0 ]; then
     echo -e "\n***\n*** Test Passed\n***"
 else
+    cat $SERVER_LOG
     echo -e "\n***\n*** Test FAILED\n***"
 fi
 exit $RET
diff --git a/ci/L0_check_health_vllm/test.sh b/ci/L0_check_health_vllm/test.sh
@@ -135,6 +135,7 @@ unmock_vllm_async_llm_engine
 if [ $RET -eq 0 ]; then
     echo -e "\n***\n*** Test Passed\n***"
 else
+    cat $SERVER_LOG
     echo -e "\n***\n*** Test FAILED\n***"
 fi
 exit $RET
diff --git a/ci/L0_multi_gpu_vllm/vllm_backend/test.sh b/ci/L0_multi_gpu_vllm/vllm_backend/test.sh
@@ -1,5 +1,5 @@
 #!/bin/bash
-# Copyright 2023-2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
+# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
 #
 # Redistribution and use in source and binary forms, with or without
 # modification, are permitted provided that the following conditions
@@ -138,6 +138,8 @@ done
 
 ### Results
 if [ $RET -eq 1 ]; then
+    cat $CLIENT_LOG
+    cat $SERVER_LOG
     echo -e "\n***\n*** Multi GPU Utilization test FAILED. \n***"
 else
     echo -e "\n***\n*** Multi GPU Utilization test PASSED. \n***"