From 9f458ddf0768c1e53519860f31c96c93623af439 Mon Sep 17 00:00:00 2001 From: Yingge He Date: Fri, 24 Oct 2025 14:56:41 -0700 Subject: [PATCH 1/4] Allow more time for cold start on SBSA --- ci/L0_check_health_vllm/test.sh | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/ci/L0_check_health_vllm/test.sh b/ci/L0_check_health_vllm/test.sh index 81bf4489..a311e3a1 100755 --- a/ci/L0_check_health_vllm/test.sh +++ b/ci/L0_check_health_vllm/test.sh @@ -35,8 +35,7 @@ RET=0 function setup_model_repository { local sample_model_repo_path=${1:-"../../samples/model_repository"} - rm -rf models vllm_baseline_output.pkl && mkdir -p models - cp -r $sample_model_repo_path/vllm_model models/vllm_opt + mkdir -p models && cp -r $sample_model_repo_path/vllm_model models/vllm_opt } function enable_health_check { @@ -93,17 +92,20 @@ function test_check_health { wait $SERVER_PID } -# Test health check unspecified setup_model_repository + +# Test health check unspecified +# Cold start on SBSA device can take longer than default 120 seconds +PREV_SERVER_TIMEOUT=$SERVER_TIMEOUT +SERVER_TIMEOUT=240 test_check_health "health_check_unspecified" "test_vllm_is_healthy" +SERVER_TIMEOUT=$PREV_SERVER_TIMEOUT # Test health check disabled -setup_model_repository enable_health_check "false" test_check_health "health_check_disabled" "test_vllm_is_healthy" # Test health check enabled -setup_model_repository enable_health_check "true" test_check_health "health_check_enabled" "test_vllm_is_healthy" @@ -111,16 +113,13 @@ test_check_health "health_check_enabled" "test_vllm_is_healthy" mock_vllm_async_llm_engine # Test health check unspecified with mocked vLLM check_health() failure -setup_model_repository test_check_health "health_check_unspecified_mocked_failure" "test_vllm_is_healthy" # Test health check disabled with mocked vLLM check_health() failure -setup_model_repository enable_health_check "false" test_check_health "health_check_disabled_mocked_failure" "test_vllm_is_healthy" # Test health check enabled with mocked vLLM check_health() failure -setup_model_repository enable_health_check "true" test_check_health "health_check_enabled_mocked_failure" "test_vllm_not_healthy" From 67e1697d7da7b0f8b0cc7ce2b4ade5d08f47d8f8 Mon Sep 17 00:00:00 2001 From: Yingge He Date: Fri, 24 Oct 2025 19:40:46 -0700 Subject: [PATCH 2/4] fsa --- ci/L0_check_health_vllm/test.sh | 10 +++++++--- 1 file changed, 7 insertions(+), 3 deletions(-) diff --git a/ci/L0_check_health_vllm/test.sh b/ci/L0_check_health_vllm/test.sh index a311e3a1..e0345fd1 100755 --- a/ci/L0_check_health_vllm/test.sh +++ b/ci/L0_check_health_vllm/test.sh @@ -34,7 +34,7 @@ pip3 install tritonclient[grpc] RET=0 function setup_model_repository { - local sample_model_repo_path=${1:-"../../samples/model_repository"} + local sample_model_repo_path="../../samples/model_repository" mkdir -p models && cp -r $sample_model_repo_path/vllm_model models/vllm_opt } @@ -92,20 +92,21 @@ function test_check_health { wait $SERVER_PID } -setup_model_repository - # Test health check unspecified # Cold start on SBSA device can take longer than default 120 seconds PREV_SERVER_TIMEOUT=$SERVER_TIMEOUT SERVER_TIMEOUT=240 +setup_model_repository test_check_health "health_check_unspecified" "test_vllm_is_healthy" SERVER_TIMEOUT=$PREV_SERVER_TIMEOUT # Test health check disabled +setup_model_repository enable_health_check "false" test_check_health "health_check_disabled" "test_vllm_is_healthy" # Test health check enabled +setup_model_repository enable_health_check "true" test_check_health "health_check_enabled" "test_vllm_is_healthy" @@ -113,13 +114,16 @@ test_check_health "health_check_enabled" "test_vllm_is_healthy" mock_vllm_async_llm_engine # Test health check unspecified with mocked vLLM check_health() failure +setup_model_repository test_check_health "health_check_unspecified_mocked_failure" "test_vllm_is_healthy" # Test health check disabled with mocked vLLM check_health() failure +setup_model_repository enable_health_check "false" test_check_health "health_check_disabled_mocked_failure" "test_vllm_is_healthy" # Test health check enabled with mocked vLLM check_health() failure +setup_model_repository enable_health_check "true" test_check_health "health_check_enabled_mocked_failure" "test_vllm_not_healthy" From b7b7147de8b704e3998d78b805365c46d3399f62 Mon Sep 17 00:00:00 2001 From: Yingge He Date: Fri, 24 Oct 2025 20:53:53 -0700 Subject: [PATCH 3/4] fsasfa --- ci/L0_check_health_vllm/test.sh | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/ci/L0_check_health_vllm/test.sh b/ci/L0_check_health_vllm/test.sh index e0345fd1..9136b82c 100755 --- a/ci/L0_check_health_vllm/test.sh +++ b/ci/L0_check_health_vllm/test.sh @@ -35,7 +35,8 @@ RET=0 function setup_model_repository { local sample_model_repo_path="../../samples/model_repository" - mkdir -p models && cp -r $sample_model_repo_path/vllm_model models/vllm_opt + rm -rf models && mkdir -p models + cp -r $sample_model_repo_path/vllm_model models/vllm_opt } function enable_health_check { From e870f3741a05687703392a1dc85bcfc77ee0f904 Mon Sep 17 00:00:00 2001 From: Yingge He Date: Fri, 24 Oct 2025 20:56:46 -0700 Subject: [PATCH 4/4] afsfsafs --- ci/L0_check_health_vllm/test.sh | 1 + 1 file changed, 1 insertion(+) diff --git a/ci/L0_check_health_vllm/test.sh b/ci/L0_check_health_vllm/test.sh index 9136b82c..3918d3d8 100755 --- a/ci/L0_check_health_vllm/test.sh +++ b/ci/L0_check_health_vllm/test.sh @@ -31,6 +31,7 @@ source ../common/util.sh pip3 install pytest==8.1.1 pip3 install tritonclient[grpc] +rm -f *.log *.report.xml RET=0 function setup_model_repository {