diff --git a/ci/L0_multi_gpu_vllm/multi_lora/test.sh b/ci/L0_multi_gpu_vllm/multi_lora/test.sh index 8cf2c3fa..c045c4fc 100755 --- a/ci/L0_multi_gpu_vllm/multi_lora/test.sh +++ b/ci/L0_multi_gpu_vllm/multi_lora/test.sh @@ -41,6 +41,9 @@ EXPECTED_NUM_TESTS=2 GENERATE_ENDPOINT="localhost:8000/v2/models/vllm_llama_multi_lora/generate" CHECK_FOR_ERROR=true +export C_INCLUDE_PATH=/usr/local/cuda/include:$C_INCLUDE_PATH +export TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas + make_api_call() { local endpoint="$1" local data="$2" diff --git a/ci/L0_multi_gpu_vllm/vllm_backend/test.sh b/ci/L0_multi_gpu_vllm/vllm_backend/test.sh index 0609bebf..e4de2ad2 100755 --- a/ci/L0_multi_gpu_vllm/vllm_backend/test.sh +++ b/ci/L0_multi_gpu_vllm/vllm_backend/test.sh @@ -36,6 +36,9 @@ CLIENT_PY="./vllm_multi_gpu_test.py" SAMPLE_MODELS_REPO="../../../samples/model_repository" EXPECTED_NUM_TESTS=1 +export C_INCLUDE_PATH=/usr/local/cuda/include:$C_INCLUDE_PATH +export TRITON_PTXAS_PATH=/usr/local/cuda/bin/ptxas + ### Helpers function validate_file_contains() { local KEY="${1}"