@@ -37,7 +37,7 @@ TEST_RESULT_FILE='test_results.txt'
3737CLIENT_PY=" ./multi_lora_test.py"
3838DOWNLOAD_PY=" ./download.py"
3939SAMPLE_MODELS_REPO=" ../../../samples/model_repository"
40- EXPECTED_NUM_TESTS=2
40+ EXPECTED_NUM_TESTS=4
4141
4242# first we download weights
4343pip install -U huggingface_hub
@@ -52,6 +52,7 @@ cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_llama_multi_lora
5252
5353export SERVER_ENABLE_LORA=true
5454
55+ # Check boolean flag value for `enable_lora`
5556model_json=$( cat << EOF
5657{
5758 "model":"./weights/backbone/gemma-2b",
@@ -110,8 +111,54 @@ set -e
110111kill $SERVER_PID
111112wait $SERVER_PID
112113
114+ # Check string flag value for `enable_lora`
115+ model_json=$( cat << EOF
116+ {
117+ "model":"./weights/backbone/gemma-2b",
118+ "disable_log_requests": true,
119+ "gpu_memory_utilization": 0.7,
120+ "tensor_parallel_size": 2,
121+ "block_size": 16,
122+ "enforce_eager": true,
123+ "enable_lora": "true",
124+ "max_lora_rank": 32,
125+ "lora_extra_vocab_size": 256,
126+ "distributed_executor_backend":"ray"
127+ }
128+ EOF
129+ )
130+ echo " $model_json " > models/vllm_llama_multi_lora/1/model.json
131+
132+ run_server
133+ if [ " $SERVER_PID " == " 0" ]; then
134+ cat $SERVER_LOG
135+ echo -e " \n***\n*** Failed to start $SERVER \n***"
136+ exit 1
137+ fi
138+
139+ set +e
140+ python3 $CLIENT_PY -v > $CLIENT_LOG 2>&1
141+
142+ if [ $? -ne 0 ]; then
143+ cat $CLIENT_LOG
144+ echo -e " \n***\n*** Running $CLIENT_PY FAILED. \n***"
145+ RET=1
146+ else
147+ check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
148+ if [ $? -ne 0 ]; then
149+ cat $CLIENT_LOG
150+ echo -e " \n***\n*** Test Result Verification FAILED.\n***"
151+ RET=1
152+ fi
153+ fi
154+ set -e
155+
156+ kill $SERVER_PID
157+ wait $SERVER_PID
158+
113159# disable lora
114160export SERVER_ENABLE_LORA=false
161+ # check bool flag value for `enable_lora`
115162model_json=$( cat << EOF
116163{
117164 "model":"./weights/backbone/gemma-2b",
@@ -155,6 +202,52 @@ set -e
155202kill $SERVER_PID
156203wait $SERVER_PID
157204
205+ # disable lora
206+ export SERVER_ENABLE_LORA=false
207+ # check string flag value for `enable_lora`
208+ model_json=$( cat << EOF
209+ {
210+ "model":"./weights/backbone/gemma-2b",
211+ "disable_log_requests": true,
212+ "gpu_memory_utilization": 0.8,
213+ "tensor_parallel_size": 2,
214+ "block_size": 16,
215+ "enforce_eager": true,
216+ "enable_lora": "false",
217+ "lora_extra_vocab_size": 256,
218+ "distributed_executor_backend":"ray"
219+ }
220+ EOF
221+ )
222+ echo " $model_json " > models/vllm_llama_multi_lora/1/model.json
223+
224+ run_server
225+ if [ " $SERVER_PID " == " 0" ]; then
226+ cat $SERVER_LOG
227+ echo -e " \n***\n*** Failed to start $SERVER \n***"
228+ exit 1
229+ fi
230+
231+ set +e
232+ python3 $CLIENT_PY -v >> $CLIENT_LOG 2>&1
233+
234+ if [ $? -ne 0 ]; then
235+ cat $CLIENT_LOG
236+ echo -e " \n***\n*** Running $CLIENT_PY FAILED. \n***"
237+ RET=1
238+ else
239+ check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
240+ if [ $? -ne 0 ]; then
241+ cat $CLIENT_LOG
242+ echo -e " \n***\n*** Test Result Verification FAILED.\n***"
243+ RET=1
244+ fi
245+ fi
246+ set -e
247+
248+ kill $SERVER_PID
249+ wait $SERVER_PID
250+
158251rm -rf models/
159252rm -rf weights/
160253
0 commit comments