@@ -52,15 +52,16 @@ cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_llama_multi_lora
5252
5353export SERVER_ENABLE_LORA=true
5454
55+ # Check boolean flag value for `enable_lora`
5556model_json=$( cat << EOF
5657{
5758 "model":"./weights/backbone/gemma-2b",
58- "disable_log_requests": " true" ,
59+ "disable_log_requests": true,
5960 "gpu_memory_utilization": 0.7,
6061 "tensor_parallel_size": 2,
6162 "block_size": 16,
62- "enforce_eager": " true" ,
63- "enable_lora": " true" ,
63+ "enforce_eager": true,
64+ "enable_lora": true,
6465 "max_lora_rank": 32,
6566 "lora_extra_vocab_size": 256,
6667 "distributed_executor_backend":"ray"
@@ -110,16 +111,108 @@ set -e
110111kill $SERVER_PID
111112wait $SERVER_PID
112113
114+ # Check string flag value for `enable_lora`
115+ model_json=$( cat << EOF
116+ {
117+ "model":"./weights/backbone/gemma-2b",
118+ "disable_log_requests": true,
119+ "gpu_memory_utilization": 0.7,
120+ "tensor_parallel_size": 2,
121+ "block_size": 16,
122+ "enforce_eager": true,
123+ "enable_lora": "true",
124+ "max_lora_rank": 32,
125+ "lora_extra_vocab_size": 256,
126+ "distributed_executor_backend":"ray"
127+ }
128+ EOF
129+ )
130+ echo " $model_json " > models/vllm_llama_multi_lora/1/model.json
131+
132+ run_server
133+ if [ " $SERVER_PID " == " 0" ]; then
134+ cat $SERVER_LOG
135+ echo -e " \n***\n*** Failed to start $SERVER \n***"
136+ exit 1
137+ fi
138+
139+ set +e
140+ python3 $CLIENT_PY -v > $CLIENT_LOG 2>&1
141+
142+ if [ $? -ne 0 ]; then
143+ cat $CLIENT_LOG
144+ echo -e " \n***\n*** Running $CLIENT_PY FAILED. \n***"
145+ RET=1
146+ else
147+ check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
148+ if [ $? -ne 0 ]; then
149+ cat $CLIENT_LOG
150+ echo -e " \n***\n*** Test Result Verification FAILED.\n***"
151+ RET=1
152+ fi
153+ fi
154+ set -e
155+
156+ kill $SERVER_PID
157+ wait $SERVER_PID
158+
159+ # disable lora
160+ export SERVER_ENABLE_LORA=false
161+ # check bool flag value for `enable_lora`
162+ model_json=$( cat << EOF
163+ {
164+ "model":"./weights/backbone/gemma-2b",
165+ "disable_log_requests": true,
166+ "gpu_memory_utilization": 0.8,
167+ "tensor_parallel_size": 2,
168+ "block_size": 16,
169+ "enforce_eager": true,
170+ "enable_lora": false,
171+ "lora_extra_vocab_size": 256,
172+ "distributed_executor_backend":"ray"
173+ }
174+ EOF
175+ )
176+ echo " $model_json " > models/vllm_llama_multi_lora/1/model.json
177+
178+ run_server
179+ if [ " $SERVER_PID " == " 0" ]; then
180+ cat $SERVER_LOG
181+ echo -e " \n***\n*** Failed to start $SERVER \n***"
182+ exit 1
183+ fi
184+
185+ set +e
186+ python3 $CLIENT_PY -v >> $CLIENT_LOG 2>&1
187+
188+ if [ $? -ne 0 ]; then
189+ cat $CLIENT_LOG
190+ echo -e " \n***\n*** Running $CLIENT_PY FAILED. \n***"
191+ RET=1
192+ else
193+ check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
194+ if [ $? -ne 0 ]; then
195+ cat $CLIENT_LOG
196+ echo -e " \n***\n*** Test Result Verification FAILED.\n***"
197+ RET=1
198+ fi
199+ fi
200+ set -e
201+
202+ kill $SERVER_PID
203+ wait $SERVER_PID
204+
113205# disable lora
114206export SERVER_ENABLE_LORA=false
207+ # check string flag value for `enable_lora`
115208model_json=$( cat << EOF
116209{
117210 "model":"./weights/backbone/gemma-2b",
118- "disable_log_requests": " true" ,
211+ "disable_log_requests": true,
119212 "gpu_memory_utilization": 0.8,
120213 "tensor_parallel_size": 2,
121214 "block_size": 16,
122- "enforce_eager": " true" ,
215+ "enforce_eager": true,
123216 "enable_lora": "false",
124217 "lora_extra_vocab_size": 256,
125218 "distributed_executor_backend":"ray"
0 commit comments