Skip to content

Commit b61bafd

Browse files
committed
Added extra tests
1 parent 22ea5d4 commit b61bafd

File tree

1 file changed

+94
-1
lines changed

1 file changed

+94
-1
lines changed

ci/L0_multi_gpu/multi_lora/test.sh

Lines changed: 94 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -37,7 +37,7 @@ TEST_RESULT_FILE='test_results.txt'
3737
CLIENT_PY="./multi_lora_test.py"
3838
DOWNLOAD_PY="./download.py"
3939
SAMPLE_MODELS_REPO="../../../samples/model_repository"
40-
EXPECTED_NUM_TESTS=2
40+
EXPECTED_NUM_TESTS=4
4141

4242
# first we download weights
4343
pip install -U huggingface_hub
@@ -52,6 +52,7 @@ cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_llama_multi_lora
5252

5353
export SERVER_ENABLE_LORA=true
5454

55+
# Check boolean flag value for `enable_lora`
5556
model_json=$(cat <<EOF
5657
{
5758
"model":"./weights/backbone/gemma-2b",
@@ -110,8 +111,54 @@ set -e
110111
kill $SERVER_PID
111112
wait $SERVER_PID
112113

114+
# Check string flag value for `enable_lora`
115+
model_json=$(cat <<EOF
116+
{
117+
"model":"./weights/backbone/gemma-2b",
118+
"disable_log_requests": true,
119+
"gpu_memory_utilization": 0.7,
120+
"tensor_parallel_size": 2,
121+
"block_size": 16,
122+
"enforce_eager": true,
123+
"enable_lora": "true",
124+
"max_lora_rank": 32,
125+
"lora_extra_vocab_size": 256,
126+
"distributed_executor_backend":"ray"
127+
}
128+
EOF
129+
)
130+
echo "$model_json" > models/vllm_llama_multi_lora/1/model.json
131+
132+
run_server
133+
if [ "$SERVER_PID" == "0" ]; then
134+
cat $SERVER_LOG
135+
echo -e "\n***\n*** Failed to start $SERVER\n***"
136+
exit 1
137+
fi
138+
139+
set +e
140+
python3 $CLIENT_PY -v > $CLIENT_LOG 2>&1
141+
142+
if [ $? -ne 0 ]; then
143+
cat $CLIENT_LOG
144+
echo -e "\n***\n*** Running $CLIENT_PY FAILED. \n***"
145+
RET=1
146+
else
147+
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
148+
if [ $? -ne 0 ]; then
149+
cat $CLIENT_LOG
150+
echo -e "\n***\n*** Test Result Verification FAILED.\n***"
151+
RET=1
152+
fi
153+
fi
154+
set -e
155+
156+
kill $SERVER_PID
157+
wait $SERVER_PID
158+
113159
# disable lora
114160
export SERVER_ENABLE_LORA=false
161+
# check bool flag value for `enable_lora`
115162
model_json=$(cat <<EOF
116163
{
117164
"model":"./weights/backbone/gemma-2b",
@@ -155,6 +202,52 @@ set -e
155202
kill $SERVER_PID
156203
wait $SERVER_PID
157204

205+
# disable lora
206+
export SERVER_ENABLE_LORA=false
207+
# check string flag value for `enable_lora`
208+
model_json=$(cat <<EOF
209+
{
210+
"model":"./weights/backbone/gemma-2b",
211+
"disable_log_requests": true,
212+
"gpu_memory_utilization": 0.8,
213+
"tensor_parallel_size": 2,
214+
"block_size": 16,
215+
"enforce_eager": true,
216+
"enable_lora": "false",
217+
"lora_extra_vocab_size": 256,
218+
"distributed_executor_backend":"ray"
219+
}
220+
EOF
221+
)
222+
echo "$model_json" > models/vllm_llama_multi_lora/1/model.json
223+
224+
run_server
225+
if [ "$SERVER_PID" == "0" ]; then
226+
cat $SERVER_LOG
227+
echo -e "\n***\n*** Failed to start $SERVER\n***"
228+
exit 1
229+
fi
230+
231+
set +e
232+
python3 $CLIENT_PY -v >> $CLIENT_LOG 2>&1
233+
234+
if [ $? -ne 0 ]; then
235+
cat $CLIENT_LOG
236+
echo -e "\n***\n*** Running $CLIENT_PY FAILED. \n***"
237+
RET=1
238+
else
239+
check_test_results $TEST_RESULT_FILE $EXPECTED_NUM_TESTS
240+
if [ $? -ne 0 ]; then
241+
cat $CLIENT_LOG
242+
echo -e "\n***\n*** Test Result Verification FAILED.\n***"
243+
RET=1
244+
fi
245+
fi
246+
set -e
247+
248+
kill $SERVER_PID
249+
wait $SERVER_PID
250+
158251
rm -rf models/
159252
rm -rf weights/
160253

0 commit comments

Comments
 (0)