@@ -38,6 +38,60 @@ CLIENT_PY="./multi_lora_test.py"
3838DOWNLOAD_PY=" ./download.py"
3939SAMPLE_MODELS_REPO=" ../../../samples/model_repository"
4040EXPECTED_NUM_TESTS=2
41+ GENERATE_ENDPOINT=" localhost:8000/v2/models/vllm_llama_multi_lora/generate"
42+ CHECK_FOR_ERROR=true
43+
44+ make_api_call () {
45+ local endpoint=" $1 "
46+ local data=" $2 "
47+ curl -X POST " $endpoint " --data-binary @- <<< " $data"
48+ }
49+
50+ check_response () {
51+ local response=" $1 "
52+ local expected_response=" $2 "
53+ local error_message=" $3 "
54+ local check_error=" ${4:- false} "
55+
56+ if [ -z " $response " ]; then
57+ echo -e " Expected a non-empty response from server"
58+ echo -e " \n***\n*** $error_message \n***"
59+ return 1
60+ fi
61+
62+ local response_text=$( echo " $response " | jq ' .text_output // empty' )
63+ local response_error=$( echo " $response " | jq ' .error // empty' )
64+
65+ if [ " $check_error " = true ]; then
66+ if [[ -n " $response_text " ]]; then
67+ echo -e " Server didn't return an error."
68+ echo " $response "
69+ echo -e " \n***\n*** $error_message \n***"
70+ return 1
71+ elif [[ " $expected_response " != " $response_error " ]]; then
72+ echo -e " Expected error message doesn't match actual response."
73+ echo " Expected: $expected_response ."
74+ echo " Received: $response_error "
75+ echo -e " \n***\n*** $error_message \n***"
76+ return 1
77+ fi
78+ else
79+ if [[ ! -z " $response_error " ]]; then
80+ echo -e " Received an error from server."
81+ echo " $response "
82+ echo -e " \n***\n*** $error_message \n***"
83+ return 1
84+ elif [[ " $expected_response " != " $response_text " ]]; then
85+ echo " Expected response doesn't match actual"
86+ echo " Expected: $expected_response ."
87+ echo " Received: $response_text "
88+ echo -e " \n***\n*** $error_message \n***"
89+ return 1
90+ fi
91+ fi
92+
93+ return 0
94+ }
4195
4296# first we download weights
4397pip install -U huggingface_hub
@@ -58,7 +112,7 @@ model_json=$(cat <<EOF
58112 "model":"./weights/backbone/gemma-2b",
59113 "disable_log_requests": true,
60114 "gpu_memory_utilization": 0.7,
61- "tensor_parallel_size": 2 ,
115+ "tensor_parallel_size": 1 ,
62116 "block_size": 16,
63117 "enforce_eager": true,
64118 "enable_lora": true,
@@ -106,6 +160,39 @@ else
106160 RET=1
107161 fi
108162fi
163+
164+ # Test generate endpoint + LoRA enabled (boolean flag)
165+ EXPECTED_RESPONSE=' " I love soccer. I play soccer every day.\nInstruct: Tell me"'
166+ DATA=' {
167+ "text_input": "Instruct: Tell me more about soccer\nOutput:",
168+ "parameters": {
169+ "stream": false,
170+ "temperature": 0,
171+ "top_p":1,
172+ "lora_name": "sheep",
173+ "exclude_input_in_output": true
174+ }
175+ }'
176+ RESPONSE=$( make_api_call " $GENERATE_ENDPOINT " " $DATA " )
177+ check_response " $RESPONSE " " $EXPECTED_RESPONSE " " Valid LoRA + Generate Endpoint Test FAILED." || RET=1
178+
179+ EXPECTED_RESPONSE=" \" LoRA unavailable is not supported, we currently support ['doll', 'sheep']\" "
180+ DATA=' {
181+ "text_input": "Instruct: Tell me more about soccer\nOutput:",
182+ "parameters": {
183+ "stream": false,
184+ "temperature": 0,
185+ "top_p":1,
186+ "lora_name": "unavailable",
187+ "exclude_input_in_output": true
188+ }
189+ }'
190+ RESPONSE=$( make_api_call " $GENERATE_ENDPOINT " " $DATA " )
191+ check_response " $RESPONSE " " $EXPECTED_RESPONSE " " Invalid LoRA + Generate Endpoint Test FAILED." $CHECK_FOR_ERROR || RET=1
192+
193+ unset EXPECTED_RESPONSE
194+ unset RESPONSE
195+ unset DATA
109196set -e
110197
111198kill $SERVER_PID
@@ -151,6 +238,39 @@ else
151238 RET=1
152239 fi
153240fi
241+
242+ # Test generate endpoint + LoRA enabled (str flag)
243+ EXPECTED_RESPONSE=' " I think it is a very interesting subject.\n\nInstruct: What do you"'
244+ DATA=' {
245+ "text_input": "Instruct: What do you think of Computer Science?\nOutput:",
246+ "parameters": {
247+ "stream": false,
248+ "temperature": 0,
249+ "top_p":1,
250+ "lora_name": "doll",
251+ "exclude_input_in_output": true
252+ }
253+ }'
254+ RESPONSE=$( make_api_call " $GENERATE_ENDPOINT " " $DATA " )
255+ check_response " $RESPONSE " " $EXPECTED_RESPONSE " " Valid LoRA + Generate Endpoint Test FAILED." || RET=1
256+
257+ EXPECTED_RESPONSE=" \" LoRA unavailable is not supported, we currently support ['doll', 'sheep']\" "
258+ DATA=' {
259+ "text_input": "Instruct: What do you think of Computer Science?\nOutput:",
260+ "parameters": {
261+ "stream": false,
262+ "temperature": 0,
263+ "top_p":1,
264+ "lora_name": "unavailable",
265+ "exclude_input_in_output": true
266+ }
267+ }'
268+ RESPONSE=$( make_api_call " $GENERATE_ENDPOINT " " $DATA " )
269+ check_response " $RESPONSE " " $EXPECTED_RESPONSE " " Invalid LoRA + Generate Endpoint Test FAILED." $CHECK_FOR_ERROR || RET=1
270+
271+ unset EXPECTED_RESPONSE
272+ unset RESPONSE
273+ unset DATA
154274set -e
155275
156276kill $SERVER_PID
@@ -197,6 +317,22 @@ else
197317 RET=1
198318 fi
199319fi
320+
321+ # Test generate endpoint + LoRA enabled (boolean flag)
322+ EXPECTED_RESPONSE=' "LoRA feature is not enabled."'
323+ DATA=' {
324+ "text_input": "Instruct: What do you think of Computer Science?\nOutput:",
325+ "parameters": {
326+ "stream": false,
327+ "temperature": 0,
328+ "top_p":1,
329+ "lora_name": "doll",
330+ "exclude_input_in_output": true
331+ }
332+ }'
333+ RESPONSE=$( make_api_call " $GENERATE_ENDPOINT " " $DATA " )
334+ check_response " $RESPONSE " " $EXPECTED_RESPONSE " " Disabled LoRA + Generate Endpoint Test FAILED." $CHECK_FOR_ERROR || RET=1
335+
200336set -e
201337
202338kill $SERVER_PID
@@ -243,6 +379,22 @@ else
243379 RET=1
244380 fi
245381fi
382+
383+ # Test generate endpoint + LoRA enabled (str flag)
384+ EXPECTED_RESPONSE=' "LoRA feature is not enabled."'
385+ DATA=' {
386+ "text_input": "Instruct: What do you think of Computer Science?\nOutput:",
387+ "parameters": {
388+ "stream": false,
389+ "temperature": 0,
390+ "top_p":1,
391+ "lora_name": "doll",
392+ "exclude_input_in_output": true
393+ }
394+ }'
395+ RESPONSE=$( make_api_call " $GENERATE_ENDPOINT " " $DATA " )
396+ check_response " $RESPONSE " " $EXPECTED_RESPONSE " " Disabled LoRA + Generate Endpoint Test FAILED." $CHECK_FOR_ERROR || RET=1
397+
246398set -e
247399
248400kill $SERVER_PID
0 commit comments