Skip to content

Commit 4ad17c4

Browse files
committed
Added tests
1 parent f63c841 commit 4ad17c4

File tree

2 files changed

+158
-5
lines changed

2 files changed

+158
-5
lines changed

ci/L0_multi_gpu_vllm/multi_lora/test.sh

Lines changed: 153 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,60 @@ CLIENT_PY="./multi_lora_test.py"
3838
DOWNLOAD_PY="./download.py"
3939
SAMPLE_MODELS_REPO="../../../samples/model_repository"
4040
EXPECTED_NUM_TESTS=2
41+
GENERATE_ENDPOINT="localhost:8000/v2/models/vllm_llama_multi_lora/generate"
42+
CHECK_FOR_ERROR=true
43+
44+
make_api_call() {
45+
local endpoint="$1"
46+
local data="$2"
47+
curl -X POST "$endpoint" --data-binary @- <<< "$data"
48+
}
49+
50+
check_response() {
51+
local response="$1"
52+
local expected_response="$2"
53+
local error_message="$3"
54+
local check_error="${4:-false}"
55+
56+
if [ -z "$response" ]; then
57+
echo -e "Expected a non-empty response from server"
58+
echo -e "\n***\n*** $error_message \n***"
59+
return 1
60+
fi
61+
62+
local response_text=$(echo "$response" | jq '.text_output // empty')
63+
local response_error=$(echo "$response" | jq '.error // empty')
64+
65+
if [ "$check_error" = true ]; then
66+
if [[ -n "$response_text" ]]; then
67+
echo -e "Server didn't return an error."
68+
echo "$response"
69+
echo -e "\n***\n*** $error_message \n***"
70+
return 1
71+
elif [[ "$expected_response" != "$response_error" ]]; then
72+
echo -e "Expected error message doesn't match actual response."
73+
echo "Expected: $expected_response."
74+
echo "Received: $response_error"
75+
echo -e "\n***\n*** $error_message\n***"
76+
return 1
77+
fi
78+
else
79+
if [[ ! -z "$response_error" ]]; then
80+
echo -e "Received an error from server."
81+
echo "$response"
82+
echo -e "\n***\n*** $error_message \n***"
83+
return 1
84+
elif [[ "$expected_response" != "$response_text" ]]; then
85+
echo "Expected response doesn't match actual"
86+
echo "Expected: $expected_response."
87+
echo "Received: $response_text"
88+
echo -e "\n***\n*** $error_message \n***"
89+
return 1
90+
fi
91+
fi
92+
93+
return 0
94+
}
4195

4296
# first we download weights
4397
pip install -U huggingface_hub
@@ -58,7 +112,7 @@ model_json=$(cat <<EOF
58112
"model":"./weights/backbone/gemma-2b",
59113
"disable_log_requests": true,
60114
"gpu_memory_utilization": 0.7,
61-
"tensor_parallel_size": 2,
115+
"tensor_parallel_size": 1,
62116
"block_size": 16,
63117
"enforce_eager": true,
64118
"enable_lora": true,
@@ -106,6 +160,39 @@ else
106160
RET=1
107161
fi
108162
fi
163+
164+
# Test generate endpoint + LoRA enabled (boolean flag)
165+
EXPECTED_RESPONSE='" I love soccer. I play soccer every day.\nInstruct: Tell me"'
166+
DATA='{
167+
"text_input": "Instruct: Tell me more about soccer\nOutput:",
168+
"parameters": {
169+
"stream": false,
170+
"temperature": 0,
171+
"top_p":1,
172+
"lora_name": "sheep",
173+
"exclude_input_in_output": true
174+
}
175+
}'
176+
RESPONSE=$(make_api_call "$GENERATE_ENDPOINT" "$DATA")
177+
check_response "$RESPONSE" "$EXPECTED_RESPONSE" "Valid LoRA + Generate Endpoint Test FAILED." || RET=1
178+
179+
EXPECTED_RESPONSE="\"LoRA unavailable is not supported, we currently support ['doll', 'sheep']\""
180+
DATA='{
181+
"text_input": "Instruct: Tell me more about soccer\nOutput:",
182+
"parameters": {
183+
"stream": false,
184+
"temperature": 0,
185+
"top_p":1,
186+
"lora_name": "unavailable",
187+
"exclude_input_in_output": true
188+
}
189+
}'
190+
RESPONSE=$(make_api_call "$GENERATE_ENDPOINT" "$DATA")
191+
check_response "$RESPONSE" "$EXPECTED_RESPONSE" "Invalid LoRA + Generate Endpoint Test FAILED." $CHECK_FOR_ERROR || RET=1
192+
193+
unset EXPECTED_RESPONSE
194+
unset RESPONSE
195+
unset DATA
109196
set -e
110197

111198
kill $SERVER_PID
@@ -151,6 +238,39 @@ else
151238
RET=1
152239
fi
153240
fi
241+
242+
# Test generate endpoint + LoRA enabled (str flag)
243+
EXPECTED_RESPONSE='" I think it is a very interesting subject.\n\nInstruct: What do you"'
244+
DATA='{
245+
"text_input": "Instruct: What do you think of Computer Science?\nOutput:",
246+
"parameters": {
247+
"stream": false,
248+
"temperature": 0,
249+
"top_p":1,
250+
"lora_name": "doll",
251+
"exclude_input_in_output": true
252+
}
253+
}'
254+
RESPONSE=$(make_api_call "$GENERATE_ENDPOINT" "$DATA")
255+
check_response "$RESPONSE" "$EXPECTED_RESPONSE" "Valid LoRA + Generate Endpoint Test FAILED." || RET=1
256+
257+
EXPECTED_RESPONSE="\"LoRA unavailable is not supported, we currently support ['doll', 'sheep']\""
258+
DATA='{
259+
"text_input": "Instruct: What do you think of Computer Science?\nOutput:",
260+
"parameters": {
261+
"stream": false,
262+
"temperature": 0,
263+
"top_p":1,
264+
"lora_name": "unavailable",
265+
"exclude_input_in_output": true
266+
}
267+
}'
268+
RESPONSE=$(make_api_call "$GENERATE_ENDPOINT" "$DATA")
269+
check_response "$RESPONSE" "$EXPECTED_RESPONSE" "Invalid LoRA + Generate Endpoint Test FAILED." $CHECK_FOR_ERROR || RET=1
270+
271+
unset EXPECTED_RESPONSE
272+
unset RESPONSE
273+
unset DATA
154274
set -e
155275

156276
kill $SERVER_PID
@@ -197,6 +317,22 @@ else
197317
RET=1
198318
fi
199319
fi
320+
321+
# Test generate endpoint + LoRA enabled (boolean flag)
322+
EXPECTED_RESPONSE='"LoRA feature is not enabled."'
323+
DATA='{
324+
"text_input": "Instruct: What do you think of Computer Science?\nOutput:",
325+
"parameters": {
326+
"stream": false,
327+
"temperature": 0,
328+
"top_p":1,
329+
"lora_name": "doll",
330+
"exclude_input_in_output": true
331+
}
332+
}'
333+
RESPONSE=$(make_api_call "$GENERATE_ENDPOINT" "$DATA")
334+
check_response "$RESPONSE" "$EXPECTED_RESPONSE" "Disabled LoRA + Generate Endpoint Test FAILED." $CHECK_FOR_ERROR || RET=1
335+
200336
set -e
201337

202338
kill $SERVER_PID
@@ -243,6 +379,22 @@ else
243379
RET=1
244380
fi
245381
fi
382+
383+
# Test generate endpoint + LoRA enabled (str flag)
384+
EXPECTED_RESPONSE='"LoRA feature is not enabled."'
385+
DATA='{
386+
"text_input": "Instruct: What do you think of Computer Science?\nOutput:",
387+
"parameters": {
388+
"stream": false,
389+
"temperature": 0,
390+
"top_p":1,
391+
"lora_name": "doll",
392+
"exclude_input_in_output": true
393+
}
394+
}'
395+
RESPONSE=$(make_api_call "$GENERATE_ENDPOINT" "$DATA")
396+
check_response "$RESPONSE" "$EXPECTED_RESPONSE" "Disabled LoRA + Generate Endpoint Test FAILED." $CHECK_FOR_ERROR || RET=1
397+
246398
set -e
247399

248400
kill $SERVER_PID

src/model.py

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -562,8 +562,8 @@ def _get_input_tensors(self, request):
562562
)
563563

564564
# parameters / sampling_parameters
565-
# An alternative mechanism to receive serialized parameters as an input tensor,
566-
# because request parameters are not yet supported via BLS.
565+
# An alternative mechanism to receive serialized parameters as an input
566+
# tensor, because request parameters are not yet supported via BLS.
567567
sampling_parameters = pb_utils.get_input_tensor_by_name(
568568
request, "sampling_parameters"
569569
)
@@ -714,9 +714,10 @@ def _verify_loras(self, request):
714714
)
715715
if parameters_input_tensor:
716716
parameters = parameters_input_tensor.as_numpy()[0].decode("utf-8")
717-
sampling_params = TritonSamplingParams.from_dict(parameters, self.logger)
718-
lora_name = sampling_params.lora_name
717+
else:
718+
parameters = request.parameters()
719719

720+
lora_name = json.loads(parameters).pop("lora_name", None)
720721
if lora_name is not None:
721722
if not self.enable_lora:
722723
lora_error = pb_utils.TritonError("LoRA feature is not enabled.")

0 commit comments

Comments
 (0)