Skip to content

Commit 13bd540

Browse files
committed
adding ensemble testing
1 parent 17b3710 commit 13bd540

File tree

16 files changed

+50
-160
lines changed

16 files changed

+50
-160
lines changed
Lines changed: 33 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,33 @@
1+
name: "ensemble_model"
2+
platform: "ensemble"
3+
max_batch_size: 1
4+
input [
5+
{
6+
name: "text_input"
7+
data_type: TYPE_STRING
8+
dims: [ -1 ]
9+
}
10+
]
11+
output [
12+
{
13+
name: "text_output"
14+
data_type: TYPE_STRING
15+
dims: [ -1 ]
16+
}
17+
]
18+
ensemble_scheduling {
19+
step [
20+
{
21+
model_name: "vllm_model"
22+
model_version: -1
23+
input_map {
24+
key: "text_input"
25+
value: "text_input"
26+
}
27+
output_map {
28+
key: "text_output"
29+
value: "text_output"
30+
}
31+
}
32+
]
33+
}

ci/L0_backend_vllm/vllm_backend/test.sh

Lines changed: 10 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -35,8 +35,7 @@ SERVER_LOG="./vllm_backend_server.log"
3535
CLIENT_LOG="./vllm_backend_client.log"
3636
TEST_RESULT_FILE='test_results.txt'
3737
CLIENT_PY="./vllm_backend_test.py"
38-
SAMPLE_BASIC_MODELS_REPO="../../../samples/basic_model/model_repository"
39-
SAMPLE_ENSEMBLE_MODELS_REPO="../../../samples/ensemble_model/model_repository"
38+
SAMPLE_MODELS_REPO="../../../samples/model_repository"
4039
EXPECTED_NUM_TESTS=6
4140

4241
# Helpers =======================================
@@ -50,7 +49,7 @@ function assert_curl_success {
5049
}
5150

5251
rm -rf models && mkdir -p models
53-
cp -r ${SAMPLE_BASIC_MODELS_REPO}/vllm_model models/vllm_opt
52+
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_opt
5453
# `vllm_opt` model will be loaded on server start and stay loaded throughout
5554
# unittesting. To test vllm model load/unload we use a dedicated
5655
# `vllm_load_test`. To ensure that vllm's memory profiler will not error out
@@ -64,13 +63,18 @@ wget -P models/add_sub/1/ https://raw.githubusercontent.com/triton-inference-ser
6463
wget -P models/add_sub https://raw.githubusercontent.com/triton-inference-server/python_backend/main/examples/add_sub/config.pbtxt
6564

6665
# Invalid model attribute
67-
cp -r ${SAMPLE_BASIC_MODELS_REPO}/vllm_model models/vllm_invalid_1/
66+
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_invalid_1/
6867
sed -i 's/"disable_log_requests"/"invalid_attribute"/' models/vllm_invalid_1/1/model.json
6968

7069
# Invalid model name
71-
cp -r ${SAMPLE_BASIC_MODELS_REPO}/vllm_model models/vllm_invalid_2/
70+
cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_invalid_2/
7271
sed -i 's/"facebook\/opt-125m"/"invalid_model"/' models/vllm_invalid_2/1/model.json
7372

73+
74+
mkdir -p models/ensemble_model/1
75+
76+
cp -r ensemble_config.pbtxt models/ensemble_model/config.pbtxt
77+
7478
RET=0
7579

7680
run_server
@@ -167,4 +171,4 @@ fi
167171

168172
collect_artifacts_from_subdir
169173

170-
exit $RET
174+
exit $RET

ci/L0_backend_vllm/vllm_backend/vllm_backend_test.py

Lines changed: 7 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,7 @@ def setUp(self):
4848
self.triton_client = grpcclient.InferenceServerClient(url="localhost:8001")
4949
self.vllm_model_name = "vllm_opt"
5050
self.python_model_name = "add_sub"
51+
self.enseble_model_name = "ensemble_model"
5152
self.vllm_load_test = "vllm_load_test"
5253

5354
def test_vllm_triton_backend(self):
@@ -163,6 +164,12 @@ def test_exclude_input_in_output_true(self):
163164
expected_output=expected_output,
164165
)
165166

167+
def test_ensemble_model(self):
168+
# Test to ensure that ensemble models are supported in vllm container.
169+
# If ensemble support not present, triton will error out at model loading stage.
170+
self.triton_client.load_model(self.enseble_model_name)
171+
self.assertTrue(self.triton_client.is_model_ready(self.enseble_model_name))
172+
166173
def _test_vllm_model(
167174
self,
168175
prompts,
@@ -247,9 +254,6 @@ def _test_python_model(self):
247254
np.allclose(input0_data - input1_data, response.as_numpy("OUTPUT1"))
248255
)
249256

250-
def _test_ensemble_model(self):
251-
pass
252-
253257
def tearDown(self):
254258
self.triton_client.close()
255259

File renamed without changes.

samples/ensemble_model/README.md

Lines changed: 0 additions & 18 deletions
This file was deleted.

samples/ensemble_model/client.py

Whitespace-only changes.

samples/ensemble_model/model_repository/ensemble_model/config.pbtxt

Lines changed: 0 additions & 57 deletions
This file was deleted.

samples/ensemble_model/model_repository/gpt2/1/model.json

Lines changed: 0 additions & 5 deletions
This file was deleted.

samples/ensemble_model/model_repository/gpt2/config.pbtxt

Lines changed: 0 additions & 2 deletions
This file was deleted.

samples/ensemble_model/model_repository/prefix_model/1/model.py

Lines changed: 0 additions & 18 deletions
This file was deleted.

0 commit comments

Comments
 (0)