Skip to content

Commit 0ac4b04

Browse files
committed
Condensed ensemble model testing into existing test function
1 parent 23a0f7a commit 0ac4b04

File tree

13 files changed

+441
-11
lines changed

13 files changed

+441
-11
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -99,8 +99,8 @@ export TRITON_CONTAINER_VERSION=<YY.MM>
9999
--endpoint=vertex-ai
100100
--upstream-container-version=${TRITON_CONTAINER_VERSION}
101101
--backend=python:r${TRITON_CONTAINER_VERSION}
102-
--backend=ensemble
103102
--backend=vllm:r${TRITON_CONTAINER_VERSION}
103+
--backend=ensemble
104104
```
105105

106106
### Option 3. Add the vLLM Backend to the Default Triton Container
Lines changed: 141 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,141 @@
1+
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Redistribution and use in source and binary forms, with or without
4+
# modification, are permitted provided that the following conditions
5+
# are met:
6+
# * Redistributions of source code must retain the above copyright
7+
# notice, this list of conditions and the following disclaimer.
8+
# * Redistributions in binary form must reproduce the above copyright
9+
# notice, this list of conditions and the following disclaimer in the
10+
# documentation and/or other materials provided with the distribution.
11+
# * Neither the name of NVIDIA CORPORATION nor the names of its
12+
# contributors may be used to endorse or promote products derived
13+
# from this software without specific prior written permission.
14+
#
15+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
27+
import json
28+
29+
# triton_python_backend_utils is available in every Triton Python model. You
30+
# need to use this module to create inference requests and responses. It also
31+
# contains some utility functions for extracting information from model_config
32+
# and converting Triton input/output types to numpy types.
33+
import triton_python_backend_utils as pb_utils
34+
35+
36+
class TritonPythonModel:
37+
"""Your Python model must use the same class name. Every Python model
38+
that is created must have "TritonPythonModel" as the class name.
39+
"""
40+
41+
def initialize(self, args):
42+
"""`initialize` is called only once when the model is being loaded.
43+
Implementing `initialize` function is optional. This function allows
44+
the model to initialize any state associated with this model.
45+
46+
Parameters
47+
----------
48+
args : dict
49+
Both keys and values are strings. The dictionary keys and values are:
50+
* model_config: A JSON string containing the model configuration
51+
* model_instance_kind: A string containing model instance kind
52+
* model_instance_device_id: A string containing model instance device ID
53+
* model_repository: Model repository path
54+
* model_version: Model version
55+
* model_name: Model name
56+
"""
57+
58+
# You must parse model_config. JSON string is not parsed here
59+
self.model_config = model_config = json.loads(args["model_config"])
60+
61+
# Get OUTPUT0 configuration
62+
output0_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT0")
63+
64+
# Get OUTPUT1 configuration
65+
output1_config = pb_utils.get_output_config_by_name(model_config, "OUTPUT1")
66+
67+
# Convert Triton types to numpy types
68+
self.output0_dtype = pb_utils.triton_string_to_numpy(
69+
output0_config["data_type"]
70+
)
71+
self.output1_dtype = pb_utils.triton_string_to_numpy(
72+
output1_config["data_type"]
73+
)
74+
75+
def execute(self, requests):
76+
"""`execute` MUST be implemented in every Python model. `execute`
77+
function receives a list of pb_utils.InferenceRequest as the only
78+
argument. This function is called when an inference request is made
79+
for this model. Depending on the batching configuration (e.g. Dynamic
80+
Batching) used, `requests` may contain multiple requests. Every
81+
Python model, must create one pb_utils.InferenceResponse for every
82+
pb_utils.InferenceRequest in `requests`. If there is an error, you can
83+
set the error argument when creating a pb_utils.InferenceResponse
84+
85+
Parameters
86+
----------
87+
requests : list
88+
A list of pb_utils.InferenceRequest
89+
90+
Returns
91+
-------
92+
list
93+
A list of pb_utils.InferenceResponse. The length of this list must
94+
be the same as `requests`
95+
"""
96+
97+
output0_dtype = self.output0_dtype
98+
output1_dtype = self.output1_dtype
99+
100+
responses = []
101+
102+
# Every Python backend must iterate over everyone of the requests
103+
# and create a pb_utils.InferenceResponse for each of them.
104+
for request in requests:
105+
# Get INPUT0
106+
in_0 = pb_utils.get_input_tensor_by_name(request, "INPUT0")
107+
# Get INPUT1
108+
in_1 = pb_utils.get_input_tensor_by_name(request, "INPUT1")
109+
110+
out_0, out_1 = (
111+
in_0.as_numpy() + in_1.as_numpy(),
112+
in_0.as_numpy() - in_1.as_numpy(),
113+
)
114+
115+
# Create output tensors. You need pb_utils.Tensor
116+
# objects to create pb_utils.InferenceResponse.
117+
out_tensor_0 = pb_utils.Tensor("OUTPUT0", out_0.astype(output0_dtype))
118+
out_tensor_1 = pb_utils.Tensor("OUTPUT1", out_1.astype(output1_dtype))
119+
120+
# Create InferenceResponse. You can set an error here in case
121+
# there was a problem with handling this inference request.
122+
# Below is an example of how you can set errors in inference
123+
# response:
124+
#
125+
# pb_utils.InferenceResponse(
126+
# output_tensors=..., TritonError("An error occurred"))
127+
inference_response = pb_utils.InferenceResponse(
128+
output_tensors=[out_tensor_0, out_tensor_1]
129+
)
130+
responses.append(inference_response)
131+
132+
# You should return a list of pb_utils.InferenceResponse. Length
133+
# of this list must match the length of `requests` list.
134+
return responses
135+
136+
def finalize(self):
137+
"""`finalize` is called only once when the model is being unloaded.
138+
Implementing `finalize` function is OPTIONAL. This function allows
139+
the model to perform any necessary clean ups before exit.
140+
"""
141+
print("Cleaning up...")
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Copyright (c) 2020, NVIDIA CORPORATION. All rights reserved.
2+
#
3+
# Redistribution and use in source and binary forms, with or without
4+
# modification, are permitted provided that the following conditions
5+
# are met:
6+
# * Redistributions of source code must retain the above copyright
7+
# notice, this list of conditions and the following disclaimer.
8+
# * Redistributions in binary form must reproduce the above copyright
9+
# notice, this list of conditions and the following disclaimer in the
10+
# documentation and/or other materials provided with the distribution.
11+
# * Neither the name of NVIDIA CORPORATION nor the names of its
12+
# contributors may be used to endorse or promote products derived
13+
# from this software without specific prior written permission.
14+
#
15+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
27+
name: "add_sub"
28+
backend: "python"
29+
30+
input [
31+
{
32+
name: "INPUT0"
33+
data_type: TYPE_FP32
34+
dims: [ 4 ]
35+
}
36+
]
37+
input [
38+
{
39+
name: "INPUT1"
40+
data_type: TYPE_FP32
41+
dims: [ 4 ]
42+
}
43+
]
44+
output [
45+
{
46+
name: "OUTPUT0"
47+
data_type: TYPE_FP32
48+
dims: [ 4 ]
49+
}
50+
]
51+
output [
52+
{
53+
name: "OUTPUT1"
54+
data_type: TYPE_FP32
55+
dims: [ 4 ]
56+
}
57+
]
58+
59+
instance_group [{ kind: KIND_CPU }]
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# Redistribution and use in source and binary forms, with or without
4+
# modification, are permitted provided that the following conditions
5+
# are met:
6+
# * Redistributions of source code must retain the above copyright
7+
# notice, this list of conditions and the following disclaimer.
8+
# * Redistributions in binary form must reproduce the above copyright
9+
# notice, this list of conditions and the following disclaimer in the
10+
# documentation and/or other materials provided with the distribution.
11+
# * Neither the name of NVIDIA CORPORATION nor the names of its
12+
# contributors may be used to endorse or promote products derived
13+
# from this software without specific prior written permission.
14+
#
15+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
27+
name: "ensemble_model"
28+
platform: "ensemble"
29+
max_batch_size: 1
30+
input [
31+
{
32+
name: "text_input"
33+
data_type: TYPE_STRING
34+
dims: [ -1 ]
35+
}
36+
]
37+
output [
38+
{
39+
name: "text_output"
40+
data_type: TYPE_STRING
41+
dims: [ -1 ]
42+
}
43+
]
44+
ensemble_scheduling {
45+
step [
46+
{
47+
model_name: "vllm_opt"
48+
model_version: -1
49+
input_map {
50+
key: "text_input"
51+
value: "text_input"
52+
}
53+
output_map {
54+
key: "text_output"
55+
value: "text_output"
56+
}
57+
}
58+
]
59+
}
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"model":"facebook/opt-125m",
3+
"invalid_attribute": true,
4+
"gpu_memory_utilization": 0.5,
5+
"enforce_eager": true
6+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# Redistribution and use in source and binary forms, with or without
4+
# modification, are permitted provided that the following conditions
5+
# are met:
6+
# * Redistributions of source code must retain the above copyright
7+
# notice, this list of conditions and the following disclaimer.
8+
# * Redistributions in binary form must reproduce the above copyright
9+
# notice, this list of conditions and the following disclaimer in the
10+
# documentation and/or other materials provided with the distribution.
11+
# * Neither the name of NVIDIA CORPORATION nor the names of its
12+
# contributors may be used to endorse or promote products derived
13+
# from this software without specific prior written permission.
14+
#
15+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
27+
# Note: You do not need to change any fields in this configuration.
28+
29+
backend: "vllm"
30+
31+
# The usage of device is deferred to the vLLM engine
32+
instance_group [
33+
{
34+
count: 1
35+
kind: KIND_MODEL
36+
}
37+
]
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"model":"invalid_model",
3+
"disable_log_requests": true,
4+
"gpu_memory_utilization": 0.5,
5+
"enforce_eager": true
6+
}
Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
# Copyright 2023, NVIDIA CORPORATION & AFFILIATES. All rights reserved.
2+
#
3+
# Redistribution and use in source and binary forms, with or without
4+
# modification, are permitted provided that the following conditions
5+
# are met:
6+
# * Redistributions of source code must retain the above copyright
7+
# notice, this list of conditions and the following disclaimer.
8+
# * Redistributions in binary form must reproduce the above copyright
9+
# notice, this list of conditions and the following disclaimer in the
10+
# documentation and/or other materials provided with the distribution.
11+
# * Neither the name of NVIDIA CORPORATION nor the names of its
12+
# contributors may be used to endorse or promote products derived
13+
# from this software without specific prior written permission.
14+
#
15+
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY
16+
# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17+
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
18+
# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR
19+
# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL,
20+
# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO,
21+
# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR
22+
# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY
23+
# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
24+
# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
25+
# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
26+
27+
# Note: You do not need to change any fields in this configuration.
28+
29+
backend: "vllm"
30+
31+
# The usage of device is deferred to the vLLM engine
32+
instance_group [
33+
{
34+
count: 1
35+
kind: KIND_MODEL
36+
}
37+
]
Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,6 @@
1+
{
2+
"model":"facebook/opt-125m",
3+
"disable_log_requests": true,
4+
"gpu_memory_utilization": 0.4,
5+
"enforce_eager": true
6+
}

0 commit comments

Comments
 (0)