diff --git a/.gitignore b/.gitignore index 9d4769c9..6b696074 100644 --- a/.gitignore +++ b/.gitignore @@ -50,6 +50,7 @@ coverage.xml .hypothesis/ .pytest_cache/ cover/ +*.out # Translations *.mo diff --git a/README.md b/README.md index eb545e77..8a993d99 100644 --- a/README.md +++ b/README.md @@ -100,6 +100,7 @@ export TRITON_CONTAINER_VERSION= --upstream-container-version=${TRITON_CONTAINER_VERSION} --backend=python:r${TRITON_CONTAINER_VERSION} --backend=vllm:r${TRITON_CONTAINER_VERSION} + --backend=ensemble ``` ### Option 3. Add the vLLM Backend to the Default Triton Container diff --git a/ci/L0_backend_vllm/vllm_backend/ensemble_config.pbtxt b/ci/L0_backend_vllm/vllm_backend/ensemble_config.pbtxt new file mode 100644 index 00000000..07977d0d --- /dev/null +++ b/ci/L0_backend_vllm/vllm_backend/ensemble_config.pbtxt @@ -0,0 +1,59 @@ +# Copyright 2024, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +name: "ensemble_model" +platform: "ensemble" +max_batch_size: 1 +input [ + { + name: "text_input" + data_type: TYPE_STRING + dims: [ -1 ] + } +] +output [ + { + name: "text_output" + data_type: TYPE_STRING + dims: [ -1 ] + } +] +ensemble_scheduling { + step [ + { + model_name: "vllm_opt" + model_version: -1 + input_map { + key: "text_input" + value: "text_input" + } + output_map { + key: "text_output" + value: "text_output" + } + } + ] +} \ No newline at end of file diff --git a/ci/L0_backend_vllm/vllm_backend/test.sh b/ci/L0_backend_vllm/vllm_backend/test.sh index 43b20af7..87e04b21 100755 --- a/ci/L0_backend_vllm/vllm_backend/test.sh +++ b/ci/L0_backend_vllm/vllm_backend/test.sh @@ -70,6 +70,11 @@ sed -i 's/"disable_log_requests"/"invalid_attribute"/' models/vllm_invalid_1/1/m cp -r ${SAMPLE_MODELS_REPO}/vllm_model models/vllm_invalid_2/ sed -i 's/"facebook\/opt-125m"/"invalid_model"/' models/vllm_invalid_2/1/model.json + +# Sanity check ensembles are enabled and can successfully be loaded +mkdir -p models/ensemble_model/1 +cp -r ensemble_config.pbtxt models/ensemble_model/config.pbtxt + RET=0 run_server @@ -166,4 +171,4 @@ fi collect_artifacts_from_subdir -exit $RET +exit $RET \ No newline at end of file diff --git a/ci/L0_backend_vllm/vllm_backend/vllm_backend_test.py b/ci/L0_backend_vllm/vllm_backend/vllm_backend_test.py index 8ca206f0..c53c391a 100644 --- a/ci/L0_backend_vllm/vllm_backend/vllm_backend_test.py +++ b/ci/L0_backend_vllm/vllm_backend/vllm_backend_test.py @@ -48,6 +48,7 @@ def setUp(self): self.triton_client = grpcclient.InferenceServerClient(url="localhost:8001") self.vllm_model_name = "vllm_opt" self.python_model_name = "add_sub" + self.ensemble_model_name = "ensemble_model" self.vllm_load_test = "vllm_load_test" def test_vllm_triton_backend(self): @@ -57,6 +58,13 @@ def test_vllm_triton_backend(self): self.triton_client.load_model(self.python_model_name) self.assertTrue(self.triton_client.is_model_ready(self.python_model_name)) + # Test to ensure that ensemble models are supported in vllm container. + # If ensemble support not present, triton will error out at model loading stage. + # Ensemble Model is a pipeline consisting of 1 model (vllm_opt) + self.triton_client.load_model(self.ensemble_model_name) + self.assertTrue(self.triton_client.is_model_ready(self.ensemble_model_name)) + self.triton_client.unload_model(self.ensemble_model_name) + # Unload vllm model and test add_sub model self.triton_client.unload_model(self.vllm_load_test) self.assertFalse(self.triton_client.is_model_ready(self.vllm_load_test))