From a75062a93b9ce756ceed20025058ce0367016f99 Mon Sep 17 00:00:00 2001 From: Ziqi Fan Date: Tue, 4 Feb 2025 14:46:17 -0800 Subject: [PATCH 1/8] test: Add tests for response parameters support for BLS in Python backend --- .../response_parameters_bls_test.py | 171 ++++++++++++++++++ qa/L0_backend_python/parameters/test.sh | 16 ++ .../response_parameters_bls/config.pbtxt | 52 ++++++ .../response_parameters_bls/model.py | 72 ++++++++ .../config.pbtxt | 56 ++++++ .../model.py | 81 +++++++++ 6 files changed, 448 insertions(+) create mode 100644 qa/L0_backend_python/parameters/response_parameters_bls_test.py create mode 100644 qa/python_models/response_parameters_bls/config.pbtxt create mode 100644 qa/python_models/response_parameters_bls/model.py create mode 100644 qa/python_models/response_parameters_bls_decoupled/config.pbtxt create mode 100644 qa/python_models/response_parameters_bls_decoupled/model.py diff --git a/qa/L0_backend_python/parameters/response_parameters_bls_test.py b/qa/L0_backend_python/parameters/response_parameters_bls_test.py new file mode 100644 index 0000000000..fad5f6e1c6 --- /dev/null +++ b/qa/L0_backend_python/parameters/response_parameters_bls_test.py @@ -0,0 +1,171 @@ +# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import sys + +sys.path.append("../../common") + +import json +import unittest + +import numpy as np +import shm_util +import tritonclient.grpc as grpcclient +from tritonclient.utils import InferenceServerException + + +class ResponseParametersTest(unittest.TestCase): + _server_address_grpc = "localhost:8001" + _model_name = "response_parameters_bls" + _shape = [1, 1] + + def setUp(self): + self._shm_leak_detector = shm_util.ShmLeakDetector() + + def _assert_response_parameters_match(self, infer_result, expected_params): + res_params = {} + for param_key, param_value in infer_result.get_response().parameters.items(): + if param_value.HasField("bool_param"): + value = param_value.bool_param + elif param_value.HasField("int64_param"): + value = param_value.int64_param + elif param_value.HasField("string_param"): + value = param_value.string_param + else: + raise ValueError(f"Unsupported parameter choice: {param_value}") + res_params[param_key] = value + self.assertEqual(expected_params, res_params) + + def _assert_response_parameters_infer_success(self, params): + params_str = json.dumps(params) + + inputs = [grpcclient.InferInput("RESPONSE_PARAMETERS", self._shape, "BYTES")] + inputs[0].set_data_from_numpy(np.array([[params_str]], dtype=np.object_)) + + with self._shm_leak_detector.Probe() as shm_probe: + with grpcclient.InferenceServerClient(self._server_address_grpc) as client: + result = client.infer(self._model_name, inputs) + + # verify the response parameters + self._assert_response_parameters_match(result, params) + + # model returns the input as output + output = str(result.as_numpy("OUTPUT")[0][0], encoding="utf-8") + self.assertEqual(params_str, output) + + def _assert_response_parameters_infer_fail(self, params, expected_err_msg): + params_str = json.dumps(params) + + inputs = [grpcclient.InferInput("RESPONSE_PARAMETERS", self._shape, "BYTES")] + inputs[0].set_data_from_numpy(np.array([[params_str]], dtype=np.object_)) + + with self._shm_leak_detector.Probe() as shm_probe: + with grpcclient.InferenceServerClient(self._server_address_grpc) as client: + with self.assertRaises(InferenceServerException) as e: + client.infer(self._model_name, inputs) + + self.assertIn("[StatusCode.INVALID_ARGUMENT] ", str(e.exception)) + self.assertIn(expected_err_msg, str(e.exception)) + + def test_setting_empty_response_parameters(self): + params = {} + self._assert_response_parameters_infer_success(params) + + def test_setting_one_element_response_parameters(self): + params = {"many_elements": False} + self._assert_response_parameters_infer_success(params) + + def test_setting_three_element_response_parameters(self): + params = {"bool": True, "str": "Hello World!", "int": 1024} + self._assert_response_parameters_infer_success(params) + + def test_setting_multi_element_response_parameters(self): + params = {"a": "1", "b": "2", "c": 3, "d": False, "e": 5, "f": ""} + self._assert_response_parameters_infer_success(params) + + def test_setting_wrong_type_response_parameters(self): + params = [] + expected_err_msg = ", got " + self._assert_response_parameters_infer_fail(params, expected_err_msg) + + def test_setting_int_key_type_response_parameters(self): + params = {"1": "int key"} + expected_err_msg = ( + "Expect parameters keys to have type str, found type " + ) + self._assert_response_parameters_infer_fail(params, expected_err_msg) + + def test_setting_float_response_parameters(self): + params = {"int": 2, "float": 0.5} + expected_err_msg = "Expect parameters values to have type bool/int/str, found type " + self._assert_response_parameters_infer_fail(params, expected_err_msg) + + def test_setting_null_response_parameters(self): + params = {"bool": True, "null": None} + expected_err_msg = "Expect parameters values to have type bool/int/str, found type " + self._assert_response_parameters_infer_fail(params, expected_err_msg) + + def test_setting_nested_response_parameters(self): + params = {"str": "", "list": ["variable"]} + expected_err_msg = "Expect parameters values to have type bool/int/str, found type " + self._assert_response_parameters_infer_fail(params, expected_err_msg) + + def test_setting_response_parameters_decoupled(self): + model_name = "response_parameters_bls_decoupled" + params = [{"bool": False, "int": 2048}, {"str": "Hello World!"}] + params_str = json.dumps(params) + + inputs = [grpcclient.InferInput("RESPONSE_PARAMETERS", self._shape, "BYTES")] + inputs[0].set_data_from_numpy(np.array([[params_str]], dtype=np.object_)) + + responses = [] + with self._shm_leak_detector.Probe() as shm_probe: + with grpcclient.InferenceServerClient(self._server_address_grpc) as client: + client.start_stream( + callback=(lambda result, error: responses.append((result, error))) + ) + client.async_stream_infer(model_name=model_name, inputs=inputs) + client.stop_stream() + + self.assertEqual(len(params), len(responses)) + for i in range(len(params)): + result, error = responses[i] + self.assertIsNone(error) + + # Since this is a decoupled model, the 'triton_final_response' parameter + # will be a part of the response parameters, so include it into the expected + # parameters. The model sends the complete final flag separately from the + # response, so the parameter is always False. + expected_params = params[i].copy() + expected_params["triton_final_response"] = False + self._assert_response_parameters_match(result, expected_params) + + output = str(result.as_numpy("OUTPUT")[0][0], encoding="utf-8") + self.assertEqual(json.dumps(params[i]), output) + + +if __name__ == "__main__": + unittest.main() diff --git a/qa/L0_backend_python/parameters/test.sh b/qa/L0_backend_python/parameters/test.sh index 9d8c86c530..276af0c66c 100755 --- a/qa/L0_backend_python/parameters/test.sh +++ b/qa/L0_backend_python/parameters/test.sh @@ -39,8 +39,15 @@ mkdir -p models/response_parameters/1 && \ mkdir -p models/response_parameters_decoupled/1 && \ cp ../../python_models/response_parameters_decoupled/model.py models/response_parameters_decoupled/1 && \ cp ../../python_models/response_parameters_decoupled/config.pbtxt models/response_parameters_decoupled +mkdir -p models/response_parameters_bls/1 && \ + cp ../../python_models/response_parameters_bls/model.py models/response_parameters_bls/1 && \ + cp ../../python_models/response_parameters_bls/config.pbtxt models/response_parameters_bls +mkdir -p models/response_parameters_bls_decoupled/1 && \ + cp ../../python_models/response_parameters_bls_decoupled/model.py models/response_parameters_bls_decoupled/1 && \ + cp ../../python_models/response_parameters_bls_decoupled/config.pbtxt models/response_parameters_bls_decoupled TEST_LOG="response_parameters_test.log" +TEST_BLS_LOG="response_parameters_bls_test.log" SERVER_LOG="response_parameters_test.server.log" SERVER_ARGS="--model-repository=${MODELDIR}/parameters/models --backend-directory=${BACKEND_DIR} --log-verbose=1" @@ -60,6 +67,15 @@ if [ $? -ne 0 ]; then fi set -e +set +e +python3 -m pytest --junitxml=response_parameters_bls_test.report.xml response_parameters_bls_test.py > $TEST_BLS_LOG 2>&1 +if [ $? -ne 0 ]; then + echo -e "\n***\n*** Response parameters in BLS mode test FAILED\n***" + cat $TEST_BLS_LOG + RET=1 +fi +set -e + kill $SERVER_PID wait $SERVER_PID diff --git a/qa/python_models/response_parameters_bls/config.pbtxt b/qa/python_models/response_parameters_bls/config.pbtxt new file mode 100644 index 0000000000..d8fee6cd42 --- /dev/null +++ b/qa/python_models/response_parameters_bls/config.pbtxt @@ -0,0 +1,52 @@ +# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +name: "response_parameters_bls" +backend: "python" +max_batch_size: 8 + +input [ + { + name: "RESPONSE_PARAMETERS" + data_type: TYPE_STRING + dims: [ 1 ] + } +] + +output [ + { + name: "OUTPUT" + data_type: TYPE_STRING + dims: [ 1 ] + } +] + +instance_group [ + { + count: 1 + kind: KIND_CPU + } +] \ No newline at end of file diff --git a/qa/python_models/response_parameters_bls/model.py b/qa/python_models/response_parameters_bls/model.py new file mode 100644 index 0000000000..f7db580ac4 --- /dev/null +++ b/qa/python_models/response_parameters_bls/model.py @@ -0,0 +1,72 @@ +# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import json + +import numpy as np +import triton_python_backend_utils as pb_utils + + +class TritonPythonModel: + def execute(self, requests): + responses = [] + + for request in requests: + try: + bls_input_tensor = pb_utils.get_input_tensor_by_name( + request, "RESPONSE_PARAMETERS" + ) + bls_request = pb_utils.InferenceRequest( + model_name="response_parameters", + inputs=[bls_input_tensor], + requested_output_names=["OUTPUT"], + ) + bls_response = bls_request.exec() + response_tensors = bls_response.output_tensors() + response_parameters_str = bls_response.parameters() + if bls_response.has_error(): + print(bls_response.error().message()) + raise Exception(bls_response.error().message()) + res_params = json.loads(response_parameters_str) + + response = pb_utils.InferenceResponse( + output_tensors=response_tensors, parameters=res_params + ) + + res_params_set = {} + if response.parameters() != "": + res_params_set = json.loads(response.parameters()) + if res_params_set != res_params: + raise Exception("Response parameters set differ from provided") + except Exception as e: + error = pb_utils.TritonError( + message=str(e), code=pb_utils.TritonError.INVALID_ARG + ) + response = pb_utils.InferenceResponse(error=error) + + responses.append(response) + + return responses diff --git a/qa/python_models/response_parameters_bls_decoupled/config.pbtxt b/qa/python_models/response_parameters_bls_decoupled/config.pbtxt new file mode 100644 index 0000000000..d1f347c5cb --- /dev/null +++ b/qa/python_models/response_parameters_bls_decoupled/config.pbtxt @@ -0,0 +1,56 @@ +# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +name: "response_parameters_bls_decoupled" +backend: "python" +max_batch_size: 8 + +input [ + { + name: "RESPONSE_PARAMETERS" + data_type: TYPE_STRING + dims: [ 1 ] + } +] + +output [ + { + name: "OUTPUT" + data_type: TYPE_STRING + dims: [ 1 ] + } +] + +instance_group [ + { + count: 1 + kind: KIND_CPU + } +] + +model_transaction_policy { + decoupled: True +} diff --git a/qa/python_models/response_parameters_bls_decoupled/model.py b/qa/python_models/response_parameters_bls_decoupled/model.py new file mode 100644 index 0000000000..d7862fc3f3 --- /dev/null +++ b/qa/python_models/response_parameters_bls_decoupled/model.py @@ -0,0 +1,81 @@ +# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import json + +import numpy as np +import triton_python_backend_utils as pb_utils + + +class TritonPythonModel: + def execute(self, requests): + for request in requests: + res_params_tensor = pb_utils.get_input_tensor_by_name( + request, "RESPONSE_PARAMETERS" + ).as_numpy() + res_params_str = str(res_params_tensor[0][0], encoding="utf-8") + response_sender = request.get_response_sender() + try: + res_params = json.loads(res_params_str) + for r_params in res_params: + bls_input_tensor = pb_utils.Tensor( + "RESPONSE_PARAMETERS", + np.array([[json.dumps(r_params)]], dtype=np.object_), + ) + bls_request = pb_utils.InferenceRequest( + model_name="response_parameters", + inputs=[bls_input_tensor], + requested_output_names=["OUTPUT"], + ) + bls_response = bls_request.exec() + response_tensors = bls_response.output_tensors() + response_parameters_str = bls_response.parameters() + if bls_response.has_error(): + print(bls_response.error().message()) + raise Exception(bls_response.error().message()) + res_params = json.loads(response_parameters_str) + + response = pb_utils.InferenceResponse( + output_tensors=response_tensors, parameters=res_params + ) + + r_params_set = {} + if response.parameters() != "": + r_params_set = json.loads(response.parameters()) + if r_params_set != r_params: + raise Exception("Response parameters set differ from provided") + + response_sender.send(response) + except Exception as e: + error = pb_utils.TritonError( + message=str(e), code=pb_utils.TritonError.INVALID_ARG + ) + response = pb_utils.InferenceResponse(error=error) + response_sender.send(response) + + response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL) + + return None From 41ec8fcefa7c3cd2cd8453c38bda89ff35283a71 Mon Sep 17 00:00:00 2001 From: Ziqi Fan Date: Tue, 4 Feb 2025 15:32:50 -0800 Subject: [PATCH 2/8] add model pydoc --- qa/python_models/response_parameters_bls/model.py | 11 +++++++++++ .../response_parameters_bls_decoupled/model.py | 8 ++++++++ 2 files changed, 19 insertions(+) diff --git a/qa/python_models/response_parameters_bls/model.py b/qa/python_models/response_parameters_bls/model.py index f7db580ac4..038fcb2092 100644 --- a/qa/python_models/response_parameters_bls/model.py +++ b/qa/python_models/response_parameters_bls/model.py @@ -31,6 +31,17 @@ class TritonPythonModel: + """ + This model (A) is designed to test sending back response parameters when using BLS. + It takes one input tensor, which is the RESPONSE_PARAMETERS and uses BLS to + call response_parameters model (B). Model B would set RESPONSE_PARAMETERS (with a bit + of data massage) as its response parameters. In the end, model A would also set its + response parameters from model B's response parameters. + + With above model set up, we can easily test whether the real response parameters are + the same as the input response parameters. + """ + def execute(self, requests): responses = [] diff --git a/qa/python_models/response_parameters_bls_decoupled/model.py b/qa/python_models/response_parameters_bls_decoupled/model.py index d7862fc3f3..8cee0524bc 100644 --- a/qa/python_models/response_parameters_bls_decoupled/model.py +++ b/qa/python_models/response_parameters_bls_decoupled/model.py @@ -31,6 +31,14 @@ class TritonPythonModel: + """ + This model (A) is designed to test sending back response parameters when using BLS + with decoupled model transaction policy. + + The only difference vs. response_parameters_bls model is this model turns on decoupled + model transaction policy. For more details, please check response_parameters_bls. + """ + def execute(self, requests): for request in requests: res_params_tensor = pb_utils.get_input_tensor_by_name( From 78dc992b8afa9daca34bad5b7c33bf2c4439c948 Mon Sep 17 00:00:00 2001 From: Ziqi Fan Date: Tue, 4 Feb 2025 15:56:32 -0800 Subject: [PATCH 3/8] remove unused import --- qa/python_models/response_parameters_bls/model.py | 1 - 1 file changed, 1 deletion(-) diff --git a/qa/python_models/response_parameters_bls/model.py b/qa/python_models/response_parameters_bls/model.py index 038fcb2092..8ee26fd7d0 100644 --- a/qa/python_models/response_parameters_bls/model.py +++ b/qa/python_models/response_parameters_bls/model.py @@ -26,7 +26,6 @@ import json -import numpy as np import triton_python_backend_utils as pb_utils From 9fa53881b7932653e724bc9cd8427d71f441b605 Mon Sep 17 00:00:00 2001 From: Ziqi Fan Date: Wed, 5 Feb 2025 12:58:00 -0800 Subject: [PATCH 4/8] address jacky's comments --- .../response_parameters_bls_test.py | 171 ------------------ .../parameters/response_parameters_test.py | 15 +- qa/L0_backend_python/parameters/test.sh | 8 +- .../response_parameters_bls/config.pbtxt | 4 +- .../response_parameters_bls/model.py | 19 +- .../config.pbtxt | 4 - .../model.py | 66 ++++--- 7 files changed, 59 insertions(+), 228 deletions(-) delete mode 100644 qa/L0_backend_python/parameters/response_parameters_bls_test.py diff --git a/qa/L0_backend_python/parameters/response_parameters_bls_test.py b/qa/L0_backend_python/parameters/response_parameters_bls_test.py deleted file mode 100644 index fad5f6e1c6..0000000000 --- a/qa/L0_backend_python/parameters/response_parameters_bls_test.py +++ /dev/null @@ -1,171 +0,0 @@ -# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import sys - -sys.path.append("../../common") - -import json -import unittest - -import numpy as np -import shm_util -import tritonclient.grpc as grpcclient -from tritonclient.utils import InferenceServerException - - -class ResponseParametersTest(unittest.TestCase): - _server_address_grpc = "localhost:8001" - _model_name = "response_parameters_bls" - _shape = [1, 1] - - def setUp(self): - self._shm_leak_detector = shm_util.ShmLeakDetector() - - def _assert_response_parameters_match(self, infer_result, expected_params): - res_params = {} - for param_key, param_value in infer_result.get_response().parameters.items(): - if param_value.HasField("bool_param"): - value = param_value.bool_param - elif param_value.HasField("int64_param"): - value = param_value.int64_param - elif param_value.HasField("string_param"): - value = param_value.string_param - else: - raise ValueError(f"Unsupported parameter choice: {param_value}") - res_params[param_key] = value - self.assertEqual(expected_params, res_params) - - def _assert_response_parameters_infer_success(self, params): - params_str = json.dumps(params) - - inputs = [grpcclient.InferInput("RESPONSE_PARAMETERS", self._shape, "BYTES")] - inputs[0].set_data_from_numpy(np.array([[params_str]], dtype=np.object_)) - - with self._shm_leak_detector.Probe() as shm_probe: - with grpcclient.InferenceServerClient(self._server_address_grpc) as client: - result = client.infer(self._model_name, inputs) - - # verify the response parameters - self._assert_response_parameters_match(result, params) - - # model returns the input as output - output = str(result.as_numpy("OUTPUT")[0][0], encoding="utf-8") - self.assertEqual(params_str, output) - - def _assert_response_parameters_infer_fail(self, params, expected_err_msg): - params_str = json.dumps(params) - - inputs = [grpcclient.InferInput("RESPONSE_PARAMETERS", self._shape, "BYTES")] - inputs[0].set_data_from_numpy(np.array([[params_str]], dtype=np.object_)) - - with self._shm_leak_detector.Probe() as shm_probe: - with grpcclient.InferenceServerClient(self._server_address_grpc) as client: - with self.assertRaises(InferenceServerException) as e: - client.infer(self._model_name, inputs) - - self.assertIn("[StatusCode.INVALID_ARGUMENT] ", str(e.exception)) - self.assertIn(expected_err_msg, str(e.exception)) - - def test_setting_empty_response_parameters(self): - params = {} - self._assert_response_parameters_infer_success(params) - - def test_setting_one_element_response_parameters(self): - params = {"many_elements": False} - self._assert_response_parameters_infer_success(params) - - def test_setting_three_element_response_parameters(self): - params = {"bool": True, "str": "Hello World!", "int": 1024} - self._assert_response_parameters_infer_success(params) - - def test_setting_multi_element_response_parameters(self): - params = {"a": "1", "b": "2", "c": 3, "d": False, "e": 5, "f": ""} - self._assert_response_parameters_infer_success(params) - - def test_setting_wrong_type_response_parameters(self): - params = [] - expected_err_msg = ", got " - self._assert_response_parameters_infer_fail(params, expected_err_msg) - - def test_setting_int_key_type_response_parameters(self): - params = {"1": "int key"} - expected_err_msg = ( - "Expect parameters keys to have type str, found type " - ) - self._assert_response_parameters_infer_fail(params, expected_err_msg) - - def test_setting_float_response_parameters(self): - params = {"int": 2, "float": 0.5} - expected_err_msg = "Expect parameters values to have type bool/int/str, found type " - self._assert_response_parameters_infer_fail(params, expected_err_msg) - - def test_setting_null_response_parameters(self): - params = {"bool": True, "null": None} - expected_err_msg = "Expect parameters values to have type bool/int/str, found type " - self._assert_response_parameters_infer_fail(params, expected_err_msg) - - def test_setting_nested_response_parameters(self): - params = {"str": "", "list": ["variable"]} - expected_err_msg = "Expect parameters values to have type bool/int/str, found type " - self._assert_response_parameters_infer_fail(params, expected_err_msg) - - def test_setting_response_parameters_decoupled(self): - model_name = "response_parameters_bls_decoupled" - params = [{"bool": False, "int": 2048}, {"str": "Hello World!"}] - params_str = json.dumps(params) - - inputs = [grpcclient.InferInput("RESPONSE_PARAMETERS", self._shape, "BYTES")] - inputs[0].set_data_from_numpy(np.array([[params_str]], dtype=np.object_)) - - responses = [] - with self._shm_leak_detector.Probe() as shm_probe: - with grpcclient.InferenceServerClient(self._server_address_grpc) as client: - client.start_stream( - callback=(lambda result, error: responses.append((result, error))) - ) - client.async_stream_infer(model_name=model_name, inputs=inputs) - client.stop_stream() - - self.assertEqual(len(params), len(responses)) - for i in range(len(params)): - result, error = responses[i] - self.assertIsNone(error) - - # Since this is a decoupled model, the 'triton_final_response' parameter - # will be a part of the response parameters, so include it into the expected - # parameters. The model sends the complete final flag separately from the - # response, so the parameter is always False. - expected_params = params[i].copy() - expected_params["triton_final_response"] = False - self._assert_response_parameters_match(result, expected_params) - - output = str(result.as_numpy("OUTPUT")[0][0], encoding="utf-8") - self.assertEqual(json.dumps(params[i]), output) - - -if __name__ == "__main__": - unittest.main() diff --git a/qa/L0_backend_python/parameters/response_parameters_test.py b/qa/L0_backend_python/parameters/response_parameters_test.py index e07bb5eb9f..4e0d24d878 100644 --- a/qa/L0_backend_python/parameters/response_parameters_test.py +++ b/qa/L0_backend_python/parameters/response_parameters_test.py @@ -24,6 +24,7 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. +import os import sys sys.path.append("../../common") @@ -39,7 +40,7 @@ class ResponseParametersTest(unittest.TestCase): _server_address_grpc = "localhost:8001" - _model_name = "response_parameters" + _model_name = os.environ["MODEL_NAME"] _shape = [1, 1] def setUp(self): @@ -166,6 +167,18 @@ def test_setting_response_parameters_decoupled(self): output = str(result.as_numpy("OUTPUT")[0][0], encoding="utf-8") self.assertEqual(json.dumps(params[i]), output) + def test_setting_response_parameters_bls_decoupled(self): + model_name = "response_parameters_bls_decoupled" + params = [{"bool": False, "int": 2048}, {"str": "Hello World!"}] + params_str = json.dumps(params) + + inputs = [grpcclient.InferInput("RESPONSE_PARAMETERS", self._shape, "BYTES")] + inputs[0].set_data_from_numpy(np.array([[params_str]], dtype=np.object_)) + + with self._shm_leak_detector.Probe() as shm_probe: + with grpcclient.InferenceServerClient(self._server_address_grpc) as client: + client.infer(model_name, inputs) + if __name__ == "__main__": unittest.main() diff --git a/qa/L0_backend_python/parameters/test.sh b/qa/L0_backend_python/parameters/test.sh index 276af0c66c..d1f3d4bdff 100755 --- a/qa/L0_backend_python/parameters/test.sh +++ b/qa/L0_backend_python/parameters/test.sh @@ -59,18 +59,16 @@ if [ "$SERVER_PID" == "0" ]; then fi set +e -python3 -m pytest --junitxml=response_parameters_test.report.xml response_parameters_test.py > $TEST_LOG 2>&1 +MODEL_NAME=response_parameters python3 -m pytest --junitxml=response_parameters_test.report.xml response_parameters_test.py > $TEST_LOG 2>&1 if [ $? -ne 0 ]; then echo -e "\n***\n*** Response parameters test FAILED\n***" cat $TEST_LOG RET=1 fi -set -e -set +e -python3 -m pytest --junitxml=response_parameters_bls_test.report.xml response_parameters_bls_test.py > $TEST_BLS_LOG 2>&1 +MODEL_NAME=response_parameters_bls python3 -m pytest -s --junitxml=response_parameters_bls_test.report.xml response_parameters_test.py > $TEST_BLS_LOG 2>&1 if [ $? -ne 0 ]; then - echo -e "\n***\n*** Response parameters in BLS mode test FAILED\n***" + echo -e "\n***\n*** Response parameters BLS test FAILED\n***" cat $TEST_BLS_LOG RET=1 fi diff --git a/qa/python_models/response_parameters_bls/config.pbtxt b/qa/python_models/response_parameters_bls/config.pbtxt index d8fee6cd42..1cf961d671 100644 --- a/qa/python_models/response_parameters_bls/config.pbtxt +++ b/qa/python_models/response_parameters_bls/config.pbtxt @@ -1,4 +1,4 @@ -# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -49,4 +49,4 @@ instance_group [ count: 1 kind: KIND_CPU } -] \ No newline at end of file +] diff --git a/qa/python_models/response_parameters_bls/model.py b/qa/python_models/response_parameters_bls/model.py index 8ee26fd7d0..ec33237cf9 100644 --- a/qa/python_models/response_parameters_bls/model.py +++ b/qa/python_models/response_parameters_bls/model.py @@ -1,4 +1,4 @@ -# Copyright 2023-2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. # # Redistribution and use in source and binary forms, with or without # modification, are permitted provided that the following conditions @@ -45,20 +45,19 @@ def execute(self, requests): responses = [] for request in requests: + bls_input_tensor = pb_utils.get_input_tensor_by_name( + request, "RESPONSE_PARAMETERS" + ) + bls_request = pb_utils.InferenceRequest( + model_name="response_parameters", + inputs=[bls_input_tensor], + requested_output_names=["OUTPUT"], + ) try: - bls_input_tensor = pb_utils.get_input_tensor_by_name( - request, "RESPONSE_PARAMETERS" - ) - bls_request = pb_utils.InferenceRequest( - model_name="response_parameters", - inputs=[bls_input_tensor], - requested_output_names=["OUTPUT"], - ) bls_response = bls_request.exec() response_tensors = bls_response.output_tensors() response_parameters_str = bls_response.parameters() if bls_response.has_error(): - print(bls_response.error().message()) raise Exception(bls_response.error().message()) res_params = json.loads(response_parameters_str) diff --git a/qa/python_models/response_parameters_bls_decoupled/config.pbtxt b/qa/python_models/response_parameters_bls_decoupled/config.pbtxt index d1f347c5cb..578cdca1aa 100644 --- a/qa/python_models/response_parameters_bls_decoupled/config.pbtxt +++ b/qa/python_models/response_parameters_bls_decoupled/config.pbtxt @@ -50,7 +50,3 @@ instance_group [ kind: KIND_CPU } ] - -model_transaction_policy { - decoupled: True -} diff --git a/qa/python_models/response_parameters_bls_decoupled/model.py b/qa/python_models/response_parameters_bls_decoupled/model.py index 8cee0524bc..79bceed9c0 100644 --- a/qa/python_models/response_parameters_bls_decoupled/model.py +++ b/qa/python_models/response_parameters_bls_decoupled/model.py @@ -32,58 +32,54 @@ class TritonPythonModel: """ - This model (A) is designed to test sending back response parameters when using BLS + This model is designed to test sending back response parameters when using BLS with decoupled model transaction policy. - The only difference vs. response_parameters_bls model is this model turns on decoupled - model transaction policy. For more details, please check response_parameters_bls. + The only difference vs. response_parameters_bls model is the BLS composing model + (i.e. response_parameters_decoupled) turns on decoupled model transaction policy. + For more details, please check response_parameters_bls model. """ def execute(self, requests): + responses = [] + for request in requests: - res_params_tensor = pb_utils.get_input_tensor_by_name( + bls_input_tensor = pb_utils.get_input_tensor_by_name( request, "RESPONSE_PARAMETERS" - ).as_numpy() - res_params_str = str(res_params_tensor[0][0], encoding="utf-8") - response_sender = request.get_response_sender() + ) + bls_request = pb_utils.InferenceRequest( + model_name="response_parameters_decoupled", + inputs=[bls_input_tensor], + requested_output_names=["OUTPUT"], + ) + + res_params_numpy = bls_input_tensor.as_numpy() + res_params_str = str(res_params_numpy[0][0], encoding="utf-8") + res_params = json.loads(res_params_str) try: - res_params = json.loads(res_params_str) - for r_params in res_params: - bls_input_tensor = pb_utils.Tensor( - "RESPONSE_PARAMETERS", - np.array([[json.dumps(r_params)]], dtype=np.object_), - ) - bls_request = pb_utils.InferenceRequest( - model_name="response_parameters", - inputs=[bls_input_tensor], - requested_output_names=["OUTPUT"], - ) - bls_response = bls_request.exec() - response_tensors = bls_response.output_tensors() - response_parameters_str = bls_response.parameters() + bls_responses = bls_request.exec(decoupled=True) + + for bls_response, r_params in zip(bls_responses, res_params): if bls_response.has_error(): - print(bls_response.error().message()) raise Exception(bls_response.error().message()) - res_params = json.loads(response_parameters_str) - - response = pb_utils.InferenceResponse( - output_tensors=response_tensors, parameters=res_params - ) r_params_set = {} - if response.parameters() != "": - r_params_set = json.loads(response.parameters()) - if r_params_set != r_params: - raise Exception("Response parameters set differ from provided") + if bls_response.parameters() != "": + r_params_set = json.loads(bls_response.parameters()) + if r_params_set != r_params: + raise Exception( + "Response parameters set differ from provided" + ) - response_sender.send(response) + # no need to send back anything in the response since we already do the + # parameters matching checking above. + response = pb_utils.InferenceResponse() except Exception as e: error = pb_utils.TritonError( message=str(e), code=pb_utils.TritonError.INVALID_ARG ) response = pb_utils.InferenceResponse(error=error) - response_sender.send(response) - response_sender.send(flags=pb_utils.TRITONSERVER_RESPONSE_COMPLETE_FINAL) + responses.append(response) - return None + return responses From 160752d93446f4798d3fd3269c9630233850f22d Mon Sep 17 00:00:00 2001 From: Ziqi Fan Date: Wed, 5 Feb 2025 16:44:37 -0800 Subject: [PATCH 5/8] adopt jacky's suggestion --- .../parameters/response_parameters_test.py | 21 ++++- qa/L0_backend_python/parameters/test.sh | 10 +-- .../response_parameters_bls/config.pbtxt | 5 ++ .../response_parameters_bls/model.py | 69 +++++++++------ .../config.pbtxt | 52 ------------ .../model.py | 85 ------------------- 6 files changed, 68 insertions(+), 174 deletions(-) delete mode 100644 qa/python_models/response_parameters_bls_decoupled/config.pbtxt delete mode 100644 qa/python_models/response_parameters_bls_decoupled/model.py diff --git a/qa/L0_backend_python/parameters/response_parameters_test.py b/qa/L0_backend_python/parameters/response_parameters_test.py index 4e0d24d878..659d86cd91 100644 --- a/qa/L0_backend_python/parameters/response_parameters_test.py +++ b/qa/L0_backend_python/parameters/response_parameters_test.py @@ -169,15 +169,28 @@ def test_setting_response_parameters_decoupled(self): def test_setting_response_parameters_bls_decoupled(self): model_name = "response_parameters_bls_decoupled" - params = [{"bool": False, "int": 2048}, {"str": "Hello World!"}] + params = {"bool": False, "int": 2048, "str": "Hello World!"} + params_decoupled = [{}, {"bool": True, "int": 10000}, {"str": "?"}] params_str = json.dumps(params) - - inputs = [grpcclient.InferInput("RESPONSE_PARAMETERS", self._shape, "BYTES")] + params_decoupled_str = json.dumps(params_decoupled) + + inputs = [ + grpcclient.InferInput("RESPONSE_PARAMETERS", self._shape, "BYTES"), + grpcclient.InferInput( + "RESPONSE_PARAMETERS_DECOUPLED", self._shape, "BYTES" + ), + ] inputs[0].set_data_from_numpy(np.array([[params_str]], dtype=np.object_)) + inputs[1].set_data_from_numpy( + np.array([[params_decoupled_str]], dtype=np.object_) + ) with self._shm_leak_detector.Probe() as shm_probe: with grpcclient.InferenceServerClient(self._server_address_grpc) as client: - client.infer(model_name, inputs) + result = client.infer(model_name, inputs) + + output = str(result.as_numpy("OUTPUT")[0][0], encoding="utf-8") + self.assertEqual(output, "True") if __name__ == "__main__": diff --git a/qa/L0_backend_python/parameters/test.sh b/qa/L0_backend_python/parameters/test.sh index d1f3d4bdff..172bc5909e 100755 --- a/qa/L0_backend_python/parameters/test.sh +++ b/qa/L0_backend_python/parameters/test.sh @@ -47,7 +47,6 @@ mkdir -p models/response_parameters_bls_decoupled/1 && \ cp ../../python_models/response_parameters_bls_decoupled/config.pbtxt models/response_parameters_bls_decoupled TEST_LOG="response_parameters_test.log" -TEST_BLS_LOG="response_parameters_bls_test.log" SERVER_LOG="response_parameters_test.server.log" SERVER_ARGS="--model-repository=${MODELDIR}/parameters/models --backend-directory=${BACKEND_DIR} --log-verbose=1" @@ -59,19 +58,12 @@ if [ "$SERVER_PID" == "0" ]; then fi set +e -MODEL_NAME=response_parameters python3 -m pytest --junitxml=response_parameters_test.report.xml response_parameters_test.py > $TEST_LOG 2>&1 +python3 -m pytest --junitxml=response_parameters_test.report.xml response_parameters_test.py > $TEST_LOG 2>&1 if [ $? -ne 0 ]; then echo -e "\n***\n*** Response parameters test FAILED\n***" cat $TEST_LOG RET=1 fi - -MODEL_NAME=response_parameters_bls python3 -m pytest -s --junitxml=response_parameters_bls_test.report.xml response_parameters_test.py > $TEST_BLS_LOG 2>&1 -if [ $? -ne 0 ]; then - echo -e "\n***\n*** Response parameters BLS test FAILED\n***" - cat $TEST_BLS_LOG - RET=1 -fi set -e kill $SERVER_PID diff --git a/qa/python_models/response_parameters_bls/config.pbtxt b/qa/python_models/response_parameters_bls/config.pbtxt index 1cf961d671..1eeb5cd4c2 100644 --- a/qa/python_models/response_parameters_bls/config.pbtxt +++ b/qa/python_models/response_parameters_bls/config.pbtxt @@ -33,6 +33,11 @@ input [ name: "RESPONSE_PARAMETERS" data_type: TYPE_STRING dims: [ 1 ] + }, + { + name: "RESPONSE_PARAMETERS_DECOUPLED" + data_type: TYPE_STRING + dims: [ 1 ] } ] diff --git a/qa/python_models/response_parameters_bls/model.py b/qa/python_models/response_parameters_bls/model.py index ec33237cf9..f112e7e86f 100644 --- a/qa/python_models/response_parameters_bls/model.py +++ b/qa/python_models/response_parameters_bls/model.py @@ -26,6 +26,7 @@ import json +import numpy as np import triton_python_backend_utils as pb_utils @@ -45,37 +46,57 @@ def execute(self, requests): responses = [] for request in requests: - bls_input_tensor = pb_utils.get_input_tensor_by_name( + passed = True + + # test bls response parameters from a regular model + res_params_tensor = pb_utils.get_input_tensor_by_name( request, "RESPONSE_PARAMETERS" - ) - bls_request = pb_utils.InferenceRequest( + ).as_numpy() + res_params_str = str(res_params_tensor[0][0], encoding="utf-8") + res_params = json.loads(res_params_str) + bls_input_tensor = pb_utils.Tensor("RESPONSE_PARAMETERS", res_params_tensor) + bls_req = pb_utils.InferenceRequest( model_name="response_parameters", inputs=[bls_input_tensor], - requested_output_names=["OUTPUT"], ) - try: - bls_response = bls_request.exec() - response_tensors = bls_response.output_tensors() - response_parameters_str = bls_response.parameters() - if bls_response.has_error(): - raise Exception(bls_response.error().message()) - res_params = json.loads(response_parameters_str) - - response = pb_utils.InferenceResponse( - output_tensors=response_tensors, parameters=res_params - ) + bls_res = bls_req.exec() # decoupled=False + bls_res_params_str = bls_res.parameters() + bls_res_params = ( + json.loads(bls_res_params_str) if bls_res_params_str != "" else {} + ) + passed = passed and bls_res_params == res_params - res_params_set = {} - if response.parameters() != "": - res_params_set = json.loads(response.parameters()) - if res_params_set != res_params: - raise Exception("Response parameters set differ from provided") - except Exception as e: - error = pb_utils.TritonError( - message=str(e), code=pb_utils.TritonError.INVALID_ARG + # test bls response parameters from a decoupled model + res_params_decoupled_tensor = pb_utils.get_input_tensor_by_name( + request, "RESPONSE_PARAMETERS_DECOUPLED" + ).as_numpy() + res_params_decoupled_str = str( + res_params_decoupled_tensor[0][0], encoding="utf-8" + ) + res_params_decoupled = json.loads(res_params_decoupled_str) + bls_decoupled_input_tensor = pb_utils.Tensor( + "RESPONSE_PARAMETERS_DECOUPLED", res_params_decoupled_tensor + ) + bls_decoupled_req = pb_utils.InferenceRequest( + model_name="response_parameters_decoupled", + inputs=[bls_decoupled_input_tensor], + ) + bls_decoupled_res = bls_decoupled_req.exec(decoupled=True) + for bls_decoupled_r in bls_decoupled_res: + bls_decoupled_r_params_str = bls_decoupled_r.parameters() + bls_decoupled_r_params = ( + json.loads(bls_decoupled_r_params_str) + if bls_decoupled_r_params_str != "" + else {} ) - response = pb_utils.InferenceResponse(error=error) + passed = passed and bls_decoupled_r_params in res_params_decoupled + res_params_decoupled.remove(bls_decoupled_r_params) + passed = passed and len(res_params_decoupled) == 0 + output_tensor = pb_utils.Tensor( + "OUTPUT", np.array([[str(passed)]], dtype=np.object_) + ) + response = pb_utils.InferenceResponse(output_tensors=[output_tensor]) responses.append(response) return responses diff --git a/qa/python_models/response_parameters_bls_decoupled/config.pbtxt b/qa/python_models/response_parameters_bls_decoupled/config.pbtxt deleted file mode 100644 index 578cdca1aa..0000000000 --- a/qa/python_models/response_parameters_bls_decoupled/config.pbtxt +++ /dev/null @@ -1,52 +0,0 @@ -# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -name: "response_parameters_bls_decoupled" -backend: "python" -max_batch_size: 8 - -input [ - { - name: "RESPONSE_PARAMETERS" - data_type: TYPE_STRING - dims: [ 1 ] - } -] - -output [ - { - name: "OUTPUT" - data_type: TYPE_STRING - dims: [ 1 ] - } -] - -instance_group [ - { - count: 1 - kind: KIND_CPU - } -] diff --git a/qa/python_models/response_parameters_bls_decoupled/model.py b/qa/python_models/response_parameters_bls_decoupled/model.py deleted file mode 100644 index 79bceed9c0..0000000000 --- a/qa/python_models/response_parameters_bls_decoupled/model.py +++ /dev/null @@ -1,85 +0,0 @@ -# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. -# -# Redistribution and use in source and binary forms, with or without -# modification, are permitted provided that the following conditions -# are met: -# * Redistributions of source code must retain the above copyright -# notice, this list of conditions and the following disclaimer. -# * Redistributions in binary form must reproduce the above copyright -# notice, this list of conditions and the following disclaimer in the -# documentation and/or other materials provided with the distribution. -# * Neither the name of NVIDIA CORPORATION nor the names of its -# contributors may be used to endorse or promote products derived -# from this software without specific prior written permission. -# -# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY -# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE -# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR -# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR -# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, -# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, -# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR -# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY -# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT -# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE -# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - -import json - -import numpy as np -import triton_python_backend_utils as pb_utils - - -class TritonPythonModel: - """ - This model is designed to test sending back response parameters when using BLS - with decoupled model transaction policy. - - The only difference vs. response_parameters_bls model is the BLS composing model - (i.e. response_parameters_decoupled) turns on decoupled model transaction policy. - For more details, please check response_parameters_bls model. - """ - - def execute(self, requests): - responses = [] - - for request in requests: - bls_input_tensor = pb_utils.get_input_tensor_by_name( - request, "RESPONSE_PARAMETERS" - ) - bls_request = pb_utils.InferenceRequest( - model_name="response_parameters_decoupled", - inputs=[bls_input_tensor], - requested_output_names=["OUTPUT"], - ) - - res_params_numpy = bls_input_tensor.as_numpy() - res_params_str = str(res_params_numpy[0][0], encoding="utf-8") - res_params = json.loads(res_params_str) - try: - bls_responses = bls_request.exec(decoupled=True) - - for bls_response, r_params in zip(bls_responses, res_params): - if bls_response.has_error(): - raise Exception(bls_response.error().message()) - - r_params_set = {} - if bls_response.parameters() != "": - r_params_set = json.loads(bls_response.parameters()) - if r_params_set != r_params: - raise Exception( - "Response parameters set differ from provided" - ) - - # no need to send back anything in the response since we already do the - # parameters matching checking above. - response = pb_utils.InferenceResponse() - except Exception as e: - error = pb_utils.TritonError( - message=str(e), code=pb_utils.TritonError.INVALID_ARG - ) - response = pb_utils.InferenceResponse(error=error) - - responses.append(response) - - return responses From 76570a320679ca0a24fa9888322b9bbc52f30e3e Mon Sep 17 00:00:00 2001 From: Ziqi Fan Date: Wed, 5 Feb 2025 16:46:52 -0800 Subject: [PATCH 6/8] clean up --- qa/L0_backend_python/parameters/response_parameters_test.py | 3 +-- qa/L0_backend_python/parameters/test.sh | 3 --- 2 files changed, 1 insertion(+), 5 deletions(-) diff --git a/qa/L0_backend_python/parameters/response_parameters_test.py b/qa/L0_backend_python/parameters/response_parameters_test.py index 659d86cd91..88a5ba8079 100644 --- a/qa/L0_backend_python/parameters/response_parameters_test.py +++ b/qa/L0_backend_python/parameters/response_parameters_test.py @@ -24,7 +24,6 @@ # (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE # OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. -import os import sys sys.path.append("../../common") @@ -40,7 +39,7 @@ class ResponseParametersTest(unittest.TestCase): _server_address_grpc = "localhost:8001" - _model_name = os.environ["MODEL_NAME"] + _model_name = "response_parameters" _shape = [1, 1] def setUp(self): diff --git a/qa/L0_backend_python/parameters/test.sh b/qa/L0_backend_python/parameters/test.sh index 172bc5909e..6cf12ccd47 100755 --- a/qa/L0_backend_python/parameters/test.sh +++ b/qa/L0_backend_python/parameters/test.sh @@ -42,9 +42,6 @@ mkdir -p models/response_parameters_decoupled/1 && \ mkdir -p models/response_parameters_bls/1 && \ cp ../../python_models/response_parameters_bls/model.py models/response_parameters_bls/1 && \ cp ../../python_models/response_parameters_bls/config.pbtxt models/response_parameters_bls -mkdir -p models/response_parameters_bls_decoupled/1 && \ - cp ../../python_models/response_parameters_bls_decoupled/model.py models/response_parameters_bls_decoupled/1 && \ - cp ../../python_models/response_parameters_bls_decoupled/config.pbtxt models/response_parameters_bls_decoupled TEST_LOG="response_parameters_test.log" SERVER_LOG="response_parameters_test.server.log" From fed80ae101b23f16960d116d82f2a7e1e84b0e3a Mon Sep 17 00:00:00 2001 From: Ziqi Fan Date: Wed, 5 Feb 2025 23:30:06 -0800 Subject: [PATCH 7/8] fix typo --- qa/L0_backend_python/parameters/response_parameters_test.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/qa/L0_backend_python/parameters/response_parameters_test.py b/qa/L0_backend_python/parameters/response_parameters_test.py index 88a5ba8079..d8dbbff6cc 100644 --- a/qa/L0_backend_python/parameters/response_parameters_test.py +++ b/qa/L0_backend_python/parameters/response_parameters_test.py @@ -166,8 +166,8 @@ def test_setting_response_parameters_decoupled(self): output = str(result.as_numpy("OUTPUT")[0][0], encoding="utf-8") self.assertEqual(json.dumps(params[i]), output) - def test_setting_response_parameters_bls_decoupled(self): - model_name = "response_parameters_bls_decoupled" + def test_setting_response_parameters_bls(self): + model_name = "response_parameters_bls" params = {"bool": False, "int": 2048, "str": "Hello World!"} params_decoupled = [{}, {"bool": True, "int": 10000}, {"str": "?"}] params_str = json.dumps(params) From bc8f539a7a236e43484b26230f0268336a45abd9 Mon Sep 17 00:00:00 2001 From: Ziqi Fan Date: Thu, 6 Feb 2025 10:56:09 -0800 Subject: [PATCH 8/8] fix test issues --- qa/python_models/response_parameters_bls/model.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/qa/python_models/response_parameters_bls/model.py b/qa/python_models/response_parameters_bls/model.py index f112e7e86f..390c14803e 100644 --- a/qa/python_models/response_parameters_bls/model.py +++ b/qa/python_models/response_parameters_bls/model.py @@ -58,6 +58,7 @@ def execute(self, requests): bls_req = pb_utils.InferenceRequest( model_name="response_parameters", inputs=[bls_input_tensor], + requested_output_names=["OUTPUT"], ) bls_res = bls_req.exec() # decoupled=False bls_res_params_str = bls_res.parameters() @@ -75,14 +76,17 @@ def execute(self, requests): ) res_params_decoupled = json.loads(res_params_decoupled_str) bls_decoupled_input_tensor = pb_utils.Tensor( - "RESPONSE_PARAMETERS_DECOUPLED", res_params_decoupled_tensor - ) + "RESPONSE_PARAMETERS", res_params_decoupled_tensor + ) # response_parameters_decoupled model input name is RESPONSE_PARAMETERS bls_decoupled_req = pb_utils.InferenceRequest( model_name="response_parameters_decoupled", inputs=[bls_decoupled_input_tensor], + requested_output_names=["OUTPUT"], ) bls_decoupled_res = bls_decoupled_req.exec(decoupled=True) for bls_decoupled_r in bls_decoupled_res: + if len(bls_decoupled_r.output_tensors()) == 0: + break # meaning reached final response bls_decoupled_r_params_str = bls_decoupled_r.parameters() bls_decoupled_r_params = ( json.loads(bls_decoupled_r_params_str)