diff --git a/qa/L0_backend_python/parameters/response_parameters_test.py b/qa/L0_backend_python/parameters/response_parameters_test.py index e07bb5eb9f..d8dbbff6cc 100644 --- a/qa/L0_backend_python/parameters/response_parameters_test.py +++ b/qa/L0_backend_python/parameters/response_parameters_test.py @@ -166,6 +166,31 @@ def test_setting_response_parameters_decoupled(self): output = str(result.as_numpy("OUTPUT")[0][0], encoding="utf-8") self.assertEqual(json.dumps(params[i]), output) + def test_setting_response_parameters_bls(self): + model_name = "response_parameters_bls" + params = {"bool": False, "int": 2048, "str": "Hello World!"} + params_decoupled = [{}, {"bool": True, "int": 10000}, {"str": "?"}] + params_str = json.dumps(params) + params_decoupled_str = json.dumps(params_decoupled) + + inputs = [ + grpcclient.InferInput("RESPONSE_PARAMETERS", self._shape, "BYTES"), + grpcclient.InferInput( + "RESPONSE_PARAMETERS_DECOUPLED", self._shape, "BYTES" + ), + ] + inputs[0].set_data_from_numpy(np.array([[params_str]], dtype=np.object_)) + inputs[1].set_data_from_numpy( + np.array([[params_decoupled_str]], dtype=np.object_) + ) + + with self._shm_leak_detector.Probe() as shm_probe: + with grpcclient.InferenceServerClient(self._server_address_grpc) as client: + result = client.infer(model_name, inputs) + + output = str(result.as_numpy("OUTPUT")[0][0], encoding="utf-8") + self.assertEqual(output, "True") + if __name__ == "__main__": unittest.main() diff --git a/qa/L0_backend_python/parameters/test.sh b/qa/L0_backend_python/parameters/test.sh index 9d8c86c530..6cf12ccd47 100755 --- a/qa/L0_backend_python/parameters/test.sh +++ b/qa/L0_backend_python/parameters/test.sh @@ -39,6 +39,9 @@ mkdir -p models/response_parameters/1 && \ mkdir -p models/response_parameters_decoupled/1 && \ cp ../../python_models/response_parameters_decoupled/model.py models/response_parameters_decoupled/1 && \ cp ../../python_models/response_parameters_decoupled/config.pbtxt models/response_parameters_decoupled +mkdir -p models/response_parameters_bls/1 && \ + cp ../../python_models/response_parameters_bls/model.py models/response_parameters_bls/1 && \ + cp ../../python_models/response_parameters_bls/config.pbtxt models/response_parameters_bls TEST_LOG="response_parameters_test.log" SERVER_LOG="response_parameters_test.server.log" diff --git a/qa/python_models/response_parameters_bls/config.pbtxt b/qa/python_models/response_parameters_bls/config.pbtxt new file mode 100644 index 0000000000..1eeb5cd4c2 --- /dev/null +++ b/qa/python_models/response_parameters_bls/config.pbtxt @@ -0,0 +1,57 @@ +# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +name: "response_parameters_bls" +backend: "python" +max_batch_size: 8 + +input [ + { + name: "RESPONSE_PARAMETERS" + data_type: TYPE_STRING + dims: [ 1 ] + }, + { + name: "RESPONSE_PARAMETERS_DECOUPLED" + data_type: TYPE_STRING + dims: [ 1 ] + } +] + +output [ + { + name: "OUTPUT" + data_type: TYPE_STRING + dims: [ 1 ] + } +] + +instance_group [ + { + count: 1 + kind: KIND_CPU + } +] diff --git a/qa/python_models/response_parameters_bls/model.py b/qa/python_models/response_parameters_bls/model.py new file mode 100644 index 0000000000..390c14803e --- /dev/null +++ b/qa/python_models/response_parameters_bls/model.py @@ -0,0 +1,106 @@ +# Copyright 2025, NVIDIA CORPORATION & AFFILIATES. All rights reserved. +# +# Redistribution and use in source and binary forms, with or without +# modification, are permitted provided that the following conditions +# are met: +# * Redistributions of source code must retain the above copyright +# notice, this list of conditions and the following disclaimer. +# * Redistributions in binary form must reproduce the above copyright +# notice, this list of conditions and the following disclaimer in the +# documentation and/or other materials provided with the distribution. +# * Neither the name of NVIDIA CORPORATION nor the names of its +# contributors may be used to endorse or promote products derived +# from this software without specific prior written permission. +# +# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS ``AS IS'' AND ANY +# EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE +# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR +# PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR +# CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, +# EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, +# PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR +# PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY +# OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT +# (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE +# OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. + +import json + +import numpy as np +import triton_python_backend_utils as pb_utils + + +class TritonPythonModel: + """ + This model (A) is designed to test sending back response parameters when using BLS. + It takes one input tensor, which is the RESPONSE_PARAMETERS and uses BLS to + call response_parameters model (B). Model B would set RESPONSE_PARAMETERS (with a bit + of data massage) as its response parameters. In the end, model A would also set its + response parameters from model B's response parameters. + + With above model set up, we can easily test whether the real response parameters are + the same as the input response parameters. + """ + + def execute(self, requests): + responses = [] + + for request in requests: + passed = True + + # test bls response parameters from a regular model + res_params_tensor = pb_utils.get_input_tensor_by_name( + request, "RESPONSE_PARAMETERS" + ).as_numpy() + res_params_str = str(res_params_tensor[0][0], encoding="utf-8") + res_params = json.loads(res_params_str) + bls_input_tensor = pb_utils.Tensor("RESPONSE_PARAMETERS", res_params_tensor) + bls_req = pb_utils.InferenceRequest( + model_name="response_parameters", + inputs=[bls_input_tensor], + requested_output_names=["OUTPUT"], + ) + bls_res = bls_req.exec() # decoupled=False + bls_res_params_str = bls_res.parameters() + bls_res_params = ( + json.loads(bls_res_params_str) if bls_res_params_str != "" else {} + ) + passed = passed and bls_res_params == res_params + + # test bls response parameters from a decoupled model + res_params_decoupled_tensor = pb_utils.get_input_tensor_by_name( + request, "RESPONSE_PARAMETERS_DECOUPLED" + ).as_numpy() + res_params_decoupled_str = str( + res_params_decoupled_tensor[0][0], encoding="utf-8" + ) + res_params_decoupled = json.loads(res_params_decoupled_str) + bls_decoupled_input_tensor = pb_utils.Tensor( + "RESPONSE_PARAMETERS", res_params_decoupled_tensor + ) # response_parameters_decoupled model input name is RESPONSE_PARAMETERS + bls_decoupled_req = pb_utils.InferenceRequest( + model_name="response_parameters_decoupled", + inputs=[bls_decoupled_input_tensor], + requested_output_names=["OUTPUT"], + ) + bls_decoupled_res = bls_decoupled_req.exec(decoupled=True) + for bls_decoupled_r in bls_decoupled_res: + if len(bls_decoupled_r.output_tensors()) == 0: + break # meaning reached final response + bls_decoupled_r_params_str = bls_decoupled_r.parameters() + bls_decoupled_r_params = ( + json.loads(bls_decoupled_r_params_str) + if bls_decoupled_r_params_str != "" + else {} + ) + passed = passed and bls_decoupled_r_params in res_params_decoupled + res_params_decoupled.remove(bls_decoupled_r_params) + passed = passed and len(res_params_decoupled) == 0 + + output_tensor = pb_utils.Tensor( + "OUTPUT", np.array([[str(passed)]], dtype=np.object_) + ) + response = pb_utils.InferenceResponse(output_tensors=[output_tensor]) + responses.append(response) + + return responses