1- // Copyright 2021-2023 , NVIDIA CORPORATION & AFFILIATES. All rights reserved.
1+ // Copyright 2021-2025 , NVIDIA CORPORATION & AFFILIATES. All rights reserved.
22//
33// Redistribution and use in source and binary forms, with or without
44// modification, are permitted provided that the following conditions
@@ -39,8 +39,10 @@ namespace triton { namespace backend { namespace python {
3939
4040InferResponse::InferResponse (
4141 const std::vector<std::shared_ptr<PbTensor>>& output_tensors,
42- std::shared_ptr<PbError> error, const bool is_last_response, void * id)
43- : error_(error), is_last_response_(is_last_response), id_(id)
42+ std::shared_ptr<PbError> error, std::string parameters,
43+ const bool is_last_response, void * id)
44+ : error_(error), is_last_response_(is_last_response), id_(id),
45+ parameters_ (std::move(parameters))
4446{
4547 for (auto & output : output_tensors) {
4648 if (!output) {
@@ -58,6 +60,12 @@ InferResponse::OutputTensors()
5860 return output_tensors_;
5961}
6062
63+ std::string&
64+ InferResponse::Parameters ()
65+ {
66+ return parameters_;
67+ }
68+
6169bool
6270InferResponse::HasError ()
6371{
@@ -106,6 +114,9 @@ InferResponse::SaveToSharedMemory(
106114 j++;
107115 }
108116 response_shm_ptr->id = id_;
117+
118+ parameters_shm_ = PbString::Create (shm_pool, parameters_);
119+ response_shm_ptr->parameters = parameters_shm_->ShmHandle ();
109120 }
110121}
111122
@@ -143,6 +154,8 @@ InferResponse::LoadFromSharedMemory(
143154
144155 std::shared_ptr<PbError> pb_error;
145156 std::vector<std::shared_ptr<PbTensor>> output_tensors;
157+ std::shared_ptr<PbString> parameters_shm;
158+ std::string parameters;
146159
147160 // If the error field is set, do not load output tensors from shared memory.
148161 if (response_shm_ptr->has_error && response_shm_ptr->is_error_set ) {
@@ -154,33 +167,43 @@ InferResponse::LoadFromSharedMemory(
154167 bi::managed_external_buffer::handle_t * tensor_handle_shm =
155168 reinterpret_cast <bi::managed_external_buffer::handle_t *>(
156169 response_shm.data_ .get () + sizeof (ResponseShm));
170+ {
157171#ifdef TRITON_PB_STUB
158- // Need to acquire the GIL to avoid hangs.
159- py::gil_scoped_acquire acquire;
172+ // Need to acquire the GIL to avoid hangs.
173+ py::gil_scoped_acquire acquire;
160174#endif
161- for (size_t idx = 0 ; idx < requested_output_count; ++idx) {
162- std::shared_ptr<PbTensor> pb_tensor = PbTensor::LoadFromSharedMemory (
163- shm_pool, tensor_handle_shm[idx], open_cuda_handle);
164- output_tensors.emplace_back (std::move (pb_tensor));
175+ for (size_t idx = 0 ; idx < requested_output_count; ++idx) {
176+ std::shared_ptr<PbTensor> pb_tensor = PbTensor::LoadFromSharedMemory (
177+ shm_pool, tensor_handle_shm[idx], open_cuda_handle);
178+ output_tensors.emplace_back (std::move (pb_tensor));
179+ }
165180 }
181+
182+ parameters_shm = std::move (
183+ PbString::LoadFromSharedMemory (shm_pool, response_shm_ptr->parameters ));
184+ parameters = parameters_shm->String ();
166185 }
167186
168187 return std::unique_ptr<InferResponse>(new InferResponse (
169188 response_shm, output_tensors, pb_error,
170- response_shm_ptr->is_last_response , response_shm_ptr->id ));
189+ response_shm_ptr->is_last_response , response_shm_ptr->id , parameters_shm,
190+ parameters));
171191}
172192
173193InferResponse::InferResponse (
174194 AllocatedSharedMemory<char >& response_shm,
175195 std::vector<std::shared_ptr<PbTensor>>& output_tensors,
176- std::shared_ptr<PbError>& pb_error, const bool is_last_response, void * id)
196+ std::shared_ptr<PbError>& pb_error, const bool is_last_response, void * id,
197+ std::shared_ptr<PbString>& parameters_shm, std::string& parameters)
177198{
178199 response_shm_ = std::move (response_shm);
179200 output_tensors_ = std::move (output_tensors);
180201 error_ = std::move (pb_error);
181202 shm_handle_ = response_shm_.handle_ ;
182203 id_ = id;
183204 is_last_response_ = is_last_response;
205+ parameters_shm_ = std::move (parameters_shm);
206+ parameters_ = std::move (parameters);
184207}
185208
186209std::shared_ptr<PbError>&
@@ -387,6 +410,38 @@ InferResponse::Send(
387410 cuda_copy |= cuda_used;
388411 }
389412
413+ if (!parameters_.empty ()) {
414+ triton::common::TritonJson::Value param;
415+ THROW_IF_TRITON_ERROR (
416+ param.Parse (parameters_.c_str (), parameters_.length ()));
417+ std::vector<std::string> param_keys;
418+ THROW_IF_TRITON_ERROR (param.Members (¶m_keys));
419+ for (const auto & key : param_keys) {
420+ triton::common::TritonJson::Value value;
421+ if (!param.Find (key.c_str (), &value)) {
422+ throw PythonBackendException (" Unexpected missing key on parameters" );
423+ }
424+ if (value.IsString ()) {
425+ std::string string_value;
426+ THROW_IF_TRITON_ERROR (value.AsString (&string_value));
427+ THROW_IF_TRITON_ERROR (TRITONBACKEND_ResponseSetStringParameter (
428+ response, key.c_str (), string_value.c_str ()));
429+ } else if (value.IsInt ()) {
430+ int64_t int_value = 0 ;
431+ THROW_IF_TRITON_ERROR (value.AsInt (&int_value));
432+ THROW_IF_TRITON_ERROR (TRITONBACKEND_ResponseSetIntParameter (
433+ response, key.c_str (), int_value));
434+ } else if (value.IsBool ()) {
435+ bool bool_value = false ;
436+ THROW_IF_TRITON_ERROR (value.AsBool (&bool_value));
437+ THROW_IF_TRITON_ERROR (TRITONBACKEND_ResponseSetBoolParameter (
438+ response, key.c_str (), bool_value));
439+ } else {
440+ throw PythonBackendException (" Unsupported value type on parameters" );
441+ }
442+ }
443+ }
444+
390445#ifdef TRITON_ENABLE_GPU
391446 if (cuda_copy) {
392447 cudaStreamSynchronize (reinterpret_cast <cudaStream_t>(cuda_stream));
0 commit comments