@@ -58,6 +58,7 @@ def execute(self, requests):
5858 bls_req = pb_utils .InferenceRequest (
5959 model_name = "response_parameters" ,
6060 inputs = [bls_input_tensor ],
61+ requested_output_names = ["OUTPUT" ],
6162 )
6263 bls_res = bls_req .exec () # decoupled=False
6364 bls_res_params_str = bls_res .parameters ()
@@ -75,14 +76,17 @@ def execute(self, requests):
7576 )
7677 res_params_decoupled = json .loads (res_params_decoupled_str )
7778 bls_decoupled_input_tensor = pb_utils .Tensor (
78- "RESPONSE_PARAMETERS_DECOUPLED " , res_params_decoupled_tensor
79- )
79+ "RESPONSE_PARAMETERS " , res_params_decoupled_tensor
80+ ) # response_parameters_decoupled model input name is RESPONSE_PARAMETERS
8081 bls_decoupled_req = pb_utils .InferenceRequest (
8182 model_name = "response_parameters_decoupled" ,
8283 inputs = [bls_decoupled_input_tensor ],
84+ requested_output_names = ["OUTPUT" ],
8385 )
8486 bls_decoupled_res = bls_decoupled_req .exec (decoupled = True )
8587 for bls_decoupled_r in bls_decoupled_res :
88+ if len (bls_decoupled_r .output_tensors ()) == 0 :
89+ break # meaning reached final response
8690 bls_decoupled_r_params_str = bls_decoupled_r .parameters ()
8791 bls_decoupled_r_params = (
8892 json .loads (bls_decoupled_r_params_str )
0 commit comments