@@ -654,7 +654,6 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr)
654654 py::list py_request_list =
655655 LoadRequestsFromSharedMemory (request_batch_shm_ptr);
656656 std::unique_ptr<IPCMessage> execute_response;
657- // IPCMessage::Create(shm_pool_, false /* Inline response */);
658657
659658 std::optional<AllocatedSharedMemory<char >> response_batch;
660659 bool has_exception = false ;
@@ -675,8 +674,7 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr)
675674 {
676675 NVTX_RANGE (nvtx_, " PyExecute " + name_);
677676
678- execute_return =
679- model_instance_.attr (" execute" )(py_request_list);
677+ execute_return = model_instance_.attr (" execute" )(py_request_list);
680678
681679 bool is_coroutine = py::module::import (" asyncio" )
682680 .attr (" iscoroutine" )(execute_return)
@@ -688,10 +686,12 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr)
688686 } else {
689687 py::object coroutine_return =
690688 RunCoroutine (execute_return, false /* in_background */ );
691- ProcessReturnedResponses (py_request_list, coroutine_return, response_batch);
689+ ProcessReturnedResponses (
690+ py_request_list, coroutine_return, response_batch);
692691 }
693692 } else {
694- ProcessReturnedResponses (py_request_list, execute_return, response_batch);
693+ ProcessReturnedResponses (
694+ py_request_list, execute_return, response_batch);
695695 }
696696 }
697697 }
@@ -712,11 +712,14 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr)
712712 error_string;
713713 LOG_ERROR << err_message.c_str ();
714714 if (!response_batch) {
715- response_batch = shm_pool_->Construct <char >(sizeof (ResponseBatch) + sizeof (IPCMessageShm));
716- }
717- ResponseBatch* response_batch_shm_ptr = reinterpret_cast <ResponseBatch*>(response_batch.value ().data_ .get () + sizeof (IPCMessageShm));
715+ response_batch = shm_pool_->Construct <char >(
716+ sizeof (ResponseBatch) + sizeof (IPCMessageShm));
717+ }
718+ ResponseBatch* response_batch_shm_ptr = reinterpret_cast <ResponseBatch*>(
719+ response_batch.value ().data_ .get () + sizeof (IPCMessageShm));
718720
719- response_batch_shm_ptr = reinterpret_cast <ResponseBatch*>(response_batch.value ().data_ .get ());
721+ response_batch_shm_ptr =
722+ reinterpret_cast <ResponseBatch*>(response_batch.value ().data_ .get ());
720723 response_batch_shm_ptr->has_error = true ;
721724 error_string_shm = PbString::Create (shm_pool_, err_message);
722725 response_batch_shm_ptr->error = error_string_shm->ShmHandle ();
@@ -732,14 +735,19 @@ Stub::ProcessRequests(RequestBatch* request_batch_shm_ptr)
732735 }
733736
734737 if (!response_batch) {
735- response_batch = shm_pool_->Construct <char >(sizeof (ResponseBatch) + sizeof (IPCMessageShm));
736- ResponseBatch* response_batch_shm_ptr =reinterpret_cast <ResponseBatch*>(response_batch.value ().data_ .get () + sizeof (IPCMessageShm));
737- response_batch_shm_ptr->batch_size = 0 ;
738- }
739- ResponseBatch* response_batch_shm_ptr = reinterpret_cast <ResponseBatch*>(response_batch.value ().data_ .get () + sizeof (IPCMessageShm));
738+ response_batch = shm_pool_->Construct <char >(
739+ sizeof (ResponseBatch) + sizeof (IPCMessageShm));
740+ ResponseBatch* response_batch_shm_ptr = reinterpret_cast <ResponseBatch*>(
741+ response_batch.value ().data_ .get () + sizeof (IPCMessageShm));
742+ response_batch_shm_ptr->batch_size = 0 ;
743+ }
744+ ResponseBatch* response_batch_shm_ptr = reinterpret_cast <ResponseBatch*>(
745+ response_batch.value ().data_ .get () + sizeof (IPCMessageShm));
740746 response_batch_shm_ptr->has_error = false ;
741747 response_batch_shm_ptr->is_error_set = false ;
742- execute_response = IPCMessage::Create (reinterpret_cast <IPCMessageShm*>(response_batch.value ().data_ .get ()), response_batch.value ().handle_ );
748+ execute_response = IPCMessage::Create (
749+ reinterpret_cast <IPCMessageShm*>(response_batch.value ().data_ .get ()),
750+ response_batch.value ().handle_ );
743751 execute_response->Args () = response_batch.value ().handle_ ;
744752 execute_response->InlineResponse () = false ;
745753 execute_response->Command () = PYTHONSTUB_ExecuteResponse;
@@ -761,7 +769,8 @@ Stub::ProcessResponse(InferResponse* response)
761769
762770void
763771Stub::ProcessReturnedResponses (
764- py::list py_requests, py::object py_responses_obj, std::optional<AllocatedSharedMemory<char >>& response_batch)
772+ py::list py_requests, py::object py_responses_obj,
773+ std::optional<AllocatedSharedMemory<char >>& response_batch)
765774{
766775 // Return if there is nothing to process.
767776 if (py::isinstance<py::none>(py_responses_obj)) {
@@ -812,29 +821,34 @@ Stub::ProcessReturnedResponses(
812821
813822 std::shared_ptr<InferResponse> response =
814823 py_responses[i].cast <std::shared_ptr<InferResponse>>();
815- request->GetResponseSender ()->UpdateStateAndCounters (response, TRITONSERVER_RESPONSE_COMPLETE_FINAL);
824+ request->GetResponseSender ()->UpdateStateAndCounters (
825+ response, TRITONSERVER_RESPONSE_COMPLETE_FINAL);
816826 }
817827 }
818- response_batch = std::move (shm_pool_->Construct <char >(sizeof (IPCMessageShm) +
828+ // Return all the created responses using response_batch. The reason
829+ // that both of the paths are available is that sending the responses
830+ // using response_batch is faster than using `response_sender`.
831+ response_batch = std::move (shm_pool_->Construct <char >(
832+ sizeof (IPCMessageShm) +
819833 requests_size * sizeof (bi::managed_external_buffer::handle_t ) +
820834 sizeof (ResponseBatch)));
821- ResponseBatch* response_batch_shm_ptr =
822- reinterpret_cast <ResponseBatch*>( response_batch.value ().data_ .get () + sizeof (IPCMessageShm));
835+ ResponseBatch* response_batch_shm_ptr = reinterpret_cast <ResponseBatch*>(
836+ response_batch.value ().data_ .get () + sizeof (IPCMessageShm));
823837
824838 bi::managed_external_buffer::handle_t * responses_shm_handle =
825839 reinterpret_cast <bi::managed_external_buffer::handle_t *>(
826- response_batch.value ().data_ .get () + sizeof (ResponseBatch) + sizeof (IPCMessageShm));
827-
828- for ( size_t i = 0 ; i < responses_size; i++) {
829- // Check the return type of execute function.
830- InferRequest* infer_request = py_requests[i]. cast <InferRequest*>();
831- InferResponse* infer_response = py_responses [i].cast <InferResponse *>();
832- infer_response-> PruneOutputTensors (
833- infer_request->RequestedOutputNames ());
834- ProcessResponse (infer_response);
835- responses_shm_handle[i] = infer_response->ShmHandle ();
836- }
837- response_batch_shm_ptr->batch_size = requests_size;
840+ response_batch.value ().data_ .get () + sizeof (ResponseBatch) +
841+ sizeof (IPCMessageShm));
842+
843+ for ( size_t i = 0 ; i < responses_size; i++) {
844+ // Check the return type of execute function.
845+ InferRequest* infer_request = py_requests [i].cast <InferRequest *>();
846+ InferResponse* infer_response = py_responses[i]. cast <InferResponse*>();
847+ infer_response-> PruneOutputTensors ( infer_request->RequestedOutputNames ());
848+ ProcessResponse (infer_response);
849+ responses_shm_handle[i] = infer_response->ShmHandle ();
850+ }
851+ response_batch_shm_ptr->batch_size = requests_size;
838852}
839853
840854py::object
0 commit comments