@@ -290,8 +290,8 @@ ModelInstanceState::SaveRequestsToSharedMemory(
290290 request, &request_timeout));
291291
292292 std::unique_ptr<InferRequest> infer_request;
293- TRITONBACKEND_ResponseFactory* factory_ptr;
294- RETURN_IF_ERROR (TRITONBACKEND_ResponseFactoryNew (&factory_ptr, request));
293+ TRITONBACKEND_ResponseFactory* factory_ptr = nullptr ;
294+ // RETURN_IF_ERROR(TRITONBACKEND_ResponseFactoryNew(&factory_ptr, request));
295295
296296 infer_request = std::make_unique<InferRequest>(
297297 id, correlation_id, pb_input_tensors, requested_output_names,
@@ -322,8 +322,6 @@ ModelInstanceState::LaunchStubProcess()
322322 thread_pool_ = std::make_unique<boost::asio::thread_pool>(
323323 model_state->StateForBackend ()->thread_pool_size );
324324
325- queue_monitor_thread_ = true ;
326- queue_monitor_ = std::thread (&ModelInstanceState::MessageQueueMonitor, this );
327325 request_executor_ = std::make_unique<RequestExecutor>(
328326 Stub ()->ShmPool (), model_state->TritonServer ());
329327
@@ -685,44 +683,6 @@ ModelInstanceState::ExecuteBLSRequest(
685683 }
686684}
687685
688- void
689- ModelInstanceState::MessageQueueMonitor ()
690- {
691- while (queue_monitor_thread_) {
692- bi::managed_external_buffer::handle_t handle =
693- Stub ()->ParentMessageQueue ()->Pop ();
694- if (handle == DUMMY_MESSAGE) {
695- break ;
696- }
697- std::unique_ptr<IPCMessage> message =
698- IPCMessage::LoadFromSharedMemory (Stub ()->ShmPool (), handle);
699-
700- // Need to notify the model instance thread that the execute response has
701- // been received.
702- if (message->Command () == PYTHONSTUB_ExecuteResponse) {
703- std::lock_guard<std::mutex> guard{mu_};
704- received_message_ = std::move (message);
705- cv_.notify_one ();
706- } else if (message->Command () == PYTHONSTUB_ResponseSend) {
707- std::shared_ptr<IPCMessage> response_send_message = std::move (message);
708- std::packaged_task<void ()> task ([this , response_send_message] {
709- ResponseSendDecoupled (response_send_message);
710- });
711- boost::asio::post (*thread_pool_, std::move (task));
712- } else if (
713- message->Command () == PYTHONSTUB_InferExecRequest ||
714- message->Command () == PYTHONSTUB_InferStreamExecRequest) {
715- std::shared_ptr<IPCMessage> bls_execute = std::move (message);
716- std::packaged_task<void ()> task ([this , bls_execute] {
717- ExecuteBLSRequest (
718- bls_execute,
719- (bls_execute->Command () == PYTHONSTUB_InferStreamExecRequest));
720- });
721- boost::asio::post (*thread_pool_, std::move (task));
722- }
723- }
724- }
725-
726686void
727687ModelInstanceState::StubToParentMQMonitor ()
728688{
@@ -769,6 +729,25 @@ ModelInstanceState::StubToParentMQMonitor()
769729 ProcessModelControlRequest (message);
770730 break ;
771731 }
732+ case PYTHONSTUB_ResponseSend: {
733+ std::shared_ptr<IPCMessage> response_send_message = std::move (message);
734+ std::packaged_task<void ()> task ([this , response_send_message] {
735+ ResponseSendDecoupled (response_send_message);
736+ });
737+ boost::asio::post (*thread_pool_, std::move (task));
738+ break ;
739+ }
740+ case PYTHONSTUB_InferExecRequest:
741+ case PYTHONSTUB_InferStreamExecRequest: {
742+ std::shared_ptr<IPCMessage> bls_execute = std::move (message);
743+ std::packaged_task<void ()> task ([this , bls_execute] {
744+ ExecuteBLSRequest (
745+ bls_execute,
746+ (bls_execute->Command () == PYTHONSTUB_InferStreamExecRequest));
747+ });
748+ boost::asio::post (*thread_pool_, std::move (task));
749+ break ;
750+ }
772751 default : {
773752 LOG_MESSAGE (
774753 TRITONSERVER_LOG_ERROR, " Unexpected message type received." );
@@ -1228,26 +1207,23 @@ ModelInstanceState::ProcessRequests(
12281207 IPCMessage::Create (Stub ()->ShmPool (), false /* inline_response*/ ));
12291208 ipc_message->Command () = PYTHONSTUB_CommandType::PYTHONSTUB_ExecuteRequest;
12301209 ipc_message->Args () = request_batch.handle_ ;
1231- received_message_ = nullptr ;
1210+
12321211 ScopedDefer execute_finalize ([this ] {
12331212 // Push a dummy message to signal the thread to terminate.
12341213 Stub ()->StubMessageQueue ()->Push (DUMMY_MESSAGE);
12351214 });
12361215
1216+ std::unique_ptr<IPCMessage> response;
12371217 {
1238- std::unique_lock<std::mutex> guard{mu_};
12391218 Stub ()->StubMessageQueue ()->Push (ipc_message->ShmHandle ());
1240- cv_.wait (guard, [this ] { return received_message_ != nullptr ; });
1219+ bi::managed_external_buffer::handle_t response_message;
1220+ Stub ()->ReceiveMessageFromStub (response_message);
1221+ response = IPCMessage::LoadFromSharedMemory (Stub ()->ShmPool (), response_message);
12411222 }
1242-
1243-
1244- AllocatedSharedMemory<char > response_batch = Stub ()->ShmPool ()->Load <char >(received_message_->Args ());
1245-
1223+ char * ipc_message_shm = reinterpret_cast <char *>(response->GetAllocatedSharedMemory ().data_ .get ());;
12461224 ResponseBatch* response_batch_shm_ptr =
1247- reinterpret_cast <ResponseBatch*>(response_batch. data_ . get ( ));
1225+ reinterpret_cast <ResponseBatch*>(ipc_message_shm + sizeof (IPCMessageShm ));
12481226
1249- received_message_.reset ();
1250-
12511227 uint64_t compute_end_ns = 0 ;
12521228 SET_TIMESTAMP (compute_end_ns);
12531229 reporter.SetComputeEndNs (compute_end_ns);
@@ -1282,7 +1258,7 @@ ModelInstanceState::ProcessRequests(
12821258 }
12831259 bi::managed_external_buffer::handle_t * response_shm_handle =
12841260 reinterpret_cast <bi::managed_external_buffer::handle_t *>(
1285- response_batch. data_ . get ( ) + sizeof (ResponseBatch ));
1261+ ipc_message_shm + sizeof (ResponseBatch ) + sizeof (IPCMessageShm ));
12861262
12871263 // If the output provided by the model is in GPU, we will pass the list of
12881264 // buffers provided by Triton to the stub process.
@@ -1390,8 +1366,6 @@ ModelInstanceState::ProcessRequests(
13901366 }
13911367 }
13921368
1393- // Finalize the execute.
1394- execute_finalize.Complete ();
13951369 }
13961370
13971371 // If the output tensor is in GPU, there will be a second round trip
@@ -1610,7 +1584,6 @@ ModelInstanceState::~ModelInstanceState()
16101584 Stub ()->TerminateStub ();
16111585 TerminateMonitor ();
16121586 Stub ()->ClearQueues ();
1613- received_message_.reset ();
16141587 Stub ().reset ();
16151588}
16161589
0 commit comments