@@ -301,18 +301,21 @@ StubLauncher::Launch()
301301 //
302302 // The reason it is broken into two steps is that creation of the health
303303 // monitoring thread may take longer which can make the server process think
304- // that the stub process is unhealthy and return early. Waiting until the
305- // health thread is spawn would make sure would prevent this issue.
304+ // that the stub process is unhealthy and return early. Waiting with a longer
305+ // timeout prevents this issue.
306+ const uint64_t initialization_timeout_ms = 5000 ; // 5 sec
307+ LOG_MESSAGE (
308+ TRITONSERVER_LOG_VERBOSE,
309+ " Waiting for the stub health monitoring thread to start" );
310+
306311 bi::managed_external_buffer::handle_t message;
307- auto err = ReceiveMessageFromStub (message);
312+ auto err = ReceiveMessageFromStub (message, initialization_timeout_ms );
308313 if (err != nullptr ) {
309314 KillStubProcess ();
310315 }
311316
312317 if (stub_process_kind_ == " AUTOCOMPLETE_STUB" ) {
313- if (err != nullptr ) {
314- throw BackendModelException (err);
315- }
318+ THROW_IF_BACKEND_MODEL_ERROR (err);
316319 try {
317320 AutocompleteStubProcess ();
318321 }
@@ -468,18 +471,21 @@ StubLauncher::Launch()
468471 //
469472 // The reason it is broken into two steps is that creation of the health
470473 // monitoring thread may take longer which can make the server process think
471- // that the stub process is unhealthy and return early. Waiting until the
472- // health thread is spawn would prevent this issue.
474+ // that the stub process is unhealthy and return early. Waiting with a
475+ // longer timeout prevents this issue.
476+ const uint64_t initialization_timeout_ms = 5000 ; // 5 sec
477+ LOG_MESSAGE (
478+ TRITONSERVER_LOG_VERBOSE,
479+ " Waiting for the stub health monitoring thread to start" );
480+
473481 bi::managed_external_buffer::handle_t message;
474- auto err = ReceiveMessageFromStub (message);
482+ auto err = ReceiveMessageFromStub (message, initialization_timeout_ms );
475483 if (err != nullptr ) {
476484 KillStubProcess ();
477485 }
478486
479487 if (stub_process_kind_ == " AUTOCOMPLETE_STUB" ) {
480- if (err != nullptr ) {
481- throw BackendModelException (err);
482- }
488+ THROW_IF_BACKEND_MODEL_ERROR (err);
483489 try {
484490 AutocompleteStubProcess ();
485491 }
@@ -612,8 +618,13 @@ StubLauncher::ModelInstanceStubProcess()
612618 initialize_message->Args () = initialize_map_handle;
613619 stub_message_queue_->Push (initialize_message->ShmHandle ());
614620
621+ const uint64_t initialization_timeout_ms = 5000 ; // 5 sec
622+ LOG_MESSAGE (
623+ TRITONSERVER_LOG_VERBOSE,
624+ " Waiting for the stub process initialization response" );
625+
615626 bi::managed_external_buffer::handle_t message;
616- RETURN_IF_ERROR (ReceiveMessageFromStub (message));
627+ RETURN_IF_ERROR (ReceiveMessageFromStub (message, initialization_timeout_ms ));
617628
618629 std::unique_ptr<IPCMessage> initialize_response_message =
619630 IPCMessage::LoadFromSharedMemory (shm_pool_, message);
@@ -746,11 +757,11 @@ StubLauncher::KillStubProcess()
746757
747758TRITONSERVER_Error*
748759StubLauncher::ReceiveMessageFromStub (
749- bi::managed_external_buffer::handle_t & message)
760+ bi::managed_external_buffer::handle_t & message,
761+ uint64_t timeout_miliseconds)
750762{
751763 bool success = false ;
752764 while (!success) {
753- uint64_t timeout_miliseconds = 1000 ;
754765 {
755766 boost::posix_time::ptime timeout =
756767 boost::get_system_time () +
0 commit comments