@@ -301,18 +301,21 @@ StubLauncher::Launch()
301301  // 
302302  //  The reason it is broken into two steps is that creation of the health
303303  //  monitoring thread may take longer which can make the server process think
304-   //  that the stub process is unhealthy and return early. Waiting until the
305-   //  health thread is spawn would make sure would prevent this issue.
304+   //  that the stub process is unhealthy and return early. Waiting with a longer
305+   //  timeout prevents this issue.
306+   const  uint64_t  initialization_timeout_ms = 5000 ;  //  5 sec
307+   LOG_MESSAGE (
308+       TRITONSERVER_LOG_VERBOSE,
309+       " Waiting for the stub health monitoring thread to start"  );
310+ 
306311  bi::managed_external_buffer::handle_t  message;
307-   auto  err = ReceiveMessageFromStub (message);
312+   auto  err = ReceiveMessageFromStub (message, initialization_timeout_ms );
308313  if  (err != nullptr ) {
309314    KillStubProcess ();
310315  }
311316
312317  if  (stub_process_kind_ == " AUTOCOMPLETE_STUB"  ) {
313-     if  (err != nullptr ) {
314-       throw  BackendModelException (err);
315-     }
318+     THROW_IF_BACKEND_MODEL_ERROR (err);
316319    try  {
317320      AutocompleteStubProcess ();
318321    }
@@ -468,18 +471,21 @@ StubLauncher::Launch()
468471    // 
469472    //  The reason it is broken into two steps is that creation of the health
470473    //  monitoring thread may take longer which can make the server process think
471-     //  that the stub process is unhealthy and return early. Waiting until the
472-     //  health thread is spawn would prevent this issue.
474+     //  that the stub process is unhealthy and return early. Waiting with a
475+     //  longer timeout prevents this issue.
476+     const  uint64_t  initialization_timeout_ms = 5000 ;  //  5 sec
477+     LOG_MESSAGE (
478+         TRITONSERVER_LOG_VERBOSE,
479+         " Waiting for the stub health monitoring thread to start"  );
480+ 
473481    bi::managed_external_buffer::handle_t  message;
474-     auto  err = ReceiveMessageFromStub (message);
482+     auto  err = ReceiveMessageFromStub (message, initialization_timeout_ms );
475483    if  (err != nullptr ) {
476484      KillStubProcess ();
477485    }
478486
479487    if  (stub_process_kind_ == " AUTOCOMPLETE_STUB"  ) {
480-       if  (err != nullptr ) {
481-         throw  BackendModelException (err);
482-       }
488+       THROW_IF_BACKEND_MODEL_ERROR (err);
483489      try  {
484490        AutocompleteStubProcess ();
485491      }
@@ -612,8 +618,13 @@ StubLauncher::ModelInstanceStubProcess()
612618  initialize_message->Args () = initialize_map_handle;
613619  stub_message_queue_->Push (initialize_message->ShmHandle ());
614620
621+   const  uint64_t  initialization_timeout_ms = 5000 ;  //  5 sec
622+   LOG_MESSAGE (
623+       TRITONSERVER_LOG_VERBOSE,
624+       " Waiting for the stub process initialization response"  );
625+ 
615626  bi::managed_external_buffer::handle_t  message;
616-   RETURN_IF_ERROR (ReceiveMessageFromStub (message));
627+   RETURN_IF_ERROR (ReceiveMessageFromStub (message, initialization_timeout_ms ));
617628
618629  std::unique_ptr<IPCMessage> initialize_response_message =
619630      IPCMessage::LoadFromSharedMemory (shm_pool_, message);
@@ -746,11 +757,11 @@ StubLauncher::KillStubProcess()
746757
747758TRITONSERVER_Error*
748759StubLauncher::ReceiveMessageFromStub (
749-     bi::managed_external_buffer::handle_t & message)
760+     bi::managed_external_buffer::handle_t & message,
761+     uint64_t  timeout_miliseconds)
750762{
751763  bool  success = false ;
752764  while  (!success) {
753-     uint64_t  timeout_miliseconds = 1000 ;
754765    {
755766      boost::posix_time::ptime timeout =
756767          boost::get_system_time () +
0 commit comments