@@ -301,18 +301,21 @@ StubLauncher::Launch()
301
301
//
302
302
// The reason it is broken into two steps is that creation of the health
303
303
// monitoring thread may take longer which can make the server process think
304
- // that the stub process is unhealthy and return early. Waiting until the
305
- // health thread is spawn would make sure would prevent this issue.
304
+ // that the stub process is unhealthy and return early. Waiting with a longer
305
+ // timeout prevents this issue.
306
+ const uint64_t initialization_timeout_ms = 5000 ; // 5 sec
307
+ LOG_MESSAGE (
308
+ TRITONSERVER_LOG_VERBOSE,
309
+ " Waiting for the stub health monitoring thread to start" );
310
+
306
311
bi::managed_external_buffer::handle_t message;
307
- auto err = ReceiveMessageFromStub (message);
312
+ auto err = ReceiveMessageFromStub (message, initialization_timeout_ms );
308
313
if (err != nullptr ) {
309
314
KillStubProcess ();
310
315
}
311
316
312
317
if (stub_process_kind_ == " AUTOCOMPLETE_STUB" ) {
313
- if (err != nullptr ) {
314
- throw BackendModelException (err);
315
- }
318
+ THROW_IF_BACKEND_MODEL_ERROR (err);
316
319
try {
317
320
AutocompleteStubProcess ();
318
321
}
@@ -468,18 +471,21 @@ StubLauncher::Launch()
468
471
//
469
472
// The reason it is broken into two steps is that creation of the health
470
473
// monitoring thread may take longer which can make the server process think
471
- // that the stub process is unhealthy and return early. Waiting until the
472
- // health thread is spawn would prevent this issue.
474
+ // that the stub process is unhealthy and return early. Waiting with a
475
+ // longer timeout prevents this issue.
476
+ const uint64_t initialization_timeout_ms = 5000 ; // 5 sec
477
+ LOG_MESSAGE (
478
+ TRITONSERVER_LOG_VERBOSE,
479
+ " Waiting for the stub health monitoring thread to start" );
480
+
473
481
bi::managed_external_buffer::handle_t message;
474
- auto err = ReceiveMessageFromStub (message);
482
+ auto err = ReceiveMessageFromStub (message, initialization_timeout_ms );
475
483
if (err != nullptr ) {
476
484
KillStubProcess ();
477
485
}
478
486
479
487
if (stub_process_kind_ == " AUTOCOMPLETE_STUB" ) {
480
- if (err != nullptr ) {
481
- throw BackendModelException (err);
482
- }
488
+ THROW_IF_BACKEND_MODEL_ERROR (err);
483
489
try {
484
490
AutocompleteStubProcess ();
485
491
}
@@ -612,8 +618,13 @@ StubLauncher::ModelInstanceStubProcess()
612
618
initialize_message->Args () = initialize_map_handle;
613
619
stub_message_queue_->Push (initialize_message->ShmHandle ());
614
620
621
+ const uint64_t initialization_timeout_ms = 5000 ; // 5 sec
622
+ LOG_MESSAGE (
623
+ TRITONSERVER_LOG_VERBOSE,
624
+ " Waiting for the stub process initialization response" );
625
+
615
626
bi::managed_external_buffer::handle_t message;
616
- RETURN_IF_ERROR (ReceiveMessageFromStub (message));
627
+ RETURN_IF_ERROR (ReceiveMessageFromStub (message, initialization_timeout_ms ));
617
628
618
629
std::unique_ptr<IPCMessage> initialize_response_message =
619
630
IPCMessage::LoadFromSharedMemory (shm_pool_, message);
@@ -746,11 +757,11 @@ StubLauncher::KillStubProcess()
746
757
747
758
TRITONSERVER_Error*
748
759
StubLauncher::ReceiveMessageFromStub (
749
- bi::managed_external_buffer::handle_t & message)
760
+ bi::managed_external_buffer::handle_t & message,
761
+ uint64_t timeout_miliseconds)
750
762
{
751
763
bool success = false ;
752
764
while (!success) {
753
- uint64_t timeout_miliseconds = 1000 ;
754
765
{
755
766
boost::posix_time::ptime timeout =
756
767
boost::get_system_time () +
0 commit comments