Skip to content

Commit 94a4e2c

Browse files
committed
Merge remote-tracking branch 'origin/main' into mwittwer/explicit_model_load_parsing
2 parents 86a61ed + 8f2df55 commit 94a4e2c

File tree

3 files changed

+59
-19
lines changed

3 files changed

+59
-19
lines changed

src/pb_stub.cc

Lines changed: 11 additions & 7 deletions
Original file line numberDiff line numberDiff line change
@@ -1040,11 +1040,13 @@ Stub::~Stub()
10401040
{
10411041
#ifdef TRITON_ENABLE_GPU
10421042
try {
1043-
CUDAHandler& cuda_api = CUDAHandler::getInstance();
1044-
for (auto& m :
1045-
shm_pool_->GetCUDAMemoryPoolManager()->CUDAPoolAddressMap()) {
1046-
if (m.second != nullptr) {
1047-
cuda_api.CloseCudaHandle(m.first, m.second);
1043+
if (shm_pool_ != nullptr) {
1044+
CUDAHandler& cuda_api = CUDAHandler::getInstance();
1045+
for (auto& m :
1046+
shm_pool_->GetCUDAMemoryPoolManager()->CUDAPoolAddressMap()) {
1047+
if (m.second != nullptr) {
1048+
cuda_api.CloseCudaHandle(m.first, m.second);
1049+
}
10481050
}
10491051
}
10501052
}
@@ -1053,13 +1055,14 @@ Stub::~Stub()
10531055
}
10541056
#endif
10551057

1056-
{
1058+
// Ensure the interpreter is active before trying to clean up.
1059+
if (Py_IsInitialized()) {
10571060
py::gil_scoped_acquire acquire;
10581061
py::object async_event_loop_local(std::move(async_event_loop_));
10591062
py::object background_futures_local(std::move(background_futures_));
10601063
py::object model_instance_local(std::move(model_instance_));
10611064
}
1062-
stub_instance_.reset();
1065+
10631066
stub_message_queue_.reset();
10641067
parent_message_queue_.reset();
10651068
stub_to_parent_mq_.reset();
@@ -2030,6 +2033,7 @@ main(int argc, char** argv)
20302033
catch (const PythonBackendException& pb_exception) {
20312034
LOG_INFO << "Failed to preinitialize Python stub: " << pb_exception.what();
20322035
logger.reset();
2036+
stub.reset();
20332037
exit(1);
20342038
}
20352039

src/stub_launcher.cc

Lines changed: 46 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -283,7 +283,9 @@ StubLauncher::Launch()
283283
// Push a dummy message to the message queue so that the stub
284284
// process is notified that it can release the object stored in
285285
// shared memory.
286-
stub_message_queue_->Push(DUMMY_MESSAGE);
286+
if (stub_message_queue_) {
287+
stub_message_queue_->Push(DUMMY_MESSAGE);
288+
}
287289

288290
// If the model is not initialized, wait for the stub process to exit.
289291
if (!is_initialized_) {
@@ -302,11 +304,23 @@ StubLauncher::Launch()
302304
//
303305
// The reason it is broken into two steps is that creation of the health
304306
// monitoring thread may take longer which can make the server process think
305-
// that the stub process is unhealthy and return early. Waiting until the
306-
// health thread is spawn would make sure would prevent this issue.
307-
parent_message_queue_->Pop();
307+
// that the stub process is unhealthy and return early. Waiting with a longer
308+
// timeout prevents this issue.
309+
const uint64_t initialization_timeout_ms = 10000; // 10 sec
310+
LOG_MESSAGE(
311+
TRITONSERVER_LOG_VERBOSE,
312+
"Waiting for the stub health monitoring thread to start");
313+
314+
bi::managed_external_buffer::handle_t message;
315+
auto err = ReceiveMessageFromStub(message, initialization_timeout_ms);
316+
if (err != nullptr) {
317+
KillStubProcess();
318+
}
308319

309320
if (stub_process_kind_ == "AUTOCOMPLETE_STUB") {
321+
if (err != nullptr) {
322+
throw BackendModelException(err);
323+
}
310324
try {
311325
AutocompleteStubProcess();
312326
}
@@ -317,6 +331,7 @@ StubLauncher::Launch()
317331
TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, ex.what()));
318332
}
319333
} else if (stub_process_kind_ == "MODEL_INSTANCE_STUB") {
334+
RETURN_IF_ERROR(err);
320335
RETURN_IF_ERROR(ModelInstanceStubProcess());
321336
} else {
322337
return TRITONSERVER_ErrorNew(
@@ -509,7 +524,9 @@ StubLauncher::Launch()
509524
// Push a dummy message to the message queue so that the stub
510525
// process is notified that it can release the object stored in
511526
// shared memory.
512-
stub_message_queue_->Push(DUMMY_MESSAGE);
527+
if (stub_message_queue_) {
528+
stub_message_queue_->Push(DUMMY_MESSAGE);
529+
}
513530

514531
// If the model is not initialized, wait for the stub process to exit.
515532
if (!is_initialized_) {
@@ -528,11 +545,23 @@ StubLauncher::Launch()
528545
//
529546
// The reason it is broken into two steps is that creation of the health
530547
// monitoring thread may take longer which can make the server process think
531-
// that the stub process is unhealthy and return early. Waiting until the
532-
// health thread is spawn would prevent this issue.
533-
parent_message_queue_->Pop();
548+
// that the stub process is unhealthy and return early. Waiting with a
549+
// longer timeout prevents this issue.
550+
const uint64_t initialization_timeout_ms = 10000; // 10 sec
551+
LOG_MESSAGE(
552+
TRITONSERVER_LOG_VERBOSE,
553+
"Waiting for the stub health monitoring thread to start");
554+
555+
bi::managed_external_buffer::handle_t message;
556+
auto err = ReceiveMessageFromStub(message, initialization_timeout_ms);
557+
if (err != nullptr) {
558+
KillStubProcess();
559+
}
534560

535561
if (stub_process_kind_ == "AUTOCOMPLETE_STUB") {
562+
if (err != nullptr) {
563+
throw BackendModelException(err);
564+
}
536565
try {
537566
AutocompleteStubProcess();
538567
}
@@ -543,6 +572,7 @@ StubLauncher::Launch()
543572
TRITONSERVER_ErrorNew(TRITONSERVER_ERROR_INTERNAL, ex.what()));
544573
}
545574
} else if (stub_process_kind_ == "MODEL_INSTANCE_STUB") {
575+
RETURN_IF_ERROR(err);
546576
RETURN_IF_ERROR(ModelInstanceStubProcess());
547577
} else {
548578
return TRITONSERVER_ErrorNew(
@@ -663,8 +693,13 @@ StubLauncher::ModelInstanceStubProcess()
663693
initialize_message->Args() = initialize_map_handle;
664694
stub_message_queue_->Push(initialize_message->ShmHandle());
665695

696+
const uint64_t initialization_timeout_ms = 5000; // 5 sec
697+
LOG_MESSAGE(
698+
TRITONSERVER_LOG_VERBOSE,
699+
"Waiting for the stub process initialization response");
700+
666701
bi::managed_external_buffer::handle_t message;
667-
RETURN_IF_ERROR(ReceiveMessageFromStub(message));
702+
RETURN_IF_ERROR(ReceiveMessageFromStub(message, initialization_timeout_ms));
668703

669704
std::unique_ptr<IPCMessage> initialize_response_message =
670705
IPCMessage::LoadFromSharedMemory(shm_pool_, message);
@@ -797,11 +832,11 @@ StubLauncher::KillStubProcess()
797832

798833
TRITONSERVER_Error*
799834
StubLauncher::ReceiveMessageFromStub(
800-
bi::managed_external_buffer::handle_t& message)
835+
bi::managed_external_buffer::handle_t& message,
836+
uint64_t timeout_miliseconds)
801837
{
802838
bool success = false;
803839
while (!success) {
804-
uint64_t timeout_miliseconds = 1000;
805840
{
806841
boost::posix_time::ptime timeout =
807842
boost::get_system_time() +

src/stub_launcher.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -147,7 +147,8 @@ class StubLauncher {
147147

148148
// Get a message from the stub process
149149
TRITONSERVER_Error* ReceiveMessageFromStub(
150-
bi::managed_external_buffer::handle_t& message);
150+
bi::managed_external_buffer::handle_t& message,
151+
uint64_t timeout_miliseconds = 1000);
151152

152153
// Wait for stub process
153154
void WaitForStubProcess();

0 commit comments

Comments
 (0)