Skip to content

Commit 0c82aa0

Browse files
author
lizhenneng
committed
Improve Python backend shutdown speed
When shutting down linux operating system, triton server needs taking 1 to 2 seconds to stop triton server,especially when the Python backend has already been started, it leads to a longer shutdown time。 Signed-off-by: lizhenneng <[email protected]>
1 parent cbb42dd commit 0c82aa0

File tree

1 file changed

+10
-1
lines changed

1 file changed

+10
-1
lines changed

src/pb_stub.cc

Lines changed: 10 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,9 @@ using cudaStream_t = void*;
7777
namespace triton { namespace backend { namespace python {
7878

7979
std::atomic<bool> non_graceful_exit = {false};
80+
std::atomic<bool> should_exit{false};
81+
std::condition_variable exit_cv;
82+
std::mutex exit_mutex;
8083

8184
void
8285
SignalHandler(int signum)
@@ -2058,7 +2061,11 @@ main(int argc, char** argv)
20582061
// shared memory and will be set to false by the parent process.
20592062
// The parent process expects that the stub process sets this
20602063
// variable to true within 1 second.
2061-
std::this_thread::sleep_for(std::chrono::milliseconds(300));
2064+
std::unique_lock<std::mutex> lock(exit_mutex);
2065+
if (exit_cv.wait_for(lock, std::chrono::milliseconds(300), []{
2066+
return should_exit.load();
2067+
})) {
2068+
}
20622069

20632070
stub->UpdateHealth();
20642071

@@ -2099,6 +2106,8 @@ main(int argc, char** argv)
20992106
stub->TerminateParentToStubQueueMonitor();
21002107
}
21012108
background_thread_running = false;
2109+
should_exit = true;
2110+
exit_cv.notify_all();
21022111
background_thread.join();
21032112
break;
21042113
}

0 commit comments

Comments
 (0)