@@ -1714,6 +1714,8 @@ struct server_queue {
17141714};
17151715
17161716struct server_response {
1717+ bool running = true ;
1718+
17171719 // for keeping track of all tasks waiting for the result
17181720 std::unordered_set<int > waiting_task_ids;
17191721
@@ -1768,6 +1770,10 @@ struct server_response {
17681770 while (true ) {
17691771 std::unique_lock<std::mutex> lock (mutex_results);
17701772 condition_results.wait (lock, [&]{
1773+ if (!running) {
1774+ SRV_DBG (" %s : queue result stop\n " , __func__);
1775+ std::terminate (); // we cannot return here since the caller is HTTP code
1776+ }
17711777 return !queue_results.empty ();
17721778 });
17731779
@@ -1798,6 +1804,10 @@ struct server_response {
17981804 }
17991805
18001806 std::cv_status cr_res = condition_results.wait_for (lock, std::chrono::seconds (timeout));
1807+ if (!running) {
1808+ SRV_DBG (" %s : queue result stop\n " , __func__);
1809+ std::terminate (); // we cannot return here since the caller is HTTP code
1810+ }
18011811 if (cr_res == std::cv_status::timeout) {
18021812 return nullptr ;
18031813 }
@@ -1827,6 +1837,12 @@ struct server_response {
18271837 }
18281838 }
18291839 }
1840+
1841+ // terminate the waiting loop
1842+ void terminate () {
1843+ running = false ;
1844+ condition_results.notify_all ();
1845+ }
18301846};
18311847
18321848struct server_context {
@@ -4503,9 +4519,10 @@ int main(int argc, char ** argv) {
45034519 svr->new_task_queue = [¶ms] { return new httplib::ThreadPool (params.n_threads_http ); };
45044520
45054521 // clean up function, to be called before exit
4506- auto clean_up = [&svr]() {
4522+ auto clean_up = [&svr, &ctx_server ]() {
45074523 SRV_INF (" %s: cleaning up before exit...\n " , __func__);
45084524 svr->stop ();
4525+ ctx_server.queue_results .terminate ();
45094526 llama_backend_free ();
45104527 };
45114528
@@ -4546,7 +4563,7 @@ int main(int argc, char ** argv) {
45464563
45474564 if (!ctx_server.load_model (params)) {
45484565 clean_up ();
4549- // t.join(); // FIXME: see below
4566+ t.join ();
45504567 LOG_ERR (" %s: exiting due to model loading error\n " , __func__);
45514568 return 1 ;
45524569 }
@@ -4594,7 +4611,7 @@ int main(int argc, char ** argv) {
45944611 ctx_server.queue_tasks .start_loop ();
45954612
45964613 clean_up ();
4597- // t.join(); // FIXME: http thread may stuck if there is an on-going request. we don't need to care about this for now as the HTTP connection will already be closed at this point, but it's better to fix this
4614+ t.join ();
45984615
45994616 return 0 ;
46004617}
0 commit comments