@@ -1705,6 +1705,8 @@ struct server_queue {
17051705};
17061706
17071707struct server_response {
1708+ bool running = true ;
1709+
17081710 // for keeping track of all tasks waiting for the result
17091711 std::unordered_set<int > waiting_task_ids;
17101712
@@ -1759,6 +1761,10 @@ struct server_response {
17591761 while (true ) {
17601762 std::unique_lock<std::mutex> lock (mutex_results);
17611763 condition_results.wait (lock, [&]{
1764+ if (!running) {
1765+ SRV_DBG (" %s : queue result stop\n " , __func__);
1766+ std::terminate (); // we cannot return here since the caller is HTTP code
1767+ }
17621768 return !queue_results.empty ();
17631769 });
17641770
@@ -1789,6 +1795,10 @@ struct server_response {
17891795 }
17901796
17911797 std::cv_status cr_res = condition_results.wait_for (lock, std::chrono::seconds (timeout));
1798+ if (!running) {
1799+ SRV_DBG (" %s : queue result stop\n " , __func__);
1800+ std::terminate (); // we cannot return here since the caller is HTTP code
1801+ }
17921802 if (cr_res == std::cv_status::timeout) {
17931803 return nullptr ;
17941804 }
@@ -1818,6 +1828,12 @@ struct server_response {
18181828 }
18191829 }
18201830 }
1831+
1832+ // terminate the waiting loop
1833+ void terminate () {
1834+ running = false ;
1835+ condition_results.notify_all ();
1836+ }
18211837};
18221838
18231839struct server_context {
@@ -4491,9 +4507,10 @@ int main(int argc, char ** argv) {
44914507 svr->new_task_queue = [¶ms] { return new httplib::ThreadPool (params.n_threads_http ); };
44924508
44934509 // clean up function, to be called before exit
4494- auto clean_up = [&svr]() {
4510+ auto clean_up = [&svr, &ctx_server ]() {
44954511 SRV_INF (" %s: cleaning up before exit...\n " , __func__);
44964512 svr->stop ();
4513+ ctx_server.queue_results .terminate ();
44974514 llama_backend_free ();
44984515 };
44994516
@@ -4534,7 +4551,7 @@ int main(int argc, char ** argv) {
45344551
45354552 if (!ctx_server.load_model (params)) {
45364553 clean_up ();
4537- // t.join(); // FIXME: see below
4554+ t.join ();
45384555 LOG_ERR (" %s: exiting due to model loading error\n " , __func__);
45394556 return 1 ;
45404557 }
@@ -4582,7 +4599,7 @@ int main(int argc, char ** argv) {
45824599 ctx_server.queue_tasks .start_loop ();
45834600
45844601 clean_up ();
4585- // t.join(); // FIXME: http thread may stuck if there is an on-going request. we don't need to care about this for now as the HTTP connection will already be closed at this point, but it's better to fix this
4602+ t.join ();
45864603
45874604 return 0 ;
45884605}
0 commit comments