@@ -1497,6 +1497,7 @@ struct server_queue {
14971497 // queues
14981498 lock_free::linked_list<server_task> queue_tasks;
14991499 lock_free::linked_list<server_task> queue_tasks_deferred;
1500+ std::atomic<int > n_queue_tasks_deferred = 0 ;
15001501
15011502 lock_free::hash_map<int , int > cancel_tasks = {10000 };
15021503
@@ -1543,6 +1544,7 @@ struct server_queue {
15431544 void defer (server_task task) {
15441545 QUE_DBG (" defer task, id = %d\n " , task.id );
15451546 queue_tasks_deferred.insertHead (std::move (task));
1547+ n_queue_tasks_deferred++;
15461548 condition_tasks.notify_one ();
15471549 }
15481550
@@ -1565,9 +1567,10 @@ struct server_queue {
15651567 // Call when the state of one slot is changed, it will move one task from deferred to main queue
15661568 void pop_deferred_task () {
15671569 if (!queue_tasks_deferred.empty ()) {
1568- queue_tasks_deferred.sweepOnce ([&](server_task & task) {
1570+ queue_tasks_deferred.sweepOnce ([&](server_task && task) {
15691571 queue_tasks.insertHead (std::move (task));
15701572 });
1573+ n_queue_tasks_deferred--;
15711574 }
15721575 condition_tasks.notify_one ();
15731576 }
@@ -1599,7 +1602,7 @@ struct server_queue {
15991602 if (queue_tasks.empty ()) {
16001603 break ;
16011604 }
1602- queue_tasks.sweepOnce ([&](server_task & task) {
1605+ queue_tasks.sweepOnce ([&](server_task && task) {
16031606 QUE_DBG (" processing task, id = %d\n " , task.id );
16041607 if (cancel_tasks.erase (task.id ) > 0 ) {
16051608 QUE_DBG (" task id = %d is canceled\n " , task.id );
@@ -1620,6 +1623,7 @@ struct server_queue {
16201623 return ;
16211624 }
16221625 if (queue_tasks.empty ()) {
1626+ std::unique_lock<std::mutex> lock (mutex_tasks);
16231627 condition_tasks.wait (lock, [&]{
16241628 return (!queue_tasks.empty () || !running);
16251629 });
@@ -2595,7 +2599,7 @@ struct server_context {
25952599 res->slots_data = std::move (slots_data);
25962600 res->n_idle_slots = n_idle_slots;
25972601 res->n_processing_slots = n_processing_slots;
2598- res->n_tasks_deferred = queue_tasks.queue_tasks_deferred . size () ;
2602+ res->n_tasks_deferred = queue_tasks.n_queue_tasks_deferred ;
25992603 res->t_start = metrics.t_start ;
26002604
26012605 res->kv_cache_tokens_count = llama_get_kv_cache_token_count (ctx);
0 commit comments