@@ -1563,14 +1563,15 @@ struct server_queue {
15631563 if (task.type == SERVER_TASK_TYPE_CANCEL) {
15641564 cleanup_pending_task (task.id_target );
15651565 }
1566- QUE_DBG (" new task, id = %d, front = %d\n " , task.id , front);
1566+ const int task_id = task.id ;
1567+ QUE_DBG (" new task, id = %d, front = %d\n " , task_id, front);
15671568 if (front) {
15681569 queue_tasks.push_front (std::move (task));
15691570 } else {
15701571 queue_tasks.push_back (std::move (task));
15711572 }
15721573 condition_tasks.notify_one ();
1573- return task. id ;
1574+ return task_id ;
15741575 }
15751576
15761577 // multi-task version of post()
@@ -2105,18 +2106,18 @@ struct server_context {
21052106 return true ;
21062107 }
21072108
2108- bool launch_slot_with_task (server_slot & slot, const server_task && task) {
2109+ bool launch_slot_with_task (server_slot & slot, server_task && task) {
21092110 slot.reset ();
21102111 slot.id_task = task.id ;
21112112 slot.index = task.index ;
21122113 slot.task_type = task.type ;
21132114 slot.params = std::move (task.params );
21142115 slot.prompt_tokens = std::move (task.prompt_tokens );
21152116
2116- if (!are_lora_equal (task .params .lora , slot.lora )) {
2117+ if (!are_lora_equal (slot .params .lora , slot.lora )) {
21172118 // if lora is changed, we cannot reuse cached tokens
21182119 slot.cache_tokens .clear ();
2119- slot.lora = task .params .lora ;
2120+ slot.lora = slot .params .lora ;
21202121 }
21212122
21222123 bool can_detokenize = can_be_detokenized (ctx, slot.prompt_tokens );
@@ -3952,44 +3953,42 @@ int main(int argc, char ** argv) {
39523953
39533954 auto completion_id = gen_chatcmplid ();
39543955 std::unordered_set<int > task_ids;
3955- {
3956+ try {
39563957 std::vector<server_task> tasks;
39573958
3958- try {
3959- const auto & prompt = data.at (" prompt" );
3960- // TODO: this log can become very long, put it behind a flag or think about a more compact format
3961- // SRV_DBG("Prompt: %s\n", prompt.is_string() ? prompt.get<std::string>().c_str() : prompt.dump(2).c_str());
3962-
3963- std::vector<llama_tokens> tokenized_prompts = tokenize_input_prompts (ctx_server.vocab , prompt, true , true );
3964- tasks.reserve (tokenized_prompts.size ());
3965- for (size_t i = 0 ; i < tokenized_prompts.size (); i++) {
3966- server_task task = server_task (type);
3967-
3968- task.id = ctx_server.queue_tasks .get_new_id ();
3969- task.index = i;
3970-
3971- task.prompt_tokens = std::move (tokenized_prompts[i]);
3972- task.params = server_task::params_from_json_cmpl (
3973- ctx_server.ctx ,
3974- ctx_server.params_base ,
3975- data);
3976- task.id_selected_slot = json_value (data, " id_slot" , -1 );
3977-
3978- // OAI-compat
3979- task.params .oaicompat = oaicompat;
3980- task.params .oaicompat_cmpl_id = completion_id;
3981- // oaicompat_model is already populated by params_from_json_cmpl
3982-
3983- tasks.push_back (std::move (task));
3984- }
3985- } catch (const std::exception & e) {
3986- res_error (res, format_error_response (e.what (), ERROR_TYPE_INVALID_REQUEST));
3987- return ;
3959+ const auto & prompt = data.at (" prompt" );
3960+ // TODO: this log can become very long, put it behind a flag or think about a more compact format
3961+ // SRV_DBG("Prompt: %s\n", prompt.is_string() ? prompt.get<std::string>().c_str() : prompt.dump(2).c_str());
3962+
3963+ std::vector<llama_tokens> tokenized_prompts = tokenize_input_prompts (ctx_server.vocab , prompt, true , true );
3964+ tasks.reserve (tokenized_prompts.size ());
3965+ for (size_t i = 0 ; i < tokenized_prompts.size (); i++) {
3966+ server_task task = server_task (type);
3967+
3968+ task.id = ctx_server.queue_tasks .get_new_id ();
3969+ task.index = i;
3970+
3971+ task.prompt_tokens = std::move (tokenized_prompts[i]);
3972+ task.params = server_task::params_from_json_cmpl (
3973+ ctx_server.ctx ,
3974+ ctx_server.params_base ,
3975+ data);
3976+ task.id_selected_slot = json_value (data, " id_slot" , -1 );
3977+
3978+ // OAI-compat
3979+ task.params .oaicompat = oaicompat;
3980+ task.params .oaicompat_cmpl_id = completion_id;
3981+ // oaicompat_model is already populated by params_from_json_cmpl
3982+
3983+ tasks.push_back (std::move (task));
39883984 }
39893985
39903986 task_ids = server_task::get_list_id (tasks);
39913987 ctx_server.queue_results .add_waiting_tasks (tasks);
39923988 ctx_server.queue_tasks .post (std::move (tasks));
3989+ } catch (const std::exception & e) {
3990+ res_error (res, format_error_response (e.what (), ERROR_TYPE_INVALID_REQUEST));
3991+ return ;
39933992 }
39943993
39953994 bool stream = json_value (data, " stream" , false );
0 commit comments