Skip to content

Commit c64d6bc

Browse files
ngxsonggerganov
andcommitted
apply changes from suggestion
Co-authored-by: ggerganov <[email protected]>
1 parent 8fc8941 commit c64d6bc

File tree

1 file changed

+35
-36
lines changed

1 file changed

+35
-36
lines changed

examples/server/server.cpp

Lines changed: 35 additions & 36 deletions
Original file line numberDiff line numberDiff line change
@@ -1563,14 +1563,15 @@ struct server_queue {
15631563
if (task.type == SERVER_TASK_TYPE_CANCEL) {
15641564
cleanup_pending_task(task.id_target);
15651565
}
1566-
QUE_DBG("new task, id = %d, front = %d\n", task.id, front);
1566+
const int task_id = task.id;
1567+
QUE_DBG("new task, id = %d, front = %d\n", task_id, front);
15671568
if (front) {
15681569
queue_tasks.push_front(std::move(task));
15691570
} else {
15701571
queue_tasks.push_back(std::move(task));
15711572
}
15721573
condition_tasks.notify_one();
1573-
return task.id;
1574+
return task_id;
15741575
}
15751576

15761577
// multi-task version of post()
@@ -2105,18 +2106,18 @@ struct server_context {
21052106
return true;
21062107
}
21072108

2108-
bool launch_slot_with_task(server_slot & slot, const server_task && task) {
2109+
bool launch_slot_with_task(server_slot & slot, server_task && task) {
21092110
slot.reset();
21102111
slot.id_task = task.id;
21112112
slot.index = task.index;
21122113
slot.task_type = task.type;
21132114
slot.params = std::move(task.params);
21142115
slot.prompt_tokens = std::move(task.prompt_tokens);
21152116

2116-
if (!are_lora_equal(task.params.lora, slot.lora)) {
2117+
if (!are_lora_equal(slot.params.lora, slot.lora)) {
21172118
// if lora is changed, we cannot reuse cached tokens
21182119
slot.cache_tokens.clear();
2119-
slot.lora = task.params.lora;
2120+
slot.lora = slot.params.lora;
21202121
}
21212122

21222123
bool can_detokenize = can_be_detokenized(ctx, slot.prompt_tokens);
@@ -3952,44 +3953,42 @@ int main(int argc, char ** argv) {
39523953

39533954
auto completion_id = gen_chatcmplid();
39543955
std::unordered_set<int> task_ids;
3955-
{
3956+
try {
39563957
std::vector<server_task> tasks;
39573958

3958-
try {
3959-
const auto & prompt = data.at("prompt");
3960-
// TODO: this log can become very long, put it behind a flag or think about a more compact format
3961-
//SRV_DBG("Prompt: %s\n", prompt.is_string() ? prompt.get<std::string>().c_str() : prompt.dump(2).c_str());
3962-
3963-
std::vector<llama_tokens> tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, prompt, true, true);
3964-
tasks.reserve(tokenized_prompts.size());
3965-
for (size_t i = 0; i < tokenized_prompts.size(); i++) {
3966-
server_task task = server_task(type);
3967-
3968-
task.id = ctx_server.queue_tasks.get_new_id();
3969-
task.index = i;
3970-
3971-
task.prompt_tokens = std::move(tokenized_prompts[i]);
3972-
task.params = server_task::params_from_json_cmpl(
3973-
ctx_server.ctx,
3974-
ctx_server.params_base,
3975-
data);
3976-
task.id_selected_slot = json_value(data, "id_slot", -1);
3977-
3978-
// OAI-compat
3979-
task.params.oaicompat = oaicompat;
3980-
task.params.oaicompat_cmpl_id = completion_id;
3981-
// oaicompat_model is already populated by params_from_json_cmpl
3982-
3983-
tasks.push_back(std::move(task));
3984-
}
3985-
} catch (const std::exception & e) {
3986-
res_error(res, format_error_response(e.what(), ERROR_TYPE_INVALID_REQUEST));
3987-
return;
3959+
const auto & prompt = data.at("prompt");
3960+
// TODO: this log can become very long, put it behind a flag or think about a more compact format
3961+
//SRV_DBG("Prompt: %s\n", prompt.is_string() ? prompt.get<std::string>().c_str() : prompt.dump(2).c_str());
3962+
3963+
std::vector<llama_tokens> tokenized_prompts = tokenize_input_prompts(ctx_server.vocab, prompt, true, true);
3964+
tasks.reserve(tokenized_prompts.size());
3965+
for (size_t i = 0; i < tokenized_prompts.size(); i++) {
3966+
server_task task = server_task(type);
3967+
3968+
task.id = ctx_server.queue_tasks.get_new_id();
3969+
task.index = i;
3970+
3971+
task.prompt_tokens = std::move(tokenized_prompts[i]);
3972+
task.params = server_task::params_from_json_cmpl(
3973+
ctx_server.ctx,
3974+
ctx_server.params_base,
3975+
data);
3976+
task.id_selected_slot = json_value(data, "id_slot", -1);
3977+
3978+
// OAI-compat
3979+
task.params.oaicompat = oaicompat;
3980+
task.params.oaicompat_cmpl_id = completion_id;
3981+
// oaicompat_model is already populated by params_from_json_cmpl
3982+
3983+
tasks.push_back(std::move(task));
39883984
}
39893985

39903986
task_ids = server_task::get_list_id(tasks);
39913987
ctx_server.queue_results.add_waiting_tasks(tasks);
39923988
ctx_server.queue_tasks.post(std::move(tasks));
3989+
} catch (const std::exception & e) {
3990+
res_error(res, format_error_response(e.what(), ERROR_TYPE_INVALID_REQUEST));
3991+
return;
39933992
}
39943993

39953994
bool stream = json_value(data, "stream", false);

0 commit comments

Comments
 (0)