Skip to content

Commit d86a1ae

Browse files
author
ochafik
committed
Unify content + message in server_task_result_cmpl_final (+ avoid string copy)
1 parent 77c60e6 commit d86a1ae

File tree

1 file changed

+12
-12
lines changed

1 file changed

+12
-12
lines changed

examples/server/server.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -533,7 +533,7 @@ struct completion_token_output {
533533
struct server_task_result_cmpl_final : server_task_result {
534534
int index = 0;
535535

536-
std::string content;
536+
common_chat_msg message;
537537
llama_tokens tokens;
538538

539539
bool stream;
@@ -559,7 +559,6 @@ struct server_task_result_cmpl_final : server_task_result {
559559
oaicompat_type oaicompat = OAICOMPAT_TYPE_NONE;
560560
std::string oaicompat_model;
561561
std::string oaicompat_cmpl_id;
562-
common_chat_msg oaicompat_chat_msg;
563562

564563
virtual int get_index() override {
565564
return index;
@@ -585,7 +584,7 @@ struct server_task_result_cmpl_final : server_task_result {
585584
json to_json_non_oaicompat() {
586585
json res = json {
587586
{"index", index},
588-
{"content", stream ? "" : content}, // in stream mode, content is already in last partial chunk
587+
{"content", stream ? "" : message.content}, // in stream mode, content is already in last partial chunk
589588
{"tokens", stream ? llama_tokens {} : tokens},
590589
{"id_slot", id_slot},
591590
{"stop", true},
@@ -622,7 +621,7 @@ struct server_task_result_cmpl_final : server_task_result {
622621
json res = json {
623622
{"choices", json::array({
624623
json{
625-
{"text", stream ? "" : content}, // in stream mode, content is already in last partial chunk
624+
{"text", stream ? "" : message.content}, // in stream mode, content is already in last partial chunk
626625
{"index", index},
627626
{"logprobs", logprobs},
628627
{"finish_reason", finish_reason},
@@ -654,13 +653,13 @@ struct server_task_result_cmpl_final : server_task_result {
654653
json to_json_oaicompat_chat() {
655654
std::string finish_reason = "length";
656655
if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) {
657-
finish_reason = oaicompat_chat_msg.tool_calls.empty() ? "stop" : "tool_calls";
656+
finish_reason = message.tool_calls.empty() ? "stop" : "tool_calls";
658657
}
659658

660659
json tool_calls;
661-
if (!oaicompat_chat_msg.tool_calls.empty()) {
660+
if (!message.tool_calls.empty()) {
662661
tool_calls = json::array();
663-
for (const auto & tc : oaicompat_chat_msg.tool_calls) {
662+
for (const auto & tc : message.tool_calls) {
664663
tool_calls.push_back({
665664
{"type", "function"},
666665
{"function", {
@@ -676,7 +675,7 @@ struct server_task_result_cmpl_final : server_task_result {
676675
{"finish_reason", finish_reason},
677676
{"index", 0},
678677
{"message", json {
679-
{"content", oaicompat_chat_msg.content},
678+
{"content", message.content},
680679
{"tool_calls", tool_calls},
681680
{"role", "assistant"},
682681
}},
@@ -2283,7 +2282,6 @@ struct server_context {
22832282
res->id_slot = slot.id;
22842283

22852284
res->index = slot.index;
2286-
res->content = slot.generated_text;
22872285
res->tokens = slot.generated_tokens;
22882286
res->timings = slot.get_timings();
22892287
res->prompt = common_detokenize(ctx, slot.prompt_tokens, true);
@@ -2304,11 +2302,11 @@ struct server_context {
23042302
res->oaicompat_model = slot.params.oaicompat_model;
23052303
res->oaicompat_cmpl_id = slot.params.oaicompat_cmpl_id;
23062304
if (slot.params.chat_parser) {
2307-
res->oaicompat_chat_msg = slot.params.chat_parser(slot.generated_text);
2305+
res->message = slot.params.chat_parser(slot.generated_text);
23082306
} else {
2309-
res->oaicompat_chat_msg = {
2307+
res->message = {
23102308
/* .role = */ "assistant",
2311-
/* .content = */ slot.generated_text,
2309+
/* .content = */ std::move(slot.generated_text),
23122310
/* .tool_calls = */ {}
23132311
};
23142312
}
@@ -3838,6 +3836,8 @@ int main(int argc, char ** argv) {
38383836
// OAI-compat
38393837
task.params.oaicompat = oaicompat;
38403838
task.params.oaicompat_cmpl_id = completion_id;
3839+
3840+
// Grammar & tool-calls
38413841
task.params.sampling.grammar = chat_params.grammar;
38423842
task.params.sampling.grammar_lazy = chat_params.grammar_lazy;
38433843
for (const auto & trigger : chat_params.grammar_triggers) {

0 commit comments

Comments
 (0)