@@ -533,7 +533,7 @@ struct completion_token_output {
533533struct server_task_result_cmpl_final : server_task_result {
534534 int index = 0 ;
535535
536- std::string content ;
536+ common_chat_msg message ;
537537 llama_tokens tokens;
538538
539539 bool stream;
@@ -559,7 +559,6 @@ struct server_task_result_cmpl_final : server_task_result {
559559 oaicompat_type oaicompat = OAICOMPAT_TYPE_NONE;
560560 std::string oaicompat_model;
561561 std::string oaicompat_cmpl_id;
562- common_chat_msg oaicompat_chat_msg;
563562
564563 virtual int get_index () override {
565564 return index;
@@ -585,7 +584,7 @@ struct server_task_result_cmpl_final : server_task_result {
585584 json to_json_non_oaicompat () {
586585 json res = json {
587586 {" index" , index},
588- {" content" , stream ? " " : content}, // in stream mode, content is already in last partial chunk
587+ {" content" , stream ? " " : message. content }, // in stream mode, content is already in last partial chunk
589588 {" tokens" , stream ? llama_tokens {} : tokens},
590589 {" id_slot" , id_slot},
591590 {" stop" , true },
@@ -622,7 +621,7 @@ struct server_task_result_cmpl_final : server_task_result {
622621 json res = json {
623622 {" choices" , json::array ({
624623 json{
625- {" text" , stream ? " " : content}, // in stream mode, content is already in last partial chunk
624+ {" text" , stream ? " " : message. content }, // in stream mode, content is already in last partial chunk
626625 {" index" , index},
627626 {" logprobs" , logprobs},
628627 {" finish_reason" , finish_reason},
@@ -654,13 +653,13 @@ struct server_task_result_cmpl_final : server_task_result {
654653 json to_json_oaicompat_chat () {
655654 std::string finish_reason = " length" ;
656655 if (stop == STOP_TYPE_WORD || stop == STOP_TYPE_EOS) {
657- finish_reason = oaicompat_chat_msg .tool_calls .empty () ? " stop" : " tool_calls" ;
656+ finish_reason = message .tool_calls .empty () ? " stop" : " tool_calls" ;
658657 }
659658
660659 json tool_calls;
661- if (!oaicompat_chat_msg .tool_calls .empty ()) {
660+ if (!message .tool_calls .empty ()) {
662661 tool_calls = json::array ();
663- for (const auto & tc : oaicompat_chat_msg .tool_calls ) {
662+ for (const auto & tc : message .tool_calls ) {
664663 tool_calls.push_back ({
665664 {" type" , " function" },
666665 {" function" , {
@@ -676,7 +675,7 @@ struct server_task_result_cmpl_final : server_task_result {
676675 {" finish_reason" , finish_reason},
677676 {" index" , 0 },
678677 {" message" , json {
679- {" content" , oaicompat_chat_msg .content },
678+ {" content" , message .content },
680679 {" tool_calls" , tool_calls},
681680 {" role" , " assistant" },
682681 }},
@@ -2283,7 +2282,6 @@ struct server_context {
22832282 res->id_slot = slot.id ;
22842283
22852284 res->index = slot.index ;
2286- res->content = slot.generated_text ;
22872285 res->tokens = slot.generated_tokens ;
22882286 res->timings = slot.get_timings ();
22892287 res->prompt = common_detokenize (ctx, slot.prompt_tokens , true );
@@ -2304,11 +2302,11 @@ struct server_context {
23042302 res->oaicompat_model = slot.params .oaicompat_model ;
23052303 res->oaicompat_cmpl_id = slot.params .oaicompat_cmpl_id ;
23062304 if (slot.params .chat_parser ) {
2307- res->oaicompat_chat_msg = slot.params .chat_parser (slot.generated_text );
2305+ res->message = slot.params .chat_parser (slot.generated_text );
23082306 } else {
2309- res->oaicompat_chat_msg = {
2307+ res->message = {
23102308 /* .role = */ " assistant" ,
2311- /* .content = */ slot.generated_text ,
2309+ /* .content = */ std::move ( slot.generated_text ) ,
23122310 /* .tool_calls = */ {}
23132311 };
23142312 }
@@ -3838,6 +3836,8 @@ int main(int argc, char ** argv) {
38383836 // OAI-compat
38393837 task.params .oaicompat = oaicompat;
38403838 task.params .oaicompat_cmpl_id = completion_id;
3839+
3840+ // Grammar & tool-calls
38413841 task.params .sampling .grammar = chat_params.grammar ;
38423842 task.params .sampling .grammar_lazy = chat_params.grammar_lazy ;
38433843 for (const auto & trigger : chat_params.grammar_triggers ) {
0 commit comments