@@ -494,6 +494,18 @@ int32_t server_tokens::process_chunk(
494494 return 0 ;
495495}
496496
497+ server_tokens server_tokens::clone () const {
498+ server_tokens res;
499+ res.has_mtmd = has_mtmd;
500+ res.tokens = tokens;
501+ for (auto it = map_idx_to_media.begin (); it != map_idx_to_media.end (); ++it) {
502+ size_t idx = it->first ;
503+ const mtmd::input_chunk_ptr & chunk = it->second ;
504+ res.map_idx_to_media [idx] = mtmd::input_chunk_ptr (mtmd_input_chunk_copy (chunk.get ()));
505+ }
506+ return res;
507+ }
508+
497509//
498510// tokenizer and input processing utils
499511//
@@ -745,12 +757,6 @@ json oaicompat_completion_params_parse(const json & body) {
745757 llama_params[" stop" ] = json_value (body, " stop" , json::array ());
746758 }
747759
748- // Handle "n" field
749- int n_choices = json_value (body, " n" , 1 );
750- if (n_choices != 1 ) {
751- throw std::runtime_error (" Only one completion choice is allowed" );
752- }
753-
754760 // Handle "echo" field
755761 if (json_value (body, " echo" , false )) {
756762 throw std::runtime_error (" Only no echo is supported" );
@@ -791,7 +797,7 @@ static void handle_media(
791797 SRV_INF (" downloading image from '%s'\n " , url.c_str ());
792798 auto res = common_remote_get_content (url, params);
793799 if (200 <= res.first && res.first < 300 ) {
794- SRV_INF (" downloaded %ld bytes\n " , res.second .size ());
800+ SRV_INF (" downloaded %zu bytes\n " , res.second .size ());
795801 raw_buffer data;
796802 data.insert (data.end (), res.second .begin (), res.second .end ());
797803 out_files.push_back (data);
@@ -966,6 +972,9 @@ json oaicompat_chat_params_parse(
966972 inputs.parallel_tool_calls = json_value (body, " parallel_tool_calls" , false );
967973 inputs.add_generation_prompt = json_value (body, " add_generation_prompt" , true );
968974 inputs.reasoning_format = opt.reasoning_format ;
975+ if (body.contains (" reasoning_format" )) {
976+ inputs.reasoning_format = common_reasoning_format_from_name (body.at (" reasoning_format" ).get <std::string>());
977+ }
969978 inputs.enable_thinking = opt.enable_thinking ;
970979 if (!inputs.tools .empty () && inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_NONE) {
971980 if (body.contains (" grammar" )) {
@@ -1045,11 +1054,8 @@ json oaicompat_chat_params_parse(
10451054 for (const auto & stop : chat_params.additional_stops ) {
10461055 llama_params[" stop" ].push_back (stop);
10471056 }
1048-
1049- // Handle "n" field
1050- int n_choices = json_value (body, " n" , 1 );
1051- if (n_choices != 1 ) {
1052- throw std::invalid_argument (" Only one completion choice is allowed" );
1057+ if (!chat_params.parser .empty ()) {
1058+ llama_params[" chat_parser" ] = chat_params.parser ;
10531059 }
10541060
10551061 // Handle "logprobs" field
0 commit comments