Skip to content

Commit 4294712

Browse files
committed
Fix /completions endpoint by separating completion and chat completion parsing
- Add new oaicompat_completion_params_parse() for simple completions - Rename existing function to oaicompat_chat_completion_params_parse() - Update /completions endpoint to use simple parser (no chat templates) - Update /chat/completions endpoint to use chat parser (with tools support) - Fixes compatibility issue introduced in tool calling PR ikawrakow#677 Incorporates fixes from upstream PR ikawrakow#684
1 parent df660f7 commit 4294712

File tree

2 files changed

+43
-5
lines changed

2 files changed

+43
-5
lines changed

examples/server/server.cpp

Lines changed: 2 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -3602,9 +3602,7 @@ int main(int argc, char ** argv) {
36023602
}
36033603

36043604
res.set_header("Access-Control-Allow-Origin", req.get_header_value("Origin"));
3605-
auto body = json::parse(req.body);
3606-
const auto& chat_template = body.contains("tools") && ctx_server.chat_templates.template_tool_use ? *ctx_server.chat_templates.template_tool_use : *ctx_server.chat_templates.template_default;
3607-
json data = oaicompat_completion_params_parse(ctx_server.model, body, chat_template, ctx_server.params.use_jinja);
3605+
json data = oaicompat_completion_params_parse(json::parse(req.body));
36083606

36093607
const int id_task = ctx_server.queue_tasks.get_new_id();
36103608

@@ -3707,7 +3705,7 @@ int main(int argc, char ** argv) {
37073705

37083706
auto body = json::parse(req.body);
37093707
const auto& chat_template = body.contains("tools") && ctx_server.chat_templates.template_tool_use ? *ctx_server.chat_templates.template_tool_use : *ctx_server.chat_templates.template_default;
3710-
json data = oaicompat_completion_params_parse(ctx_server.model,body, chat_template, params.use_jinja);
3708+
json data = oaicompat_chat_completion_params_parse(ctx_server.model,body, chat_template, params.use_jinja);
37113709

37123710

37133711
const int id_task = ctx_server.queue_tasks.get_new_id();

examples/server/utils.hpp

Lines changed: 41 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -424,7 +424,47 @@ static tool_choice_type tool_choice_parse_oaicompat(const std::string & tool_cho
424424
// OAI utils
425425
//
426426

427-
static json oaicompat_completion_params_parse(
427+
static json oaicompat_completion_params_parse(const json& body) {
428+
json llama_params;
429+
430+
if (!body.contains("prompt")) {
431+
throw std::runtime_error("\"prompt\" is required");
432+
}
433+
434+
// Handle "stop" field
435+
if (body.contains("stop") && body.at("stop").is_string()) {
436+
llama_params["stop"] = json::array({ body.at("stop").get<std::string>() });
437+
}
438+
else {
439+
llama_params["stop"] = json_value(body, "stop", json::array());
440+
}
441+
442+
// Handle "n" field
443+
int n_choices = json_value(body, "n", 1);
444+
if (n_choices != 1) {
445+
throw std::runtime_error("Only one completion choice is allowed");
446+
}
447+
448+
// Params supported by OAI but unsupported by llama.cpp
449+
static const std::vector<std::string> unsupported_params{ "best_of", "echo", "suffix" };
450+
for (const auto& param : unsupported_params) {
451+
if (body.contains(param)) {
452+
throw std::runtime_error("Unsupported param: " + param);
453+
}
454+
}
455+
456+
// Copy remaining properties to llama_params
457+
for (const auto& item : body.items()) {
458+
// Exception: if "n_predict" is present, we overwrite the value specified earlier by "max_tokens"
459+
if (!llama_params.contains(item.key()) || item.key() == "n_predict") {
460+
llama_params[item.key()] = item.value();
461+
}
462+
}
463+
464+
return llama_params;
465+
}
466+
467+
static json oaicompat_chat_completion_params_parse(
428468
const struct llama_model * model,
429469
const json & body, /* openai api json semantics */
430470
const common_chat_template& tmpl,

0 commit comments

Comments
 (0)