Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions common/chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ struct templates_params {
json extra_context;
bool add_bos;
bool add_eos;
bool is_inference = true;
};

common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice) {
Expand Down Expand Up @@ -1336,6 +1337,17 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
common_chat_params data;
auto prompt = apply(tmpl, inputs);

// Check if we need to replace the return token with end token during
// inference and without generation prompt. For more details see:
// https://github.com/ggml-org/llama.cpp/issues/15417
if (inputs.is_inference && !inputs.add_generation_prompt) {
static constexpr std::string_view return_token = "<|return|>";
static constexpr std::string_view end_token = "<|end|>";
if (size_t pos = prompt.rfind(return_token); pos != std::string::npos) {
prompt.replace(pos, return_token.length(), end_token);
}
}

data.prompt = prompt;
data.format = COMMON_CHAT_FORMAT_GPT_OSS;

Expand Down
Loading