Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
49 changes: 44 additions & 5 deletions common/chat.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -1314,17 +1314,56 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
data.prompt = prompt;
data.format = COMMON_CHAT_FORMAT_GPT_OSS;

// These special tokens are required to parse properly, so we include them
// even if parse_tool_calls is false.
data.preserved_tokens = {
"<|channel|>",
"<|constrain|>",
"<|message|>",
"<|start|>",
"<|end|>",
};

// TODO: support tool calls in GPT-OSS?

return data;
}
static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) {
// TODO @ngxson : this won't work with --special enabled, we should fix that
builder.try_parse_reasoning("<|channel|>analysis<|message|>", "<|start|>assistant<|channel|>final<|message|>");
if (!builder.syntax().parse_tool_calls) {
builder.add_content(builder.consume_rest());
return;
static const common_regex end_regex("<\\|end\\|>");
static const common_regex analysis_regex("<\\|channel\\|>analysis<\\|message\\|>");
static const common_regex final_regex("<\\|channel\\|>final<\\|message\\|>");

if (builder.try_consume_regex(analysis_regex)) {
std::string reasoning;
bool has_end = false;
if (auto res = builder.try_find_regex(end_regex, std::string::npos, false)) {
reasoning = res->prelude;
has_end = true;
} else {
reasoning = builder.consume_rest();
}

if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) {
// the templates raise an exception if <|channel|> is present
// an assistant's content, so wrap it in think tags
builder.add_content("<think>");
builder.add_content(reasoning);
if (has_end) {
builder.add_content("</think>");
}
} else {
builder.add_reasoning_content(reasoning);
}
}

if (builder.try_find_regex(final_regex, std::string::npos, false)) {
if (!builder.try_find_regex(end_regex)) {
builder.add_content(builder.consume_rest());
}
}

// no tool call support yet, so we have to consume everything else
builder.consume_rest();
}

static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) {
Expand Down
7 changes: 0 additions & 7 deletions src/llama-vocab.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2339,13 +2339,6 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
}
}

// @ngxson : quick hack for gpt-oss, always render these tokens
for (const auto & t : token_to_id) {
if (t.first == "<|channel|>" || t.first == "<|message|>" || t.first == "<|start|>") {
id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
}
}

Comment on lines -2342 to -2348
Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Are we sure about removing this? It will prevent rendering these token without --special

Copy link
Collaborator Author

@aldehir aldehir Aug 11, 2025

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I am admittedly new to the code base, however for the web server it seems placing those tokens in preserved_tokens is sufficient to make them render.

I tested it with llama-cli and I see now it does omit them there. I will revert it.

This comment was marked as outdated.

Copy link
Collaborator

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

In addition to this, I think <|constrain|> and <|end|> should also be added in the condition

// sanity checks
if (special_eos_id != LLAMA_TOKEN_NULL && special_eog_ids.count(special_eos_id) == 0) {
special_eog_ids.insert(special_eos_id);
Expand Down
Loading