diff --git a/common/chat.cpp b/common/chat.cpp index 316bd24170c9e..fb6f457e31132 100644 --- a/common/chat.cpp +++ b/common/chat.cpp @@ -1314,17 +1314,56 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp data.prompt = prompt; data.format = COMMON_CHAT_FORMAT_GPT_OSS; + // These special tokens are required to parse properly, so we include them + // even if parse_tool_calls is false. + data.preserved_tokens = { + "<|channel|>", + "<|constrain|>", + "<|message|>", + "<|start|>", + "<|end|>", + }; + // TODO: support tool calls in GPT-OSS? return data; } static void common_chat_parse_gpt_oss(common_chat_msg_parser & builder) { - // TODO @ngxson : this won't work with --special enabled, we should fix that - builder.try_parse_reasoning("<|channel|>analysis<|message|>", "<|start|>assistant<|channel|>final<|message|>"); - if (!builder.syntax().parse_tool_calls) { - builder.add_content(builder.consume_rest()); - return; + static const common_regex end_regex("<\\|end\\|>"); + static const common_regex analysis_regex("<\\|channel\\|>analysis<\\|message\\|>"); + static const common_regex final_regex("<\\|channel\\|>final<\\|message\\|>"); + + if (builder.try_consume_regex(analysis_regex)) { + std::string reasoning; + bool has_end = false; + if (auto res = builder.try_find_regex(end_regex, std::string::npos, false)) { + reasoning = res->prelude; + has_end = true; + } else { + reasoning = builder.consume_rest(); + } + + if (builder.syntax().reasoning_format == COMMON_REASONING_FORMAT_NONE || builder.syntax().reasoning_in_content) { + // the templates raise an exception if <|channel|> is present + // an assistant's content, so wrap it in think tags + builder.add_content(""); + builder.add_content(reasoning); + if (has_end) { + builder.add_content(""); + } + } else { + builder.add_reasoning_content(reasoning); + } + } + + if (builder.try_find_regex(final_regex, std::string::npos, false)) { + if (!builder.try_find_regex(end_regex)) { + builder.add_content(builder.consume_rest()); + } } + + // no tool call support yet, so we have to consume everything else + builder.consume_rest(); } static common_chat_params common_chat_params_init_firefunction_v2(const common_chat_template & tmpl, const struct templates_params & inputs) { diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp index f7e03e702ed19..6639cd630b627 100644 --- a/src/llama-vocab.cpp +++ b/src/llama-vocab.cpp @@ -2339,13 +2339,6 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) { } } - // @ngxson : quick hack for gpt-oss, always render these tokens - for (const auto & t : token_to_id) { - if (t.first == "<|channel|>" || t.first == "<|message|>" || t.first == "<|start|>") { - id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_USER_DEFINED; - } - } - // sanity checks if (special_eos_id != LLAMA_TOKEN_NULL && special_eog_ids.count(special_eos_id) == 0) { special_eog_ids.insert(special_eos_id);