gpt-oss : clean up workarounds

aldehir · aldehir · commit 3997b47bc073 · 2025-08-11T19:16:00.000-05:00
diff --git a/common/chat.cpp b/common/chat.cpp
@@ -1339,6 +1339,16 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
     data.prompt = prompt;
     data.format = COMMON_CHAT_FORMAT_GPT_OSS;
 
+    // These special tokens are required to parse properly, so we include them
+    // even if parse_tool_calls is false.
+    data.preserved_tokens = {
+        "<|channel|>",
+        "<|constrain|>",
+        "<|message|>",
+        "<|start|>",
+        "<|end|>",
+    };
+
     if (inputs.tools.is_array() && !inputs.tools.empty()) {
         data.grammar_lazy = inputs.tool_choice != COMMON_CHAT_TOOL_CHOICE_REQUIRED;
         data.grammar = build_grammar([&](const common_grammar_builder & builder) {
@@ -1397,14 +1407,6 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
                 COMMON_GRAMMAR_TRIGGER_TYPE_PATTERN,
                 "<\\|start\\|>assistant to"
             });
-
-            data.preserved_tokens = {
-                "<|channel|>",
-                "<|constrain|>",
-                "<|message|>",
-                "<|start|>",
-                "<|end|>",
-            };
         });
     }
 
diff --git a/src/llama-vocab.cpp b/src/llama-vocab.cpp
@@ -2339,13 +2339,6 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
             }
         }
 
-        // @ngxson : quick hack for gpt-oss, always render these tokens
-        for (const auto & t : token_to_id) {
-            if (t.first == "<|channel|>" || t.first == "<|message|>" || t.first == "<|start|>" || t.first == "<|constrain|>") {
-                id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
-            }
-        }
-
         // sanity checks
         if (special_eos_id != LLAMA_TOKEN_NULL && special_eog_ids.count(special_eos_id) == 0) {
             special_eog_ids.insert(special_eos_id);
@@ -2388,7 +2381,6 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
 
             if (has_return && has_call && has_end) {
                 special_eog_ids.erase(end_id);
-                id_to_token[end_id].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
                 LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
             }
         }

Original file line number	Diff line number	Diff line change
`@@ -2339,13 +2339,6 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {`
`2339`	`2339`	`}`
`2340`	`2340`	`}`
`2341`	`2341`
`2342`		`- // @ngxson : quick hack for gpt-oss, always render these tokens`
`2343`		`- for (const auto & t : token_to_id) {`
`2344`		`- if (t.first == "<\|channel\|>" \|\| t.first == "<\|message\|>" \|\| t.first == "<\|start\|>" \|\| t.first == "<\|constrain\|>") {`
`2345`		`- id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;`
`2346`		`- }`
`2347`		`- }`
`2348`		`-`
`2349`	`2342`	`// sanity checks`
`2350`	`2343`	`if (special_eos_id != LLAMA_TOKEN_NULL && special_eog_ids.count(special_eos_id) == 0) {`
`2351`	`2344`	`special_eog_ids.insert(special_eos_id);`
`@@ -2388,7 +2381,6 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {`
`2388`	`2381`
`2389`	`2382`	`if (has_return && has_call && has_end) {`
`2390`	`2383`	`special_eog_ids.erase(end_id);`
`2391`		`- id_to_token[end_id].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;`
`2392`	`2384`	`LLAMA_LOG_WARN("%s: special_eog_ids contains both '<\|return\|>' and '<\|call\|>' tokens, removing '<\|end\|>' token from EOG list\n", __func__);`
`2393`	`2385`	`}`
`2394`	`2386`	`}`