Revert "chat: handle gpt-oss return/end token inconsistency (ggml-org#15421)"

Nexesenex · Nexesenex · commit 68d3134c1c14 · 2025-10-06T21:47:34.000+02:00
This reverts commit 657b8a7.
diff --git a/common/chat.cpp b/common/chat.cpp
@@ -147,7 +147,6 @@ struct templates_params {
     json extra_context;
     bool add_bos;
     bool add_eos;
-    bool is_inference = true;
 };
 
 common_chat_tool_choice common_chat_tool_choice_parse_oaicompat(const std::string & tool_choice) {
@@ -1337,17 +1336,6 @@ static common_chat_params common_chat_params_init_gpt_oss(const common_chat_temp
     common_chat_params data;
     auto prompt = apply(tmpl, inputs);
 
-    // Check if we need to replace the return token with end token during
-    // inference and without generation prompt. For more details see:
-    // https://github.com/ggml-org/llama.cpp/issues/15417
-    if (inputs.is_inference && !inputs.add_generation_prompt) {
-        static constexpr std::string_view return_token = "<|return|>";
-        static constexpr std::string_view end_token    = "<|end|>";
-        if (size_t pos = prompt.rfind(return_token); pos != std::string::npos) {
-            prompt.replace(pos, return_token.length(), end_token);
-        }
-    }
-
     data.prompt = prompt;
     data.format = COMMON_CHAT_FORMAT_GPT_OSS;