Skip to content

Commit 04e1626

Browse files
committed
gpt-oss : set special tokens back to user defined
1 parent eca4d65 commit 04e1626

File tree

1 file changed

+8
-0
lines changed

1 file changed

+8
-0
lines changed

src/llama-vocab.cpp

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2339,6 +2339,13 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
23392339
}
23402340
}
23412341

2342+
// @ngxson : quick hack for gpt-oss, always render these tokens
2343+
for (const auto & t : token_to_id) {
2344+
if (t.first == "<|channel|>" || t.first == "<|message|>" || t.first == "<|start|>" || t.first == "<|constrain|>") {
2345+
id_to_token[t.second].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
2346+
}
2347+
}
2348+
23422349
// sanity checks
23432350
if (special_eos_id != LLAMA_TOKEN_NULL && special_eog_ids.count(special_eos_id) == 0) {
23442351
special_eog_ids.insert(special_eos_id);
@@ -2381,6 +2388,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
23812388

23822389
if (has_return && has_call && has_end) {
23832390
special_eog_ids.erase(end_id);
2391+
id_to_token[end_id].attr = LLAMA_TOKEN_ATTR_USER_DEFINED;
23842392
LLAMA_LOG_WARN("%s: special_eog_ids contains both '<|return|>' and '<|call|>' tokens, removing '<|end|>' token from EOG list\n", __func__);
23852393
}
23862394
}

0 commit comments

Comments
 (0)