Skip to content

Commit e0393c0

Browse files
committed
Revert "Try standard gpt-2 pretokenizer?"
This reverts commit 9b94cb6.
1 parent 62ebee4 commit e0393c0

File tree

1 file changed

+2
-3
lines changed

1 file changed

+2
-3
lines changed

src/llama-vocab.cpp

Lines changed: 2 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1866,7 +1866,6 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
18661866
tokenizer_pre == "jina-v2-es" ||
18671867
tokenizer_pre == "jina-v2-de" ||
18681868
tokenizer_pre == "a.x-4.0" ||
1869-
tokenizer_pre == "minimax-m2" ||
18701869
tokenizer_pre == "mellum") {
18711870
pre_type = LLAMA_VOCAB_PRE_TYPE_GPT2;
18721871
} else if (
@@ -1994,11 +1993,11 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
19941993
tokenizer_pre == "grok-2") {
19951994
pre_type = LLAMA_VOCAB_PRE_TYPE_GROK_2;
19961995
clean_spaces = false;
1997-
} else /*if (
1996+
} else if (
19981997
tokenizer_pre == "minimax-m2") {
19991998
pre_type = LLAMA_VOCAB_PRE_TYPE_MINIMAX_M2;
20001999
clean_spaces = false;
2001-
} else*/ {
2000+
} else {
20022001
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
20032002
}
20042003
} else if (type == LLAMA_VOCAB_TYPE_SPM) {

0 commit comments

Comments
 (0)