Skip to content

Commit a10e37f

Browse files
committed
Update llama-vocab.cpp for Trillion
1 parent 8a26252 commit a10e37f

File tree

1 file changed

+2
-0
lines changed

1 file changed

+2
-0
lines changed

src/llama-vocab.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -332,6 +332,7 @@ struct llm_tokenizer_bpe : llm_tokenizer {
332332
case LLAMA_VOCAB_PRE_TYPE_SMOLLM:
333333
case LLAMA_VOCAB_PRE_TYPE_CODESHELL:
334334
case LLAMA_VOCAB_PRE_TYPE_EXAONE:
335+
case LLAMA_VOCAB_PRE_TYPE_TRILLION:
335336
case LLAMA_VOCAB_PRE_TYPE_MINERVA:
336337
regex_exprs = {
337338
"\\p{N}",
@@ -1617,6 +1618,7 @@ void llama_vocab::impl::load(llama_model_loader & ml, const LLM_KV & kv) {
16171618
} else if (
16181619
tokenizer_pre == "trillion") {
16191620
pre_type = LLAMA_VOCAB_PRE_TYPE_TRILLION;
1621+
clean_spaces = false;
16201622
} else {
16211623
throw std::runtime_error(format("unknown pre-tokenizer type: '%s'", tokenizer_pre.c_str()));
16221624
}

0 commit comments

Comments
 (0)