File tree Expand file tree Collapse file tree 3 files changed +11
-0
lines changed Expand file tree Collapse file tree 3 files changed +11
-0
lines changed Original file line number Diff line number Diff line change @@ -103,6 +103,7 @@ extern "C" {
103103 LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24 ,
104104 LLAMA_VOCAB_PRE_TYPE_EXAONE = 25 ,
105105 LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26 ,
106+ LLAMA_VOCAB_PRE_TYPE_FALCON_3 = 27 ,
106107 };
107108
108109 enum llama_rope_type {
Original file line number Diff line number Diff line change @@ -412,6 +412,13 @@ struct llm_tokenizer_bpe : llm_tokenizer {
412412 " [0-9][0-9][0-9]" ,
413413 };
414414 break ;
415+ case LLAMA_VOCAB_PRE_TYPE_FALCON_3:
416+ regex_exprs = {
417+ " [\\ p{P}\\ $\\ +<=>\\ ^~\\ |`]+" ,
418+ " 's|'t|'re|'ve|'m|'ll|'d| ?\\ p{L}+| ?\\ p{N}+| ?[^\\ s\\ p{L}\\ p{N}]+|\\ s+(?!\\ S)" ,
419+ " [0-9]" ,
420+ };
421+ break ;
415422 case LLAMA_VOCAB_PRE_TYPE_STARCODER:
416423 case LLAMA_VOCAB_PRE_TYPE_REFACT:
417424 case LLAMA_VOCAB_PRE_TYPE_COMMAND_R:
Original file line number Diff line number Diff line change @@ -6351,6 +6351,9 @@ static void llm_load_vocab(
63516351 } else if (
63526352 tokenizer_pre == "falcon") {
63536353 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_FALCON;
6354+ } else if (
6355+ tokenizer_pre == "falcon3") {
6356+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_FALCON_3;
63546357 } else if (
63556358 tokenizer_pre == "mpt") {
63566359 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_MPT;
You can’t perform that action at this time.
0 commit comments