File tree Expand file tree Collapse file tree 3 files changed +20
-0
lines changed Expand file tree Collapse file tree 3 files changed +20
-0
lines changed Original file line number Diff line number Diff line change @@ -103,6 +103,7 @@ extern "C" {
103103 LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24 ,
104104 LLAMA_VOCAB_PRE_TYPE_EXAONE = 25 ,
105105 LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26 ,
106+ LLAMA_VOCAB_PRE_TYPE_FALCON_3 = 27 ,
106107 };
107108
108109 enum llama_rope_type {
Original file line number Diff line number Diff line change @@ -412,6 +412,13 @@ struct llm_tokenizer_bpe : llm_tokenizer {
412412 " [0-9][0-9][0-9]" ,
413413 };
414414 break ;
415+ case LLAMA_VOCAB_PRE_TYPE_FALCON_3:
416+ regex_exprs = {
417+ " [\\ p{P}\\ $\\ +<=>\\ ^~\\ |`]+" ,
418+ " 's|'t|'re|'ve|'m|'ll|'d| ?\\ p{L}+| ?\\ p{N}+| ?[^\\ s\\ p{L}\\ p{N}]+|\\ s+(?!\\ S)" ,
419+ " [0-9]" ,
420+ };
421+ break ;
415422 case LLAMA_VOCAB_PRE_TYPE_STARCODER:
416423 case LLAMA_VOCAB_PRE_TYPE_REFACT:
417424 case LLAMA_VOCAB_PRE_TYPE_COMMAND_R:
Original file line number Diff line number Diff line change @@ -6351,6 +6351,9 @@ static void llm_load_vocab(
63516351 } else if (
63526352 tokenizer_pre == "falcon") {
63536353 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_FALCON;
6354+ } else if (
6355+ tokenizer_pre == "falcon3") {
6356+ vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_FALCON_3;
63546357 } else if (
63556358 tokenizer_pre == "mpt") {
63566359 vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_MPT;
@@ -21613,6 +21616,15 @@ static int32_t llama_chat_apply_template_internal(
2161321616 if (add_ass) {
2161421617 ss << "<|assistant|>\n";
2161521618 }
21619+ } else if (tmpl == "falcon3" || (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>"))) {
21620+ // Falcon 3
21621+ for (auto message : chat) {
21622+ std::string role(message->role);
21623+ ss << "<|" << role << "|>\n" << message->content << "\n";
21624+ }
21625+ if (add_ass) {
21626+ ss << "<|assistant|>\n";
21627+ }
2161621628 } else if (tmpl == "zephyr" || tmpl_contains("<|user|>")) {
2161721629 // zephyr template
2161821630 for (auto message : chat) {
You can’t perform that action at this time.
0 commit comments