Skip to content

Commit a270792

Browse files
Merge pull request #1 from younesbelkada/fc3
Changes for Falcon 3 release
2 parents 874ed80 + ec5ce2a commit a270792

File tree

3 files changed

+20
-0
lines changed

3 files changed

+20
-0
lines changed

include/llama.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ extern "C" {
103103
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
104104
LLAMA_VOCAB_PRE_TYPE_EXAONE = 25,
105105
LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
106+
LLAMA_VOCAB_PRE_TYPE_FALCON_3 = 27,
106107
};
107108

108109
enum llama_rope_type {

src/llama-vocab.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,13 @@ struct llm_tokenizer_bpe : llm_tokenizer {
412412
"[0-9][0-9][0-9]",
413413
};
414414
break;
415+
case LLAMA_VOCAB_PRE_TYPE_FALCON_3:
416+
regex_exprs = {
417+
"[\\p{P}\\$\\+<=>\\^~\\|`]+",
418+
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
419+
"[0-9]",
420+
};
421+
break;
415422
case LLAMA_VOCAB_PRE_TYPE_STARCODER:
416423
case LLAMA_VOCAB_PRE_TYPE_REFACT:
417424
case LLAMA_VOCAB_PRE_TYPE_COMMAND_R:

src/llama.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6351,6 +6351,9 @@ static void llm_load_vocab(
63516351
} else if (
63526352
tokenizer_pre == "falcon") {
63536353
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_FALCON;
6354+
} else if (
6355+
tokenizer_pre == "falcon3") {
6356+
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_FALCON_3;
63546357
} else if (
63556358
tokenizer_pre == "mpt") {
63566359
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_MPT;
@@ -21613,6 +21616,15 @@ static int32_t llama_chat_apply_template_internal(
2161321616
if (add_ass) {
2161421617
ss << "<|assistant|>\n";
2161521618
}
21619+
} else if (tmpl == "falcon3" || (tmpl_contains("<|assistant|>") && tmpl_contains("<|user|>"))) {
21620+
// Falcon 3
21621+
for (auto message : chat) {
21622+
std::string role(message->role);
21623+
ss << "<|" << role << "|>\n" << message->content << "\n";
21624+
}
21625+
if (add_ass) {
21626+
ss << "<|assistant|>\n";
21627+
}
2161621628
} else if (tmpl == "zephyr" || tmpl_contains("<|user|>")) {
2161721629
// zephyr template
2161821630
for (auto message : chat) {

0 commit comments

Comments
 (0)