Skip to content

Commit 2ce8604

Browse files
committed
changes for fc3
1 parent 814d0ee commit 2ce8604

File tree

3 files changed

+11
-0
lines changed

3 files changed

+11
-0
lines changed

include/llama.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -103,6 +103,7 @@ extern "C" {
103103
LLAMA_VOCAB_PRE_TYPE_GPT3_FINNISH = 24,
104104
LLAMA_VOCAB_PRE_TYPE_EXAONE = 25,
105105
LLAMA_VOCAB_PRE_TYPE_CHAMELEON = 26,
106+
LLAMA_VOCAB_PRE_TYPE_FALCON_3 = 27,
106107
};
107108

108109
enum llama_rope_type {

src/llama-vocab.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -412,6 +412,13 @@ struct llm_tokenizer_bpe : llm_tokenizer {
412412
"[0-9][0-9][0-9]",
413413
};
414414
break;
415+
case LLAMA_VOCAB_PRE_TYPE_FALCON_3:
416+
regex_exprs = {
417+
"[\\p{P}\\$\\+<=>\\^~\\|`]+",
418+
"'s|'t|'re|'ve|'m|'ll|'d| ?\\p{L}+| ?\\p{N}+| ?[^\\s\\p{L}\\p{N}]+|\\s+(?!\\S)",
419+
"[0-9]",
420+
};
421+
break;
415422
case LLAMA_VOCAB_PRE_TYPE_STARCODER:
416423
case LLAMA_VOCAB_PRE_TYPE_REFACT:
417424
case LLAMA_VOCAB_PRE_TYPE_COMMAND_R:

src/llama.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -6351,6 +6351,9 @@ static void llm_load_vocab(
63516351
} else if (
63526352
tokenizer_pre == "falcon") {
63536353
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_FALCON;
6354+
} else if (
6355+
tokenizer_pre == "falcon3") {
6356+
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_FALCON_3;
63546357
} else if (
63556358
tokenizer_pre == "mpt") {
63566359
vocab.type_pre = LLAMA_VOCAB_PRE_TYPE_MPT;

0 commit comments

Comments
 (0)