Skip to content

Commit e47dc34

Browse files
committed
Merge branch 'smoldocling-support' of github.com:ryan-mangeno/llama.cpp into smoldocling-support
2 parents f5e4d56 + 69fa672 commit e47dc34

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

src/llama-vocab.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -429,9 +429,10 @@ struct llm_tokenizer_bpe : llm_tokenizer {
429429
// uses digits and byte level pre tokenizers defined in the pre_tokenizer section of
430430
// https://huggingface.co/ds4sd/SmolDocling-256M-preview/raw/main/tokenizer.json
431431
regex_exprs = {
432-
"[0-9]",
433-
"[a-zA-Z0-9_]+|[^a-zA-Z0-9_\\s]+",
434-
};
432+
"[0-9]",
433+
"[a-zA-Z0-9_]+|[^a-zA-Z0-9_\\s]+",
434+
};
435+
break;
435436
default:
436437
// default regex for BPE tokenization pre-processing
437438
regex_exprs = {

0 commit comments

Comments
 (0)