Skip to content

Commit 69fa672

Browse files
ryan-mangenoCISC
andauthored
Update src/llama-vocab.cpp
Co-authored-by: Sigbjørn Skjæret <[email protected]>
1 parent 5c3d948 commit 69fa672

File tree

1 file changed

+4
-3
lines changed

1 file changed

+4
-3
lines changed

src/llama-vocab.cpp

Lines changed: 4 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -429,9 +429,10 @@ struct llm_tokenizer_bpe : llm_tokenizer {
429429
// uses digits and byte level pre tokenizers defined in the pre_tokenizer section of
430430
// https://huggingface.co/ds4sd/SmolDocling-256M-preview/raw/main/tokenizer.json
431431
regex_exprs = {
432-
"[0-9]",
433-
"[a-zA-Z0-9_]+|[^a-zA-Z0-9_\\s]+",
434-
};
432+
"[0-9]",
433+
"[a-zA-Z0-9_]+|[^a-zA-Z0-9_\\s]+",
434+
};
435+
break;
435436
default:
436437
// default regex for BPE tokenization pre-processing
437438
regex_exprs = {

0 commit comments

Comments
 (0)