We read every piece of feedback, and take your input very seriously.
To see all available qualifiers, see our documentation.
There was an error while loading. Please reload this page.
2 parents f5e4d56 + 69fa672 commit e47dc34Copy full SHA for e47dc34
src/llama-vocab.cpp
@@ -429,9 +429,10 @@ struct llm_tokenizer_bpe : llm_tokenizer {
429
// uses digits and byte level pre tokenizers defined in the pre_tokenizer section of
430
// https://huggingface.co/ds4sd/SmolDocling-256M-preview/raw/main/tokenizer.json
431
regex_exprs = {
432
- "[0-9]",
433
- "[a-zA-Z0-9_]+|[^a-zA-Z0-9_\\s]+",
434
- };
+ "[0-9]",
+ "[a-zA-Z0-9_]+|[^a-zA-Z0-9_\\s]+",
+ };
435
+ break;
436
default:
437
// default regex for BPE tokenization pre-processing
438
0 commit comments