File tree Expand file tree Collapse file tree 1 file changed +4
-3
lines changed Expand file tree Collapse file tree 1 file changed +4
-3
lines changed Original file line number Diff line number Diff line change @@ -393,9 +393,10 @@ struct llm_tokenizer_bpe : llm_tokenizer {
393393 };
394394 break ;
395395 case LLAMA_VOCAB_PRE_TYPE_VELVET:
396- regex_exprs = {
397- " [^\\ r\\ n\\ p{L}\\ p{N}]?((?=[\\ p{L}])([^a-z]))*((?=[\\ p{L}])([^A-Z]))+|[^\\ r\\ n\\ p{L}\\ p{N}]?((?=[\\ p{L}])([^a-z]))+((?=[\\ p{L}])([^A-Z]))*|\\ p{N}| ?[^\\ s\\ p{L}\\ p{N}]+[\\ r\\ n/]*|\\ s*[\\ r\\ n]+|\\ s+(?!\\ S)|\\ s+"
398- // "[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]*[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]+|[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]+[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]*|\\p{N}|[\\p{P}\\p{S}]{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
396+ // original regex from tokenizer.json
397+ // "[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]*[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]+|[^\\r\\n\\p{L}\\p{N}]?[\\p{Lu}\\p{Lt}\\p{Lm}\\p{Lo}\\p{M}]+[\\p{Ll}\\p{Lm}\\p{Lo}\\p{M}]*|\\p{N}|[\\p{P}\\p{S}]{1,3}| ?[^\\s\\p{L}\\p{N}]+[\\r\\n/]*|\\s*[\\r\\n]+|\\s+(?!\\S)|\\s+"
398+ regex_exprs = {
399+ " [^\\ r\\ n\\ p{L}\\ p{N}]?((?=[\\ p{L}])([^a-z]))*((?=[\\ p{L}])([^A-Z]))+|[^\\ r\\ n\\ p{L}\\ p{N}]?((?=[\\ p{L}])([^a-z]))+((?=[\\ p{L}])([^A-Z]))*|\\ p{N}|[\\ p{P}\\ p{S}]{1,3}| ?[^\\ s\\ p{L}\\ p{N}]+[\\ r\\ n/]*|\\ s*[\\ r\\ n]+|\\ s+(?!\\ S)|\\ s+"
399400 };
400401 break ;
401402 default :
You can’t perform that action at this time.
0 commit comments