File tree Expand file tree Collapse file tree 1 file changed +2
-1
lines changed Expand file tree Collapse file tree 1 file changed +2
-1
lines changed Original file line number Diff line number Diff line change @@ -411,7 +411,8 @@ struct llm_tokenizer_bpe : llm_tokenizer {
411411                regex_exprs = {
412412                    //  original regex from tokenizer.json
413413                    //  "'(?i:[sdmt]|ll|ve|re)|[^\\r\\n\\p{L}\\p{N}]?+\\p{L}+|\\p{N}| ?[^\\s\\p{L}\\p{N}]++[\\r\\n]*|\\s*[\\r\\n]|\\s+(?!\\S)|\\s+"
414-                     " '(?:[sSdDmMtT]|[lL][lL]|[vV][eE]|[rR][eE])|[^\\ r\\ n\\ p{L}\\ p{N}]?+\\ p{L}+|\\ p{N}| ?[^\\ s\\ p{L}\\ p{N}]++[\\ r\\ n]*|\\ s*[\\ r\\ n]|\\ s+(?!\\ S)|\\ s+"  ,
414+                     //  FIXME? Changed possessive quantifiers (?+ and ++) to greedy to avoid errors and imatrix hanging (tried atomic grouping but it's not supported?)
415+                     " '(?:[sSdDmMtT]|[lL][lL]|[vV][eE]|[rR][eE])|[^\\ r\\ n\\ p{L}\\ p{N}]?\\ p{L}+|\\ p{N}| ?[^\\ s\\ p{L}\\ p{N}]+[\\ r\\ n]*|\\ s*[\\ r\\ n]|\\ s+(?!\\ S)|\\ s+"  ,
415416                };
416417                break ;
417418            default :
    
 
   
 
     
   
   
          
     
  
    
     
 
    
      
     
 
     
    You can’t perform that action at this time.
  
 
    
  
     
    
      
        
     
 
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments