File tree Expand file tree Collapse file tree 1 file changed +6
-1
lines changed Expand file tree Collapse file tree 1 file changed +6
-1
lines changed Original file line number Diff line number Diff line change @@ -608,7 +608,12 @@ static std::vector<size_t> unicode_regex_split_custom_kimi_k2(const std::string
608608            //  Pattern 2 & 3: Letter words excluding Han characters with optional contractions
609609            //  [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+(?:'s|'t|'re|'ve|'m|'ll|'d)?
610610            //  [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*(?:'s|'t|'re|'ve|'m|'ll|'d)?
611-             if  (flags.is_letter  && !unicode_cpt_is_han (cpt)) {
611+             //  Check if current char is a letter OR if current char could be a leading char and next char is a letter
612+             bool  is_letter_pattern = (flags.is_letter  && !unicode_cpt_is_han (cpt)) ||
613+                                      (!(cpt == ' \r '   || cpt == ' \n '   || flags.is_letter  || flags.is_number ) && 
614+                                       _get_flags (pos + 1 ).is_letter  && !unicode_cpt_is_han (_get_cpt (pos + 1 )));
615+             
616+             if  (is_letter_pattern) {
612617                //  Handle optional leading non-letter/non-number character
613618                bool  has_leading_char = false ;
614619                if  (!(cpt == ' \r '   || cpt == ' \n '   || flags.is_letter  || flags.is_number )) {
    
 
   
 
     
   
   
          
     
  
    
     
 
    
      
     
 
     
    You can’t perform that action at this time.
  
 
    
  
     
    
      
        
     
 
       
      
     
   
 
    
    
  
 
  
 
     
    
0 commit comments