File tree Expand file tree Collapse file tree 1 file changed +6
-1
lines changed Expand file tree Collapse file tree 1 file changed +6
-1
lines changed Original file line number Diff line number Diff line change @@ -608,7 +608,12 @@ static std::vector<size_t> unicode_regex_split_custom_kimi_k2(const std::string
608608 // Pattern 2 & 3: Letter words excluding Han characters with optional contractions
609609 // [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+(?:'s|'t|'re|'ve|'m|'ll|'d)?
610610 // [^\r\n\p{L}\p{N}]?[\p{Lu}\p{Lt}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]+[\p{Ll}\p{Lm}\p{Lo}\p{M}&&[^\p{Han}]]*(?:'s|'t|'re|'ve|'m|'ll|'d)?
611- if (flags.is_letter && !unicode_cpt_is_han (cpt)) {
611+ // Check if current char is a letter OR if current char could be a leading char and next char is a letter
612+ bool is_letter_pattern = (flags.is_letter && !unicode_cpt_is_han (cpt)) ||
613+ (!(cpt == ' \r ' || cpt == ' \n ' || flags.is_letter || flags.is_number ) &&
614+ _get_flags (pos + 1 ).is_letter && !unicode_cpt_is_han (_get_cpt (pos + 1 )));
615+
616+ if (is_letter_pattern) {
612617 // Handle optional leading non-letter/non-number character
613618 bool has_leading_char = false ;
614619 if (!(cpt == ' \r ' || cpt == ' \n ' || flags.is_letter || flags.is_number )) {
You can’t perform that action at this time.
0 commit comments