Skip to content

Commit 29c1532

Browse files
remove more unicode.cpp whitespaces
1 parent 6fda4fe commit 29c1532

File tree

1 file changed

+15
-15
lines changed

1 file changed

+15
-15
lines changed

src/unicode.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -612,31 +612,31 @@ static std::vector<size_t> unicode_regex_split_custom_kimi_k2(const std::string
612612
bool is_letter_pattern = (flags.is_letter && !unicode_cpt_is_han(cpt)) ||
613613
(!(cpt == '\r' || cpt == '\n' || flags.is_letter || flags.is_number) &&
614614
_get_flags(pos + 1).is_letter && !unicode_cpt_is_han(_get_cpt(pos + 1)));
615-
615+
616616
if (is_letter_pattern) {
617617
// Handle optional leading non-letter/non-number character
618618
bool has_leading_char = false;
619619
if (!(cpt == '\r' || cpt == '\n' || flags.is_letter || flags.is_number)) {
620620
has_leading_char = true;
621621
pos++;
622622
}
623-
623+
624624
// Match letter sequence (excluding Han characters)
625625
bool has_letters = false;
626626
while (_get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos))) {
627627
has_letters = true;
628628
pos++;
629629
}
630-
630+
631631
// Only proceed if we found letters (after potentially skipping leading char)
632632
if (has_letters || (!has_leading_char && _get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos)))) {
633633
if (!has_letters) pos++; // consume the first letter if we didn't already
634-
634+
635635
// Continue consuming letters
636636
while (_get_flags(pos).is_letter && !unicode_cpt_is_han(_get_cpt(pos))) {
637637
pos++;
638638
}
639-
639+
640640
// Check for optional contractions (?:'s|'t|'re|'ve|'m|'ll|'d)
641641
if (_get_cpt(pos) == '\'' && pos + 1 < offset_end) {
642642
uint32_t cpt_next = unicode_tolower(_get_cpt(pos + 1));
@@ -651,7 +651,7 @@ static std::vector<size_t> unicode_regex_split_custom_kimi_k2(const std::string
651651
}
652652
}
653653
}
654-
654+
655655
_add_token(pos);
656656
continue;
657657
} else if (has_leading_char) {
@@ -851,31 +851,31 @@ bool unicode_cpt_is_han(uint32_t cpt) {
851851
// Han character ranges (Chinese/CJK characters)
852852
// CJK Unified Ideographs (most common)
853853
if (cpt >= 0x4E00 && cpt <= 0x9FFF) return true;
854-
854+
855855
// CJK Extension A
856856
if (cpt >= 0x3400 && cpt <= 0x4DBF) return true;
857-
857+
858858
// CJK Extension B
859859
if (cpt >= 0x20000 && cpt <= 0x2A6DF) return true;
860-
860+
861861
// CJK Extension C
862862
if (cpt >= 0x2A700 && cpt <= 0x2B73F) return true;
863-
863+
864864
// CJK Extension D
865865
if (cpt >= 0x2B740 && cpt <= 0x2B81F) return true;
866-
866+
867867
// CJK Extension E
868868
if (cpt >= 0x2B820 && cpt <= 0x2CEAF) return true;
869-
869+
870870
// CJK Extension F
871871
if (cpt >= 0x2CEB0 && cpt <= 0x2EBEF) return true;
872-
872+
873873
// CJK Compatibility Ideographs
874874
if (cpt >= 0xF900 && cpt <= 0xFAFF) return true;
875-
875+
876876
// CJK Compatibility Ideographs Supplement
877877
if (cpt >= 0x2F800 && cpt <= 0x2FA1F) return true;
878-
878+
879879
return false;
880880
}
881881

0 commit comments

Comments
 (0)