Skip to content

Commit 20dbef3

Browse files
committed
Fix: Folding "中ABC" on Ice Lake
1 parent bbea84f commit 20dbef3

File tree

1 file changed

+5
-3
lines changed

1 file changed

+5
-3
lines changed

include/stringzilla/utf8_case.h

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -1232,8 +1232,9 @@ SZ_PUBLIC sz_size_t sz_utf8_case_fold_ice(sz_cptr_t source, sz_size_t source_len
12321232
_mm512_cmp_epu8_mask(_mm512_sub_epi8(source_vec.zmm, _mm512_set1_epi8((char)0xAD)),
12331233
_mm512_set1_epi8(0x02), _MM_CMPINT_LT)); // 0xAD-0xAE
12341234
if (!(is_e1 | is_e2_folding | is_ea_folding | is_ef)) {
1235-
// Pure safe 3-byte content (E0, E3-E9, EB-EE) - no case folding needed
1236-
// Just need to avoid splitting a 3-byte sequence at the end
1235+
// Safe 3-byte content (E0, E3-E9, EB-EE) - no 3-byte case folding needed
1236+
// But ASCII mixed in still needs folding! Use sz_ice_fold_ascii_in_prefix_.
1237+
// Just need to avoid splitting a 3-byte sequence at the end.
12371238
sz_size_t copy_len = chunk_size;
12381239
if (copy_len < 64) {
12391240
// Check if last 1-2 bytes are an incomplete sequence
@@ -1245,7 +1246,8 @@ SZ_PUBLIC sz_size_t sz_utf8_case_fold_ice(sz_cptr_t source, sz_size_t source_len
12451246
}
12461247
if (copy_len > 0) {
12471248
__mmask64 copy_mask = sz_u64_mask_until_(copy_len);
1248-
_mm512_mask_storeu_epi8(target, copy_mask, source_vec.zmm);
1249+
_mm512_mask_storeu_epi8(target, copy_mask,
1250+
sz_ice_fold_ascii_in_prefix_(source_vec.zmm, copy_mask));
12491251
target += copy_len, source += copy_len, source_length -= copy_len;
12501252
continue;
12511253
}

0 commit comments

Comments
 (0)