@@ -1232,8 +1232,9 @@ SZ_PUBLIC sz_size_t sz_utf8_case_fold_ice(sz_cptr_t source, sz_size_t source_len
12321232 _mm512_cmp_epu8_mask (_mm512_sub_epi8 (source_vec .zmm , _mm512_set1_epi8 ((char )0xAD )),
12331233 _mm512_set1_epi8 (0x02 ), _MM_CMPINT_LT )); // 0xAD-0xAE
12341234 if (!(is_e1 | is_e2_folding | is_ea_folding | is_ef )) {
1235- // Pure safe 3-byte content (E0, E3-E9, EB-EE) - no case folding needed
1236- // Just need to avoid splitting a 3-byte sequence at the end
1235+ // Safe 3-byte content (E0, E3-E9, EB-EE) - no 3-byte case folding needed
1236+ // But ASCII mixed in still needs folding! Use sz_ice_fold_ascii_in_prefix_.
1237+ // Just need to avoid splitting a 3-byte sequence at the end.
12371238 sz_size_t copy_len = chunk_size ;
12381239 if (copy_len < 64 ) {
12391240 // Check if last 1-2 bytes are an incomplete sequence
@@ -1245,7 +1246,8 @@ SZ_PUBLIC sz_size_t sz_utf8_case_fold_ice(sz_cptr_t source, sz_size_t source_len
12451246 }
12461247 if (copy_len > 0 ) {
12471248 __mmask64 copy_mask = sz_u64_mask_until_ (copy_len );
1248- _mm512_mask_storeu_epi8 (target , copy_mask , source_vec .zmm );
1249+ _mm512_mask_storeu_epi8 (target , copy_mask ,
1250+ sz_ice_fold_ascii_in_prefix_ (source_vec .zmm , copy_mask ));
12491251 target += copy_len , source += copy_len , source_length -= copy_len ;
12501252 continue ;
12511253 }
0 commit comments