Skip to content

Commit 09ca314

Browse files
committed
Fix: Micro case-fold in Georgian path
1 parent 916b23e commit 09ca314

File tree

1 file changed

+9
-0
lines changed

1 file changed

+9
-0
lines changed

include/stringzilla/utf8_case.h

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2999,6 +2999,15 @@ SZ_PUBLIC sz_size_t sz_utf8_case_fold_ice(sz_cptr_t source, sz_size_t source_len
29992999
folded = _mm512_mask_add_epi8(folded, sz_ice_is_ascii_upper_(source_vec.zmm) & prefix_mask,
30003000
folded, ascii_case_offset);
30013001

3002+
// Fold Micro Sign: C2 B5 → CE BC (U+00B5 → U+03BC)
3003+
__mmask64 c2_in_prefix = is_c2_lead & prefix_mask;
3004+
__mmask64 c2_second_pos = c2_in_prefix << 1;
3005+
__mmask64 is_micro_second =
3006+
c2_second_pos & _mm512_cmpeq_epi8_mask(source_vec.zmm, _mm512_set1_epi8((char)0xB5));
3007+
__mmask64 is_micro_lead = is_micro_second >> 1;
3008+
folded = _mm512_mask_blend_epi8(is_micro_lead, folded, _mm512_set1_epi8((char)0xCE));
3009+
folded = _mm512_mask_blend_epi8(is_micro_second, folded, _mm512_set1_epi8((char)0xBC));
3010+
30023011
_mm512_mask_storeu_epi8(target, prefix_mask, folded);
30033012
target += georgian_length, source += georgian_length, source_length -= georgian_length;
30043013
continue;

0 commit comments

Comments
 (0)