@@ -27,6 +27,7 @@ constexpr uint32_t MASK_ENCODED_BITS =
2727 mask_trailing_ones<uint32_t , ENCODED_BITS_PER_UTF8>();
2828// Maximum value for utf-32 for a utf-8 sequence of a given length
2929constexpr char32_t MAX_VALUE_PER_UTF8_LEN[] = {0x7f , 0x7ff , 0xffff , 0x10ffff };
30+ constexpr int MAX_UTF8_LENGTH = 4 ;
3031
3132CharacterConverter::CharacterConverter (mbstate *mbstate) { state = mbstate; }
3233
@@ -43,12 +44,14 @@ bool CharacterConverter::isFull() {
4344bool CharacterConverter::isEmpty () { return state->bytes_stored == 0 ; }
4445
4546bool CharacterConverter::isValidState () {
47+ if (state->total_bytes > 4 )
48+ return false ;
49+
4650 const char32_t max_utf32_value =
4751 state->total_bytes == 0 ? 0
4852 : MAX_VALUE_PER_UTF8_LEN[state->total_bytes - 1 ];
4953 return state->bytes_stored <= state->total_bytes &&
50- state->bytes_stored >= 0 && state->total_bytes <= 4 &&
51- state->partial <= max_utf32_value;
54+ state->bytes_stored >= 0 && state->partial <= max_utf32_value;
5255}
5356
5457int CharacterConverter::push (char8_t utf8_byte) {
@@ -101,8 +104,7 @@ int CharacterConverter::push(char32_t utf32) {
101104 state->partial = utf32;
102105
103106 // determine number of utf-8 bytes needed to represent this utf32 value
104- constexpr int NUM_RANGES = 4 ;
105- for (uint8_t i = 0 ; i < NUM_RANGES; i++) {
107+ for (uint8_t i = 0 ; i < MAX_UTF8_LENGTH; i++) {
106108 if (state->partial <= MAX_VALUE_PER_UTF8_LEN[i]) {
107109 state->total_bytes = i + 1 ;
108110 state->bytes_stored = i + 1 ;
0 commit comments