1010#include " hdr/types/char8_t.h"
1111#include " src/__support/CPP/bit.h"
1212#include " src/__support/error_or.h"
13+ #include " src/__support/math_extras.h"
1314#include " src/__support/wchar/mbstate.h"
1415#include " src/__support/wchar/utf_ret.h"
1516
@@ -35,8 +36,12 @@ int CharacterConverter::push(char8_t utf8_byte) {
3536 }
3637 // 2 through 4 bytes total
3738 else if (numOnes >= 2 && numOnes <= 4 ) {
39+ /* Since the format is 110xxxxx, 1110xxxx, and 11110xxx for 2, 3, and 4,
40+ we will make the base mask with 7 ones and right shift it as necessary. */
41+ const size_t significant_bits = 7 ;
3842 state->total_bytes = numOnes;
39- utf8_byte &= (0x7F >> numOnes);
43+ utf8_byte &=
44+ (mask_trailing_ones<uint32_t , significant_bits>() >> numOnes);
4045 }
4146 // Invalid first byte
4247 else {
@@ -48,9 +53,9 @@ int CharacterConverter::push(char8_t utf8_byte) {
4853 }
4954 // Any subsequent push
5055 // Adding 6 more bits so need to left shift
51- const int BITS_PER_UTF8 = 6 ;
56+ const size_t BITS_PER_UTF8 = 6 ;
5257 if (cpp::countl_one (utf8_byte) == 1 && !isComplete ()) {
53- char32_t byte = utf8_byte & 0x3F ;
58+ char32_t byte = utf8_byte & mask_trailing_ones< uint32_t , BITS_PER_UTF8>() ;
5459 state->partial = state->partial << BITS_PER_UTF8;
5560 state->partial |= byte;
5661 state->bytes_processed ++;
0 commit comments