88
99#include " hdr/types/char32_t.h"
1010#include " hdr/types/char8_t.h"
11- #include " src/__support/CPP/bit.h"
1211#include " src/__support/common.h"
1312#include " src/__support/error_or.h"
1413#include " src/__support/math_extras.h"
@@ -31,49 +30,6 @@ bool CharacterConverter::isComplete() {
3130 return state->bytes_processed == state->total_bytes ;
3231}
3332
34- int CharacterConverter::push (char8_t utf8_byte) {
35- uint8_t num_ones = static_cast <uint8_t >(cpp::countl_one (utf8_byte));
36- // Checking the first byte if first push
37- if (state->bytes_processed == 0 ) {
38- // UTF-8 char has 1 byte total
39- if (num_ones == 0 ) {
40- state->total_bytes = 1 ;
41- }
42- // UTF-8 char has 2 through 4 bytes total
43- else if (num_ones >= 2 && num_ones <= 4 ) {
44- /* Since the format is 110xxxxx, 1110xxxx, and 11110xxx for 2, 3, and 4,
45- we will make the base mask with 7 ones and right shift it as necessary. */
46- constexpr size_t SIGNIFICANT_BITS = 7 ;
47- uint32_t base_mask = mask_trailing_ones<uint32_t , SIGNIFICANT_BITS>();
48- state->total_bytes = num_ones;
49- utf8_byte &= (base_mask >> num_ones);
50- }
51- // Invalid first byte
52- else {
53- // bytes_processed and total_bytes will always be 0 here
54- state->partial = static_cast <char32_t >(0 );
55- return -1 ;
56- }
57- state->partial = static_cast <char32_t >(utf8_byte);
58- state->bytes_processed ++;
59- return 0 ;
60- }
61- // Any subsequent push
62- // Adding 6 more bits so need to left shift
63- constexpr size_t ENCODED_BITS_PER_UTF8 = 6 ;
64- if (num_ones == 1 && !isComplete ()) {
65- char32_t byte =
66- utf8_byte & mask_trailing_ones<uint32_t , ENCODED_BITS_PER_UTF8>();
67- state->partial = state->partial << ENCODED_BITS_PER_UTF8;
68- state->partial |= byte;
69- state->bytes_processed ++;
70- return 0 ;
71- }
72- // Invalid byte -> reset the state
73- clear ();
74- return -1 ;
75- }
76-
7733int CharacterConverter::push (char32_t utf32) {
7834 // we can't be partially through a conversion when pushing a utf32 value
7935 if (!isComplete ())
@@ -98,17 +54,6 @@ int CharacterConverter::push(char32_t utf32) {
9854 return -1 ;
9955}
10056
101- ErrorOr<char32_t > CharacterConverter::pop_utf32 () {
102- // If pop is called too early, do not reset the state, use error to determine
103- // whether enough bytes have been pushed
104- if (!isComplete () || state->bytes_processed == 0 )
105- return Error (-1 );
106- char32_t utf32 = state->partial ;
107- // reset if successful pop
108- clear ();
109- return utf32;
110- }
111-
11257ErrorOr<char8_t > CharacterConverter::pop_utf8 () {
11358 if (isComplete ())
11459 return Error (-1 );
0 commit comments