Skip to content

Commit 19fabd3

Browse files
author
Sriya Pratipati
committed
Cleaned up pushing first byte and some readability changes
1 parent 3323d0d commit 19fabd3

File tree

2 files changed

+17
-24
lines changed

2 files changed

+17
-24
lines changed

libc/src/__support/wchar/character_converter.cpp

Lines changed: 12 additions & 21 deletions
Original file line numberDiff line numberDiff line change
@@ -28,39 +28,30 @@ int CharacterConverter::push(char8_t utf8_byte) {
2828
// Checking the first byte if first push
2929
if (state->bytes_processed == 0 && state->total_bytes == 0) {
3030
state->partial = static_cast<char32_t>(0);
31-
int numOnes = cpp::countl_one(utf8_byte);
32-
switch (numOnes) {
31+
uint8_t numOnes = static_cast<uint8_t>(cpp::countl_one(utf8_byte));
3332
// 1 byte total
34-
case 0:
33+
if (numOnes == 0) {
3534
state->total_bytes = 1;
36-
break;
37-
// 2 bytes total
38-
case 2:
39-
state->total_bytes = 2;
40-
utf8_byte &= 0x1F;
41-
break;
42-
// 3 bytes total
43-
case 3:
44-
state->total_bytes = 3;
45-
utf8_byte &= 0x0F;
46-
break;
47-
// 4 bytes total
48-
case 4:
49-
state->total_bytes = 4;
50-
utf8_byte &= 0x07;
51-
break;
35+
}
36+
// 2 through 4 bytes total
37+
else if (numOnes >= 2 && numOnes <= 4) {
38+
state->total_bytes = numOnes;
39+
utf8_byte &= (0x7F >> numOnes);
40+
}
5241
// Invalid first byte
53-
default:
42+
else {
5443
return -1;
5544
}
5645
state->partial = static_cast<char32_t>(utf8_byte);
5746
state->bytes_processed++;
5847
return 0;
5948
}
6049
// Any subsequent push
50+
// Adding 6 more bits so need to left shift
51+
const int shift_amount = 6;
6152
if (cpp::countl_one(utf8_byte) == 1 && !isComplete()) {
6253
char32_t byte = utf8_byte & 0x3F;
63-
state->partial = state->partial << 6;
54+
state->partial = state->partial << shift_amount;
6455
state->partial |= byte;
6556
state->bytes_processed++;
6657
return 0;

libc/test/src/__support/wchar/utf8_to_32_test.cpp

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -31,7 +31,8 @@ TEST(LlvmLibcCharacterConverterUTF8To32Test, TwoBytes) {
3131
LIBC_NAMESPACE::internal::mbstate state;
3232
state.bytes_processed = 0;
3333
state.total_bytes = 0;
34-
const char ch[2] = {static_cast<char>(0xC2), static_cast<char>(0x8E)}; // Ž
34+
const char ch[2] = {static_cast<char>(0xC2),
35+
static_cast<char>(0x8E)}; // Ž car symbol
3536

3637
LIBC_NAMESPACE::internal::CharacterConverter char_conv(&state);
3738
char_conv.push(static_cast<char8_t>(ch[0]));
@@ -47,7 +48,7 @@ TEST(LlvmLibcCharacterConverterUTF8To32Test, ThreeBytes) {
4748
state.bytes_processed = 0;
4849
state.total_bytes = 0;
4950
const char ch[3] = {static_cast<char>(0xE2), static_cast<char>(0x88),
50-
static_cast<char>(0x91)}; //
51+
static_cast<char>(0x91)}; // sigma symbol
5152

5253
LIBC_NAMESPACE::internal::CharacterConverter char_conv(&state);
5354
char_conv.push(static_cast<char8_t>(ch[0]));
@@ -64,7 +65,8 @@ TEST(LlvmLibcCharacterConverterUTF8To32Test, FourBytes) {
6465
state.bytes_processed = 0;
6566
state.total_bytes = 0;
6667
const char ch[4] = {static_cast<char>(0xF0), static_cast<char>(0x9F),
67-
static_cast<char>(0xA4), static_cast<char>(0xA1)}; // 🤡
68+
static_cast<char>(0xA4),
69+
static_cast<char>(0xA1)}; // 🤡 clown emoji
6870

6971
LIBC_NAMESPACE::internal::CharacterConverter char_conv(&state);
7072
char_conv.push(static_cast<char8_t>(ch[0]));

0 commit comments

Comments
 (0)