Skip to content

Commit 708e289

Browse files
committed
cleanup
1 parent ee2172d commit 708e289

File tree

2 files changed

+30
-10
lines changed

2 files changed

+30
-10
lines changed

libc/src/__support/wchar/character_converter.cpp

Lines changed: 14 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -31,27 +31,31 @@ bool CharacterConverter::isComplete() {
3131
}
3232

3333
int CharacterConverter::push(char32_t utf32) {
34+
// we can't be partially through a conversion when pushing a utf32 value
35+
if (!isComplete())
36+
return -1;
37+
3438
state->partial = utf32;
3539
state->bytes_processed = 0;
36-
state->total_bytes = 0;
3740

3841
// determine number of utf-8 bytes needed to represent this utf32 value
39-
constexpr char32_t ranges[] = {0x7f, 0x7ff, 0xffff, 0x10ffff};
40-
constexpr int num_ranges = 4;
41-
for (uint8_t i = 0; i < num_ranges; i++) {
42-
if (state->partial <= ranges[i]) {
42+
constexpr char32_t MAX_VALUE_PER_UTF8_LEN[] = {0x7f, 0x7ff, 0xffff, 0x10ffff};
43+
constexpr int NUM_RANGES = 4;
44+
for (uint8_t i = 0; i < NUM_RANGES; i++) {
45+
if (state->partial <= MAX_VALUE_PER_UTF8_LEN[i]) {
4346
state->total_bytes = i + 1;
44-
break;
47+
return 0;
4548
}
4649
}
47-
if (state->total_bytes == 0)
48-
return -1;
4950

50-
return 0;
51+
// `utf32` contains a value that is too large to actually represent a valid
52+
// unicode character
53+
clear();
54+
return -1;
5155
}
5256

5357
ErrorOr<char8_t> CharacterConverter::pop_utf8() {
54-
if (state->bytes_processed >= state->total_bytes)
58+
if (isComplete())
5559
return Error(-1);
5660

5761
constexpr char8_t FIRST_BYTE_HEADERS[] = {0, 0xC0, 0xE0, 0xF0};

libc/test/src/__support/wchar/utf32_to_8_test.cpp

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -162,3 +162,19 @@ TEST(LlvmLibcCharacterConverterUTF32To8Test, FourByte) {
162162
popped = cr.pop_utf8();
163163
ASSERT_FALSE(popped.has_value());
164164
}
165+
166+
TEST(LlvmLibcCharacterConverterUTF32To8Test, CantPushMidConversion) {
167+
LIBC_NAMESPACE::internal::mbstate state;
168+
LIBC_NAMESPACE::internal::CharacterConverter cr(&state);
169+
cr.clear();
170+
171+
// testing utf32: 0x12121 -> utf8: 0xf0 0x92 0x84 0xa1
172+
char32_t utf32 = 0x12121;
173+
ASSERT_EQ(cr.push(utf32), 0);
174+
auto popped = cr.pop_utf8();
175+
ASSERT_TRUE(popped.has_value());
176+
177+
// can't push a utf32 without finishing popping the utf8 bytes out
178+
int err = cr.push(utf32);
179+
ASSERT_EQ(err, -1);
180+
}

0 commit comments

Comments
 (0)