Skip to content
Merged
Show file tree
Hide file tree
Changes from 1 commit
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 7 additions & 1 deletion libc/src/__support/wchar/string_converter.h
Original file line number Diff line number Diff line change
Expand Up @@ -56,11 +56,14 @@ template <typename T> class StringConverter {
// TODO: following functions are almost identical
// look into templating CharacterConverter pop functions
ErrorOr<char32_t> popUTF32() {
if (num_to_write == 0)
return Error(-1);

if (cr.isEmpty() || src_idx == 0) {
auto src_elements_read = pushFullCharacter();
if (!src_elements_read.has_value())
return Error(src_elements_read.error());

if (cr.sizeAsUTF32() > num_to_write) {
cr.clear();
return Error(-1);
Expand All @@ -79,6 +82,9 @@ template <typename T> class StringConverter {
}

ErrorOr<char8_t> popUTF8() {
if (num_to_write == 0)
return Error(-1);

if (cr.isEmpty() || src_idx == 0) {
auto src_elements_read = pushFullCharacter();
if (!src_elements_read.has_value())
Expand Down
57 changes: 57 additions & 0 deletions libc/test/src/__support/wchar/string_converter_test.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -245,6 +245,63 @@ TEST(LlvmLibcStringConverterTest, UTF8To32ErrorHandling) {
ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 4);
}

TEST(LlvmLibcStringConverterTest, InvalidCharacterOutsideBounds) {
// if an invalid character exists in the source string but we don't have space
// to write it, we should return a "stop converting" error rather than an
// invalid character error

// first 4 bytes are clown emoji (🤡)
// next 3 form an invalid character
const char *src1 = "\xF0\x9F\xA4\xA1\x90\x88\x30";
LIBC_NAMESPACE::internal::mbstate ps1;
LIBC_NAMESPACE::internal::StringConverter<char8_t> sc1(
reinterpret_cast<const char8_t *>(src1), &ps1, 1);

auto res1 = sc1.popUTF32();
ASSERT_TRUE(res1.has_value());
ASSERT_EQ(static_cast<int>(res1.value()), 0x1f921);
ASSERT_EQ(static_cast<int>(sc1.getSourceIndex()), 4);

res1 = sc1.popUTF32();
ASSERT_FALSE(res1.has_value());
// no space to write error NOT invalid character error (EILSEQ)
ASSERT_EQ(static_cast<int>(res1.error()), -1);
ASSERT_EQ(static_cast<int>(sc1.getSourceIndex()), 4);

const wchar_t src2[] = {
static_cast<wchar_t>(0x1f921), static_cast<wchar_t>(0xffffff),
static_cast<wchar_t>(0x0)}; // clown emoji, invalid utf32
LIBC_NAMESPACE::internal::mbstate ps2;
LIBC_NAMESPACE::internal::StringConverter<char32_t> sc2(
reinterpret_cast<const char32_t *>(src2), &ps2, 4);

auto res2 = sc2.popUTF8();
ASSERT_TRUE(res2.has_value());
ASSERT_EQ(static_cast<int>(res2.value()), 0xF0);
ASSERT_EQ(static_cast<int>(sc2.getSourceIndex()), 1);

res2 = sc2.popUTF8();
ASSERT_TRUE(res2.has_value());
ASSERT_EQ(static_cast<int>(res2.value()), 0x9F);
ASSERT_EQ(static_cast<int>(sc2.getSourceIndex()), 1);

res2 = sc2.popUTF8();
ASSERT_TRUE(res2.has_value());
ASSERT_EQ(static_cast<int>(res2.value()), 0xA4);
ASSERT_EQ(static_cast<int>(sc2.getSourceIndex()), 1);

res2 = sc2.popUTF8();
ASSERT_TRUE(res2.has_value());
ASSERT_EQ(static_cast<int>(res2.value()), 0xA1);
ASSERT_EQ(static_cast<int>(sc2.getSourceIndex()), 1);

res2 = sc2.popUTF8();
ASSERT_FALSE(res2.has_value());
// no space to write error NOT invalid character error (EILSEQ)
ASSERT_EQ(static_cast<int>(res2.error()), -1);
ASSERT_EQ(static_cast<int>(sc2.getSourceIndex()), 1);
}

TEST(LlvmLibcStringConverterTest, MultipleStringConverters32To8) {
/*
We do NOT test partially popping a character and expecting the next
Expand Down
Loading