Skip to content

Commit 6f28eec

Browse files
authored
[libc] Fixed StringConverter Error Edge Case (llvm#149356)
Fixed StringConverter edge case related to destination limit If we call pop() but there is no space in the dest array, we should always return the "no space in destination" error even if the following character is invalid (since we shouldn't really have to look at the next character)
1 parent 13549fd commit 6f28eec

File tree

2 files changed

+63
-0
lines changed

2 files changed

+63
-0
lines changed

libc/src/__support/wchar/string_converter.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ template <typename T> class StringConverter {
5656
// TODO: following functions are almost identical
5757
// look into templating CharacterConverter pop functions
5858
ErrorOr<char32_t> popUTF32() {
59+
if (num_to_write == 0)
60+
return Error(-1);
61+
5962
if (cr.isEmpty() || src_idx == 0) {
6063
auto src_elements_read = pushFullCharacter();
6164
if (!src_elements_read.has_value())
@@ -79,6 +82,9 @@ template <typename T> class StringConverter {
7982
}
8083

8184
ErrorOr<char8_t> popUTF8() {
85+
if (num_to_write == 0)
86+
return Error(-1);
87+
8288
if (cr.isEmpty() || src_idx == 0) {
8389
auto src_elements_read = pushFullCharacter();
8490
if (!src_elements_read.has_value())

libc/test/src/__support/wchar/string_converter_test.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,63 @@ TEST(LlvmLibcStringConverterTest, UTF8To32ErrorHandling) {
245245
ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 4);
246246
}
247247

248+
TEST(LlvmLibcStringConverterTest, InvalidCharacterOutsideBounds) {
249+
// if an invalid character exists in the source string but we don't have space
250+
// to write it, we should return a "stop converting" error rather than an
251+
// invalid character error
252+
253+
// first 4 bytes are clown emoji (🤡)
254+
// next 3 form an invalid character
255+
const char *src1 = "\xF0\x9F\xA4\xA1\x90\x88\x30";
256+
LIBC_NAMESPACE::internal::mbstate ps1;
257+
LIBC_NAMESPACE::internal::StringConverter<char8_t> sc1(
258+
reinterpret_cast<const char8_t *>(src1), &ps1, 1);
259+
260+
auto res1 = sc1.popUTF32();
261+
ASSERT_TRUE(res1.has_value());
262+
ASSERT_EQ(static_cast<int>(res1.value()), 0x1f921);
263+
ASSERT_EQ(static_cast<int>(sc1.getSourceIndex()), 4);
264+
265+
res1 = sc1.popUTF32();
266+
ASSERT_FALSE(res1.has_value());
267+
// no space to write error NOT invalid character error (EILSEQ)
268+
ASSERT_EQ(static_cast<int>(res1.error()), -1);
269+
ASSERT_EQ(static_cast<int>(sc1.getSourceIndex()), 4);
270+
271+
const wchar_t src2[] = {
272+
static_cast<wchar_t>(0x1f921), static_cast<wchar_t>(0xffffff),
273+
static_cast<wchar_t>(0x0)}; // clown emoji, invalid utf32
274+
LIBC_NAMESPACE::internal::mbstate ps2;
275+
LIBC_NAMESPACE::internal::StringConverter<char32_t> sc2(
276+
reinterpret_cast<const char32_t *>(src2), &ps2, 4);
277+
278+
auto res2 = sc2.popUTF8();
279+
ASSERT_TRUE(res2.has_value());
280+
ASSERT_EQ(static_cast<int>(res2.value()), 0xF0);
281+
ASSERT_EQ(static_cast<int>(sc2.getSourceIndex()), 1);
282+
283+
res2 = sc2.popUTF8();
284+
ASSERT_TRUE(res2.has_value());
285+
ASSERT_EQ(static_cast<int>(res2.value()), 0x9F);
286+
ASSERT_EQ(static_cast<int>(sc2.getSourceIndex()), 1);
287+
288+
res2 = sc2.popUTF8();
289+
ASSERT_TRUE(res2.has_value());
290+
ASSERT_EQ(static_cast<int>(res2.value()), 0xA4);
291+
ASSERT_EQ(static_cast<int>(sc2.getSourceIndex()), 1);
292+
293+
res2 = sc2.popUTF8();
294+
ASSERT_TRUE(res2.has_value());
295+
ASSERT_EQ(static_cast<int>(res2.value()), 0xA1);
296+
ASSERT_EQ(static_cast<int>(sc2.getSourceIndex()), 1);
297+
298+
res2 = sc2.popUTF8();
299+
ASSERT_FALSE(res2.has_value());
300+
// no space to write error NOT invalid character error (EILSEQ)
301+
ASSERT_EQ(static_cast<int>(res2.error()), -1);
302+
ASSERT_EQ(static_cast<int>(sc2.getSourceIndex()), 1);
303+
}
304+
248305
TEST(LlvmLibcStringConverterTest, MultipleStringConverters32To8) {
249306
/*
250307
We do NOT test partially popping a character and expecting the next

0 commit comments

Comments
 (0)