Skip to content

Commit 73c6875

Browse files
author
git apple-llvm automerger
committed
Merge commit '6f28eec6dc68' from llvm.org/main into next
2 parents f20b51c + 6f28eec commit 73c6875

File tree

2 files changed

+63
-0
lines changed

2 files changed

+63
-0
lines changed

libc/src/__support/wchar/string_converter.h

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -56,6 +56,9 @@ template <typename T> class StringConverter {
5656
// TODO: following functions are almost identical
5757
// look into templating CharacterConverter pop functions
5858
ErrorOr<char32_t> popUTF32() {
59+
if (num_to_write == 0)
60+
return Error(-1);
61+
5962
if (cr.isEmpty() || src_idx == 0) {
6063
auto src_elements_read = pushFullCharacter();
6164
if (!src_elements_read.has_value())
@@ -79,6 +82,9 @@ template <typename T> class StringConverter {
7982
}
8083

8184
ErrorOr<char8_t> popUTF8() {
85+
if (num_to_write == 0)
86+
return Error(-1);
87+
8288
if (cr.isEmpty() || src_idx == 0) {
8389
auto src_elements_read = pushFullCharacter();
8490
if (!src_elements_read.has_value())

libc/test/src/__support/wchar/string_converter_test.cpp

Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -245,6 +245,63 @@ TEST(LlvmLibcStringConverterTest, UTF8To32ErrorHandling) {
245245
ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 4);
246246
}
247247

248+
TEST(LlvmLibcStringConverterTest, InvalidCharacterOutsideBounds) {
249+
// if an invalid character exists in the source string but we don't have space
250+
// to write it, we should return a "stop converting" error rather than an
251+
// invalid character error
252+
253+
// first 4 bytes are clown emoji (🤡)
254+
// next 3 form an invalid character
255+
const char *src1 = "\xF0\x9F\xA4\xA1\x90\x88\x30";
256+
LIBC_NAMESPACE::internal::mbstate ps1;
257+
LIBC_NAMESPACE::internal::StringConverter<char8_t> sc1(
258+
reinterpret_cast<const char8_t *>(src1), &ps1, 1);
259+
260+
auto res1 = sc1.popUTF32();
261+
ASSERT_TRUE(res1.has_value());
262+
ASSERT_EQ(static_cast<int>(res1.value()), 0x1f921);
263+
ASSERT_EQ(static_cast<int>(sc1.getSourceIndex()), 4);
264+
265+
res1 = sc1.popUTF32();
266+
ASSERT_FALSE(res1.has_value());
267+
// no space to write error NOT invalid character error (EILSEQ)
268+
ASSERT_EQ(static_cast<int>(res1.error()), -1);
269+
ASSERT_EQ(static_cast<int>(sc1.getSourceIndex()), 4);
270+
271+
const wchar_t src2[] = {
272+
static_cast<wchar_t>(0x1f921), static_cast<wchar_t>(0xffffff),
273+
static_cast<wchar_t>(0x0)}; // clown emoji, invalid utf32
274+
LIBC_NAMESPACE::internal::mbstate ps2;
275+
LIBC_NAMESPACE::internal::StringConverter<char32_t> sc2(
276+
reinterpret_cast<const char32_t *>(src2), &ps2, 4);
277+
278+
auto res2 = sc2.popUTF8();
279+
ASSERT_TRUE(res2.has_value());
280+
ASSERT_EQ(static_cast<int>(res2.value()), 0xF0);
281+
ASSERT_EQ(static_cast<int>(sc2.getSourceIndex()), 1);
282+
283+
res2 = sc2.popUTF8();
284+
ASSERT_TRUE(res2.has_value());
285+
ASSERT_EQ(static_cast<int>(res2.value()), 0x9F);
286+
ASSERT_EQ(static_cast<int>(sc2.getSourceIndex()), 1);
287+
288+
res2 = sc2.popUTF8();
289+
ASSERT_TRUE(res2.has_value());
290+
ASSERT_EQ(static_cast<int>(res2.value()), 0xA4);
291+
ASSERT_EQ(static_cast<int>(sc2.getSourceIndex()), 1);
292+
293+
res2 = sc2.popUTF8();
294+
ASSERT_TRUE(res2.has_value());
295+
ASSERT_EQ(static_cast<int>(res2.value()), 0xA1);
296+
ASSERT_EQ(static_cast<int>(sc2.getSourceIndex()), 1);
297+
298+
res2 = sc2.popUTF8();
299+
ASSERT_FALSE(res2.has_value());
300+
// no space to write error NOT invalid character error (EILSEQ)
301+
ASSERT_EQ(static_cast<int>(res2.error()), -1);
302+
ASSERT_EQ(static_cast<int>(sc2.getSourceIndex()), 1);
303+
}
304+
248305
TEST(LlvmLibcStringConverterTest, MultipleStringConverters32To8) {
249306
/*
250307
We do NOT test partially popping a character and expecting the next

0 commit comments

Comments
 (0)