diff --git a/libc/src/__support/wchar/string_converter.h b/libc/src/__support/wchar/string_converter.h index 0635bc57bf3e2..6e52e2c82ba1d 100644 --- a/libc/src/__support/wchar/string_converter.h +++ b/libc/src/__support/wchar/string_converter.h @@ -26,6 +26,7 @@ template class StringConverter { const T *src; size_t src_len; size_t src_idx; + bool completed = false; // # of pops we are allowed to perform (essentially size of the dest buffer) size_t num_to_write; @@ -70,8 +71,10 @@ template class StringConverter { } auto out = cr.pop_utf32(); - if (out.has_value() && out.value() == L'\0') + if (out.has_value() && out.value() == L'\0') { + completed = true; src_len = src_idx; + } num_to_write--; @@ -93,8 +96,10 @@ template class StringConverter { } auto out = cr.pop_utf8(); - if (out.has_value() && out.value() == '\0') + if (out.has_value() && out.value() == '\0') { + completed = true; src_len = src_idx; + } num_to_write--; @@ -102,6 +107,8 @@ template class StringConverter { } size_t getSourceIndex() { return src_idx; } + + bool isConversionComplete() { return completed; } }; } // namespace internal diff --git a/libc/test/src/__support/wchar/string_converter_test.cpp b/libc/test/src/__support/wchar/string_converter_test.cpp index 14d074156d033..548baaf4bf655 100644 --- a/libc/test/src/__support/wchar/string_converter_test.cpp +++ b/libc/test/src/__support/wchar/string_converter_test.cpp @@ -38,26 +38,31 @@ TEST(LlvmLibcStringConverterTest, UTF8To32) { ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x1f921); ASSERT_EQ(static_cast(sc.getSourceIndex()), 4); + ASSERT_FALSE(sc.isConversionComplete()); res = sc.popUTF32(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x2211); ASSERT_EQ(static_cast(sc.getSourceIndex()), 7); + ASSERT_FALSE(sc.isConversionComplete()); res = sc.popUTF32(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xff); ASSERT_EQ(static_cast(sc.getSourceIndex()), 9); + ASSERT_FALSE(sc.isConversionComplete()); res = sc.popUTF32(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x41); ASSERT_EQ(static_cast(sc.getSourceIndex()), 10); + ASSERT_FALSE(sc.isConversionComplete()); res = sc.popUTF32(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0); ASSERT_EQ(static_cast(sc.getSourceIndex()), 11); + ASSERT_TRUE(sc.isConversionComplete()); res = sc.popUTF32(); ASSERT_FALSE(res.has_value()); @@ -79,60 +84,71 @@ TEST(LlvmLibcStringConverterTest, UTF32To8) { ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xF0); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); + ASSERT_FALSE(sc.isConversionComplete()); res = sc.popUTF8(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x9F); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); + ASSERT_FALSE(sc.isConversionComplete()); res = sc.popUTF8(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xA4); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); + ASSERT_FALSE(sc.isConversionComplete()); res = sc.popUTF8(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xA1); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); + ASSERT_FALSE(sc.isConversionComplete()); // end of clown emoji, sigma symbol begins res = sc.popUTF8(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xE2); ASSERT_EQ(static_cast(sc.getSourceIndex()), 2); + ASSERT_FALSE(sc.isConversionComplete()); res = sc.popUTF8(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x88); ASSERT_EQ(static_cast(sc.getSourceIndex()), 2); + ASSERT_FALSE(sc.isConversionComplete()); res = sc.popUTF8(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x91); ASSERT_EQ(static_cast(sc.getSourceIndex()), 2); + ASSERT_FALSE(sc.isConversionComplete()); // end of sigma symbol, y with diaeresis begins res = sc.popUTF8(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xC3); ASSERT_EQ(static_cast(sc.getSourceIndex()), 3); + ASSERT_FALSE(sc.isConversionComplete()); res = sc.popUTF8(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xBF); ASSERT_EQ(static_cast(sc.getSourceIndex()), 3); + ASSERT_FALSE(sc.isConversionComplete()); // end of y with diaeresis, letter A begins res = sc.popUTF8(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x41); ASSERT_EQ(static_cast(sc.getSourceIndex()), 4); + ASSERT_FALSE(sc.isConversionComplete()); // null byte res = sc.popUTF8(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0); ASSERT_EQ(static_cast(sc.getSourceIndex()), 5); + ASSERT_TRUE(sc.isConversionComplete()); res = sc.popUTF8(); ASSERT_FALSE(res.has_value());