diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt index 6aade4ccc84a6..86a47319f278a 100644 --- a/libc/src/__support/wchar/CMakeLists.txt +++ b/libc/src/__support/wchar/CMakeLists.txt @@ -27,6 +27,7 @@ add_object_library( SRCS wcrtomb.cpp DEPENDS + libc.hdr.errno_macros libc.hdr.types.char32_t libc.hdr.types.size_t libc.hdr.types.wchar_t @@ -43,6 +44,7 @@ add_object_library( SRCS mbrtowc.cpp DEPENDS + libc.hdr.errno_macros libc.hdr.types.wchar_t libc.hdr.types.size_t libc.src.__support.common diff --git a/libc/src/__support/wchar/mbrtowc.cpp b/libc/src/__support/wchar/mbrtowc.cpp index 954c7458f4dfb..3b8f7666026c3 100644 --- a/libc/src/__support/wchar/mbrtowc.cpp +++ b/libc/src/__support/wchar/mbrtowc.cpp @@ -7,6 +7,7 @@ //===----------------------------------------------------------------------===// #include "src/__support/wchar/mbrtowc.h" +#include "hdr/errno_macros.h" #include "hdr/types/mbstate_t.h" #include "hdr/types/size_t.h" #include "hdr/types/wchar_t.h" @@ -22,6 +23,8 @@ namespace internal { ErrorOr mbrtowc(wchar_t *__restrict pwc, const char *__restrict s, size_t n, mbstate *__restrict ps) { CharacterConverter char_conv(ps); + if (!char_conv.isValidState()) + return Error(EINVAL); if (s == nullptr) return 0; size_t i = 0; @@ -30,7 +33,7 @@ ErrorOr mbrtowc(wchar_t *__restrict pwc, const char *__restrict s, int err = char_conv.push(static_cast(s[i])); // Encoding error if (err == -1) - return Error(-1); + return Error(EILSEQ); } auto wc = char_conv.pop_utf32(); if (wc.has_value()) { diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp index 8ca3d17ad6ce1..a74a6f3ec34a6 100644 --- a/libc/src/__support/wchar/wcrtomb.cpp +++ b/libc/src/__support/wchar/wcrtomb.cpp @@ -11,6 +11,7 @@ #include "src/__support/wchar/character_converter.h" #include "src/__support/wchar/mbstate.h" +#include "hdr/errno_macros.h" #include "hdr/types/char32_t.h" #include "hdr/types/size_t.h" #include "hdr/types/wchar_t.h" @@ -26,12 +27,15 @@ ErrorOr wcrtomb(char *__restrict s, wchar_t wc, CharacterConverter cr(ps); + if (!cr.isValidState()) + return Error(EINVAL); + if (s == nullptr) - return Error(-1); + return Error(EILSEQ); int status = cr.push(static_cast(wc)); if (status != 0) - return Error(status); + return Error(EILSEQ); size_t count = 0; while (!cr.isEmpty()) { diff --git a/libc/src/wchar/mbrtowc.cpp b/libc/src/wchar/mbrtowc.cpp index cd429ab8d30e2..6ecf4828da9bf 100644 --- a/libc/src/wchar/mbrtowc.cpp +++ b/libc/src/wchar/mbrtowc.cpp @@ -29,7 +29,7 @@ LLVM_LIBC_FUNCTION(size_t, mbrtowc, : reinterpret_cast(ps)); if (!ret.has_value()) { // Encoding failure - libc_errno = EILSEQ; + libc_errno = ret.error(); return -1; } return ret.value(); diff --git a/libc/src/wchar/wcrtomb.cpp b/libc/src/wchar/wcrtomb.cpp index 6d604a00599ee..3e9df0599431e 100644 --- a/libc/src/wchar/wcrtomb.cpp +++ b/libc/src/wchar/wcrtomb.cpp @@ -35,7 +35,7 @@ LLVM_LIBC_FUNCTION(size_t, wcrtomb, : reinterpret_cast(ps)); if (!result.has_value()) { - libc_errno = EILSEQ; + libc_errno = result.error(); return -1; } diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt index ddf8709a6a2a2..bf16fdd7f8c4d 100644 --- a/libc/test/src/wchar/CMakeLists.txt +++ b/libc/test/src/wchar/CMakeLists.txt @@ -31,10 +31,12 @@ add_libc_test( mbrtowc_test.cpp DEPENDS libc.src.__support.libc_errno + libc.src.__support.wchar.mbstate libc.src.string.memset libc.src.wchar.mbrtowc libc.hdr.types.mbstate_t libc.hdr.types.wchar_t + libc.test.UnitTest.ErrnoCheckingTest ) add_libc_test( @@ -72,6 +74,8 @@ add_libc_test( libc.hdr.types.wchar_t libc.hdr.types.mbstate_t libc.src.__support.libc_errno + libc.src.__support.wchar.mbstate + libc.test.UnitTest.ErrnoCheckingTest ) add_libc_test( diff --git a/libc/test/src/wchar/mbrtowc_test.cpp b/libc/test/src/wchar/mbrtowc_test.cpp index 69dcf00fde207..5a14d8e25935c 100644 --- a/libc/test/src/wchar/mbrtowc_test.cpp +++ b/libc/test/src/wchar/mbrtowc_test.cpp @@ -9,11 +9,15 @@ #include "hdr/types/mbstate_t.h" #include "hdr/types/wchar_t.h" #include "src/__support/libc_errno.h" +#include "src/__support/wchar/mbstate.h" #include "src/string/memset.h" #include "src/wchar/mbrtowc.h" +#include "test/UnitTest/ErrnoCheckingTest.h" #include "test/UnitTest/Test.h" -TEST(LlvmLibcMBRToWC, OneByte) { +using LlvmLibcMBRToWCTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest; + +TEST_F(LlvmLibcMBRToWCTest, OneByte) { const char *ch = "A"; wchar_t dest[2]; // Testing if it works with nullptr mbstate_t @@ -21,13 +25,15 @@ TEST(LlvmLibcMBRToWC, OneByte) { size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb); ASSERT_EQ(static_cast(*dest), 'A'); ASSERT_EQ(static_cast(n), 1); + ASSERT_ERRNO_SUCCESS(); // Should fail since we have not read enough n = LIBC_NAMESPACE::mbrtowc(dest, ch, 0, mb); ASSERT_EQ(static_cast(n), -2); + ASSERT_ERRNO_SUCCESS(); } -TEST(LlvmLibcMBRToWC, TwoByte) { +TEST_F(LlvmLibcMBRToWCTest, TwoByte) { const char ch[2] = {static_cast(0xC2), static_cast(0x8E)}; // Ž car symbol wchar_t dest[2]; @@ -36,6 +42,7 @@ TEST(LlvmLibcMBRToWC, TwoByte) { size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 2, mb); ASSERT_EQ(static_cast(*dest), 142); ASSERT_EQ(static_cast(n), 2); + ASSERT_ERRNO_SUCCESS(); // Should fail since we have not read enough n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb); @@ -44,9 +51,10 @@ TEST(LlvmLibcMBRToWC, TwoByte) { n = LIBC_NAMESPACE::mbrtowc(dest, ch + 1, 1, mb); ASSERT_EQ(static_cast(n), 1); ASSERT_EQ(static_cast(*dest), 142); + ASSERT_ERRNO_SUCCESS(); } -TEST(LlvmLibcMBRToWC, ThreeByte) { +TEST_F(LlvmLibcMBRToWCTest, ThreeByte) { const char ch[3] = {static_cast(0xE2), static_cast(0x88), static_cast(0x91)}; // ∑ sigma symbol wchar_t dest[2]; @@ -55,17 +63,20 @@ TEST(LlvmLibcMBRToWC, ThreeByte) { size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 3, mb); ASSERT_EQ(static_cast(*dest), 8721); ASSERT_EQ(static_cast(n), 3); + ASSERT_ERRNO_SUCCESS(); // Should fail since we have not read enough n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb); ASSERT_EQ(static_cast(n), -2); + ASSERT_ERRNO_SUCCESS(); // Should pass after reading two more bytes n = LIBC_NAMESPACE::mbrtowc(dest, ch + 1, 2, mb); ASSERT_EQ(static_cast(n), 2); ASSERT_EQ(static_cast(*dest), 8721); + ASSERT_ERRNO_SUCCESS(); } -TEST(LlvmLibcMBRToWC, FourByte) { +TEST_F(LlvmLibcMBRToWCTest, FourByte) { const char ch[4] = {static_cast(0xF0), static_cast(0x9F), static_cast(0xA4), static_cast(0xA1)}; // 🤡 clown emoji @@ -75,27 +86,29 @@ TEST(LlvmLibcMBRToWC, FourByte) { size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 4, mb); ASSERT_EQ(static_cast(*dest), 129313); ASSERT_EQ(static_cast(n), 4); - + ASSERT_ERRNO_SUCCESS(); // Should fail since we have not read enough n = LIBC_NAMESPACE::mbrtowc(dest, ch, 2, mb); ASSERT_EQ(static_cast(n), -2); + ASSERT_ERRNO_SUCCESS(); // Should pass after reading two more bytes n = LIBC_NAMESPACE::mbrtowc(dest, ch + 2, 2, mb); ASSERT_EQ(static_cast(n), 2); ASSERT_EQ(static_cast(*dest), 129313); + ASSERT_ERRNO_SUCCESS(); } -TEST(LlvmLibcMBRToWC, InvalidByte) { +TEST_F(LlvmLibcMBRToWCTest, InvalidByte) { const char ch[1] = {static_cast(0x80)}; wchar_t dest[2]; mbstate_t *mb; LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t)); size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb); ASSERT_EQ(static_cast(n), -1); - ASSERT_EQ(static_cast(libc_errno), EILSEQ); + ASSERT_ERRNO_EQ(EILSEQ); } -TEST(LlvmLibcMBRToWC, InvalidMultiByte) { +TEST_F(LlvmLibcMBRToWCTest, InvalidMultiByte) { const char ch[4] = {static_cast(0x80), static_cast(0x00), static_cast(0x80), static_cast(0x00)}; // invalid sequence of bytes @@ -105,18 +118,19 @@ TEST(LlvmLibcMBRToWC, InvalidMultiByte) { // Trying to push all 4 should error size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 4, mb); ASSERT_EQ(static_cast(n), -1); - ASSERT_EQ(static_cast(libc_errno), EILSEQ); + ASSERT_ERRNO_EQ(EILSEQ); // Trying to push just the first one should error n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb); ASSERT_EQ(static_cast(n), -1); - ASSERT_EQ(static_cast(libc_errno), EILSEQ); + ASSERT_ERRNO_EQ(EILSEQ); // Trying to push the second and third should correspond to null wc n = LIBC_NAMESPACE::mbrtowc(dest, ch + 1, 2, mb); ASSERT_EQ(static_cast(n), 0); ASSERT_TRUE(*dest == L'\0'); + ASSERT_ERRNO_SUCCESS(); } -TEST(LlvmLibcMBRToWC, InvalidLastByte) { +TEST_F(LlvmLibcMBRToWCTest, InvalidLastByte) { // Last byte is invalid since it does not have correct starting sequence. // 0xC0 --> 11000000 starting sequence should be 10xxxxxx const char ch[4] = {static_cast(0xF1), static_cast(0x80), @@ -127,10 +141,10 @@ TEST(LlvmLibcMBRToWC, InvalidLastByte) { // Trying to push all 4 should error size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 4, mb); ASSERT_EQ(static_cast(n), -1); - ASSERT_EQ(static_cast(libc_errno), EILSEQ); + ASSERT_ERRNO_EQ(EILSEQ); } -TEST(LlvmLibcMBRToWC, ValidTwoByteWithExtraRead) { +TEST_F(LlvmLibcMBRToWCTest, ValidTwoByteWithExtraRead) { const char ch[3] = {static_cast(0xC2), static_cast(0x8E), static_cast(0x80)}; wchar_t dest[2]; @@ -140,9 +154,10 @@ TEST(LlvmLibcMBRToWC, ValidTwoByteWithExtraRead) { size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 3, mb); ASSERT_EQ(static_cast(n), 2); ASSERT_EQ(static_cast(*dest), 142); + ASSERT_ERRNO_SUCCESS(); } -TEST(LlvmLibcMBRToWC, TwoValidTwoBytes) { +TEST_F(LlvmLibcMBRToWCTest, TwoValidTwoBytes) { const char ch[4] = {static_cast(0xC2), static_cast(0x8E), static_cast(0xC7), static_cast(0x8C)}; wchar_t dest[2]; @@ -152,12 +167,14 @@ TEST(LlvmLibcMBRToWC, TwoValidTwoBytes) { size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 2, mb); ASSERT_EQ(static_cast(n), 2); ASSERT_EQ(static_cast(*dest), 142); + ASSERT_ERRNO_SUCCESS(); n = LIBC_NAMESPACE::mbrtowc(dest + 1, ch + 2, 2, mb); ASSERT_EQ(static_cast(n), 2); ASSERT_EQ(static_cast(*(dest + 1)), 460); + ASSERT_ERRNO_SUCCESS(); } -TEST(LlvmLibcMBRToWC, NullString) { +TEST_F(LlvmLibcMBRToWCTest, NullString) { wchar_t dest[2] = {L'O', L'K'}; mbstate_t *mb; LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t)); @@ -165,8 +182,24 @@ TEST(LlvmLibcMBRToWC, NullString) { size_t n = LIBC_NAMESPACE::mbrtowc(dest, nullptr, 2, mb); ASSERT_EQ(static_cast(n), 0); ASSERT_TRUE(dest[0] == L'O'); + ASSERT_ERRNO_SUCCESS(); // reading a null terminator should return 0 const char *ch = "\0"; n = LIBC_NAMESPACE::mbrtowc(dest, ch, 1, mb); ASSERT_EQ(static_cast(n), 0); + ASSERT_ERRNO_SUCCESS(); +} + +TEST_F(LlvmLibcMBRToWCTest, InvalidMBState) { + const char ch[4] = {static_cast(0xC2), static_cast(0x8E), + static_cast(0xC7), static_cast(0x8C)}; + wchar_t dest[2] = {L'O', L'K'}; + mbstate_t *mb; + LIBC_NAMESPACE::internal::mbstate inv; + inv.total_bytes = 6; + mb = reinterpret_cast(&inv); + // invalid mbstate should error + size_t n = LIBC_NAMESPACE::mbrtowc(dest, ch, 2, mb); + ASSERT_EQ(static_cast(n), -1); + ASSERT_ERRNO_EQ(EINVAL); } diff --git a/libc/test/src/wchar/wcrtomb_test.cpp b/libc/test/src/wchar/wcrtomb_test.cpp index c06b39ae0143f..b29624e87f7af 100644 --- a/libc/test/src/wchar/wcrtomb_test.cpp +++ b/libc/test/src/wchar/wcrtomb_test.cpp @@ -9,11 +9,15 @@ #include "hdr/types/mbstate_t.h" #include "hdr/types/wchar_t.h" #include "src/__support/libc_errno.h" +#include "src/__support/wchar/mbstate.h" #include "src/string/memset.h" #include "src/wchar/wcrtomb.h" +#include "test/UnitTest/ErrnoCheckingTest.h" #include "test/UnitTest/Test.h" -TEST(LlvmLibcWCRToMBTest, OneByte) { +using LlvmLibcWCRToMBTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest; + +TEST_F(LlvmLibcWCRToMBTest, OneByte) { mbstate_t state; LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); wchar_t wc = L'U'; @@ -21,9 +25,10 @@ TEST(LlvmLibcWCRToMBTest, OneByte) { size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state); ASSERT_EQ(cnt, static_cast(1)); ASSERT_EQ(mb[0], 'U'); + ASSERT_ERRNO_SUCCESS(); } -TEST(LlvmLibcWCRToMBTest, TwoByte) { +TEST_F(LlvmLibcWCRToMBTest, TwoByte) { mbstate_t state; LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); // testing utf32: 0xff -> utf8: 0xc3 0xbf @@ -33,9 +38,10 @@ TEST(LlvmLibcWCRToMBTest, TwoByte) { ASSERT_EQ(cnt, static_cast(2)); ASSERT_EQ(mb[0], static_cast(0xc3)); ASSERT_EQ(mb[1], static_cast(0xbf)); + ASSERT_ERRNO_SUCCESS(); } -TEST(LlvmLibcWCRToMBTest, ThreeByte) { +TEST_F(LlvmLibcWCRToMBTest, ThreeByte) { mbstate_t state; LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); // testing utf32: 0xac15 -> utf8: 0xea 0xb0 0x95 @@ -46,9 +52,10 @@ TEST(LlvmLibcWCRToMBTest, ThreeByte) { ASSERT_EQ(mb[0], static_cast(0xea)); ASSERT_EQ(mb[1], static_cast(0xb0)); ASSERT_EQ(mb[2], static_cast(0x95)); + ASSERT_ERRNO_SUCCESS(); } -TEST(LlvmLibcWCRToMBTest, FourByte) { +TEST_F(LlvmLibcWCRToMBTest, FourByte) { mbstate_t state; LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); // testing utf32: 0x1f921 -> utf8: 0xf0 0x9f 0xa4 0xa1 @@ -60,9 +67,10 @@ TEST(LlvmLibcWCRToMBTest, FourByte) { ASSERT_EQ(mb[1], static_cast(0x9f)); ASSERT_EQ(mb[2], static_cast(0xa4)); ASSERT_EQ(mb[3], static_cast(0xa1)); + ASSERT_ERRNO_SUCCESS(); } -TEST(LlvmLibcWCRToMBTest, NullString) { +TEST_F(LlvmLibcWCRToMBTest, NullString) { mbstate_t state; LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); wchar_t wc = L'A'; @@ -70,24 +78,39 @@ TEST(LlvmLibcWCRToMBTest, NullString) { // should be equivalent to the call wcrtomb(buf, L'\0', state) size_t cnt1 = LIBC_NAMESPACE::wcrtomb(nullptr, wc, &state); + ASSERT_ERRNO_SUCCESS(); size_t cnt2 = LIBC_NAMESPACE::wcrtomb(mb, L'\0', &state); + ASSERT_ERRNO_SUCCESS(); ASSERT_EQ(cnt1, cnt2); } -TEST(LlvmLibcWCRToMBTest, NullState) { +TEST_F(LlvmLibcWCRToMBTest, NullState) { wchar_t wc = L'A'; char mb[4]; size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, nullptr); + ASSERT_ERRNO_SUCCESS(); ASSERT_EQ(cnt, static_cast(1)); } -TEST(LlvmLibcWCRToMBTest, InvalidWchar) { +TEST_F(LlvmLibcWCRToMBTest, InvalidWchar) { mbstate_t state; LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); wchar_t wc = 0x12ffff; char mb[4]; size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state); ASSERT_EQ(cnt, static_cast(-1)); - ASSERT_EQ(static_cast(libc_errno), EILSEQ); + ASSERT_ERRNO_EQ(EILSEQ); +} + +TEST_F(LlvmLibcWCRToMBTest, InvalidMBState) { + mbstate_t *state; + LIBC_NAMESPACE::internal::mbstate inv; + inv.total_bytes = 6; + state = reinterpret_cast(&inv); + wchar_t wc = L'A'; + char mb[4]; + size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, state); + ASSERT_EQ(cnt, static_cast(-1)); + ASSERT_ERRNO_EQ(EINVAL); }