diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt index 86a47319f278a..db57d6488bd37 100644 --- a/libc/src/__support/wchar/CMakeLists.txt +++ b/libc/src/__support/wchar/CMakeLists.txt @@ -21,11 +21,11 @@ add_object_library( ) add_object_library( - wcrtomb + wcrtomb_bounded HDRS - wcrtomb.h + wcrtomb_bounded.h SRCS - wcrtomb.cpp + wcrtomb_bounded.cpp DEPENDS libc.hdr.errno_macros libc.hdr.types.char32_t diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb_bounded.cpp similarity index 67% rename from libc/src/__support/wchar/wcrtomb.cpp rename to libc/src/__support/wchar/wcrtomb_bounded.cpp index a74a6f3ec34a6..3355c08a938ef 100644 --- a/libc/src/__support/wchar/wcrtomb.cpp +++ b/libc/src/__support/wchar/wcrtomb_bounded.cpp @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "src/__support/wchar/wcrtomb.h" +#include "src/__support/wchar/wcrtomb_bounded.h" #include "src/__support/error_or.h" #include "src/__support/wchar/character_converter.h" #include "src/__support/wchar/mbstate.h" @@ -21,8 +21,8 @@ namespace LIBC_NAMESPACE_DECL { namespace internal { -ErrorOr wcrtomb(char *__restrict s, wchar_t wc, - mbstate *__restrict ps) { +ErrorOr wcrtomb_bounded(char *__restrict s, wchar_t wc, + mbstate *__restrict ps, size_t max_written) { static_assert(sizeof(wchar_t) == 4); CharacterConverter cr(ps); @@ -30,15 +30,19 @@ ErrorOr wcrtomb(char *__restrict s, wchar_t wc, if (!cr.isValidState()) return Error(EINVAL); + char buf[sizeof(wchar_t) / sizeof(char)]; if (s == nullptr) - return Error(EILSEQ); + s = buf; - int status = cr.push(static_cast(wc)); - if (status != 0) - return Error(EILSEQ); + // if cr isnt empty, it should be represented in mbstate already + if (cr.isEmpty()) { + int status = cr.push(static_cast(wc)); + if (status != 0) + return Error(EILSEQ); + } size_t count = 0; - while (!cr.isEmpty()) { + while (!cr.isEmpty() && count < max_written) { auto utf8 = cr.pop_utf8(); // can never fail as long as the push succeeded LIBC_ASSERT(utf8.has_value()); @@ -46,6 +50,10 @@ ErrorOr wcrtomb(char *__restrict s, wchar_t wc, s++; count++; } + + if (!cr.isEmpty()) // didn't complete the conversion + return -1; + return count; } diff --git a/libc/src/__support/wchar/wcrtomb.h b/libc/src/__support/wchar/wcrtomb_bounded.h similarity index 68% rename from libc/src/__support/wchar/wcrtomb.h rename to libc/src/__support/wchar/wcrtomb_bounded.h index bcd39a92a3b76..fb310dcef40a8 100644 --- a/libc/src/__support/wchar/wcrtomb.h +++ b/libc/src/__support/wchar/wcrtomb_bounded.h @@ -6,8 +6,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H -#define LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H +#ifndef LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_BOUNDED_H +#define LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_BOUNDED_H #include "hdr/types/size_t.h" #include "hdr/types/wchar_t.h" @@ -18,9 +18,10 @@ namespace LIBC_NAMESPACE_DECL { namespace internal { -ErrorOr wcrtomb(char *__restrict s, wchar_t wc, mbstate *__restrict ps); +ErrorOr wcrtomb_bounded(char *__restrict s, wchar_t wc, + mbstate *__restrict ps, size_t max_written); } // namespace internal } // namespace LIBC_NAMESPACE_DECL -#endif // LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H +#endif // LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_BOUNDED_H diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index 16664100d42c7..cc610502f5c7d 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -44,7 +44,7 @@ add_entrypoint_object( libc.hdr.types.wchar_t libc.hdr.types.mbstate_t libc.src.__support.libc_errno - libc.src.__support.wchar.wcrtomb + libc.src.__support.wchar.wcrtomb_bounded libc.src.__support.wchar.mbstate ) @@ -56,7 +56,7 @@ add_entrypoint_object( wctomb.h DEPENDS libc.hdr.types.wchar_t - libc.src.__support.wchar.wcrtomb + libc.src.__support.wchar.wcrtomb_bounded libc.src.__support.wchar.mbstate libc.src.__support.libc_errno ) diff --git a/libc/src/wchar/wcrtomb.cpp b/libc/src/wchar/wcrtomb.cpp index 3e9df0599431e..56d03e9fe5ebf 100644 --- a/libc/src/wchar/wcrtomb.cpp +++ b/libc/src/wchar/wcrtomb.cpp @@ -14,7 +14,7 @@ #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include "src/__support/wchar/mbstate.h" -#include "src/__support/wchar/wcrtomb.h" +#include "src/__support/wchar/wcrtomb_bounded.h" namespace LIBC_NAMESPACE_DECL { @@ -23,16 +23,14 @@ LLVM_LIBC_FUNCTION(size_t, wcrtomb, static internal::mbstate internal_mbstate; // when s is nullptr, this is equivalent to wcrtomb(buf, L'\0', ps) - char buf[sizeof(wchar_t) / sizeof(char)]; - if (s == nullptr) { - s = buf; + if (s == nullptr) wc = L'\0'; - } - auto result = internal::wcrtomb( + auto result = internal::wcrtomb_bounded( s, wc, ps == nullptr ? &internal_mbstate - : reinterpret_cast(ps)); + : reinterpret_cast(ps), + sizeof(wchar_t)); if (!result.has_value()) { libc_errno = result.error(); diff --git a/libc/src/wchar/wctomb.cpp b/libc/src/wchar/wctomb.cpp index 142302e6ae09b..2b2c67f4dc535 100644 --- a/libc/src/wchar/wctomb.cpp +++ b/libc/src/wchar/wctomb.cpp @@ -13,7 +13,7 @@ #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" #include "src/__support/wchar/mbstate.h" -#include "src/__support/wchar/wcrtomb.h" +#include "src/__support/wchar/wcrtomb_bounded.h" namespace LIBC_NAMESPACE_DECL { @@ -22,7 +22,8 @@ LLVM_LIBC_FUNCTION(int, wctomb, (char *s, wchar_t wc)) { if (s == nullptr) return 0; - auto result = internal::wcrtomb(s, wc, &internal_mbstate); + auto result = + internal::wcrtomb_bounded(s, wc, &internal_mbstate, sizeof(wchar_t)); if (!result.has_value()) { // invalid wide character libc_errno = EILSEQ; diff --git a/libc/test/src/__support/wchar/CMakeLists.txt b/libc/test/src/__support/wchar/CMakeLists.txt index 5176bfd4b024b..44bf4c7038d80 100644 --- a/libc/test/src/__support/wchar/CMakeLists.txt +++ b/libc/test/src/__support/wchar/CMakeLists.txt @@ -19,3 +19,14 @@ add_libc_test( DEPENDS libc.src.__support.wchar.character_converter ) + +add_libc_test( + wcrtomb_bounded_test + SUITE + libc-support-tests + SRCS + wcrtomb_bounded_test.cpp + DEPENDS + libc.src.__support.wchar.wcrtomb_bounded + libc.src.__support.wchar.mbstate +) diff --git a/libc/test/src/__support/wchar/wcrtomb_bounded_test.cpp b/libc/test/src/__support/wchar/wcrtomb_bounded_test.cpp new file mode 100644 index 0000000000000..8ba5cfb119d29 --- /dev/null +++ b/libc/test/src/__support/wchar/wcrtomb_bounded_test.cpp @@ -0,0 +1,129 @@ +//===-- Unittests for wcrtomb_bounded -------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/errno_macros.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/wchar/mbstate.h" +#include "src/__support/wchar/wcrtomb_bounded.h" +#include "test/UnitTest/Test.h" + +// The majority of the following tests are the same as +// tests/src/wchar/wcrtomb_test.cpp + +TEST(LlvmLibcWCRToMBBoundedTest, OneByte) { + LIBC_NAMESPACE::internal::mbstate state; + wchar_t wc = L'U'; + char mb[4]; + auto result = LIBC_NAMESPACE::internal::wcrtomb_bounded(mb, wc, &state, 4); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(result.value(), static_cast(1)); + ASSERT_EQ(mb[0], 'U'); +} + +TEST(LlvmLibcWCRToMBBoundedTest, TwoByte) { + LIBC_NAMESPACE::internal::mbstate state; + // testing utf32: 0xff -> utf8: 0xc3 0xbf + wchar_t wc = 0xff; + char mb[4]; + auto result = LIBC_NAMESPACE::internal::wcrtomb_bounded(mb, wc, &state, 4); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(result.value(), static_cast(2)); + ASSERT_EQ(mb[0], static_cast(0xc3)); + ASSERT_EQ(mb[1], static_cast(0xbf)); +} + +TEST(LlvmLibcWCRToMBBoundedTest, ThreeByte) { + LIBC_NAMESPACE::internal::mbstate state; + // testing utf32: 0xac15 -> utf8: 0xea 0xb0 0x95 + wchar_t wc = 0xac15; + char mb[4]; + auto result = LIBC_NAMESPACE::internal::wcrtomb_bounded(mb, wc, &state, 4); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(result.value(), static_cast(3)); + ASSERT_EQ(mb[0], static_cast(0xea)); + ASSERT_EQ(mb[1], static_cast(0xb0)); + ASSERT_EQ(mb[2], static_cast(0x95)); +} + +TEST(LlvmLibcWCRToMBTest, FourByte) { + LIBC_NAMESPACE::internal::mbstate state; + // testing utf32: 0x1f921 -> utf8: 0xf0 0x9f 0xa4 0xa1 + wchar_t wc = 0x1f921; + char mb[4]; + auto result = LIBC_NAMESPACE::internal::wcrtomb_bounded(mb, wc, &state, 4); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(result.value(), static_cast(4)); + ASSERT_EQ(mb[0], static_cast(0xf0)); + ASSERT_EQ(mb[1], static_cast(0x9f)); + ASSERT_EQ(mb[2], static_cast(0xa4)); + ASSERT_EQ(mb[3], static_cast(0xa1)); +} + +TEST(LlvmLibcWCRToMBBoundedTest, NullString) { + LIBC_NAMESPACE::internal::mbstate state; + wchar_t wc = L'A'; + + // should return the multi byte length of the widechar + auto result = + LIBC_NAMESPACE::internal::wcrtomb_bounded(nullptr, wc, &state, 4); + + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(result.value(), static_cast(1)); +} + +TEST(LlvmLibcWCRToMBBoundedTest, InvalidWchar) { + LIBC_NAMESPACE::internal::mbstate state; + wchar_t wc = 0x12ffff; + char mb[4]; + auto result = LIBC_NAMESPACE::internal::wcrtomb_bounded(mb, wc, &state, 4); + ASSERT_FALSE(result.has_value()); + ASSERT_EQ(result.error(), EILSEQ); +} + +TEST(LlvmLibcWCRToMBBoundedTest, InvalidMBState) { + LIBC_NAMESPACE::internal::mbstate inv; + inv.total_bytes = 6; + wchar_t wc = L'A'; + char mb[4]; + auto result = LIBC_NAMESPACE::internal::wcrtomb_bounded(mb, wc, &inv, 4); + ASSERT_FALSE(result.has_value()); + ASSERT_EQ(result.error(), EINVAL); +} + +// wcrtomb_bounded unique tests + +TEST(LlvmLibcWCRToMBBoundedTest, ContinueConversion) { + LIBC_NAMESPACE::internal::mbstate state; + // utf32: 0x1f921 -> utf8: 0xf0 0x9f 0xa4 0xa1 + wchar_t wc = 0x1f921; + char mb[5] = {'\x01', '\x01', '\x01', '\x01', '\x01'}; + auto result = LIBC_NAMESPACE::internal::wcrtomb_bounded(mb, wc, &state, 1); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(result.value(), + static_cast(-1)); // conversion not completed + ASSERT_EQ(mb[0], '\xF0'); + ASSERT_EQ(mb[1], '\x01'); + + result = LIBC_NAMESPACE::internal::wcrtomb_bounded(mb + 1, wc, &state, 2); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(result.value(), + static_cast(-1)); // conversion not completed + ASSERT_EQ(mb[0], '\xF0'); + ASSERT_EQ(mb[1], '\x9F'); + ASSERT_EQ(mb[2], '\xA4'); + ASSERT_EQ(mb[3], '\x01'); + + result = LIBC_NAMESPACE::internal::wcrtomb_bounded(mb + 3, wc, &state, 100); + ASSERT_TRUE(result.has_value()); + ASSERT_EQ(result.value(), static_cast(1)); + ASSERT_EQ(mb[0], '\xF0'); + ASSERT_EQ(mb[1], '\x9F'); + ASSERT_EQ(mb[2], '\xA4'); + ASSERT_EQ(mb[3], '\xA1'); + ASSERT_EQ(mb[4], '\x01'); +} \ No newline at end of file