From bc5e83f4f84479babd932cc0b2632d028eb47508 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Tue, 17 Jun 2025 18:55:20 +0000 Subject: [PATCH 01/12] Implemented wcrtomb internal and public function --- libc/config/linux/x86_64/entrypoints.txt | 1 + libc/hdr/types/CMakeLists.txt | 8 ++ libc/hdr/types/mbstate_t.h | 14 +++ libc/include/llvm-libc-types/mbstate_t.h | 6 +- libc/include/wchar.yaml | 8 ++ libc/src/__support/wchar/CMakeLists.txt | 16 ++++ .../__support/wchar/character_converter.cpp | 3 +- libc/src/__support/wchar/wcrtomb.cpp | 48 ++++++++++ libc/src/__support/wchar/wcrtomb.h | 27 ++++++ libc/src/wchar/CMakeLists.txt | 13 +++ libc/src/wchar/wcrtomb.cpp | 34 +++++++ libc/src/wchar/wcrtomb.h | 23 +++++ libc/test/src/wchar/CMakeLists.txt | 14 +++ libc/test/src/wchar/wcrtomb_test.cpp | 93 +++++++++++++++++++ 14 files changed, 306 insertions(+), 2 deletions(-) create mode 100644 libc/hdr/types/mbstate_t.h create mode 100644 libc/src/__support/wchar/wcrtomb.cpp create mode 100644 libc/src/__support/wchar/wcrtomb.h create mode 100644 libc/src/wchar/wcrtomb.cpp create mode 100644 libc/src/wchar/wcrtomb.h create mode 100644 libc/test/src/wchar/wcrtomb_test.cpp diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index aa2079faed409..0373c23db04e5 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -365,6 +365,7 @@ set(TARGET_LIBC_ENTRYPOINTS # wchar.h entrypoints libc.src.wchar.btowc + libc.src.wchar.wcrtomb libc.src.wchar.wcslen libc.src.wchar.wctob libc.src.wchar.wmemmove diff --git a/libc/hdr/types/CMakeLists.txt b/libc/hdr/types/CMakeLists.txt index c88c357009072..58f500a6343f6 100644 --- a/libc/hdr/types/CMakeLists.txt +++ b/libc/hdr/types/CMakeLists.txt @@ -20,6 +20,14 @@ add_proxy_header_library( libc.include.uchar ) +add_proxy_header_library( + mbstate_t + HDRS + mbstate_t.h + DEPENDS + libc.include.llvm-libc-types.mbstate_t +) + add_proxy_header_library( div_t HDRS diff --git a/libc/hdr/types/mbstate_t.h b/libc/hdr/types/mbstate_t.h new file mode 100644 index 0000000000000..1e232af08e4db --- /dev/null +++ b/libc/hdr/types/mbstate_t.h @@ -0,0 +1,14 @@ +//===-- Definition of macros from mbstate_t.h -----------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_HDR_TYPES_MBSTATE_T_H +#define LLVM_LIBC_HDR_TYPES_MBSTATE_T_H + +#include "include/llvm-libc-types/mbstate_t.h" + +#endif // LLVM_LIBC_HDR_TYPES_MBSTATE_T_H diff --git a/libc/include/llvm-libc-types/mbstate_t.h b/libc/include/llvm-libc-types/mbstate_t.h index 540d50975a264..40e693355dcd0 100644 --- a/libc/include/llvm-libc-types/mbstate_t.h +++ b/libc/include/llvm-libc-types/mbstate_t.h @@ -9,8 +9,12 @@ #ifndef LLVM_LIBC_TYPES_MBSTATE_T_H #define LLVM_LIBC_TYPES_MBSTATE_T_H -// TODO: Complete this once we implement functions that operate on this type. +#include "../llvm-libc-macros/stdint-macros.h" + typedef struct { + uint32_t __field1; + uint8_t __field2; + uint8_t __field3; } mbstate_t; #endif // LLVM_LIBC_TYPES_MBSTATE_T_H diff --git a/libc/include/wchar.yaml b/libc/include/wchar.yaml index 84db73d8f01ea..3c373f8ec34c0 100644 --- a/libc/include/wchar.yaml +++ b/libc/include/wchar.yaml @@ -150,6 +150,14 @@ functions: - type: wchar_t *__restrict - type: const wchar_t *__restrict - type: size_t + - name: wcrtomb + standards: + - stdc + return_type: size_t + arguments: + - type: char *__restrict + - type: wchar_t + - type: mbstate_t *__restrict - name: wcscpy standards: - stdc diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt index 6715e354e23e5..e4a3bfd7b9c5c 100644 --- a/libc/src/__support/wchar/CMakeLists.txt +++ b/libc/src/__support/wchar/CMakeLists.txt @@ -19,3 +19,19 @@ add_object_library( libc.src.__support.math_extras .mbstate ) + +add_object_library( + wcrtomb + HDRS + wcrtomb.h + SRCS + wcrtomb.cpp + DEPENDS + libc.hdr.types.char32_t + libc.hdr.types.size_t + libc.hdr.types.mbstate_t + libc.hdr.types.wchar_t + libc.src.__support.error_or + .character_converter + .mbstate +) diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp index ca709769616c3..fc8c9aaaa3d50 100644 --- a/libc/src/__support/wchar/character_converter.cpp +++ b/libc/src/__support/wchar/character_converter.cpp @@ -12,6 +12,7 @@ #include "src/__support/error_or.h" #include "src/__support/math_extras.h" #include "src/__support/wchar/mbstate.h" +#include "src/__support/libc_errno.h" // for error numbers #include "character_converter.h" @@ -51,7 +52,7 @@ int CharacterConverter::push(char32_t utf32) { // `utf32` contains a value that is too large to actually represent a valid // unicode character clear(); - return -1; + return EILSEQ; } ErrorOr CharacterConverter::pop_utf8() { diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp new file mode 100644 index 0000000000000..abd18aa391d75 --- /dev/null +++ b/libc/src/__support/wchar/wcrtomb.cpp @@ -0,0 +1,48 @@ +//===-- Implementation of wcrtomb -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/__support/wchar/wcrtomb.h" +#include "src/__support/error_or.h" +#include "src/__support/wchar/character_converter.h" +#include "src/__support/wchar/mbstate.h" + +#include "hdr/types/char32_t.h" +#include "hdr/types/mbstate_t.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/common.h" + +namespace LIBC_NAMESPACE_DECL { +namespace internal { + +ErrorOr wcrtomb(char *__restrict s, wchar_t wc, + mbstate_t *__restrict ps) { + CharacterConverter cr((internal::mbstate *)ps); + + char buf[sizeof(wchar_t) / sizeof(char)]; + if (s == nullptr) { + s = buf; + wc = L'\0'; + } + + int status = cr.push((char32_t)wc); + if (status != 0) + return Error(status); + + size_t count = 0; + while (!cr.isComplete()) { + auto utf8 = cr.pop_utf8(); // can never fail as long as the push succeeded + *s = utf8.value(); + s++; + count++; + } + return count; +} + +} // namespace internal +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/__support/wchar/wcrtomb.h b/libc/src/__support/wchar/wcrtomb.h new file mode 100644 index 0000000000000..35a472548b67a --- /dev/null +++ b/libc/src/__support/wchar/wcrtomb.h @@ -0,0 +1,27 @@ +//===-- Implementation header for wcrtomb ---------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H +#define LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H + +#include "hdr/types/mbstate_t.h" +#include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/error_or.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { +namespace internal { + +ErrorOr wcrtomb(char *__restrict s, wchar_t wc, + mbstate_t *__restrict ps); + +} // namespace internal +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index 491dd5b34340a..a106a8ee9aa41 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -34,6 +34,19 @@ add_entrypoint_object( libc.src.__support.wctype_utils ) +add_entrypoint_object( + wcrtomb + SRCS + wcrtomb.cpp + HDRS + wcrtomb.h + DEPENDS + libc.hdr.types.wchar_t + libc.hdr.types.mbstate_t + libc.src.__support.libc_errno + libc.src.__support.wchar.wcrtomb +) + add_entrypoint_object( wmemset SRCS diff --git a/libc/src/wchar/wcrtomb.cpp b/libc/src/wchar/wcrtomb.cpp new file mode 100644 index 0000000000000..82c813747e0f6 --- /dev/null +++ b/libc/src/wchar/wcrtomb.cpp @@ -0,0 +1,34 @@ +//===-- Implementation of wcrtomb -----------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "src/wchar/wcrtomb.h" + +#include "hdr/types/mbstate_t.h" +#include "src/__support/common.h" +#include "src/__support/libc_errno.h" +#include "src/__support/macros/config.h" +#include "src/__support/wchar/wcrtomb.h" + +namespace LIBC_NAMESPACE_DECL { + +LLVM_LIBC_FUNCTION(size_t, wcrtomb, + (char *__restrict s, wchar_t wc, mbstate_t *__restrict ps)) { + static mbstate_t internal_mbstate{0, 0, 0}; + + auto result = + internal::wcrtomb(s, wc, ps == nullptr ? &internal_mbstate : ps); + + if (!result.has_value()) { + libc_errno = result.error(); + return -1; + } + + return result.value(); +} + +} // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcrtomb.h b/libc/src/wchar/wcrtomb.h new file mode 100644 index 0000000000000..3cfb1a6f2dc84 --- /dev/null +++ b/libc/src/wchar/wcrtomb.h @@ -0,0 +1,23 @@ +//===-- Implementation header for wcrtomb -----------------------*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIBC_SRC_WCHAR_WCRTOMB_H +#define LLVM_LIBC_SRC_WCHAR_WCRTOMB_H + +#include "hdr/types/wchar_t.h" +#include "hdr/types/mbstate_t.h" +#include "hdr/types/size_t.h" +#include "src/__support/macros/config.h" + +namespace LIBC_NAMESPACE_DECL { + +size_t wcrtomb(char *__restrict s, wchar_t wc, mbstate_t *__restrict ps); + +} // namespace LIBC_NAMESPACE_DECL + +#endif // LLVM_LIBC_SRC_WCHAR_WCRTOMB_H diff --git a/libc/test/src/wchar/CMakeLists.txt b/libc/test/src/wchar/CMakeLists.txt index 4990b6953348b..c4f51b5f94a12 100644 --- a/libc/test/src/wchar/CMakeLists.txt +++ b/libc/test/src/wchar/CMakeLists.txt @@ -33,6 +33,20 @@ add_libc_test( libc.src.wchar.wctob ) +add_libc_test( + wcrtomb_test + SUITE + libc_wchar_unittests + SRCS + wcrtomb_test.cpp + DEPENDS + libc.src.wchar.wcrtomb + libc.src.string.memset + libc.hdr.types.wchar_t + libc.hdr.types.mbstate_t + libc.src.__support.libc_errno +) + add_libc_test( wmemset_test SUITE diff --git a/libc/test/src/wchar/wcrtomb_test.cpp b/libc/test/src/wchar/wcrtomb_test.cpp new file mode 100644 index 0000000000000..be249f4f6dae4 --- /dev/null +++ b/libc/test/src/wchar/wcrtomb_test.cpp @@ -0,0 +1,93 @@ +//===-- Unittests for wcrtomb --------------------------------------------===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "hdr/types/wchar_t.h" +#include "hdr/types/mbstate_t.h" +#include "src/wchar/wcrtomb.h" +#include "src/string/memset.h" +#include "test/UnitTest/Test.h" +#include "src/__support/libc_errno.h" + +TEST(LlvmLibcWCRToMBTest, OneByte) { + mbstate_t state; + LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t)); + wchar_t wc = L'U'; + char mb[4]; + size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state); + ASSERT_EQ(cnt, static_cast(1)); + ASSERT_EQ(mb[0], 'U'); +} + +TEST(LlvmLibcWCRToMBTest, TwoByte) { + mbstate_t state; + LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t)); + // testing utf32: 0xff -> utf8: 0xc3 0xbf + wchar_t wc = 0xff; + char mb[4]; + size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state); + ASSERT_EQ(cnt, static_cast(2)); + ASSERT_EQ(mb[0], static_cast(0xc3)); + ASSERT_EQ(mb[1], static_cast(0xbf)); +} + +TEST(LlvmLibcWCRToMBTest, ThreeByte) { + mbstate_t state; + LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t)); + // testing utf32: 0xac15 -> utf8: 0xea 0xb0 0x95 + wchar_t wc = 0xac15; + char mb[4]; + size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state); + ASSERT_EQ(cnt, static_cast(3)); + ASSERT_EQ(mb[0], static_cast(0xea)); + ASSERT_EQ(mb[1], static_cast(0xb0)); + ASSERT_EQ(mb[2], static_cast(0x95)); +} + +TEST(LlvmLibcWCRToMBTest, FourByte) { + mbstate_t state; + LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t)); + // testing utf32: 0x1f921 -> utf8: 0xf0 0x9f 0xa4 0xa1 + wchar_t wc = 0x1f921; + char mb[4]; + size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state); + ASSERT_EQ(cnt, static_cast(4)); + ASSERT_EQ(mb[0], static_cast(0xf0)); + ASSERT_EQ(mb[1], static_cast(0x9f)); + ASSERT_EQ(mb[2], static_cast(0xa4)); + ASSERT_EQ(mb[3], static_cast(0xa1)); +} + +TEST(LlvmLibcWCRToMBTest, NullString) { + mbstate_t state; + LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t)); + wchar_t wc = L'A'; + char mb[4]; + + // should be equivalent to the call wcrtomb(buf, L'\0', state) + size_t cnt1 = LIBC_NAMESPACE::wcrtomb(nullptr, wc, &state); + size_t cnt2 = LIBC_NAMESPACE::wcrtomb(mb, L'\0', &state); + + ASSERT_EQ(cnt1, cnt2); +} + +TEST(LlvmLibcWCRToMBTest, NullState) { + wchar_t wc = L'A'; + char mb[4]; + size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, nullptr); + ASSERT_EQ(cnt, static_cast(1)); +} + +TEST(LlvmLibcWCRToMBTest, InvalidWchar) { + mbstate_t state; + LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t)); + wchar_t wc = 0x12ffff; + char mb[4]; + size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state); + ASSERT_EQ(cnt, static_cast(-1)); + ASSERT_EQ(static_cast(libc_errno), EILSEQ); +} From bb072dff5747e5e110d08838c57f846e56420634 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Tue, 17 Jun 2025 20:21:14 +0000 Subject: [PATCH 02/12] formatting --- libc/include/llvm-libc-types/mbstate_t.h | 6 +++--- .../__support/wchar/character_converter.cpp | 2 +- libc/src/__support/wchar/wcrtomb.cpp | 2 +- libc/test/src/wchar/wcrtomb_test.cpp | 18 +++++++++--------- 4 files changed, 14 insertions(+), 14 deletions(-) diff --git a/libc/include/llvm-libc-types/mbstate_t.h b/libc/include/llvm-libc-types/mbstate_t.h index 40e693355dcd0..009fe57da50e2 100644 --- a/libc/include/llvm-libc-types/mbstate_t.h +++ b/libc/include/llvm-libc-types/mbstate_t.h @@ -12,9 +12,9 @@ #include "../llvm-libc-macros/stdint-macros.h" typedef struct { - uint32_t __field1; - uint8_t __field2; - uint8_t __field3; + uint32_t __field1; + uint8_t __field2; + uint8_t __field3; } mbstate_t; #endif // LLVM_LIBC_TYPES_MBSTATE_T_H diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp index fc8c9aaaa3d50..61b3a4abd24fd 100644 --- a/libc/src/__support/wchar/character_converter.cpp +++ b/libc/src/__support/wchar/character_converter.cpp @@ -10,9 +10,9 @@ #include "hdr/types/char8_t.h" #include "src/__support/common.h" #include "src/__support/error_or.h" +#include "src/__support/libc_errno.h" // for error numbers #include "src/__support/math_extras.h" #include "src/__support/wchar/mbstate.h" -#include "src/__support/libc_errno.h" // for error numbers #include "character_converter.h" diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp index abd18aa391d75..847ffd162afc0 100644 --- a/libc/src/__support/wchar/wcrtomb.cpp +++ b/libc/src/__support/wchar/wcrtomb.cpp @@ -21,7 +21,7 @@ namespace LIBC_NAMESPACE_DECL { namespace internal { ErrorOr wcrtomb(char *__restrict s, wchar_t wc, - mbstate_t *__restrict ps) { + mbstate_t *__restrict ps) { CharacterConverter cr((internal::mbstate *)ps); char buf[sizeof(wchar_t) / sizeof(char)]; diff --git a/libc/test/src/wchar/wcrtomb_test.cpp b/libc/test/src/wchar/wcrtomb_test.cpp index be249f4f6dae4..c06b39ae0143f 100644 --- a/libc/test/src/wchar/wcrtomb_test.cpp +++ b/libc/test/src/wchar/wcrtomb_test.cpp @@ -6,16 +6,16 @@ // //===----------------------------------------------------------------------===// -#include "hdr/types/wchar_t.h" #include "hdr/types/mbstate_t.h" -#include "src/wchar/wcrtomb.h" +#include "hdr/types/wchar_t.h" +#include "src/__support/libc_errno.h" #include "src/string/memset.h" +#include "src/wchar/wcrtomb.h" #include "test/UnitTest/Test.h" -#include "src/__support/libc_errno.h" TEST(LlvmLibcWCRToMBTest, OneByte) { mbstate_t state; - LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t)); + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); wchar_t wc = L'U'; char mb[4]; size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state); @@ -25,7 +25,7 @@ TEST(LlvmLibcWCRToMBTest, OneByte) { TEST(LlvmLibcWCRToMBTest, TwoByte) { mbstate_t state; - LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t)); + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); // testing utf32: 0xff -> utf8: 0xc3 0xbf wchar_t wc = 0xff; char mb[4]; @@ -37,7 +37,7 @@ TEST(LlvmLibcWCRToMBTest, TwoByte) { TEST(LlvmLibcWCRToMBTest, ThreeByte) { mbstate_t state; - LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t)); + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); // testing utf32: 0xac15 -> utf8: 0xea 0xb0 0x95 wchar_t wc = 0xac15; char mb[4]; @@ -50,7 +50,7 @@ TEST(LlvmLibcWCRToMBTest, ThreeByte) { TEST(LlvmLibcWCRToMBTest, FourByte) { mbstate_t state; - LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t)); + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); // testing utf32: 0x1f921 -> utf8: 0xf0 0x9f 0xa4 0xa1 wchar_t wc = 0x1f921; char mb[4]; @@ -64,7 +64,7 @@ TEST(LlvmLibcWCRToMBTest, FourByte) { TEST(LlvmLibcWCRToMBTest, NullString) { mbstate_t state; - LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t)); + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); wchar_t wc = L'A'; char mb[4]; @@ -84,7 +84,7 @@ TEST(LlvmLibcWCRToMBTest, NullState) { TEST(LlvmLibcWCRToMBTest, InvalidWchar) { mbstate_t state; - LIBC_NAMESPACE::memset(&state, 0, sizeof (mbstate_t)); + LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t)); wchar_t wc = 0x12ffff; char mb[4]; size_t cnt = LIBC_NAMESPACE::wcrtomb(mb, wc, &state); From a16caf9834ad4001f7410b34ee613230cc3cf43d Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Tue, 17 Jun 2025 20:34:25 +0000 Subject: [PATCH 03/12] formatting --- libc/src/wchar/wcrtomb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/src/wchar/wcrtomb.h b/libc/src/wchar/wcrtomb.h index 3cfb1a6f2dc84..06c42f158122c 100644 --- a/libc/src/wchar/wcrtomb.h +++ b/libc/src/wchar/wcrtomb.h @@ -9,9 +9,9 @@ #ifndef LLVM_LIBC_SRC_WCHAR_WCRTOMB_H #define LLVM_LIBC_SRC_WCHAR_WCRTOMB_H -#include "hdr/types/wchar_t.h" #include "hdr/types/mbstate_t.h" #include "hdr/types/size_t.h" +#include "hdr/types/wchar_t.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { From fc563be2635bb8247e56ad325da674fe639ffb18 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Tue, 17 Jun 2025 20:59:08 +0000 Subject: [PATCH 04/12] fix mbstate_t in overlay mode --- libc/hdr/types/mbstate_t.h | 8 ++++++++ 1 file changed, 8 insertions(+) diff --git a/libc/hdr/types/mbstate_t.h b/libc/hdr/types/mbstate_t.h index 1e232af08e4db..15b2614341d7d 100644 --- a/libc/hdr/types/mbstate_t.h +++ b/libc/hdr/types/mbstate_t.h @@ -9,6 +9,14 @@ #ifndef LLVM_LIBC_HDR_TYPES_MBSTATE_T_H #define LLVM_LIBC_HDR_TYPES_MBSTATE_T_H +#ifdef LIBC_FULL_BUILD + #include "include/llvm-libc-types/mbstate_t.h" +#else // Overlay mode + +#include "hdr/wchar_overlay.h" + +#endif // LLVM_LIBC_FULL_BUILD + #endif // LLVM_LIBC_HDR_TYPES_MBSTATE_T_H From 094d2c4b8a40076aaa31ef0d32f34aa1579b6450 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Wed, 18 Jun 2025 16:46:26 +0000 Subject: [PATCH 05/12] moved libc error code to public function for consistency --- libc/src/__support/wchar/CMakeLists.txt | 1 + libc/src/__support/wchar/character_converter.cpp | 3 +-- libc/src/__support/wchar/wcrtomb.cpp | 1 + libc/src/wchar/wcrtomb.cpp | 2 +- 4 files changed, 4 insertions(+), 3 deletions(-) diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt index e4a3bfd7b9c5c..38dfd76513be1 100644 --- a/libc/src/__support/wchar/CMakeLists.txt +++ b/libc/src/__support/wchar/CMakeLists.txt @@ -32,6 +32,7 @@ add_object_library( libc.hdr.types.mbstate_t libc.hdr.types.wchar_t libc.src.__support.error_or + libc.src.__support.common .character_converter .mbstate ) diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp index 61b3a4abd24fd..ca709769616c3 100644 --- a/libc/src/__support/wchar/character_converter.cpp +++ b/libc/src/__support/wchar/character_converter.cpp @@ -10,7 +10,6 @@ #include "hdr/types/char8_t.h" #include "src/__support/common.h" #include "src/__support/error_or.h" -#include "src/__support/libc_errno.h" // for error numbers #include "src/__support/math_extras.h" #include "src/__support/wchar/mbstate.h" @@ -52,7 +51,7 @@ int CharacterConverter::push(char32_t utf32) { // `utf32` contains a value that is too large to actually represent a valid // unicode character clear(); - return EILSEQ; + return -1; } ErrorOr CharacterConverter::pop_utf8() { diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp index 847ffd162afc0..43d714d436898 100644 --- a/libc/src/__support/wchar/wcrtomb.cpp +++ b/libc/src/__support/wchar/wcrtomb.cpp @@ -24,6 +24,7 @@ ErrorOr wcrtomb(char *__restrict s, wchar_t wc, mbstate_t *__restrict ps) { CharacterConverter cr((internal::mbstate *)ps); + // when s is nullptr, this is equivalent to wcrtomb(buf, L'\0', ps) char buf[sizeof(wchar_t) / sizeof(char)]; if (s == nullptr) { s = buf; diff --git a/libc/src/wchar/wcrtomb.cpp b/libc/src/wchar/wcrtomb.cpp index 82c813747e0f6..5917769af9d4e 100644 --- a/libc/src/wchar/wcrtomb.cpp +++ b/libc/src/wchar/wcrtomb.cpp @@ -24,7 +24,7 @@ LLVM_LIBC_FUNCTION(size_t, wcrtomb, internal::wcrtomb(s, wc, ps == nullptr ? &internal_mbstate : ps); if (!result.has_value()) { - libc_errno = result.error(); + libc_errno = EILSEQ; return -1; } From 591882f2b91051a79bd51027952a5938c4a208f1 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Wed, 18 Jun 2025 17:16:18 +0000 Subject: [PATCH 06/12] use internal mbstate in internal function; used explicit casts --- libc/src/__support/wchar/CMakeLists.txt | 1 - libc/src/__support/wchar/mbstate.h | 6 +++--- libc/src/__support/wchar/wcrtomb.cpp | 9 +++++---- libc/src/__support/wchar/wcrtomb.h | 5 ++--- libc/src/wchar/CMakeLists.txt | 1 + libc/src/wchar/wcrtomb.cpp | 9 ++++++--- 6 files changed, 17 insertions(+), 14 deletions(-) diff --git a/libc/src/__support/wchar/CMakeLists.txt b/libc/src/__support/wchar/CMakeLists.txt index 38dfd76513be1..41ae0cdc00f39 100644 --- a/libc/src/__support/wchar/CMakeLists.txt +++ b/libc/src/__support/wchar/CMakeLists.txt @@ -29,7 +29,6 @@ add_object_library( DEPENDS libc.hdr.types.char32_t libc.hdr.types.size_t - libc.hdr.types.mbstate_t libc.hdr.types.wchar_t libc.src.__support.error_or libc.src.__support.common diff --git a/libc/src/__support/wchar/mbstate.h b/libc/src/__support/wchar/mbstate.h index fb08fb4eaa188..087d3c0fbc4f0 100644 --- a/libc/src/__support/wchar/mbstate.h +++ b/libc/src/__support/wchar/mbstate.h @@ -18,17 +18,17 @@ namespace internal { struct mbstate { // store a partial codepoint (in UTF-32) - char32_t partial; + char32_t partial = 0; /* Progress towards a conversion For utf8 -> utf32, increases with each CharacterConverter::push(utf8_byte) For utf32 -> utf8, increases with each CharacterConverter::pop_utf8() */ - uint8_t bytes_processed; + uint8_t bytes_processed = 0; // Total number of bytes that will be needed to represent this character - uint8_t total_bytes; + uint8_t total_bytes = 0; }; } // namespace internal diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp index 43d714d436898..cf0db4ee2763b 100644 --- a/libc/src/__support/wchar/wcrtomb.cpp +++ b/libc/src/__support/wchar/wcrtomb.cpp @@ -12,7 +12,6 @@ #include "src/__support/wchar/mbstate.h" #include "hdr/types/char32_t.h" -#include "hdr/types/mbstate_t.h" #include "hdr/types/size_t.h" #include "hdr/types/wchar_t.h" #include "src/__support/common.h" @@ -21,8 +20,10 @@ namespace LIBC_NAMESPACE_DECL { namespace internal { ErrorOr wcrtomb(char *__restrict s, wchar_t wc, - mbstate_t *__restrict ps) { - CharacterConverter cr((internal::mbstate *)ps); + mbstate *__restrict ps) { + static_assert(sizeof(wchar_t) == 4); + + CharacterConverter cr(ps); // when s is nullptr, this is equivalent to wcrtomb(buf, L'\0', ps) char buf[sizeof(wchar_t) / sizeof(char)]; @@ -31,7 +32,7 @@ ErrorOr wcrtomb(char *__restrict s, wchar_t wc, wc = L'\0'; } - int status = cr.push((char32_t)wc); + int status = cr.push(static_cast(wc)); if (status != 0) return Error(status); diff --git a/libc/src/__support/wchar/wcrtomb.h b/libc/src/__support/wchar/wcrtomb.h index 35a472548b67a..5e36b76eb01de 100644 --- a/libc/src/__support/wchar/wcrtomb.h +++ b/libc/src/__support/wchar/wcrtomb.h @@ -9,17 +9,16 @@ #ifndef LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H #define LLVM_LIBC_SRC__SUPPORT_WCHAR_WCRTOMB_H -#include "hdr/types/mbstate_t.h" #include "hdr/types/size_t.h" #include "hdr/types/wchar_t.h" #include "src/__support/error_or.h" +#include "src/__support/wchar/mbstate.h" #include "src/__support/macros/config.h" namespace LIBC_NAMESPACE_DECL { namespace internal { -ErrorOr wcrtomb(char *__restrict s, wchar_t wc, - mbstate_t *__restrict ps); +ErrorOr wcrtomb(char *__restrict s, wchar_t wc, mbstate *__restrict ps); } // namespace internal } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/CMakeLists.txt b/libc/src/wchar/CMakeLists.txt index a106a8ee9aa41..6fe6da513ba04 100644 --- a/libc/src/wchar/CMakeLists.txt +++ b/libc/src/wchar/CMakeLists.txt @@ -45,6 +45,7 @@ add_entrypoint_object( libc.hdr.types.mbstate_t libc.src.__support.libc_errno libc.src.__support.wchar.wcrtomb + libc.src.__support.wchar.mbstate ) add_entrypoint_object( diff --git a/libc/src/wchar/wcrtomb.cpp b/libc/src/wchar/wcrtomb.cpp index 5917769af9d4e..7951bea2120bc 100644 --- a/libc/src/wchar/wcrtomb.cpp +++ b/libc/src/wchar/wcrtomb.cpp @@ -12,16 +12,19 @@ #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" +#include "src/__support/wchar/mbstate.h" #include "src/__support/wchar/wcrtomb.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(size_t, wcrtomb, (char *__restrict s, wchar_t wc, mbstate_t *__restrict ps)) { - static mbstate_t internal_mbstate{0, 0, 0}; + static internal::mbstate internal_mbstate; - auto result = - internal::wcrtomb(s, wc, ps == nullptr ? &internal_mbstate : ps); + auto result = internal::wcrtomb( + s, wc, + ps == nullptr ? &internal_mbstate + : reinterpret_cast(ps)); if (!result.has_value()) { libc_errno = EILSEQ; From babc9984281287c08fbfb4bac5cfe82e7e324168 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Wed, 18 Jun 2025 17:23:46 +0000 Subject: [PATCH 07/12] formatting --- libc/src/__support/wchar/wcrtomb.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/src/__support/wchar/wcrtomb.h b/libc/src/__support/wchar/wcrtomb.h index 5e36b76eb01de..bcd39a92a3b76 100644 --- a/libc/src/__support/wchar/wcrtomb.h +++ b/libc/src/__support/wchar/wcrtomb.h @@ -12,8 +12,8 @@ #include "hdr/types/size_t.h" #include "hdr/types/wchar_t.h" #include "src/__support/error_or.h" -#include "src/__support/wchar/mbstate.h" #include "src/__support/macros/config.h" +#include "src/__support/wchar/mbstate.h" namespace LIBC_NAMESPACE_DECL { namespace internal { From 8b1d981deadd330a2318130a4aacd367dab366b9 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Fri, 20 Jun 2025 17:21:10 +0000 Subject: [PATCH 08/12] prevent overlaying mbstate_t --- libc/hdr/types/mbstate_t.h | 2 +- libc/src/__support/wchar/wcrtomb.cpp | 3 +++ 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/libc/hdr/types/mbstate_t.h b/libc/hdr/types/mbstate_t.h index 15b2614341d7d..ad826d49c14c7 100644 --- a/libc/hdr/types/mbstate_t.h +++ b/libc/hdr/types/mbstate_t.h @@ -15,7 +15,7 @@ #else // Overlay mode -#include "hdr/wchar_overlay.h" +#error "type not available in overlay mode" #endif // LLVM_LIBC_FULL_BUILD diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp index cf0db4ee2763b..fdbbcb5526917 100644 --- a/libc/src/__support/wchar/wcrtomb.cpp +++ b/libc/src/__support/wchar/wcrtomb.cpp @@ -15,6 +15,7 @@ #include "hdr/types/size_t.h" #include "hdr/types/wchar_t.h" #include "src/__support/common.h" +#include "src/__support/libc_assert.h" namespace LIBC_NAMESPACE_DECL { namespace internal { @@ -39,6 +40,8 @@ ErrorOr wcrtomb(char *__restrict s, wchar_t wc, size_t count = 0; while (!cr.isComplete()) { auto utf8 = cr.pop_utf8(); // can never fail as long as the push succeeded + LIBC_ASSERT(utf8.has_value()); + *s = utf8.value(); s++; count++; From f2be9eafdd1e97955195eb367326e04a28f93ccf Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Fri, 20 Jun 2025 17:27:17 +0000 Subject: [PATCH 09/12] move internal buffer to public function --- libc/src/__support/wchar/wcrtomb.cpp | 10 +++------- libc/src/wchar/wcrtomb.cpp | 7 +++++++ 2 files changed, 10 insertions(+), 7 deletions(-) diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp index fdbbcb5526917..b51e4cd7982bf 100644 --- a/libc/src/__support/wchar/wcrtomb.cpp +++ b/libc/src/__support/wchar/wcrtomb.cpp @@ -26,12 +26,8 @@ ErrorOr wcrtomb(char *__restrict s, wchar_t wc, CharacterConverter cr(ps); - // when s is nullptr, this is equivalent to wcrtomb(buf, L'\0', ps) - char buf[sizeof(wchar_t) / sizeof(char)]; - if (s == nullptr) { - s = buf; - wc = L'\0'; - } + if (s == nullptr) + return Error(-1); int status = cr.push(static_cast(wc)); if (status != 0) @@ -41,7 +37,7 @@ ErrorOr wcrtomb(char *__restrict s, wchar_t wc, while (!cr.isComplete()) { auto utf8 = cr.pop_utf8(); // can never fail as long as the push succeeded LIBC_ASSERT(utf8.has_value()); - + *s = utf8.value(); s++; count++; diff --git a/libc/src/wchar/wcrtomb.cpp b/libc/src/wchar/wcrtomb.cpp index 7951bea2120bc..45494b65cf275 100644 --- a/libc/src/wchar/wcrtomb.cpp +++ b/libc/src/wchar/wcrtomb.cpp @@ -21,6 +21,13 @@ LLVM_LIBC_FUNCTION(size_t, wcrtomb, (char *__restrict s, wchar_t wc, mbstate_t *__restrict ps)) { static internal::mbstate internal_mbstate; + // when s is nullptr, this is equivalent to wcrtomb(buf, L'\0', ps) + char buf[sizeof(wchar_t) / sizeof(char)]; + if (s == nullptr) { + s = buf; + wc = L'\0'; + } + auto result = internal::wcrtomb( s, wc, ps == nullptr ? &internal_mbstate From ddf318383c29352ba4249730f78c810e43c49a0e Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Fri, 20 Jun 2025 19:30:47 +0000 Subject: [PATCH 10/12] fixed uninitialized variable --- libc/src/__support/wchar/mbstate.h | 2 +- libc/src/__support/wchar/wcrtomb.cpp | 2 +- libc/src/wchar/wcrtomb.cpp | 1 + 3 files changed, 3 insertions(+), 2 deletions(-) diff --git a/libc/src/__support/wchar/mbstate.h b/libc/src/__support/wchar/mbstate.h index 1fb46818cdfba..32304a5215241 100644 --- a/libc/src/__support/wchar/mbstate.h +++ b/libc/src/__support/wchar/mbstate.h @@ -25,7 +25,7 @@ struct mbstate { Increases with each push(...) until it reaches total_bytes Decreases with each pop(...) until it reaches 0 */ - uint8_t bytes_stored; + uint8_t bytes_stored = 0; // Total number of bytes that will be needed to represent this character uint8_t total_bytes = 0; diff --git a/libc/src/__support/wchar/wcrtomb.cpp b/libc/src/__support/wchar/wcrtomb.cpp index b51e4cd7982bf..8ca3d17ad6ce1 100644 --- a/libc/src/__support/wchar/wcrtomb.cpp +++ b/libc/src/__support/wchar/wcrtomb.cpp @@ -34,7 +34,7 @@ ErrorOr wcrtomb(char *__restrict s, wchar_t wc, return Error(status); size_t count = 0; - while (!cr.isComplete()) { + while (!cr.isEmpty()) { auto utf8 = cr.pop_utf8(); // can never fail as long as the push succeeded LIBC_ASSERT(utf8.has_value()); diff --git a/libc/src/wchar/wcrtomb.cpp b/libc/src/wchar/wcrtomb.cpp index 45494b65cf275..6d604a00599ee 100644 --- a/libc/src/wchar/wcrtomb.cpp +++ b/libc/src/wchar/wcrtomb.cpp @@ -9,6 +9,7 @@ #include "src/wchar/wcrtomb.h" #include "hdr/types/mbstate_t.h" +#include "hdr/types/wchar_t.h" #include "src/__support/common.h" #include "src/__support/libc_errno.h" #include "src/__support/macros/config.h" From edf8aea1e415adffa711788f460d75c2df9feaa6 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Fri, 20 Jun 2025 19:50:42 +0000 Subject: [PATCH 11/12] updated entrypoings --- libc/config/linux/x86_64/entrypoints.txt | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 0373c23db04e5..5674aa50a6051 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -365,7 +365,6 @@ set(TARGET_LIBC_ENTRYPOINTS # wchar.h entrypoints libc.src.wchar.btowc - libc.src.wchar.wcrtomb libc.src.wchar.wcslen libc.src.wchar.wctob libc.src.wchar.wmemmove @@ -1245,6 +1244,9 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.sys.socket.recv libc.src.sys.socket.recvfrom libc.src.sys.socket.recvmsg + + # wchar entrypoints + libc.src.wchar.wcrtomb ) endif() From 25adc75dd422834300dbbd7b7383e9e8dea9eb11 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Fri, 20 Jun 2025 21:37:37 +0000 Subject: [PATCH 12/12] whitespace --- libc/config/linux/x86_64/entrypoints.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/config/linux/x86_64/entrypoints.txt b/libc/config/linux/x86_64/entrypoints.txt index 11625f65489e6..4d94f10196fd7 100644 --- a/libc/config/linux/x86_64/entrypoints.txt +++ b/libc/config/linux/x86_64/entrypoints.txt @@ -1244,7 +1244,7 @@ if(LLVM_LIBC_FULL_BUILD) libc.src.sys.socket.recv libc.src.sys.socket.recvfrom libc.src.sys.socket.recvmsg - + # wchar.h entrypoints libc.src.wchar.mbrtowc libc.src.wchar.wcrtomb