From 6a2a111610354f276b2ce11a461fa4c7e7b970a5 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Tue, 5 Aug 2025 20:43:51 +0000 Subject: [PATCH 1/5] template characterconverter and stringconverter pop functions; update tests to use templated functions --- .../src/__support/wchar/character_converter.h | 7 ++ libc/src/__support/wchar/mbsnrtowcs.h | 4 +- libc/src/__support/wchar/string_converter.h | 41 ++----- libc/src/__support/wchar/wcsnrtombs.h | 4 +- .../__support/wchar/string_converter_test.cpp | 110 +++++++++--------- 5 files changed, 76 insertions(+), 90 deletions(-) diff --git a/libc/src/__support/wchar/character_converter.h b/libc/src/__support/wchar/character_converter.h index b6d918f2d2edc..edb53214c9be9 100644 --- a/libc/src/__support/wchar/character_converter.h +++ b/libc/src/__support/wchar/character_converter.h @@ -12,6 +12,7 @@ #include "hdr/types/char32_t.h" #include "hdr/types/char8_t.h" #include "hdr/types/size_t.h" +#include "src/__support/CPP/type_traits.h" #include "src/__support/common.h" #include "src/__support/error_or.h" #include "src/__support/wchar/mbstate.h" @@ -39,6 +40,12 @@ class CharacterConverter { ErrorOr pop_utf8(); ErrorOr pop_utf32(); + template ErrorOr pop() { + if constexpr (cpp::is_same_v) + return pop_utf8(); + else + return pop_utf32(); + } }; } // namespace internal diff --git a/libc/src/__support/wchar/mbsnrtowcs.h b/libc/src/__support/wchar/mbsnrtowcs.h index 54e315210d95c..6abb836635772 100644 --- a/libc/src/__support/wchar/mbsnrtowcs.h +++ b/libc/src/__support/wchar/mbsnrtowcs.h @@ -36,7 +36,7 @@ LIBC_INLINE static ErrorOr mbsnrtowcs(wchar_t *__restrict dst, StringConverter str_conv(reinterpret_cast(*src), ps, len, nmc); size_t dst_idx = 0; - ErrorOr converted = str_conv.popUTF32(); + ErrorOr converted = str_conv.pop(); while (converted.has_value()) { if (dst != nullptr) dst[dst_idx] = converted.value(); @@ -47,7 +47,7 @@ LIBC_INLINE static ErrorOr mbsnrtowcs(wchar_t *__restrict dst, return dst_idx; } dst_idx++; - converted = str_conv.popUTF32(); + converted = str_conv.pop(); } if (converted.error() == -1) { // if we hit conversion limit diff --git a/libc/src/__support/wchar/string_converter.h b/libc/src/__support/wchar/string_converter.h index 869ebdfc8b390..85f93f39a08e7 100644 --- a/libc/src/__support/wchar/string_converter.h +++ b/libc/src/__support/wchar/string_converter.h @@ -12,6 +12,7 @@ #include "hdr/types/char32_t.h" #include "hdr/types/char8_t.h" #include "hdr/types/size_t.h" +#include "src/__support/CPP/type_traits.h" #include "src/__support/common.h" #include "src/__support/error_or.h" #include "src/__support/wchar/character_converter.h" @@ -53,9 +54,7 @@ template class StringConverter { size_t srclen = SIZE_MAX) : cr(ps), src(s), src_len(srclen), src_idx(0), num_to_write(dstlen) {} - // TODO: following functions are almost identical - // look into templating CharacterConverter pop functions - ErrorOr popUTF32() { + template ErrorOr pop() { if (num_to_write == 0) return Error(-1); @@ -64,33 +63,13 @@ template class StringConverter { if (!src_elements_read.has_value()) return Error(src_elements_read.error()); - if (cr.sizeAsUTF32() > num_to_write) { - cr.clear(); - return Error(-1); - } - - src_idx += src_elements_read.value(); - } + size_t size; + if constexpr (cpp::is_same_v) + size = cr.sizeAsUTF8(); + else + size = cr.sizeAsUTF32(); - auto out = cr.pop_utf32(); - if (out.has_value() && out.value() == L'\0') - src_len = src_idx; - - num_to_write--; - - return out; - } - - ErrorOr popUTF8() { - if (num_to_write == 0) - return Error(-1); - - if (cr.isEmpty() || src_idx == 0) { - auto src_elements_read = pushFullCharacter(); - if (!src_elements_read.has_value()) - return Error(src_elements_read.error()); - - if (cr.sizeAsUTF8() > num_to_write) { + if (size > num_to_write) { cr.clear(); return Error(-1); } @@ -98,8 +77,8 @@ template class StringConverter { src_idx += src_elements_read.value(); } - auto out = cr.pop_utf8(); - if (out.has_value() && out.value() == '\0') + ErrorOr out = cr.pop(); + if (out.has_value() && out.value() == L'\0') src_len = src_idx; num_to_write--; diff --git a/libc/src/__support/wchar/wcsnrtombs.h b/libc/src/__support/wchar/wcsnrtombs.h index 433097c937a42..f593a0e0dba87 100644 --- a/libc/src/__support/wchar/wcsnrtombs.h +++ b/libc/src/__support/wchar/wcsnrtombs.h @@ -39,7 +39,7 @@ wcsnrtombs(char *__restrict dest, const wchar_t **__restrict ptr_to_src, reinterpret_cast(*ptr_to_src), ps, dest_len, num_src_widechars); size_t dst_idx = 0; - ErrorOr converted = str_conv.popUTF8(); + ErrorOr converted = str_conv.pop(); while (converted.has_value()) { if (dest != nullptr) dest[dst_idx] = converted.value(); @@ -51,7 +51,7 @@ wcsnrtombs(char *__restrict dest, const wchar_t **__restrict ptr_to_src, } dst_idx++; - converted = str_conv.popUTF8(); + converted = str_conv.pop(); } if (dest != nullptr) diff --git a/libc/test/src/__support/wchar/string_converter_test.cpp b/libc/test/src/__support/wchar/string_converter_test.cpp index d514df9317852..e45358ddc68c4 100644 --- a/libc/test/src/__support/wchar/string_converter_test.cpp +++ b/libc/test/src/__support/wchar/string_converter_test.cpp @@ -34,32 +34,32 @@ TEST(LlvmLibcStringConverterTest, UTF8To32) { LIBC_NAMESPACE::internal::StringConverter sc( reinterpret_cast(src), &state, SIZE_MAX); - auto res = sc.popUTF32(); + auto res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x1f921); ASSERT_EQ(static_cast(sc.getSourceIndex()), 4); - res = sc.popUTF32(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x2211); ASSERT_EQ(static_cast(sc.getSourceIndex()), 7); - res = sc.popUTF32(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xff); ASSERT_EQ(static_cast(sc.getSourceIndex()), 9); - res = sc.popUTF32(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x41); ASSERT_EQ(static_cast(sc.getSourceIndex()), 10); - res = sc.popUTF32(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0); ASSERT_EQ(static_cast(sc.getSourceIndex()), 11); - res = sc.popUTF32(); + res = sc.pop(); ASSERT_FALSE(res.has_value()); ASSERT_EQ(res.error(), -1); ASSERT_EQ(static_cast(sc.getSourceIndex()), 11); @@ -75,66 +75,66 @@ TEST(LlvmLibcStringConverterTest, UTF32To8) { LIBC_NAMESPACE::internal::StringConverter sc( reinterpret_cast(src), &state, SIZE_MAX); - auto res = sc.popUTF8(); + auto res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xF0); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x9F); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xA4); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xA1); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); // end of clown emoji, sigma symbol begins - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xE2); ASSERT_EQ(static_cast(sc.getSourceIndex()), 2); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x88); ASSERT_EQ(static_cast(sc.getSourceIndex()), 2); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x91); ASSERT_EQ(static_cast(sc.getSourceIndex()), 2); // end of sigma symbol, y with diaeresis begins - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xC3); ASSERT_EQ(static_cast(sc.getSourceIndex()), 3); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xBF); ASSERT_EQ(static_cast(sc.getSourceIndex()), 3); // end of y with diaeresis, letter A begins - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x41); ASSERT_EQ(static_cast(sc.getSourceIndex()), 4); // null byte - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0); ASSERT_EQ(static_cast(sc.getSourceIndex()), 5); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_FALSE(res.has_value()); ASSERT_EQ(res.error(), -1); ASSERT_EQ(static_cast(sc.getSourceIndex()), 5); @@ -148,28 +148,28 @@ TEST(LlvmLibcStringConverterTest, UTF32To8PartialRead) { LIBC_NAMESPACE::internal::StringConverter sc( reinterpret_cast(src), &state, SIZE_MAX, 1); - auto res = sc.popUTF8(); + auto res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xF0); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x9F); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xA4); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xA1); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); // can only read 1 character from source string, so error on next pop - res = sc.popUTF8(); + res = sc.pop(); ASSERT_FALSE(res.has_value()); ASSERT_EQ(res.error(), -1); } @@ -181,12 +181,12 @@ TEST(LlvmLibcStringConverterTest, UTF8To32PartialRead) { LIBC_NAMESPACE::internal::StringConverter sc( reinterpret_cast(src), &state, SIZE_MAX, 5); - auto res = sc.popUTF32(); + auto res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x1f921); ASSERT_EQ(static_cast(sc.getSourceIndex()), 4); - res = sc.popUTF32(); + res = sc.pop(); ASSERT_FALSE(res.has_value()); ASSERT_EQ(static_cast(res.error()), -1); ASSERT_EQ(static_cast(sc.getSourceIndex()), 5); @@ -200,27 +200,27 @@ TEST(LlvmLibcStringConverterTest, UTF32To8ErrorHandling) { LIBC_NAMESPACE::internal::StringConverter sc( reinterpret_cast(src), &state, SIZE_MAX); - auto res = sc.popUTF8(); + auto res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xF0); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x9F); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xA4); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xA1); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_FALSE(res.has_value()); ASSERT_EQ(static_cast(res.error()), EILSEQ); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); @@ -234,12 +234,12 @@ TEST(LlvmLibcStringConverterTest, UTF8To32ErrorHandling) { LIBC_NAMESPACE::internal::StringConverter sc( reinterpret_cast(src), &state, SIZE_MAX); - auto res = sc.popUTF32(); + auto res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x1f921); ASSERT_EQ(static_cast(sc.getSourceIndex()), 4); - res = sc.popUTF32(); + res = sc.pop(); ASSERT_FALSE(res.has_value()); ASSERT_EQ(static_cast(res.error()), EILSEQ); ASSERT_EQ(static_cast(sc.getSourceIndex()), 4); @@ -257,12 +257,12 @@ TEST(LlvmLibcStringConverterTest, InvalidCharacterOutsideBounds) { LIBC_NAMESPACE::internal::StringConverter sc1( reinterpret_cast(src1), &ps1, 1); - auto res1 = sc1.popUTF32(); + auto res1 = sc1.pop(); ASSERT_TRUE(res1.has_value()); ASSERT_EQ(static_cast(res1.value()), 0x1f921); ASSERT_EQ(static_cast(sc1.getSourceIndex()), 4); - res1 = sc1.popUTF32(); + res1 = sc1.pop(); ASSERT_FALSE(res1.has_value()); // no space to write error NOT invalid character error (EILSEQ) ASSERT_EQ(static_cast(res1.error()), -1); @@ -275,27 +275,27 @@ TEST(LlvmLibcStringConverterTest, InvalidCharacterOutsideBounds) { LIBC_NAMESPACE::internal::StringConverter sc2( reinterpret_cast(src2), &ps2, 4); - auto res2 = sc2.popUTF8(); + auto res2 = sc2.pop(); ASSERT_TRUE(res2.has_value()); ASSERT_EQ(static_cast(res2.value()), 0xF0); ASSERT_EQ(static_cast(sc2.getSourceIndex()), 1); - res2 = sc2.popUTF8(); + res2 = sc2.pop(); ASSERT_TRUE(res2.has_value()); ASSERT_EQ(static_cast(res2.value()), 0x9F); ASSERT_EQ(static_cast(sc2.getSourceIndex()), 1); - res2 = sc2.popUTF8(); + res2 = sc2.pop(); ASSERT_TRUE(res2.has_value()); ASSERT_EQ(static_cast(res2.value()), 0xA4); ASSERT_EQ(static_cast(sc2.getSourceIndex()), 1); - res2 = sc2.popUTF8(); + res2 = sc2.pop(); ASSERT_TRUE(res2.has_value()); ASSERT_EQ(static_cast(res2.value()), 0xA1); ASSERT_EQ(static_cast(sc2.getSourceIndex()), 1); - res2 = sc2.popUTF8(); + res2 = sc2.pop(); ASSERT_FALSE(res2.has_value()); // no space to write error NOT invalid character error (EILSEQ) ASSERT_EQ(static_cast(res2.error()), -1); @@ -315,22 +315,22 @@ TEST(LlvmLibcStringConverterTest, MultipleStringConverters32To8) { LIBC_NAMESPACE::internal::StringConverter sc1( reinterpret_cast(src), &state, SIZE_MAX, 1); - auto res = sc1.popUTF8(); + auto res = sc1.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xF0); ASSERT_EQ(static_cast(sc1.getSourceIndex()), 1); - res = sc1.popUTF8(); + res = sc1.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x9F); ASSERT_EQ(static_cast(sc1.getSourceIndex()), 1); - res = sc1.popUTF8(); + res = sc1.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xA4); ASSERT_EQ(static_cast(sc1.getSourceIndex()), 1); - res = sc1.popUTF8(); + res = sc1.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xA1); ASSERT_EQ(static_cast(sc1.getSourceIndex()), 1); @@ -340,12 +340,12 @@ TEST(LlvmLibcStringConverterTest, MultipleStringConverters32To8) { reinterpret_cast(src) + sc1.getSourceIndex(), &state, SIZE_MAX, 1); - res = sc2.popUTF8(); + res = sc2.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xC3); ASSERT_EQ(static_cast(sc2.getSourceIndex()), 1); - res = sc2.popUTF8(); + res = sc2.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xBF); ASSERT_EQ(static_cast(sc2.getSourceIndex()), 1); @@ -357,7 +357,7 @@ TEST(LlvmLibcStringConverterTest, MultipleStringConverters8To32) { LIBC_NAMESPACE::internal::StringConverter sc1( reinterpret_cast(src), &state, SIZE_MAX, 2); - auto res = sc1.popUTF32(); + auto res = sc1.pop(); ASSERT_FALSE(res.has_value()); ASSERT_EQ(static_cast(res.error()), -1); ASSERT_EQ(static_cast(sc1.getSourceIndex()), 2); @@ -367,12 +367,12 @@ TEST(LlvmLibcStringConverterTest, MultipleStringConverters8To32) { reinterpret_cast(src) + sc1.getSourceIndex(), &state, SIZE_MAX, 3); - res = sc2.popUTF32(); + res = sc2.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x1f921); ASSERT_EQ(static_cast(sc2.getSourceIndex()), 2); - res = sc2.popUTF32(); + res = sc2.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0); ASSERT_EQ(static_cast(sc2.getSourceIndex()), 3); @@ -384,11 +384,11 @@ TEST(LlvmLibcStringConverterTest, DestLimitUTF8To32) { LIBC_NAMESPACE::internal::StringConverter sc( reinterpret_cast(src), &state, 1); - auto res = sc.popUTF32(); + auto res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(sc.getSourceIndex()), 4); - res = sc.popUTF32(); // no space to pop this into + res = sc.pop(); // no space to pop this into ASSERT_FALSE(res.has_value()); } @@ -399,23 +399,23 @@ TEST(LlvmLibcStringConverterTest, DestLimitUTF32To8) { LIBC_NAMESPACE::internal::StringConverter sc( reinterpret_cast(src), &state, 5); - auto res = sc.popUTF8(); + auto res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_FALSE(res.has_value()); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); } From 194b8e0f89ccf984e141484fb139faab9bd663a0 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Tue, 5 Aug 2025 20:49:09 +0000 Subject: [PATCH 2/5] fix incorrect null terminator check --- libc/src/__support/wchar/string_converter.h | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/libc/src/__support/wchar/string_converter.h b/libc/src/__support/wchar/string_converter.h index 85f93f39a08e7..d1559ef11b673 100644 --- a/libc/src/__support/wchar/string_converter.h +++ b/libc/src/__support/wchar/string_converter.h @@ -78,7 +78,7 @@ template class StringConverter { } ErrorOr out = cr.pop(); - if (out.has_value() && out.value() == L'\0') + if (out.has_value() && out.value() == 0) // if out isn't null terminator or an error src_len = src_idx; num_to_write--; From fdb8401c0f38e9e898682973d6774cd6b57f78bc Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Tue, 5 Aug 2025 21:00:25 +0000 Subject: [PATCH 3/5] format --- libc/src/__support/wchar/string_converter.h | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/libc/src/__support/wchar/string_converter.h b/libc/src/__support/wchar/string_converter.h index d1559ef11b673..d7f7e76923757 100644 --- a/libc/src/__support/wchar/string_converter.h +++ b/libc/src/__support/wchar/string_converter.h @@ -78,7 +78,8 @@ template class StringConverter { } ErrorOr out = cr.pop(); - if (out.has_value() && out.value() == 0) // if out isn't null terminator or an error + // if out isn't null terminator or an error + if (out.has_value() && out.value() == 0) src_len = src_idx; num_to_write--; From e9ba0bad4aab03dd3a0087486d4f6e8e38d4c571 Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Wed, 6 Aug 2025 16:11:37 +0000 Subject: [PATCH 4/5] template CharacterConverter size method --- libc/src/__support/wchar/character_converter.cpp | 6 ------ libc/src/__support/wchar/character_converter.h | 8 ++++++-- libc/src/__support/wchar/string_converter.h | 8 +------- 3 files changed, 7 insertions(+), 15 deletions(-) diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp index 15d0f478a18a9..278248c5c4c4a 100644 --- a/libc/src/__support/wchar/character_converter.cpp +++ b/libc/src/__support/wchar/character_converter.cpp @@ -132,12 +132,6 @@ ErrorOr CharacterConverter::pop_utf32() { return utf32; } -size_t CharacterConverter::sizeAsUTF32() { - return 1; // a single utf-32 value can fit an entire character -} - -size_t CharacterConverter::sizeAsUTF8() { return state->total_bytes; } - ErrorOr CharacterConverter::pop_utf8() { if (isEmpty()) return Error(-1); diff --git a/libc/src/__support/wchar/character_converter.h b/libc/src/__support/wchar/character_converter.h index edb53214c9be9..f09997a85a43c 100644 --- a/libc/src/__support/wchar/character_converter.h +++ b/libc/src/__support/wchar/character_converter.h @@ -32,8 +32,12 @@ class CharacterConverter { bool isEmpty(); bool isValidState(); - size_t sizeAsUTF32(); - size_t sizeAsUTF8(); + template size_t sizeAs() { + if constexpr (cpp::is_same_v) + return state->total_bytes; + else // char32_t + return 1; // every character fits in a single char32_t + } int push(char8_t utf8_byte); int push(char32_t utf32); diff --git a/libc/src/__support/wchar/string_converter.h b/libc/src/__support/wchar/string_converter.h index d7f7e76923757..ba628bd34cdc0 100644 --- a/libc/src/__support/wchar/string_converter.h +++ b/libc/src/__support/wchar/string_converter.h @@ -63,13 +63,7 @@ template class StringConverter { if (!src_elements_read.has_value()) return Error(src_elements_read.error()); - size_t size; - if constexpr (cpp::is_same_v) - size = cr.sizeAsUTF8(); - else - size = cr.sizeAsUTF32(); - - if (size > num_to_write) { + if (cr.sizeAs() > num_to_write) { cr.clear(); return Error(-1); } From efad91f5e3ebe885dd1bf0aa493a057fc48e8bca Mon Sep 17 00:00:00 2001 From: Uzair Nawaz Date: Wed, 6 Aug 2025 17:41:25 +0000 Subject: [PATCH 5/5] template specialization --- libc/src/__support/wchar/character_converter.h | 18 ++++++------------ 1 file changed, 6 insertions(+), 12 deletions(-) diff --git a/libc/src/__support/wchar/character_converter.h b/libc/src/__support/wchar/character_converter.h index f09997a85a43c..fef30f7ce43fa 100644 --- a/libc/src/__support/wchar/character_converter.h +++ b/libc/src/__support/wchar/character_converter.h @@ -32,24 +32,18 @@ class CharacterConverter { bool isEmpty(); bool isValidState(); - template size_t sizeAs() { - if constexpr (cpp::is_same_v) - return state->total_bytes; - else // char32_t - return 1; // every character fits in a single char32_t - } + template size_t sizeAs(); + template <> size_t sizeAs() { return state->total_bytes; } + template <> size_t sizeAs() { return 1; } int push(char8_t utf8_byte); int push(char32_t utf32); ErrorOr pop_utf8(); ErrorOr pop_utf32(); - template ErrorOr pop() { - if constexpr (cpp::is_same_v) - return pop_utf8(); - else - return pop_utf32(); - } + template ErrorOr pop(); + template <> ErrorOr pop() { return pop_utf8(); } + template <> ErrorOr pop() { return pop_utf32(); } }; } // namespace internal