diff --git a/libc/src/__support/wchar/character_converter.cpp b/libc/src/__support/wchar/character_converter.cpp index 15d0f478a18a9..278248c5c4c4a 100644 --- a/libc/src/__support/wchar/character_converter.cpp +++ b/libc/src/__support/wchar/character_converter.cpp @@ -132,12 +132,6 @@ ErrorOr CharacterConverter::pop_utf32() { return utf32; } -size_t CharacterConverter::sizeAsUTF32() { - return 1; // a single utf-32 value can fit an entire character -} - -size_t CharacterConverter::sizeAsUTF8() { return state->total_bytes; } - ErrorOr CharacterConverter::pop_utf8() { if (isEmpty()) return Error(-1); diff --git a/libc/src/__support/wchar/character_converter.h b/libc/src/__support/wchar/character_converter.h index b6d918f2d2edc..fef30f7ce43fa 100644 --- a/libc/src/__support/wchar/character_converter.h +++ b/libc/src/__support/wchar/character_converter.h @@ -12,6 +12,7 @@ #include "hdr/types/char32_t.h" #include "hdr/types/char8_t.h" #include "hdr/types/size_t.h" +#include "src/__support/CPP/type_traits.h" #include "src/__support/common.h" #include "src/__support/error_or.h" #include "src/__support/wchar/mbstate.h" @@ -31,14 +32,18 @@ class CharacterConverter { bool isEmpty(); bool isValidState(); - size_t sizeAsUTF32(); - size_t sizeAsUTF8(); + template size_t sizeAs(); + template <> size_t sizeAs() { return state->total_bytes; } + template <> size_t sizeAs() { return 1; } int push(char8_t utf8_byte); int push(char32_t utf32); ErrorOr pop_utf8(); ErrorOr pop_utf32(); + template ErrorOr pop(); + template <> ErrorOr pop() { return pop_utf8(); } + template <> ErrorOr pop() { return pop_utf32(); } }; } // namespace internal diff --git a/libc/src/__support/wchar/mbsnrtowcs.h b/libc/src/__support/wchar/mbsnrtowcs.h index 54e315210d95c..6abb836635772 100644 --- a/libc/src/__support/wchar/mbsnrtowcs.h +++ b/libc/src/__support/wchar/mbsnrtowcs.h @@ -36,7 +36,7 @@ LIBC_INLINE static ErrorOr mbsnrtowcs(wchar_t *__restrict dst, StringConverter str_conv(reinterpret_cast(*src), ps, len, nmc); size_t dst_idx = 0; - ErrorOr converted = str_conv.popUTF32(); + ErrorOr converted = str_conv.pop(); while (converted.has_value()) { if (dst != nullptr) dst[dst_idx] = converted.value(); @@ -47,7 +47,7 @@ LIBC_INLINE static ErrorOr mbsnrtowcs(wchar_t *__restrict dst, return dst_idx; } dst_idx++; - converted = str_conv.popUTF32(); + converted = str_conv.pop(); } if (converted.error() == -1) { // if we hit conversion limit diff --git a/libc/src/__support/wchar/string_converter.h b/libc/src/__support/wchar/string_converter.h index 869ebdfc8b390..ba628bd34cdc0 100644 --- a/libc/src/__support/wchar/string_converter.h +++ b/libc/src/__support/wchar/string_converter.h @@ -12,6 +12,7 @@ #include "hdr/types/char32_t.h" #include "hdr/types/char8_t.h" #include "hdr/types/size_t.h" +#include "src/__support/CPP/type_traits.h" #include "src/__support/common.h" #include "src/__support/error_or.h" #include "src/__support/wchar/character_converter.h" @@ -53,9 +54,7 @@ template class StringConverter { size_t srclen = SIZE_MAX) : cr(ps), src(s), src_len(srclen), src_idx(0), num_to_write(dstlen) {} - // TODO: following functions are almost identical - // look into templating CharacterConverter pop functions - ErrorOr popUTF32() { + template ErrorOr pop() { if (num_to_write == 0) return Error(-1); @@ -64,7 +63,7 @@ template class StringConverter { if (!src_elements_read.has_value()) return Error(src_elements_read.error()); - if (cr.sizeAsUTF32() > num_to_write) { + if (cr.sizeAs() > num_to_write) { cr.clear(); return Error(-1); } @@ -72,34 +71,9 @@ template class StringConverter { src_idx += src_elements_read.value(); } - auto out = cr.pop_utf32(); - if (out.has_value() && out.value() == L'\0') - src_len = src_idx; - - num_to_write--; - - return out; - } - - ErrorOr popUTF8() { - if (num_to_write == 0) - return Error(-1); - - if (cr.isEmpty() || src_idx == 0) { - auto src_elements_read = pushFullCharacter(); - if (!src_elements_read.has_value()) - return Error(src_elements_read.error()); - - if (cr.sizeAsUTF8() > num_to_write) { - cr.clear(); - return Error(-1); - } - - src_idx += src_elements_read.value(); - } - - auto out = cr.pop_utf8(); - if (out.has_value() && out.value() == '\0') + ErrorOr out = cr.pop(); + // if out isn't null terminator or an error + if (out.has_value() && out.value() == 0) src_len = src_idx; num_to_write--; diff --git a/libc/src/__support/wchar/wcsnrtombs.h b/libc/src/__support/wchar/wcsnrtombs.h index 433097c937a42..f593a0e0dba87 100644 --- a/libc/src/__support/wchar/wcsnrtombs.h +++ b/libc/src/__support/wchar/wcsnrtombs.h @@ -39,7 +39,7 @@ wcsnrtombs(char *__restrict dest, const wchar_t **__restrict ptr_to_src, reinterpret_cast(*ptr_to_src), ps, dest_len, num_src_widechars); size_t dst_idx = 0; - ErrorOr converted = str_conv.popUTF8(); + ErrorOr converted = str_conv.pop(); while (converted.has_value()) { if (dest != nullptr) dest[dst_idx] = converted.value(); @@ -51,7 +51,7 @@ wcsnrtombs(char *__restrict dest, const wchar_t **__restrict ptr_to_src, } dst_idx++; - converted = str_conv.popUTF8(); + converted = str_conv.pop(); } if (dest != nullptr) diff --git a/libc/test/src/__support/wchar/string_converter_test.cpp b/libc/test/src/__support/wchar/string_converter_test.cpp index d514df9317852..e45358ddc68c4 100644 --- a/libc/test/src/__support/wchar/string_converter_test.cpp +++ b/libc/test/src/__support/wchar/string_converter_test.cpp @@ -34,32 +34,32 @@ TEST(LlvmLibcStringConverterTest, UTF8To32) { LIBC_NAMESPACE::internal::StringConverter sc( reinterpret_cast(src), &state, SIZE_MAX); - auto res = sc.popUTF32(); + auto res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x1f921); ASSERT_EQ(static_cast(sc.getSourceIndex()), 4); - res = sc.popUTF32(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x2211); ASSERT_EQ(static_cast(sc.getSourceIndex()), 7); - res = sc.popUTF32(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xff); ASSERT_EQ(static_cast(sc.getSourceIndex()), 9); - res = sc.popUTF32(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x41); ASSERT_EQ(static_cast(sc.getSourceIndex()), 10); - res = sc.popUTF32(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0); ASSERT_EQ(static_cast(sc.getSourceIndex()), 11); - res = sc.popUTF32(); + res = sc.pop(); ASSERT_FALSE(res.has_value()); ASSERT_EQ(res.error(), -1); ASSERT_EQ(static_cast(sc.getSourceIndex()), 11); @@ -75,66 +75,66 @@ TEST(LlvmLibcStringConverterTest, UTF32To8) { LIBC_NAMESPACE::internal::StringConverter sc( reinterpret_cast(src), &state, SIZE_MAX); - auto res = sc.popUTF8(); + auto res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xF0); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x9F); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xA4); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xA1); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); // end of clown emoji, sigma symbol begins - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xE2); ASSERT_EQ(static_cast(sc.getSourceIndex()), 2); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x88); ASSERT_EQ(static_cast(sc.getSourceIndex()), 2); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x91); ASSERT_EQ(static_cast(sc.getSourceIndex()), 2); // end of sigma symbol, y with diaeresis begins - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xC3); ASSERT_EQ(static_cast(sc.getSourceIndex()), 3); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xBF); ASSERT_EQ(static_cast(sc.getSourceIndex()), 3); // end of y with diaeresis, letter A begins - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x41); ASSERT_EQ(static_cast(sc.getSourceIndex()), 4); // null byte - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0); ASSERT_EQ(static_cast(sc.getSourceIndex()), 5); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_FALSE(res.has_value()); ASSERT_EQ(res.error(), -1); ASSERT_EQ(static_cast(sc.getSourceIndex()), 5); @@ -148,28 +148,28 @@ TEST(LlvmLibcStringConverterTest, UTF32To8PartialRead) { LIBC_NAMESPACE::internal::StringConverter sc( reinterpret_cast(src), &state, SIZE_MAX, 1); - auto res = sc.popUTF8(); + auto res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xF0); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x9F); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xA4); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xA1); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); // can only read 1 character from source string, so error on next pop - res = sc.popUTF8(); + res = sc.pop(); ASSERT_FALSE(res.has_value()); ASSERT_EQ(res.error(), -1); } @@ -181,12 +181,12 @@ TEST(LlvmLibcStringConverterTest, UTF8To32PartialRead) { LIBC_NAMESPACE::internal::StringConverter sc( reinterpret_cast(src), &state, SIZE_MAX, 5); - auto res = sc.popUTF32(); + auto res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x1f921); ASSERT_EQ(static_cast(sc.getSourceIndex()), 4); - res = sc.popUTF32(); + res = sc.pop(); ASSERT_FALSE(res.has_value()); ASSERT_EQ(static_cast(res.error()), -1); ASSERT_EQ(static_cast(sc.getSourceIndex()), 5); @@ -200,27 +200,27 @@ TEST(LlvmLibcStringConverterTest, UTF32To8ErrorHandling) { LIBC_NAMESPACE::internal::StringConverter sc( reinterpret_cast(src), &state, SIZE_MAX); - auto res = sc.popUTF8(); + auto res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xF0); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x9F); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xA4); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xA1); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_FALSE(res.has_value()); ASSERT_EQ(static_cast(res.error()), EILSEQ); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); @@ -234,12 +234,12 @@ TEST(LlvmLibcStringConverterTest, UTF8To32ErrorHandling) { LIBC_NAMESPACE::internal::StringConverter sc( reinterpret_cast(src), &state, SIZE_MAX); - auto res = sc.popUTF32(); + auto res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x1f921); ASSERT_EQ(static_cast(sc.getSourceIndex()), 4); - res = sc.popUTF32(); + res = sc.pop(); ASSERT_FALSE(res.has_value()); ASSERT_EQ(static_cast(res.error()), EILSEQ); ASSERT_EQ(static_cast(sc.getSourceIndex()), 4); @@ -257,12 +257,12 @@ TEST(LlvmLibcStringConverterTest, InvalidCharacterOutsideBounds) { LIBC_NAMESPACE::internal::StringConverter sc1( reinterpret_cast(src1), &ps1, 1); - auto res1 = sc1.popUTF32(); + auto res1 = sc1.pop(); ASSERT_TRUE(res1.has_value()); ASSERT_EQ(static_cast(res1.value()), 0x1f921); ASSERT_EQ(static_cast(sc1.getSourceIndex()), 4); - res1 = sc1.popUTF32(); + res1 = sc1.pop(); ASSERT_FALSE(res1.has_value()); // no space to write error NOT invalid character error (EILSEQ) ASSERT_EQ(static_cast(res1.error()), -1); @@ -275,27 +275,27 @@ TEST(LlvmLibcStringConverterTest, InvalidCharacterOutsideBounds) { LIBC_NAMESPACE::internal::StringConverter sc2( reinterpret_cast(src2), &ps2, 4); - auto res2 = sc2.popUTF8(); + auto res2 = sc2.pop(); ASSERT_TRUE(res2.has_value()); ASSERT_EQ(static_cast(res2.value()), 0xF0); ASSERT_EQ(static_cast(sc2.getSourceIndex()), 1); - res2 = sc2.popUTF8(); + res2 = sc2.pop(); ASSERT_TRUE(res2.has_value()); ASSERT_EQ(static_cast(res2.value()), 0x9F); ASSERT_EQ(static_cast(sc2.getSourceIndex()), 1); - res2 = sc2.popUTF8(); + res2 = sc2.pop(); ASSERT_TRUE(res2.has_value()); ASSERT_EQ(static_cast(res2.value()), 0xA4); ASSERT_EQ(static_cast(sc2.getSourceIndex()), 1); - res2 = sc2.popUTF8(); + res2 = sc2.pop(); ASSERT_TRUE(res2.has_value()); ASSERT_EQ(static_cast(res2.value()), 0xA1); ASSERT_EQ(static_cast(sc2.getSourceIndex()), 1); - res2 = sc2.popUTF8(); + res2 = sc2.pop(); ASSERT_FALSE(res2.has_value()); // no space to write error NOT invalid character error (EILSEQ) ASSERT_EQ(static_cast(res2.error()), -1); @@ -315,22 +315,22 @@ TEST(LlvmLibcStringConverterTest, MultipleStringConverters32To8) { LIBC_NAMESPACE::internal::StringConverter sc1( reinterpret_cast(src), &state, SIZE_MAX, 1); - auto res = sc1.popUTF8(); + auto res = sc1.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xF0); ASSERT_EQ(static_cast(sc1.getSourceIndex()), 1); - res = sc1.popUTF8(); + res = sc1.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x9F); ASSERT_EQ(static_cast(sc1.getSourceIndex()), 1); - res = sc1.popUTF8(); + res = sc1.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xA4); ASSERT_EQ(static_cast(sc1.getSourceIndex()), 1); - res = sc1.popUTF8(); + res = sc1.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xA1); ASSERT_EQ(static_cast(sc1.getSourceIndex()), 1); @@ -340,12 +340,12 @@ TEST(LlvmLibcStringConverterTest, MultipleStringConverters32To8) { reinterpret_cast(src) + sc1.getSourceIndex(), &state, SIZE_MAX, 1); - res = sc2.popUTF8(); + res = sc2.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xC3); ASSERT_EQ(static_cast(sc2.getSourceIndex()), 1); - res = sc2.popUTF8(); + res = sc2.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0xBF); ASSERT_EQ(static_cast(sc2.getSourceIndex()), 1); @@ -357,7 +357,7 @@ TEST(LlvmLibcStringConverterTest, MultipleStringConverters8To32) { LIBC_NAMESPACE::internal::StringConverter sc1( reinterpret_cast(src), &state, SIZE_MAX, 2); - auto res = sc1.popUTF32(); + auto res = sc1.pop(); ASSERT_FALSE(res.has_value()); ASSERT_EQ(static_cast(res.error()), -1); ASSERT_EQ(static_cast(sc1.getSourceIndex()), 2); @@ -367,12 +367,12 @@ TEST(LlvmLibcStringConverterTest, MultipleStringConverters8To32) { reinterpret_cast(src) + sc1.getSourceIndex(), &state, SIZE_MAX, 3); - res = sc2.popUTF32(); + res = sc2.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0x1f921); ASSERT_EQ(static_cast(sc2.getSourceIndex()), 2); - res = sc2.popUTF32(); + res = sc2.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(res.value()), 0); ASSERT_EQ(static_cast(sc2.getSourceIndex()), 3); @@ -384,11 +384,11 @@ TEST(LlvmLibcStringConverterTest, DestLimitUTF8To32) { LIBC_NAMESPACE::internal::StringConverter sc( reinterpret_cast(src), &state, 1); - auto res = sc.popUTF32(); + auto res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(sc.getSourceIndex()), 4); - res = sc.popUTF32(); // no space to pop this into + res = sc.pop(); // no space to pop this into ASSERT_FALSE(res.has_value()); } @@ -399,23 +399,23 @@ TEST(LlvmLibcStringConverterTest, DestLimitUTF32To8) { LIBC_NAMESPACE::internal::StringConverter sc( reinterpret_cast(src), &state, 5); - auto res = sc.popUTF8(); + auto res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_TRUE(res.has_value()); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); - res = sc.popUTF8(); + res = sc.pop(); ASSERT_FALSE(res.has_value()); ASSERT_EQ(static_cast(sc.getSourceIndex()), 1); }