[libc] Template StringConverter pop function to avoid duplicate code #152204

uzairnawaz · 2025-08-05T20:51:03Z

Addressed TODO to template the StringConverter pop functions to have a single implementation (combine popUTF8 and popUTF32 into a single templated pop function)

… tests to use templated functions

llvmbot · 2025-08-05T20:51:35Z

@llvm/pr-subscribers-libc

Author: Uzair Nawaz (uzairnawaz)

Changes

Addressed TODO to template the StringConverter pop functions to have a single implementation (combine popUTF8 and popUTF32 into a single templated pop function)

Full diff: https://github.com/llvm/llvm-project/pull/152204.diff

5 Files Affected:

(modified) libc/src/__support/wchar/character_converter.h (+7)
(modified) libc/src/__support/wchar/mbsnrtowcs.h (+2-2)
(modified) libc/src/__support/wchar/string_converter.h (+10-31)
(modified) libc/src/__support/wchar/wcsnrtombs.h (+2-2)
(modified) libc/test/src/__support/wchar/string_converter_test.cpp (+55-55)

diff --git a/libc/src/__support/wchar/character_converter.h b/libc/src/__support/wchar/character_converter.h
index b6d918f2d2edc..edb53214c9be9 100644
--- a/libc/src/__support/wchar/character_converter.h
+++ b/libc/src/__support/wchar/character_converter.h
@@ -12,6 +12,7 @@
 #include "hdr/types/char32_t.h"
 #include "hdr/types/char8_t.h"
 #include "hdr/types/size_t.h"
+#include "src/__support/CPP/type_traits.h"
 #include "src/__support/common.h"
 #include "src/__support/error_or.h"
 #include "src/__support/wchar/mbstate.h"
@@ -39,6 +40,12 @@ class CharacterConverter {
 
   ErrorOr<char8_t> pop_utf8();
   ErrorOr<char32_t> pop_utf32();
+  template <typename CharType> ErrorOr<CharType> pop() {
+    if constexpr (cpp::is_same_v<CharType, char8_t>)
+      return pop_utf8();
+    else
+      return pop_utf32();
+  }
 };
 
 } // namespace internal
diff --git a/libc/src/__support/wchar/mbsnrtowcs.h b/libc/src/__support/wchar/mbsnrtowcs.h
index 54e315210d95c..6abb836635772 100644
--- a/libc/src/__support/wchar/mbsnrtowcs.h
+++ b/libc/src/__support/wchar/mbsnrtowcs.h
@@ -36,7 +36,7 @@ LIBC_INLINE static ErrorOr<size_t> mbsnrtowcs(wchar_t *__restrict dst,
   StringConverter<char8_t> str_conv(reinterpret_cast<const char8_t *>(*src), ps,
                                     len, nmc);
   size_t dst_idx = 0;
-  ErrorOr<char32_t> converted = str_conv.popUTF32();
+  ErrorOr<char32_t> converted = str_conv.pop<char32_t>();
   while (converted.has_value()) {
     if (dst != nullptr)
       dst[dst_idx] = converted.value();
@@ -47,7 +47,7 @@ LIBC_INLINE static ErrorOr<size_t> mbsnrtowcs(wchar_t *__restrict dst,
       return dst_idx;
     }
     dst_idx++;
-    converted = str_conv.popUTF32();
+    converted = str_conv.pop<char32_t>();
   }
 
   if (converted.error() == -1) { // if we hit conversion limit
diff --git a/libc/src/__support/wchar/string_converter.h b/libc/src/__support/wchar/string_converter.h
index 869ebdfc8b390..d1559ef11b673 100644
--- a/libc/src/__support/wchar/string_converter.h
+++ b/libc/src/__support/wchar/string_converter.h
@@ -12,6 +12,7 @@
 #include "hdr/types/char32_t.h"
 #include "hdr/types/char8_t.h"
 #include "hdr/types/size_t.h"
+#include "src/__support/CPP/type_traits.h"
 #include "src/__support/common.h"
 #include "src/__support/error_or.h"
 #include "src/__support/wchar/character_converter.h"
@@ -53,9 +54,7 @@ template <typename T> class StringConverter {
                   size_t srclen = SIZE_MAX)
       : cr(ps), src(s), src_len(srclen), src_idx(0), num_to_write(dstlen) {}
 
-  // TODO: following functions are almost identical
-  // look into templating CharacterConverter pop functions
-  ErrorOr<char32_t> popUTF32() {
+  template <typename CharType> ErrorOr<CharType> pop() {
     if (num_to_write == 0)
       return Error(-1);
 
@@ -64,33 +63,13 @@ template <typename T> class StringConverter {
       if (!src_elements_read.has_value())
         return Error(src_elements_read.error());
 
-      if (cr.sizeAsUTF32() > num_to_write) {
-        cr.clear();
-        return Error(-1);
-      }
-
-      src_idx += src_elements_read.value();
-    }
-
-    auto out = cr.pop_utf32();
-    if (out.has_value() && out.value() == L'\0')
-      src_len = src_idx;
-
-    num_to_write--;
-
-    return out;
-  }
-
-  ErrorOr<char8_t> popUTF8() {
-    if (num_to_write == 0)
-      return Error(-1);
-
-    if (cr.isEmpty() || src_idx == 0) {
-      auto src_elements_read = pushFullCharacter();
-      if (!src_elements_read.has_value())
-        return Error(src_elements_read.error());
+      size_t size;
+      if constexpr (cpp::is_same_v<CharType, char8_t>)
+        size = cr.sizeAsUTF8();
+      else
+        size = cr.sizeAsUTF32();
 
-      if (cr.sizeAsUTF8() > num_to_write) {
+      if (size > num_to_write) {
         cr.clear();
         return Error(-1);
       }
@@ -98,8 +77,8 @@ template <typename T> class StringConverter {
       src_idx += src_elements_read.value();
     }
 
-    auto out = cr.pop_utf8();
-    if (out.has_value() && out.value() == '\0')
+    ErrorOr<CharType> out = cr.pop<CharType>();
+    if (out.has_value() && out.value() == 0) // if out isn't null terminator or an error
       src_len = src_idx;
 
     num_to_write--;
diff --git a/libc/src/__support/wchar/wcsnrtombs.h b/libc/src/__support/wchar/wcsnrtombs.h
index 433097c937a42..f593a0e0dba87 100644
--- a/libc/src/__support/wchar/wcsnrtombs.h
+++ b/libc/src/__support/wchar/wcsnrtombs.h
@@ -39,7 +39,7 @@ wcsnrtombs(char *__restrict dest, const wchar_t **__restrict ptr_to_src,
       reinterpret_cast<const char32_t *>(*ptr_to_src), ps, dest_len,
       num_src_widechars);
   size_t dst_idx = 0;
-  ErrorOr<char8_t> converted = str_conv.popUTF8();
+  ErrorOr<char8_t> converted = str_conv.pop<char8_t>();
   while (converted.has_value()) {
     if (dest != nullptr)
       dest[dst_idx] = converted.value();
@@ -51,7 +51,7 @@ wcsnrtombs(char *__restrict dest, const wchar_t **__restrict ptr_to_src,
     }
 
     dst_idx++;
-    converted = str_conv.popUTF8();
+    converted = str_conv.pop<char8_t>();
   }
 
   if (dest != nullptr)
diff --git a/libc/test/src/__support/wchar/string_converter_test.cpp b/libc/test/src/__support/wchar/string_converter_test.cpp
index d514df9317852..e45358ddc68c4 100644
--- a/libc/test/src/__support/wchar/string_converter_test.cpp
+++ b/libc/test/src/__support/wchar/string_converter_test.cpp
@@ -34,32 +34,32 @@ TEST(LlvmLibcStringConverterTest, UTF8To32) {
   LIBC_NAMESPACE::internal::StringConverter<char8_t> sc(
       reinterpret_cast<const char8_t *>(src), &state, SIZE_MAX);
 
-  auto res = sc.popUTF32();
+  auto res = sc.pop<char32_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0x1f921);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 4);
 
-  res = sc.popUTF32();
+  res = sc.pop<char32_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0x2211);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 7);
 
-  res = sc.popUTF32();
+  res = sc.pop<char32_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xff);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 9);
 
-  res = sc.popUTF32();
+  res = sc.pop<char32_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0x41);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 10);
 
-  res = sc.popUTF32();
+  res = sc.pop<char32_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 11);
 
-  res = sc.popUTF32();
+  res = sc.pop<char32_t>();
   ASSERT_FALSE(res.has_value());
   ASSERT_EQ(res.error(), -1);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 11);
@@ -75,66 +75,66 @@ TEST(LlvmLibcStringConverterTest, UTF32To8) {
   LIBC_NAMESPACE::internal::StringConverter<char32_t> sc(
       reinterpret_cast<const char32_t *>(src), &state, SIZE_MAX);
 
-  auto res = sc.popUTF8();
+  auto res = sc.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xF0);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
 
-  res = sc.popUTF8();
+  res = sc.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0x9F);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
 
-  res = sc.popUTF8();
+  res = sc.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xA4);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
 
-  res = sc.popUTF8();
+  res = sc.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xA1);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
 
   // end of clown emoji, sigma symbol begins
-  res = sc.popUTF8();
+  res = sc.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xE2);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 2);
 
-  res = sc.popUTF8();
+  res = sc.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0x88);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 2);
 
-  res = sc.popUTF8();
+  res = sc.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0x91);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 2);
 
   // end of sigma symbol, y with diaeresis begins
-  res = sc.popUTF8();
+  res = sc.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xC3);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 3);
 
-  res = sc.popUTF8();
+  res = sc.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xBF);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 3);
 
   // end of y with diaeresis, letter A begins
-  res = sc.popUTF8();
+  res = sc.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0x41);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 4);
 
   // null byte
-  res = sc.popUTF8();
+  res = sc.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 5);
 
-  res = sc.popUTF8();
+  res = sc.pop<char8_t>();
   ASSERT_FALSE(res.has_value());
   ASSERT_EQ(res.error(), -1);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 5);
@@ -148,28 +148,28 @@ TEST(LlvmLibcStringConverterTest, UTF32To8PartialRead) {
   LIBC_NAMESPACE::internal::StringConverter<char32_t> sc(
       reinterpret_cast<const char32_t *>(src), &state, SIZE_MAX, 1);
 
-  auto res = sc.popUTF8();
+  auto res = sc.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xF0);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
 
-  res = sc.popUTF8();
+  res = sc.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0x9F);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
 
-  res = sc.popUTF8();
+  res = sc.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xA4);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
 
-  res = sc.popUTF8();
+  res = sc.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xA1);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
 
   // can only read 1 character from source string, so error on next pop
-  res = sc.popUTF8();
+  res = sc.pop<char8_t>();
   ASSERT_FALSE(res.has_value());
   ASSERT_EQ(res.error(), -1);
 }
@@ -181,12 +181,12 @@ TEST(LlvmLibcStringConverterTest, UTF8To32PartialRead) {
   LIBC_NAMESPACE::internal::StringConverter<char8_t> sc(
       reinterpret_cast<const char8_t *>(src), &state, SIZE_MAX, 5);
 
-  auto res = sc.popUTF32();
+  auto res = sc.pop<char32_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0x1f921);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 4);
 
-  res = sc.popUTF32();
+  res = sc.pop<char32_t>();
   ASSERT_FALSE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.error()), -1);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 5);
@@ -200,27 +200,27 @@ TEST(LlvmLibcStringConverterTest, UTF32To8ErrorHandling) {
   LIBC_NAMESPACE::internal::StringConverter<char32_t> sc(
       reinterpret_cast<const char32_t *>(src), &state, SIZE_MAX);
 
-  auto res = sc.popUTF8();
+  auto res = sc.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xF0);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
 
-  res = sc.popUTF8();
+  res = sc.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0x9F);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
 
-  res = sc.popUTF8();
+  res = sc.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xA4);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
 
-  res = sc.popUTF8();
+  res = sc.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xA1);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
 
-  res = sc.popUTF8();
+  res = sc.pop<char8_t>();
   ASSERT_FALSE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.error()), EILSEQ);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
@@ -234,12 +234,12 @@ TEST(LlvmLibcStringConverterTest, UTF8To32ErrorHandling) {
   LIBC_NAMESPACE::internal::StringConverter<char8_t> sc(
       reinterpret_cast<const char8_t *>(src), &state, SIZE_MAX);
 
-  auto res = sc.popUTF32();
+  auto res = sc.pop<char32_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0x1f921);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 4);
 
-  res = sc.popUTF32();
+  res = sc.pop<char32_t>();
   ASSERT_FALSE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.error()), EILSEQ);
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 4);
@@ -257,12 +257,12 @@ TEST(LlvmLibcStringConverterTest, InvalidCharacterOutsideBounds) {
   LIBC_NAMESPACE::internal::StringConverter<char8_t> sc1(
       reinterpret_cast<const char8_t *>(src1), &ps1, 1);
 
-  auto res1 = sc1.popUTF32();
+  auto res1 = sc1.pop<char32_t>();
   ASSERT_TRUE(res1.has_value());
   ASSERT_EQ(static_cast<int>(res1.value()), 0x1f921);
   ASSERT_EQ(static_cast<int>(sc1.getSourceIndex()), 4);
 
-  res1 = sc1.popUTF32();
+  res1 = sc1.pop<char32_t>();
   ASSERT_FALSE(res1.has_value());
   // no space to write error NOT invalid character error (EILSEQ)
   ASSERT_EQ(static_cast<int>(res1.error()), -1);
@@ -275,27 +275,27 @@ TEST(LlvmLibcStringConverterTest, InvalidCharacterOutsideBounds) {
   LIBC_NAMESPACE::internal::StringConverter<char32_t> sc2(
       reinterpret_cast<const char32_t *>(src2), &ps2, 4);
 
-  auto res2 = sc2.popUTF8();
+  auto res2 = sc2.pop<char8_t>();
   ASSERT_TRUE(res2.has_value());
   ASSERT_EQ(static_cast<int>(res2.value()), 0xF0);
   ASSERT_EQ(static_cast<int>(sc2.getSourceIndex()), 1);
 
-  res2 = sc2.popUTF8();
+  res2 = sc2.pop<char8_t>();
   ASSERT_TRUE(res2.has_value());
   ASSERT_EQ(static_cast<int>(res2.value()), 0x9F);
   ASSERT_EQ(static_cast<int>(sc2.getSourceIndex()), 1);
 
-  res2 = sc2.popUTF8();
+  res2 = sc2.pop<char8_t>();
   ASSERT_TRUE(res2.has_value());
   ASSERT_EQ(static_cast<int>(res2.value()), 0xA4);
   ASSERT_EQ(static_cast<int>(sc2.getSourceIndex()), 1);
 
-  res2 = sc2.popUTF8();
+  res2 = sc2.pop<char8_t>();
   ASSERT_TRUE(res2.has_value());
   ASSERT_EQ(static_cast<int>(res2.value()), 0xA1);
   ASSERT_EQ(static_cast<int>(sc2.getSourceIndex()), 1);
 
-  res2 = sc2.popUTF8();
+  res2 = sc2.pop<char8_t>();
   ASSERT_FALSE(res2.has_value());
   // no space to write error NOT invalid character error (EILSEQ)
   ASSERT_EQ(static_cast<int>(res2.error()), -1);
@@ -315,22 +315,22 @@ TEST(LlvmLibcStringConverterTest, MultipleStringConverters32To8) {
   LIBC_NAMESPACE::internal::StringConverter<char32_t> sc1(
       reinterpret_cast<const char32_t *>(src), &state, SIZE_MAX, 1);
 
-  auto res = sc1.popUTF8();
+  auto res = sc1.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xF0);
   ASSERT_EQ(static_cast<int>(sc1.getSourceIndex()), 1);
 
-  res = sc1.popUTF8();
+  res = sc1.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0x9F);
   ASSERT_EQ(static_cast<int>(sc1.getSourceIndex()), 1);
 
-  res = sc1.popUTF8();
+  res = sc1.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xA4);
   ASSERT_EQ(static_cast<int>(sc1.getSourceIndex()), 1);
 
-  res = sc1.popUTF8();
+  res = sc1.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xA1);
   ASSERT_EQ(static_cast<int>(sc1.getSourceIndex()), 1);
@@ -340,12 +340,12 @@ TEST(LlvmLibcStringConverterTest, MultipleStringConverters32To8) {
       reinterpret_cast<const char32_t *>(src) + sc1.getSourceIndex(), &state,
       SIZE_MAX, 1);
 
-  res = sc2.popUTF8();
+  res = sc2.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xC3);
   ASSERT_EQ(static_cast<int>(sc2.getSourceIndex()), 1);
 
-  res = sc2.popUTF8();
+  res = sc2.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0xBF);
   ASSERT_EQ(static_cast<int>(sc2.getSourceIndex()), 1);
@@ -357,7 +357,7 @@ TEST(LlvmLibcStringConverterTest, MultipleStringConverters8To32) {
   LIBC_NAMESPACE::internal::StringConverter<char8_t> sc1(
       reinterpret_cast<const char8_t *>(src), &state, SIZE_MAX, 2);
 
-  auto res = sc1.popUTF32();
+  auto res = sc1.pop<char32_t>();
   ASSERT_FALSE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.error()), -1);
   ASSERT_EQ(static_cast<int>(sc1.getSourceIndex()), 2);
@@ -367,12 +367,12 @@ TEST(LlvmLibcStringConverterTest, MultipleStringConverters8To32) {
       reinterpret_cast<const char8_t *>(src) + sc1.getSourceIndex(), &state,
       SIZE_MAX, 3);
 
-  res = sc2.popUTF32();
+  res = sc2.pop<char32_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0x1f921);
   ASSERT_EQ(static_cast<int>(sc2.getSourceIndex()), 2);
 
-  res = sc2.popUTF32();
+  res = sc2.pop<char32_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(res.value()), 0);
   ASSERT_EQ(static_cast<int>(sc2.getSourceIndex()), 3);
@@ -384,11 +384,11 @@ TEST(LlvmLibcStringConverterTest, DestLimitUTF8To32) {
   LIBC_NAMESPACE::internal::StringConverter<char8_t> sc(
       reinterpret_cast<const char8_t *>(src), &state, 1);
 
-  auto res = sc.popUTF32();
+  auto res = sc.pop<char32_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 4);
 
-  res = sc.popUTF32(); // no space to pop this into
+  res = sc.pop<char32_t>(); // no space to pop this into
   ASSERT_FALSE(res.has_value());
 }
 
@@ -399,23 +399,23 @@ TEST(LlvmLibcStringConverterTest, DestLimitUTF32To8) {
   LIBC_NAMESPACE::internal::StringConverter<char32_t> sc(
       reinterpret_cast<const char32_t *>(src), &state, 5);
 
-  auto res = sc.popUTF8();
+  auto res = sc.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
 
-  res = sc.popUTF8();
+  res = sc.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
 
-  res = sc.popUTF8();
+  res = sc.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
 
-  res = sc.popUTF8();
+  res = sc.pop<char8_t>();
   ASSERT_TRUE(res.has_value());
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
 
-  res = sc.popUTF8();
+  res = sc.pop<char8_t>();
   ASSERT_FALSE(res.has_value());
   ASSERT_EQ(static_cast<int>(sc.getSourceIndex()), 1);
 }

github-actions · 2025-08-05T20:53:26Z

✅ With the latest revision this PR passed the C/C++ code formatter.

libc/src/__support/wchar/string_converter.h

libc/src/__support/wchar/character_converter.h

michaelrj-google

LGTM, thanks for the cleanup!

llvm-ci · 2025-08-06T22:49:31Z

LLVM Buildbot has detected a new failure on builder libc-x86_64-debian-gcc-fullbuild-dbg running on libc-x86_64-debian-fullbuild while building libc at step 4 "annotate".

Full details are available at: https://lab.llvm.org/buildbot/#/builders/131/builds/27754

Here is the relevant piece of the build log for the reference

Step 4 (annotate) failure: 'python ../llvm-zorg/zorg/buildbot/builders/annotated/libc-linux.py ...' (failure)
...
-- For x86_64 builtins preferring x86_64/floatundisf.S to floatundisf.c
-- For x86_64 builtins preferring x86_64/floatdixf.c to floatdixf.c
-- For x86_64 builtins preferring x86_64/floatundixf.S to floatundixf.c
-- check-runtimes does nothing.
-- Configuring done
-- Generating done
-- Build files have been written to: /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/build
@@@BUILD_STEP build libc@@@
Running: ninja libc
[1/11] Building CXX object libc/src/wchar/CMakeFiles/libc.src.wchar.mbsinit.dir/mbsinit.cpp.o
FAILED: libc/src/wchar/CMakeFiles/libc.src.wchar.mbsinit.dir/mbsinit.cpp.o 
/usr/bin/g++ -DLIBC_NAMESPACE=__llvm_libc_20_0_0_git -D_DEBUG -I/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc -isystem /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/build/libc/include -fvisibility-inlines-hidden -Werror=date-time -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -Wimplicit-fallthrough -Wno-nonnull -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wdelete-non-virtual-dtor -Wsuggest-override -Wno-comment -Wno-misleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -g -DLIBC_QSORT_IMPL=LIBC_QSORT_QUICK_SORT -DLIBC_ADD_NULL_CHECKS -DLIBC_ERRNO_MODE=LIBC_ERRNO_MODE_DEFAULT -DLIBC_THREAD_MODE=LIBC_THREAD_MODE_PLATFORM -fpie -ffreestanding -DLIBC_FULL_BUILD -isystem/usr/lib/gcc/x86_64-linux-gnu/12//include -nostdinc -idirafter/usr/include -fno-builtin -fno-exceptions -fno-lax-vector-conversions -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-rtti -ftrivial-auto-var-init=pattern -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Wall -Wextra -Werror -Wconversion -Wno-sign-conversion -Wdeprecated -fext-numeric-literals -Wno-pedantic -Wimplicit-fallthrough -Wwrite-strings -Wextra-semi -DLIBC_COPT_PUBLIC_PACKAGING -std=gnu++17 -MD -MT libc/src/wchar/CMakeFiles/libc.src.wchar.mbsinit.dir/mbsinit.cpp.o -MF libc/src/wchar/CMakeFiles/libc.src.wchar.mbsinit.dir/mbsinit.cpp.o.d -o libc/src/wchar/CMakeFiles/libc.src.wchar.mbsinit.dir/mbsinit.cpp.o -c /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/wchar/mbsinit.cpp
In file included from /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/wchar/mbsinit.cpp:14:
/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.h:36:13: error: explicit specialization in non-namespace scope ‘class __llvm_libc_20_0_0_git::internal::CharacterConverter’
   36 |   template <> size_t sizeAs<char8_t>() { return state->total_bytes; }
      |             ^
/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.h:36:22: error: template-id ‘sizeAs<char8_t>’ in declaration of primary template
   36 |   template <> size_t sizeAs<char8_t>() { return state->total_bytes; }
      |                      ^~~~~~~~~~~~~~~
/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.h:37:13: error: explicit specialization in non-namespace scope ‘class __llvm_libc_20_0_0_git::internal::CharacterConverter’
   37 |   template <> size_t sizeAs<char32_t>() { return 1; }
      |             ^
/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.h:37:22: error: template-id ‘sizeAs<char32_t>’ in declaration of primary template
   37 |   template <> size_t sizeAs<char32_t>() { return 1; }
      |                      ^~~~~~~~~~~~~~~~
/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.h:37:22: error: ‘size_t __llvm_libc_20_0_0_git::internal::CharacterConverter::sizeAs()’ cannot be overloaded with ‘size_t __llvm_libc_20_0_0_git::internal::CharacterConverter::sizeAs()’
/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.h:36:22: note: previous declaration ‘size_t __llvm_libc_20_0_0_git::internal::CharacterConverter::sizeAs()’
   36 |   template <> size_t sizeAs<char8_t>() { return state->total_bytes; }
      |                      ^~~~~~~~~~~~~~~
/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.h:45:13: error: explicit specialization in non-namespace scope ‘class __llvm_libc_20_0_0_git::internal::CharacterConverter’
   45 |   template <> ErrorOr<char8_t> pop() { return pop_utf8(); }
      |             ^
/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.h:46:13: error: explicit specialization in non-namespace scope ‘class __llvm_libc_20_0_0_git::internal::CharacterConverter’
   46 |   template <> ErrorOr<char32_t> pop() { return pop_utf32(); }
      |             ^
/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.h:46:33: error: ‘__llvm_libc_20_0_0_git::ErrorOr<char32_t> __llvm_libc_20_0_0_git::internal::CharacterConverter::pop()’ cannot be overloaded with ‘__llvm_libc_20_0_0_git::ErrorOr<unsigned char> __llvm_libc_20_0_0_git::internal::CharacterConverter::pop()’
   46 |   template <> ErrorOr<char32_t> pop() { return pop_utf32(); }
      |                                 ^~~
/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.h:45:32: note: previous declaration ‘__llvm_libc_20_0_0_git::ErrorOr<unsigned char> __llvm_libc_20_0_0_git::internal::CharacterConverter::pop()’
   45 |   template <> ErrorOr<char8_t> pop() { return pop_utf8(); }
      |                                ^~~
[2/11] Building CXX object libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.mbrtowc.dir/mbrtowc.cpp.o
FAILED: libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.mbrtowc.dir/mbrtowc.cpp.o 
/usr/bin/g++ -DLIBC_NAMESPACE=__llvm_libc_20_0_0_git -D_DEBUG -I/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc -isystem /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/build/libc/include -fvisibility-inlines-hidden -Werror=date-time -Wall -Wextra -Wno-unused-parameter -Wwrite-strings -Wcast-qual -Wno-missing-field-initializers -Wimplicit-fallthrough -Wno-nonnull -Wno-class-memaccess -Wno-redundant-move -Wno-pessimizing-move -Wno-noexcept-type -Wdelete-non-virtual-dtor -Wsuggest-override -Wno-comment -Wno-misleading-indentation -Wctad-maybe-unsupported -fdiagnostics-color -g -DLIBC_QSORT_IMPL=LIBC_QSORT_QUICK_SORT -DLIBC_ADD_NULL_CHECKS -DLIBC_ERRNO_MODE=LIBC_ERRNO_MODE_DEFAULT -DLIBC_THREAD_MODE=LIBC_THREAD_MODE_PLATFORM -fpie -ffreestanding -DLIBC_FULL_BUILD -isystem/usr/lib/gcc/x86_64-linux-gnu/12//include -nostdinc -idirafter/usr/include -fno-builtin -fno-exceptions -fno-lax-vector-conversions -fno-unwind-tables -fno-asynchronous-unwind-tables -fno-rtti -ftrivial-auto-var-init=pattern -fno-omit-frame-pointer -mno-omit-leaf-frame-pointer -Wall -Wextra -Werror -Wconversion -Wno-sign-conversion -Wdeprecated -fext-numeric-literals -Wno-pedantic -Wimplicit-fallthrough -Wwrite-strings -Wextra-semi -std=gnu++17 -MD -MT libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.mbrtowc.dir/mbrtowc.cpp.o -MF libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.mbrtowc.dir/mbrtowc.cpp.o.d -o libc/src/__support/wchar/CMakeFiles/libc.src.__support.wchar.mbrtowc.dir/mbrtowc.cpp.o -c /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/mbrtowc.cpp
In file included from /home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/mbrtowc.cpp:16:
/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.h:36:13: error: explicit specialization in non-namespace scope ‘class __llvm_libc_20_0_0_git::internal::CharacterConverter’
   36 |   template <> size_t sizeAs<char8_t>() { return state->total_bytes; }
      |             ^
/home/llvm-libc-buildbot/buildbot-worker/libc-x86_64-debian-fullbuild/libc-x86_64-debian-gcc-fullbuild-dbg/llvm-project/libc/src/__support/wchar/character_converter.h:36:22: error: template-id ‘sizeAs<char8_t>’ in declaration of primary template

…152405) Fixes build errors caused by #152204

uzairnawaz added 2 commits August 5, 2025 20:43

template characterconverter and stringconverter pop functions; update…

6a2a111

… tests to use templated functions

fix incorrect null terminator check

194b8e0

uzairnawaz requested a review from michaelrj-google August 5, 2025 20:51

llvmbot added the libc label Aug 5, 2025

uzairnawaz requested a review from sribee8 August 5, 2025 20:51

format

fdb8401

michaelrj-google reviewed Aug 5, 2025

View reviewed changes

libc/src/__support/wchar/string_converter.h Outdated Show resolved Hide resolved

template CharacterConverter size method

e9ba0ba

michaelrj-google reviewed Aug 6, 2025

View reviewed changes

libc/src/__support/wchar/character_converter.h Outdated Show resolved Hide resolved

libc/src/__support/wchar/character_converter.h Outdated Show resolved Hide resolved

template specialization

efad91f

michaelrj-google approved these changes Aug 6, 2025

View reviewed changes

uzairnawaz merged commit e83abd7 into llvm:main Aug 6, 2025
19 checks passed

uzairnawaz mentioned this pull request Aug 6, 2025

[libc] Move CharacterConverter template specialization to cpp file #152405

Merged

uzairnawaz added a commit that referenced this pull request Aug 6, 2025

[libc] Move CharacterConverter template specialization to cpp file (#…

c4846d2

…152405) Fixes build errors caused by #152204

Provide feedback

Saved searches

Use saved searches to filter your results more quickly

Uh oh!

[libc] Template StringConverter pop function to avoid duplicate code #152204

[libc] Template StringConverter pop function to avoid duplicate code #152204

Uh oh!

uzairnawaz commented Aug 5, 2025

Uh oh!

llvmbot commented Aug 5, 2025

Uh oh!

github-actions bot commented Aug 5, 2025 •

edited

Loading

Uh oh!

Uh oh!

Uh oh!

Uh oh!

michaelrj-google left a comment

Uh oh!

Uh oh!

llvm-ci commented Aug 6, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

[libc] Template StringConverter pop function to avoid duplicate code #152204

[libc] Template StringConverter pop function to avoid duplicate code #152204

Uh oh!

Conversation

uzairnawaz commented Aug 5, 2025

Uh oh!

llvmbot commented Aug 5, 2025

Uh oh!

github-actions bot commented Aug 5, 2025 • edited Loading Uh oh! There was an error while loading. Please reload this page.

Uh oh!

Uh oh!

Uh oh!

Uh oh!

Uh oh!

michaelrj-google left a comment

Choose a reason for hiding this comment

Uh oh!

Uh oh!

llvm-ci commented Aug 6, 2025

Uh oh!

Reviewers

Assignees

Labels

Projects

Milestone

Development

Uh oh!

4 participants

github-actions bot commented Aug 5, 2025 •

edited

Loading