From 59207f0e108ab95a8987b06d0c453d02846d6eb2 Mon Sep 17 00:00:00 2001 From: enh-google Date: Fri, 25 Jul 2025 13:16:35 -0400 Subject: [PATCH 1/5] [libc] Stop duplicating wcschr(). --- libc/src/wchar/wchar_utils.h | 11 ++++------- libc/src/wchar/wcschr.cpp | 7 ++----- libc/src/wchar/wcspbrk.cpp | 10 +--------- libc/src/wchar/wcstok.cpp | 13 +++---------- 4 files changed, 10 insertions(+), 31 deletions(-) diff --git a/libc/src/wchar/wchar_utils.h b/libc/src/wchar/wchar_utils.h index e0218c7d89b1f..d69638fa71912 100644 --- a/libc/src/wchar/wchar_utils.h +++ b/libc/src/wchar/wchar_utils.h @@ -17,13 +17,10 @@ namespace LIBC_NAMESPACE_DECL { namespace internal { -// returns true if the character exists in the string -LIBC_INLINE static bool wcschr(wchar_t c, const wchar_t *str) { - for (int n = 0; str[n]; ++n) { - if (str[n] == c) - return true; - } - return false; +LIBC_INLINE static wchar_t *wcschr(const wchar_t *s, wchar_t c) { + for (; *s && *s != c; ++s) + ; + return (*s == c) ? s : nullptr; } // bool should be true for wcscspn for complimentary span diff --git a/libc/src/wchar/wcschr.cpp b/libc/src/wchar/wcschr.cpp index defc2ce3c3b72..e53ec9a4c95ba 100644 --- a/libc/src/wchar/wcschr.cpp +++ b/libc/src/wchar/wcschr.cpp @@ -11,15 +11,12 @@ #include "hdr/types/wchar_t.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" +#include "wchar_utils.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(const wchar_t *, wcschr, (const wchar_t *s, wchar_t c)) { - for (; *s && *s != c; ++s) - ; - if (*s == c) - return s; - return nullptr; + return internal::wcschr(s, c); } } // namespace LIBC_NAMESPACE_DECL diff --git a/libc/src/wchar/wcspbrk.cpp b/libc/src/wchar/wcspbrk.cpp index a00ba9979a489..9faa34fe5fd1a 100644 --- a/libc/src/wchar/wcspbrk.cpp +++ b/libc/src/wchar/wcspbrk.cpp @@ -14,14 +14,6 @@ namespace LIBC_NAMESPACE_DECL { -bool contains_char(const wchar_t *str, wchar_t target) { - for (; *str != L'\0'; str++) - if (*str == target) - return true; - - return false; -} - LLVM_LIBC_FUNCTION(const wchar_t *, wcspbrk, (const wchar_t *src, const wchar_t *breakset)) { LIBC_CRASH_ON_NULLPTR(src); @@ -29,7 +21,7 @@ LLVM_LIBC_FUNCTION(const wchar_t *, wcspbrk, // currently O(n * m), can be further optimized to O(n + m) with a hash set for (int src_idx = 0; src[src_idx] != 0; src_idx++) - if (contains_char(breakset, src[src_idx])) + if (internal::wcschr(breakset, src[src_idx])) return src + src_idx; return nullptr; diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp index 291efc15e158a..32a500bf302c7 100644 --- a/libc/src/wchar/wcstok.cpp +++ b/libc/src/wchar/wcstok.cpp @@ -13,15 +13,8 @@ namespace LIBC_NAMESPACE_DECL { -bool isADelimeter(wchar_t wc, const wchar_t *delimiters) { - for (const wchar_t *delim_ptr = delimiters; *delim_ptr != L'\0'; ++delim_ptr) - if (wc == *delim_ptr) - return true; - return false; -} - LLVM_LIBC_FUNCTION(wchar_t *, wcstok, - (wchar_t *__restrict str, const wchar_t *__restrict delim, + (wchar_t *__restrict str, const wchar_t *__restrict delims, wchar_t **__restrict context)) { if (str == nullptr) { if (*context == nullptr) @@ -31,11 +24,11 @@ LLVM_LIBC_FUNCTION(wchar_t *, wcstok, } wchar_t *tok_start, *tok_end; - for (tok_start = str; *tok_start != L'\0' && isADelimeter(*tok_start, delim); + for (tok_start = str; *tok_start != L'\0' && wcschr(delims, *tok_start); ++tok_start) ; - for (tok_end = tok_start; *tok_end != L'\0' && !isADelimeter(*tok_end, delim); + for (tok_end = tok_start; *tok_end != L'\0' && !wcschr(delims, *tok_end); ++tok_end) ; From bd309b55530fab119f2a05df1dbade75f048b5f9 Mon Sep 17 00:00:00 2001 From: enh-google Date: Fri, 25 Jul 2025 13:35:01 -0400 Subject: [PATCH 2/5] [libc] add missing includes and LIBC_CRASH_ON_NULLPTR(). --- libc/src/wchar/wcschr.cpp | 2 ++ libc/src/wchar/wcspbrk.cpp | 1 + libc/src/wchar/wcstok.cpp | 1 + 3 files changed, 4 insertions(+) diff --git a/libc/src/wchar/wcschr.cpp b/libc/src/wchar/wcschr.cpp index e53ec9a4c95ba..8ac491602a65c 100644 --- a/libc/src/wchar/wcschr.cpp +++ b/libc/src/wchar/wcschr.cpp @@ -11,11 +11,13 @@ #include "hdr/types/wchar_t.h" #include "src/__support/common.h" #include "src/__support/macros/config.h" +#include "src/__support/macros/null_check.h" #include "wchar_utils.h" namespace LIBC_NAMESPACE_DECL { LLVM_LIBC_FUNCTION(const wchar_t *, wcschr, (const wchar_t *s, wchar_t c)) { + LIBC_CRASH_ON_NULLPTR(s); return internal::wcschr(s, c); } diff --git a/libc/src/wchar/wcspbrk.cpp b/libc/src/wchar/wcspbrk.cpp index 9faa34fe5fd1a..5d86a494bdf39 100644 --- a/libc/src/wchar/wcspbrk.cpp +++ b/libc/src/wchar/wcspbrk.cpp @@ -11,6 +11,7 @@ #include "hdr/types/wchar_t.h" #include "src/__support/common.h" #include "src/__support/macros/null_check.h" +#include "wchar_utils.h" namespace LIBC_NAMESPACE_DECL { diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp index 32a500bf302c7..472df33558e86 100644 --- a/libc/src/wchar/wcstok.cpp +++ b/libc/src/wchar/wcstok.cpp @@ -10,6 +10,7 @@ #include "hdr/types/wchar_t.h" #include "src/__support/common.h" +#include "wchar_utils.h" namespace LIBC_NAMESPACE_DECL { From b0492ab2b7f360ac9dd97140dc12d1904031d7ce Mon Sep 17 00:00:00 2001 From: enh-google Date: Fri, 25 Jul 2025 13:49:28 -0400 Subject: [PATCH 3/5] [libc] Fix internal::wcschr() return type. The exported function is using a similar white lie for convenience, so the internal function may as well rather than adding const casts. --- libc/src/wchar/wchar_utils.h | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libc/src/wchar/wchar_utils.h b/libc/src/wchar/wchar_utils.h index d69638fa71912..55a3cee99190f 100644 --- a/libc/src/wchar/wchar_utils.h +++ b/libc/src/wchar/wchar_utils.h @@ -17,7 +17,7 @@ namespace LIBC_NAMESPACE_DECL { namespace internal { -LIBC_INLINE static wchar_t *wcschr(const wchar_t *s, wchar_t c) { +LIBC_INLINE static const wchar_t *wcschr(const wchar_t *s, wchar_t c) { for (; *s && *s != c; ++s) ; return (*s == c) ? s : nullptr; @@ -29,7 +29,7 @@ LIBC_INLINE static size_t wcsspn(const wchar_t *s1, const wchar_t *s2, bool not_match_set) { size_t i = 0; for (; s1[i]; ++i) { - bool in_set = wcschr(s1[i], s2); + bool in_set = internal::wcschr(s2, s1[i]); if (in_set == not_match_set) return i; } From 3adedaeee4f510390c7256945c46446c60950f61 Mon Sep 17 00:00:00 2001 From: enh-google Date: Fri, 25 Jul 2025 14:22:11 -0400 Subject: [PATCH 4/5] [libc] wcstok(): explicitly say internal::wcschr(). --- libc/src/wchar/wcstok.cpp | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp index 472df33558e86..bf680dbf877ee 100644 --- a/libc/src/wchar/wcstok.cpp +++ b/libc/src/wchar/wcstok.cpp @@ -25,11 +25,11 @@ LLVM_LIBC_FUNCTION(wchar_t *, wcstok, } wchar_t *tok_start, *tok_end; - for (tok_start = str; *tok_start != L'\0' && wcschr(delims, *tok_start); + for (tok_start = str; *tok_start != L'\0' && internal::wcschr(delims, *tok_start); ++tok_start) ; - for (tok_end = tok_start; *tok_end != L'\0' && !wcschr(delims, *tok_end); + for (tok_end = tok_start; *tok_end != L'\0' && !internal::wcschr(delims, *tok_end); ++tok_end) ; From f0698b3f55bbb1b10a1fd5875c570a0c4c5836fd Mon Sep 17 00:00:00 2001 From: enh-google Date: Fri, 25 Jul 2025 14:52:10 -0400 Subject: [PATCH 5/5] [libc] fix wcstok() formatting. --- libc/src/wchar/wcstok.cpp | 15 +++++++-------- 1 file changed, 7 insertions(+), 8 deletions(-) diff --git a/libc/src/wchar/wcstok.cpp b/libc/src/wchar/wcstok.cpp index bf680dbf877ee..ed4f0aad08ea5 100644 --- a/libc/src/wchar/wcstok.cpp +++ b/libc/src/wchar/wcstok.cpp @@ -24,14 +24,13 @@ LLVM_LIBC_FUNCTION(wchar_t *, wcstok, str = *context; } - wchar_t *tok_start, *tok_end; - for (tok_start = str; *tok_start != L'\0' && internal::wcschr(delims, *tok_start); - ++tok_start) - ; - - for (tok_end = tok_start; *tok_end != L'\0' && !internal::wcschr(delims, *tok_end); - ++tok_end) - ; + wchar_t *tok_start = str; + while (*tok_start != L'\0' && internal::wcschr(delims, *tok_start)) + ++tok_start; + + wchar_t *tok_end = tok_start; + while (*tok_end != L'\0' && !internal::wcschr(delims, *tok_end)) + ++tok_end; if (*tok_end != L'\0') { *tok_end = L'\0';