Skip to content

Conversation

enh-google
Copy link
Contributor

These functions turned out to have the same bug that was in wcstok() (fixed by 4fc9801), so add the missing tests and fix the code in a way that matches wcstok().

Also fix incorrect test expectations in existing tests.

Also update the BUILD.bazel files to actually build the strsep() test.

These functions turned out to have the same bug that was in wcstok()
(fixed by 4fc9801), so add the missing tests and fix the code in a way
that matches wcstok().

Also fix incorrect test expectations in existing tests.

Also update the BUILD.bazel files to actually build the strsep() test.
@llvmbot llvmbot added libc bazel "Peripheral" support tier build system: utils/bazel labels Aug 19, 2025
@llvmbot
Copy link
Member

llvmbot commented Aug 19, 2025

@llvm/pr-subscribers-libc

Author: None (enh-google)

Changes

These functions turned out to have the same bug that was in wcstok() (fixed by 4fc9801), so add the missing tests and fix the code in a way that matches wcstok().

Also fix incorrect test expectations in existing tests.

Also update the BUILD.bazel files to actually build the strsep() test.


Full diff: https://github.com/llvm/llvm-project/pull/154370.diff

6 Files Affected:

  • (modified) libc/src/string/string_utils.h (+21-18)
  • (modified) libc/test/src/string/strsep_test.cpp (+12-4)
  • (modified) libc/test/src/string/strtok_r_test.cpp (+9)
  • (modified) libc/test/src/string/strtok_test.cpp (+7)
  • (modified) utils/bazel/llvm-project-overlay/libc/BUILD.bazel (+13)
  • (modified) utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel (+9)
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index 80e5783c7890b..a47cd631e9455 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -184,33 +184,36 @@ LIBC_INLINE size_t complementary_span(const char *src, const char *segment) {
 template <bool SkipDelim = true>
 LIBC_INLINE char *string_token(char *__restrict src,
                                const char *__restrict delimiter_string,
-                               char **__restrict saveptr) {
-  // Return nullptr immediately if both src AND saveptr are nullptr
-  if (LIBC_UNLIKELY(src == nullptr && ((src = *saveptr) == nullptr)))
+                               char **__restrict context) {
+  // Return nullptr immediately if both src AND context are nullptr
+  if (LIBC_UNLIKELY(src == nullptr && ((src = *context) == nullptr)))
     return nullptr;
 
   static_assert(CHAR_BIT == 8, "bitset of 256 assumes char is 8 bits");
-  cpp::bitset<256> delimiter_set;
+  cpp::bitset<256> delims;
   for (; *delimiter_string != '\0'; ++delimiter_string)
-    delimiter_set.set(static_cast<size_t>(*delimiter_string));
+    delims.set(static_cast<size_t>(*delimiter_string));
 
+  char *tok_start = src;
   if constexpr (SkipDelim)
-    for (; *src != '\0' && delimiter_set.test(static_cast<size_t>(*src)); ++src)
-      ;
-  if (*src == '\0') {
-    *saveptr = src;
+    while (*tok_start != '\0' && delims.test(static_cast<size_t>(*tok_start)))
+      ++tok_start;
+  if (*tok_start == '\0' && SkipDelim) {
+    *context = nullptr;
     return nullptr;
   }
-  char *token = src;
-  for (; *src != '\0'; ++src) {
-    if (delimiter_set.test(static_cast<size_t>(*src))) {
-      *src = '\0';
-      ++src;
-      break;
-    }
+
+  char *tok_end = tok_start;
+  while (*tok_end != '\0' && !delims.test(static_cast<size_t>(*tok_end)))
+    ++tok_end;
+
+  if (*tok_end == '\0') {
+    *context = nullptr;
+  } else {
+    *tok_end = '\0';
+    *context = tok_end + 1;
   }
-  *saveptr = src;
-  return token;
+  return tok_start;
 }
 
 LIBC_INLINE size_t strlcpy(char *__restrict dst, const char *__restrict src,
diff --git a/libc/test/src/string/strsep_test.cpp b/libc/test/src/string/strsep_test.cpp
index 06318dea4cb68..e2a5d52bbeddb 100644
--- a/libc/test/src/string/strsep_test.cpp
+++ b/libc/test/src/string/strsep_test.cpp
@@ -18,21 +18,21 @@ TEST(LlvmLibcStrsepTest, NullSrc) {
 TEST(LlvmLibcStrsepTest, NoTokenFound) {
   {
     char s[] = "";
-    char *string = s, *orig = s;
+    char *string = s;
     EXPECT_STREQ(LIBC_NAMESPACE::strsep(&string, ""), nullptr);
-    EXPECT_EQ(orig, string);
+    EXPECT_TRUE(string == nullptr);
   }
   {
     char s[] = "abcde";
     char *string = s, *orig = s;
     EXPECT_STREQ(LIBC_NAMESPACE::strsep(&string, ""), orig);
-    EXPECT_EQ(string, orig + 5);
+    EXPECT_TRUE(string == nullptr);
   }
   {
     char s[] = "abcde";
     char *string = s, *orig = s;
     EXPECT_STREQ(LIBC_NAMESPACE::strsep(&string, "fghijk"), orig);
-    EXPECT_EQ(string, orig + 5);
+    EXPECT_TRUE(string == nullptr);
   }
 }
 
@@ -53,6 +53,14 @@ TEST(LlvmLibcStrsepTest, DelimitersShouldNotBeIncludedInToken) {
   }
 }
 
+TEST(LlvmLibcStrsepTest, SubsequentSearchesReturnNull) {
+  char s[] = "a";
+  char *string = s;
+  ASSERT_STREQ(LIBC_NAMESPACE::strsep(&string, ":"), "a");
+  ASSERT_EQ(LIBC_NAMESPACE::strsep(&string, ":"), nullptr);
+  ASSERT_EQ(LIBC_NAMESPACE::strsep(&string, ":"), nullptr);
+}
+
 #if defined(LIBC_ADD_NULL_CHECKS)
 
 TEST(LlvmLibcStrsepTest, CrashOnNullPtr) {
diff --git a/libc/test/src/string/strtok_r_test.cpp b/libc/test/src/string/strtok_r_test.cpp
index fdc27bae23c97..a19390d0b0c2d 100644
--- a/libc/test/src/string/strtok_r_test.cpp
+++ b/libc/test/src/string/strtok_r_test.cpp
@@ -122,3 +122,12 @@ TEST(LlvmLibcStrTokReentrantTest, DelimitersShouldNotBeIncludedInToken) {
   token = LIBC_NAMESPACE::strtok_r(nullptr, "_:,_", &reserve);
   ASSERT_STREQ(token, nullptr);
 }
+
+TEST(LlvmLibcStrTokReentrantTest, SubsequentSearchesReturnNull) {
+  char src[] = "a";
+  char *reserve = nullptr;
+  char *token = LIBC_NAMESPACE::strtok_r(src, ":", &reserve);
+  ASSERT_STREQ(token, "a");
+  ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr);
+  ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr);
+}
diff --git a/libc/test/src/string/strtok_test.cpp b/libc/test/src/string/strtok_test.cpp
index b82065309e00c..76efeddda6f4a 100644
--- a/libc/test/src/string/strtok_test.cpp
+++ b/libc/test/src/string/strtok_test.cpp
@@ -76,3 +76,10 @@ TEST(LlvmLibcStrTokTest, DelimitersShouldNotBeIncludedInToken) {
   token = LIBC_NAMESPACE::strtok(nullptr, "_:,_");
   ASSERT_STREQ(token, nullptr);
 }
+
+TEST(LlvmLibcStrTokTest, SubsequentSearchesReturnNull) {
+  char src[] = "a";
+  ASSERT_STREQ("a", LIBC_NAMESPACE::strtok(src, ":"));
+  ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr);
+  ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr);
+}
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index b0ca27ae470a3..ad072cf1e492e 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -5137,6 +5137,19 @@ libc_function(
     ],
 )
 
+libc_function(
+    name = "strsep",
+    srcs = ["src/string/strsep.cpp"],
+    hdrs = ["src/string/strsep.h"],
+    deps = [
+        ":__support_common",
+        ":__support_macros_null_check",
+        ":hdr_signal_macros",
+        ":string_memory_utils",
+        ":string_utils",
+    ],
+)
+
 libc_function(
     name = "strstr",
     srcs = ["src/string/strstr.cpp"],
diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel
index 5f50b10516fb5..d90992417a721 100644
--- a/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel
@@ -59,6 +59,15 @@ libc_test(
     ],
 )
 
+libc_test(
+    name = "strsep_test",
+    srcs = ["strsep_test.cpp"],
+    deps = [
+        "//libc:hdr_signal_macros",
+        "//libc:strsep",
+    ],
+)
+
 libc_test(
     name = "strstr_test",
     srcs = ["strstr_test.cpp"],

@lntue lntue requested a review from michaelrj-google August 19, 2025 16:07
@enh-google enh-google requested a review from lntue August 20, 2025 18:31
@enh-google enh-google merged commit 71dd4e1 into llvm:main Aug 21, 2025
23 checks passed
@enh-google enh-google deleted the strsep branch August 21, 2025 15:04
Sign up for free to join this conversation on GitHub. Already have an account? Sign in to comment
Labels
bazel "Peripheral" support tier build system: utils/bazel libc
Projects
None yet
Development

Successfully merging this pull request may close these issues.

3 participants