-
Notifications
You must be signed in to change notification settings - Fork 14.9k
[libc] fix strsep()/strtok()/strtok_r() "subsequent searches" behavior. #154370
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
These functions turned out to have the same bug that was in wcstok() (fixed by 4fc9801), so add the missing tests and fix the code in a way that matches wcstok(). Also fix incorrect test expectations in existing tests. Also update the BUILD.bazel files to actually build the strsep() test.
@llvm/pr-subscribers-libc Author: None (enh-google) ChangesThese functions turned out to have the same bug that was in wcstok() (fixed by 4fc9801), so add the missing tests and fix the code in a way that matches wcstok(). Also fix incorrect test expectations in existing tests. Also update the BUILD.bazel files to actually build the strsep() test. Full diff: https://github.com/llvm/llvm-project/pull/154370.diff 6 Files Affected:
diff --git a/libc/src/string/string_utils.h b/libc/src/string/string_utils.h
index 80e5783c7890b..a47cd631e9455 100644
--- a/libc/src/string/string_utils.h
+++ b/libc/src/string/string_utils.h
@@ -184,33 +184,36 @@ LIBC_INLINE size_t complementary_span(const char *src, const char *segment) {
template <bool SkipDelim = true>
LIBC_INLINE char *string_token(char *__restrict src,
const char *__restrict delimiter_string,
- char **__restrict saveptr) {
- // Return nullptr immediately if both src AND saveptr are nullptr
- if (LIBC_UNLIKELY(src == nullptr && ((src = *saveptr) == nullptr)))
+ char **__restrict context) {
+ // Return nullptr immediately if both src AND context are nullptr
+ if (LIBC_UNLIKELY(src == nullptr && ((src = *context) == nullptr)))
return nullptr;
static_assert(CHAR_BIT == 8, "bitset of 256 assumes char is 8 bits");
- cpp::bitset<256> delimiter_set;
+ cpp::bitset<256> delims;
for (; *delimiter_string != '\0'; ++delimiter_string)
- delimiter_set.set(static_cast<size_t>(*delimiter_string));
+ delims.set(static_cast<size_t>(*delimiter_string));
+ char *tok_start = src;
if constexpr (SkipDelim)
- for (; *src != '\0' && delimiter_set.test(static_cast<size_t>(*src)); ++src)
- ;
- if (*src == '\0') {
- *saveptr = src;
+ while (*tok_start != '\0' && delims.test(static_cast<size_t>(*tok_start)))
+ ++tok_start;
+ if (*tok_start == '\0' && SkipDelim) {
+ *context = nullptr;
return nullptr;
}
- char *token = src;
- for (; *src != '\0'; ++src) {
- if (delimiter_set.test(static_cast<size_t>(*src))) {
- *src = '\0';
- ++src;
- break;
- }
+
+ char *tok_end = tok_start;
+ while (*tok_end != '\0' && !delims.test(static_cast<size_t>(*tok_end)))
+ ++tok_end;
+
+ if (*tok_end == '\0') {
+ *context = nullptr;
+ } else {
+ *tok_end = '\0';
+ *context = tok_end + 1;
}
- *saveptr = src;
- return token;
+ return tok_start;
}
LIBC_INLINE size_t strlcpy(char *__restrict dst, const char *__restrict src,
diff --git a/libc/test/src/string/strsep_test.cpp b/libc/test/src/string/strsep_test.cpp
index 06318dea4cb68..e2a5d52bbeddb 100644
--- a/libc/test/src/string/strsep_test.cpp
+++ b/libc/test/src/string/strsep_test.cpp
@@ -18,21 +18,21 @@ TEST(LlvmLibcStrsepTest, NullSrc) {
TEST(LlvmLibcStrsepTest, NoTokenFound) {
{
char s[] = "";
- char *string = s, *orig = s;
+ char *string = s;
EXPECT_STREQ(LIBC_NAMESPACE::strsep(&string, ""), nullptr);
- EXPECT_EQ(orig, string);
+ EXPECT_TRUE(string == nullptr);
}
{
char s[] = "abcde";
char *string = s, *orig = s;
EXPECT_STREQ(LIBC_NAMESPACE::strsep(&string, ""), orig);
- EXPECT_EQ(string, orig + 5);
+ EXPECT_TRUE(string == nullptr);
}
{
char s[] = "abcde";
char *string = s, *orig = s;
EXPECT_STREQ(LIBC_NAMESPACE::strsep(&string, "fghijk"), orig);
- EXPECT_EQ(string, orig + 5);
+ EXPECT_TRUE(string == nullptr);
}
}
@@ -53,6 +53,14 @@ TEST(LlvmLibcStrsepTest, DelimitersShouldNotBeIncludedInToken) {
}
}
+TEST(LlvmLibcStrsepTest, SubsequentSearchesReturnNull) {
+ char s[] = "a";
+ char *string = s;
+ ASSERT_STREQ(LIBC_NAMESPACE::strsep(&string, ":"), "a");
+ ASSERT_EQ(LIBC_NAMESPACE::strsep(&string, ":"), nullptr);
+ ASSERT_EQ(LIBC_NAMESPACE::strsep(&string, ":"), nullptr);
+}
+
#if defined(LIBC_ADD_NULL_CHECKS)
TEST(LlvmLibcStrsepTest, CrashOnNullPtr) {
diff --git a/libc/test/src/string/strtok_r_test.cpp b/libc/test/src/string/strtok_r_test.cpp
index fdc27bae23c97..a19390d0b0c2d 100644
--- a/libc/test/src/string/strtok_r_test.cpp
+++ b/libc/test/src/string/strtok_r_test.cpp
@@ -122,3 +122,12 @@ TEST(LlvmLibcStrTokReentrantTest, DelimitersShouldNotBeIncludedInToken) {
token = LIBC_NAMESPACE::strtok_r(nullptr, "_:,_", &reserve);
ASSERT_STREQ(token, nullptr);
}
+
+TEST(LlvmLibcStrTokReentrantTest, SubsequentSearchesReturnNull) {
+ char src[] = "a";
+ char *reserve = nullptr;
+ char *token = LIBC_NAMESPACE::strtok_r(src, ":", &reserve);
+ ASSERT_STREQ(token, "a");
+ ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr);
+ ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr);
+}
diff --git a/libc/test/src/string/strtok_test.cpp b/libc/test/src/string/strtok_test.cpp
index b82065309e00c..76efeddda6f4a 100644
--- a/libc/test/src/string/strtok_test.cpp
+++ b/libc/test/src/string/strtok_test.cpp
@@ -76,3 +76,10 @@ TEST(LlvmLibcStrTokTest, DelimitersShouldNotBeIncludedInToken) {
token = LIBC_NAMESPACE::strtok(nullptr, "_:,_");
ASSERT_STREQ(token, nullptr);
}
+
+TEST(LlvmLibcStrTokTest, SubsequentSearchesReturnNull) {
+ char src[] = "a";
+ ASSERT_STREQ("a", LIBC_NAMESPACE::strtok(src, ":"));
+ ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr);
+ ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr);
+}
diff --git a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
index b0ca27ae470a3..ad072cf1e492e 100644
--- a/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/BUILD.bazel
@@ -5137,6 +5137,19 @@ libc_function(
],
)
+libc_function(
+ name = "strsep",
+ srcs = ["src/string/strsep.cpp"],
+ hdrs = ["src/string/strsep.h"],
+ deps = [
+ ":__support_common",
+ ":__support_macros_null_check",
+ ":hdr_signal_macros",
+ ":string_memory_utils",
+ ":string_utils",
+ ],
+)
+
libc_function(
name = "strstr",
srcs = ["src/string/strstr.cpp"],
diff --git a/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel b/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel
index 5f50b10516fb5..d90992417a721 100644
--- a/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel
+++ b/utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel
@@ -59,6 +59,15 @@ libc_test(
],
)
+libc_test(
+ name = "strsep_test",
+ srcs = ["strsep_test.cpp"],
+ deps = [
+ "//libc:hdr_signal_macros",
+ "//libc:strsep",
+ ],
+)
+
libc_test(
name = "strstr_test",
srcs = ["strstr_test.cpp"],
|
These functions turned out to have the same bug that was in wcstok() (fixed by 4fc9801), so add the missing tests and fix the code in a way that matches wcstok().
Also fix incorrect test expectations in existing tests.
Also update the BUILD.bazel files to actually build the strsep() test.