Skip to content

Commit 71dd4e1

Browse files
authored
[libc] fix strsep()/strtok()/strtok_r() "subsequent searches" behavior. (#154370)
These functions turned out to have the same bug that was in wcstok() (fixed by 4fc9801), so add the missing tests and fix the code in a way that matches wcstok(). Also fix incorrect test expectations in existing tests. Also update the BUILD.bazel files to actually build the strsep() test.
1 parent e90ce51 commit 71dd4e1

File tree

6 files changed

+71
-22
lines changed

6 files changed

+71
-22
lines changed

libc/src/string/string_utils.h

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -202,33 +202,36 @@ LIBC_INLINE size_t complementary_span(const char *src, const char *segment) {
202202
template <bool SkipDelim = true>
203203
LIBC_INLINE char *string_token(char *__restrict src,
204204
const char *__restrict delimiter_string,
205-
char **__restrict saveptr) {
206-
// Return nullptr immediately if both src AND saveptr are nullptr
207-
if (LIBC_UNLIKELY(src == nullptr && ((src = *saveptr) == nullptr)))
205+
char **__restrict context) {
206+
// Return nullptr immediately if both src AND context are nullptr
207+
if (LIBC_UNLIKELY(src == nullptr && ((src = *context) == nullptr)))
208208
return nullptr;
209209

210210
static_assert(CHAR_BIT == 8, "bitset of 256 assumes char is 8 bits");
211-
cpp::bitset<256> delimiter_set;
211+
cpp::bitset<256> delims;
212212
for (; *delimiter_string != '\0'; ++delimiter_string)
213-
delimiter_set.set(static_cast<size_t>(*delimiter_string));
213+
delims.set(static_cast<size_t>(*delimiter_string));
214214

215+
char *tok_start = src;
215216
if constexpr (SkipDelim)
216-
for (; *src != '\0' && delimiter_set.test(static_cast<size_t>(*src)); ++src)
217-
;
218-
if (*src == '\0') {
219-
*saveptr = src;
217+
while (*tok_start != '\0' && delims.test(static_cast<size_t>(*tok_start)))
218+
++tok_start;
219+
if (*tok_start == '\0' && SkipDelim) {
220+
*context = nullptr;
220221
return nullptr;
221222
}
222-
char *token = src;
223-
for (; *src != '\0'; ++src) {
224-
if (delimiter_set.test(static_cast<size_t>(*src))) {
225-
*src = '\0';
226-
++src;
227-
break;
228-
}
223+
224+
char *tok_end = tok_start;
225+
while (*tok_end != '\0' && !delims.test(static_cast<size_t>(*tok_end)))
226+
++tok_end;
227+
228+
if (*tok_end == '\0') {
229+
*context = nullptr;
230+
} else {
231+
*tok_end = '\0';
232+
*context = tok_end + 1;
229233
}
230-
*saveptr = src;
231-
return token;
234+
return tok_start;
232235
}
233236

234237
LIBC_INLINE size_t strlcpy(char *__restrict dst, const char *__restrict src,

libc/test/src/string/strsep_test.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,21 @@ TEST(LlvmLibcStrsepTest, NullSrc) {
1818
TEST(LlvmLibcStrsepTest, NoTokenFound) {
1919
{
2020
char s[] = "";
21-
char *string = s, *orig = s;
21+
char *string = s;
2222
EXPECT_STREQ(LIBC_NAMESPACE::strsep(&string, ""), nullptr);
23-
EXPECT_EQ(orig, string);
23+
EXPECT_TRUE(string == nullptr);
2424
}
2525
{
2626
char s[] = "abcde";
2727
char *string = s, *orig = s;
2828
EXPECT_STREQ(LIBC_NAMESPACE::strsep(&string, ""), orig);
29-
EXPECT_EQ(string, orig + 5);
29+
EXPECT_TRUE(string == nullptr);
3030
}
3131
{
3232
char s[] = "abcde";
3333
char *string = s, *orig = s;
3434
EXPECT_STREQ(LIBC_NAMESPACE::strsep(&string, "fghijk"), orig);
35-
EXPECT_EQ(string, orig + 5);
35+
EXPECT_TRUE(string == nullptr);
3636
}
3737
}
3838

@@ -53,6 +53,14 @@ TEST(LlvmLibcStrsepTest, DelimitersShouldNotBeIncludedInToken) {
5353
}
5454
}
5555

56+
TEST(LlvmLibcStrsepTest, SubsequentSearchesReturnNull) {
57+
char s[] = "a";
58+
char *string = s;
59+
ASSERT_STREQ(LIBC_NAMESPACE::strsep(&string, ":"), "a");
60+
ASSERT_EQ(LIBC_NAMESPACE::strsep(&string, ":"), nullptr);
61+
ASSERT_EQ(LIBC_NAMESPACE::strsep(&string, ":"), nullptr);
62+
}
63+
5664
#if defined(LIBC_ADD_NULL_CHECKS)
5765

5866
TEST(LlvmLibcStrsepTest, CrashOnNullPtr) {

libc/test/src/string/strtok_r_test.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,3 +122,12 @@ TEST(LlvmLibcStrTokReentrantTest, DelimitersShouldNotBeIncludedInToken) {
122122
token = LIBC_NAMESPACE::strtok_r(nullptr, "_:,_", &reserve);
123123
ASSERT_STREQ(token, nullptr);
124124
}
125+
126+
TEST(LlvmLibcStrTokReentrantTest, SubsequentSearchesReturnNull) {
127+
char src[] = "a";
128+
char *reserve = nullptr;
129+
char *token = LIBC_NAMESPACE::strtok_r(src, ":", &reserve);
130+
ASSERT_STREQ(token, "a");
131+
ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr);
132+
ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr);
133+
}

libc/test/src/string/strtok_test.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,10 @@ TEST(LlvmLibcStrTokTest, DelimitersShouldNotBeIncludedInToken) {
7676
token = LIBC_NAMESPACE::strtok(nullptr, "_:,_");
7777
ASSERT_STREQ(token, nullptr);
7878
}
79+
80+
TEST(LlvmLibcStrTokTest, SubsequentSearchesReturnNull) {
81+
char src[] = "a";
82+
ASSERT_STREQ("a", LIBC_NAMESPACE::strtok(src, ":"));
83+
ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr);
84+
ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr);
85+
}

utils/bazel/llvm-project-overlay/libc/BUILD.bazel

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5140,6 +5140,19 @@ libc_function(
51405140
],
51415141
)
51425142

5143+
libc_function(
5144+
name = "strsep",
5145+
srcs = ["src/string/strsep.cpp"],
5146+
hdrs = ["src/string/strsep.h"],
5147+
deps = [
5148+
":__support_common",
5149+
":__support_macros_null_check",
5150+
":hdr_signal_macros",
5151+
":string_memory_utils",
5152+
":string_utils",
5153+
],
5154+
)
5155+
51435156
libc_function(
51445157
name = "strstr",
51455158
srcs = ["src/string/strstr.cpp"],

utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,15 @@ libc_test(
5959
],
6060
)
6161

62+
libc_test(
63+
name = "strsep_test",
64+
srcs = ["strsep_test.cpp"],
65+
deps = [
66+
"//libc:hdr_signal_macros",
67+
"//libc:strsep",
68+
],
69+
)
70+
6271
libc_test(
6372
name = "strstr_test",
6473
srcs = ["strstr_test.cpp"],

0 commit comments

Comments
 (0)