Skip to content

Commit 06f5b8f

Browse files
committed
[libc] fix strsep()/strtok()/strtok_r() "subsequent searches" behavior.
These functions turned out to have the same bug that was in wcstok() (fixed by 4fc9801), so add the missing tests and fix the code in a way that matches wcstok(). Also fix incorrect test expectations in existing tests. Also update the BUILD.bazel files to actually build the strsep() test.
1 parent 5868580 commit 06f5b8f

File tree

6 files changed

+71
-22
lines changed

6 files changed

+71
-22
lines changed

libc/src/string/string_utils.h

Lines changed: 21 additions & 18 deletions
Original file line numberDiff line numberDiff line change
@@ -184,33 +184,36 @@ LIBC_INLINE size_t complementary_span(const char *src, const char *segment) {
184184
template <bool SkipDelim = true>
185185
LIBC_INLINE char *string_token(char *__restrict src,
186186
const char *__restrict delimiter_string,
187-
char **__restrict saveptr) {
188-
// Return nullptr immediately if both src AND saveptr are nullptr
189-
if (LIBC_UNLIKELY(src == nullptr && ((src = *saveptr) == nullptr)))
187+
char **__restrict context) {
188+
// Return nullptr immediately if both src AND context are nullptr
189+
if (LIBC_UNLIKELY(src == nullptr && ((src = *context) == nullptr)))
190190
return nullptr;
191191

192192
static_assert(CHAR_BIT == 8, "bitset of 256 assumes char is 8 bits");
193-
cpp::bitset<256> delimiter_set;
193+
cpp::bitset<256> delims;
194194
for (; *delimiter_string != '\0'; ++delimiter_string)
195-
delimiter_set.set(static_cast<size_t>(*delimiter_string));
195+
delims.set(static_cast<size_t>(*delimiter_string));
196196

197+
char *tok_start = src;
197198
if constexpr (SkipDelim)
198-
for (; *src != '\0' && delimiter_set.test(static_cast<size_t>(*src)); ++src)
199-
;
200-
if (*src == '\0') {
201-
*saveptr = src;
199+
while (*tok_start != '\0' && delims.test(static_cast<size_t>(*tok_start)))
200+
++tok_start;
201+
if (*tok_start == '\0' && SkipDelim) {
202+
*context = nullptr;
202203
return nullptr;
203204
}
204-
char *token = src;
205-
for (; *src != '\0'; ++src) {
206-
if (delimiter_set.test(static_cast<size_t>(*src))) {
207-
*src = '\0';
208-
++src;
209-
break;
210-
}
205+
206+
char *tok_end = tok_start;
207+
while (*tok_end != '\0' && !delims.test(static_cast<size_t>(*tok_end)))
208+
++tok_end;
209+
210+
if (*tok_end == '\0') {
211+
*context = nullptr;
212+
} else {
213+
*tok_end = '\0';
214+
*context = tok_end + 1;
211215
}
212-
*saveptr = src;
213-
return token;
216+
return tok_start;
214217
}
215218

216219
LIBC_INLINE size_t strlcpy(char *__restrict dst, const char *__restrict src,

libc/test/src/string/strsep_test.cpp

Lines changed: 12 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -18,21 +18,21 @@ TEST(LlvmLibcStrsepTest, NullSrc) {
1818
TEST(LlvmLibcStrsepTest, NoTokenFound) {
1919
{
2020
char s[] = "";
21-
char *string = s, *orig = s;
21+
char *string = s;
2222
EXPECT_STREQ(LIBC_NAMESPACE::strsep(&string, ""), nullptr);
23-
EXPECT_EQ(orig, string);
23+
EXPECT_TRUE(string == nullptr);
2424
}
2525
{
2626
char s[] = "abcde";
2727
char *string = s, *orig = s;
2828
EXPECT_STREQ(LIBC_NAMESPACE::strsep(&string, ""), orig);
29-
EXPECT_EQ(string, orig + 5);
29+
EXPECT_TRUE(string == nullptr);
3030
}
3131
{
3232
char s[] = "abcde";
3333
char *string = s, *orig = s;
3434
EXPECT_STREQ(LIBC_NAMESPACE::strsep(&string, "fghijk"), orig);
35-
EXPECT_EQ(string, orig + 5);
35+
EXPECT_TRUE(string == nullptr);
3636
}
3737
}
3838

@@ -53,6 +53,14 @@ TEST(LlvmLibcStrsepTest, DelimitersShouldNotBeIncludedInToken) {
5353
}
5454
}
5555

56+
TEST(LlvmLibcStrsepTest, SubsequentSearchesReturnNull) {
57+
char s[] = "a";
58+
char *string = s;
59+
ASSERT_STREQ(LIBC_NAMESPACE::strsep(&string, ":"), "a");
60+
ASSERT_EQ(LIBC_NAMESPACE::strsep(&string, ":"), nullptr);
61+
ASSERT_EQ(LIBC_NAMESPACE::strsep(&string, ":"), nullptr);
62+
}
63+
5664
#if defined(LIBC_ADD_NULL_CHECKS)
5765

5866
TEST(LlvmLibcStrsepTest, CrashOnNullPtr) {

libc/test/src/string/strtok_r_test.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -122,3 +122,12 @@ TEST(LlvmLibcStrTokReentrantTest, DelimitersShouldNotBeIncludedInToken) {
122122
token = LIBC_NAMESPACE::strtok_r(nullptr, "_:,_", &reserve);
123123
ASSERT_STREQ(token, nullptr);
124124
}
125+
126+
TEST(LlvmLibcStrTokReentrantTest, SubsequentSearchesReturnNull) {
127+
char src[] = "a";
128+
char *reserve = nullptr;
129+
char *token = LIBC_NAMESPACE::strtok_r(src, ":", &reserve);
130+
ASSERT_STREQ(token, "a");
131+
ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr);
132+
ASSERT_EQ(LIBC_NAMESPACE::strtok_r(nullptr, ":", &reserve), nullptr);
133+
}

libc/test/src/string/strtok_test.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -76,3 +76,10 @@ TEST(LlvmLibcStrTokTest, DelimitersShouldNotBeIncludedInToken) {
7676
token = LIBC_NAMESPACE::strtok(nullptr, "_:,_");
7777
ASSERT_STREQ(token, nullptr);
7878
}
79+
80+
TEST(LlvmLibcStrTokTest, SubsequentSearchesReturnNull) {
81+
char src[] = "a";
82+
ASSERT_STREQ("a", LIBC_NAMESPACE::strtok(src, ":"));
83+
ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr);
84+
ASSERT_EQ(LIBC_NAMESPACE::strtok(nullptr, ":"), nullptr);
85+
}

utils/bazel/llvm-project-overlay/libc/BUILD.bazel

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5137,6 +5137,19 @@ libc_function(
51375137
],
51385138
)
51395139

5140+
libc_function(
5141+
name = "strsep",
5142+
srcs = ["src/string/strsep.cpp"],
5143+
hdrs = ["src/string/strsep.h"],
5144+
deps = [
5145+
":__support_common",
5146+
":__support_macros_null_check",
5147+
":hdr_signal_macros",
5148+
":string_memory_utils",
5149+
":string_utils",
5150+
],
5151+
)
5152+
51405153
libc_function(
51415154
name = "strstr",
51425155
srcs = ["src/string/strstr.cpp"],

utils/bazel/llvm-project-overlay/libc/test/src/string/BUILD.bazel

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,15 @@ libc_test(
5959
],
6060
)
6161

62+
libc_test(
63+
name = "strsep_test",
64+
srcs = ["strsep_test.cpp"],
65+
deps = [
66+
"//libc:hdr_signal_macros",
67+
"//libc:strsep",
68+
],
69+
)
70+
6271
libc_test(
6372
name = "strstr_test",
6473
srcs = ["strstr_test.cpp"],

0 commit comments

Comments
 (0)