Skip to content

Commit a562d1c

Browse files
committed
refactored and replaced tests to mimic strtok_r
1 parent 17f3360 commit a562d1c

File tree

3 files changed

+149
-109
lines changed

3 files changed

+149
-109
lines changed

libc/src/wchar/wcstok.cpp

Lines changed: 28 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -13,34 +13,38 @@
1313

1414
namespace LIBC_NAMESPACE_DECL {
1515

16+
bool isADelimeter(wchar_t wc, const wchar_t *delimiters) {
17+
for (const wchar_t *delim_ptr = delimiters; *delim_ptr != L'\0'; delim_ptr++)
18+
if (wc == *delim_ptr)
19+
return true;
20+
return false;
21+
}
22+
1623
LLVM_LIBC_FUNCTION(wchar_t *, wcstok,
1724
(wchar_t *__restrict str, const wchar_t *__restrict delim,
18-
wchar_t **__restrict ptr)) {
19-
if (str == nullptr)
20-
str = *ptr;
21-
22-
bool foundTokenStart = false;
23-
wchar_t *out = nullptr;
24-
wchar_t *str_ptr;
25-
for (str_ptr = str; *str_ptr != L'\0'; str_ptr++) {
26-
bool inDelim = false;
27-
for (const wchar_t *delim_ptr = delim; *delim_ptr != L'\0' && !inDelim;
28-
delim_ptr++)
29-
if (*str_ptr == *delim_ptr)
30-
inDelim = true;
31-
32-
if (!inDelim && !foundTokenStart) {
33-
foundTokenStart = true;
34-
out = str_ptr;
35-
} else if (inDelim && foundTokenStart) {
36-
*str_ptr = L'\0';
37-
*ptr = str_ptr + 1;
38-
return out;
39-
}
25+
wchar_t **__restrict context)) {
26+
if (str == nullptr) {
27+
if (*context == nullptr)
28+
return nullptr;
29+
30+
str = *context;
4031
}
4132

42-
*ptr = str_ptr;
43-
return out;
33+
wchar_t *tok_start, *tok_end;
34+
for (tok_start = str; *tok_start != L'\0' && isADelimeter(*tok_start, delim);
35+
tok_start++)
36+
;
37+
38+
for (tok_end = tok_start; *tok_end != L'\0' && !isADelimeter(*tok_end, delim);
39+
tok_end++)
40+
;
41+
42+
if (*tok_end != L'\0') {
43+
*tok_end = L'\0';
44+
tok_end++;
45+
}
46+
*context = tok_end;
47+
return *tok_start == L'\0' ? nullptr : tok_start;
4448
}
4549

4650
} // namespace LIBC_NAMESPACE_DECL

libc/src/wchar/wcstok.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
namespace LIBC_NAMESPACE_DECL {
1616

1717
wchar_t *wcstok(wchar_t *__restrict str, const wchar_t *__restrict delim,
18-
wchar_t **__restrict ptr);
18+
wchar_t **__restrict context);
1919

2020
} // namespace LIBC_NAMESPACE_DECL
2121

libc/test/src/wchar/wcstok_test.cpp

Lines changed: 120 additions & 84 deletions
Original file line numberDiff line numberDiff line change
@@ -11,135 +11,171 @@
1111
#include "src/wchar/wcstok.h"
1212
#include "test/UnitTest/Test.h"
1313

14-
TEST(LlvmLibcStrTokTest, NoTokenFound) {
15-
wchar_t empty[] = L"";
16-
wchar_t *buf;
17-
ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &buf), nullptr);
18-
ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &buf), nullptr);
19-
20-
wchar_t single[] = L"_";
21-
wchar_t *token = LIBC_NAMESPACE::wcstok(single, L"", &buf);
22-
ASSERT_TRUE(token[0] == L'_');
23-
ASSERT_TRUE(token[1] == L'\0');
24-
25-
wchar_t multiple[] = L"1,2";
26-
token = LIBC_NAMESPACE::wcstok(multiple, L":", &buf);
27-
ASSERT_TRUE(multiple[0] == L'1');
28-
ASSERT_TRUE(multiple[1] == L',');
29-
ASSERT_TRUE(multiple[2] == L'2');
30-
ASSERT_TRUE(multiple[3] == L'\0');
14+
TEST(LlvmLibcWCSTokReentrantTest, NoTokenFound) {
15+
{ // Empty source and delimiter string.
16+
wchar_t empty[] = L"";
17+
wchar_t *reserve = nullptr;
18+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &reserve), nullptr);
19+
// Another call to ensure that 'reserve' is not in a bad state.
20+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"", &reserve), nullptr);
21+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"", &reserve), nullptr);
22+
}
23+
{ // Empty source and single character delimiter string.
24+
wchar_t empty[] = L"";
25+
wchar_t *reserve = nullptr;
26+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr);
27+
// Another call to ensure that 'reserve' is not in a bad state.
28+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(empty, L"_", &reserve), nullptr);
29+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
30+
}
31+
{ // Same wchar_tacter source and delimiter string.
32+
wchar_t single[] = L"_";
33+
wchar_t *reserve = nullptr;
34+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr);
35+
// Another call to ensure that 'reserve' is not in a bad state.
36+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(single, L"_", &reserve), nullptr);
37+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L"_", &reserve), nullptr);
38+
}
39+
{ // Multiple wchar_tacter source and single wchar_tacter delimiter string.
40+
wchar_t multiple[] = L"1,2";
41+
wchar_t *reserve = nullptr;
42+
wchar_t *tok = LIBC_NAMESPACE::wcstok(multiple, L":", &reserve);
43+
ASSERT_TRUE(tok[0] == L'1');
44+
ASSERT_TRUE(tok[1] == L',');
45+
ASSERT_TRUE(tok[2] == L'2');
46+
ASSERT_TRUE(tok[3] == L'\0');
47+
// Another call to ensure that 'reserve' is not in a bad state.
48+
tok = LIBC_NAMESPACE::wcstok(multiple, L":", &reserve);
49+
ASSERT_TRUE(tok[0] == L'1');
50+
ASSERT_TRUE(tok[1] == L',');
51+
ASSERT_TRUE(tok[2] == L'2');
52+
ASSERT_TRUE(tok[3] == L'\0');
53+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr);
54+
}
3155
}
3256

33-
TEST(LlvmLibcStrTokTest, DelimiterAsFirstCharacterShouldBeIgnored) {
34-
wchar_t *buf;
57+
TEST(LlvmLibcWCSTokReentrantTest, DelimiterAsFirstCharacterShouldBeIgnored) {
3558
wchar_t src[] = L".123";
36-
wchar_t *token = LIBC_NAMESPACE::wcstok(src, L".", &buf);
37-
ASSERT_TRUE(token[0] == L'1');
38-
ASSERT_TRUE(token[1] == L'2');
39-
ASSERT_TRUE(token[2] == L'3');
40-
ASSERT_TRUE(token[3] == L'\0');
59+
wchar_t *reserve = nullptr;
60+
wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L".", &reserve);
61+
ASSERT_TRUE(tok[0] == L'1');
62+
ASSERT_TRUE(tok[1] == L'2');
63+
ASSERT_TRUE(tok[2] == L'3');
64+
ASSERT_TRUE(tok[3] == L'\0');
65+
// Another call to ensure that 'reserve' is not in a bad state.
66+
tok = LIBC_NAMESPACE::wcstok(src, L".", &reserve);
67+
ASSERT_TRUE(tok[0] == L'1');
68+
ASSERT_TRUE(tok[1] == L'2');
69+
ASSERT_TRUE(tok[2] == L'3');
70+
ASSERT_TRUE(tok[3] == L'\0');
71+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L".", &reserve), nullptr);
4172
}
4273

43-
TEST(LlvmLibcStrTokTest, DelimiterIsMiddleCharacter) {
74+
TEST(LlvmLibcWCSTokReentrantTest, DelimiterIsMiddleCharacter) {
4475
wchar_t src[] = L"12,34";
45-
wchar_t *buf;
46-
wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",", &buf);
47-
ASSERT_TRUE(token[0] == L'1');
48-
ASSERT_TRUE(token[1] == L'2');
49-
ASSERT_TRUE(token[2] == L'\0');
76+
wchar_t *reserve = nullptr;
77+
wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
78+
ASSERT_TRUE(tok[0] == L'1');
79+
ASSERT_TRUE(tok[1] == L'2');
80+
ASSERT_TRUE(tok[2] == L'\0');
81+
// Another call to ensure that 'reserve' is not in a bad state.
82+
tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
83+
ASSERT_TRUE(tok[0] == L'1');
84+
ASSERT_TRUE(tok[1] == L'2');
85+
ASSERT_TRUE(tok[2] == L'\0');
86+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L",", &reserve), nullptr);
5087
}
5188

52-
TEST(LlvmLibcStrTokTest, DelimiterAsLastCharacterShouldBeIgnored) {
89+
TEST(LlvmLibcWCSTokReentrantTest, DelimiterAsLastCharacterShouldBeIgnored) {
5390
wchar_t src[] = L"1234:";
54-
wchar_t *buf;
55-
wchar_t *token = LIBC_NAMESPACE::wcstok(src, L":", &buf);
56-
ASSERT_TRUE(token[0] == L'1');
57-
ASSERT_TRUE(token[1] == L'2');
58-
ASSERT_TRUE(token[2] == L'3');
59-
ASSERT_TRUE(token[3] == L'4');
60-
ASSERT_TRUE(token[4] == L'\0');
91+
wchar_t *reserve = nullptr;
92+
wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L":", &reserve);
93+
ASSERT_TRUE(tok[0] == L'1');
94+
ASSERT_TRUE(tok[1] == L'2');
95+
ASSERT_TRUE(tok[2] == L'3');
96+
ASSERT_TRUE(tok[3] == L'4');
97+
ASSERT_TRUE(tok[4] == L'\0');
98+
// Another call to ensure that 'reserve' is not in a bad state.
99+
tok = LIBC_NAMESPACE::wcstok(src, L":", &reserve);
100+
ASSERT_TRUE(tok[0] == L'1');
101+
ASSERT_TRUE(tok[1] == L'2');
102+
ASSERT_TRUE(tok[2] == L'3');
103+
ASSERT_TRUE(tok[3] == L'4');
104+
ASSERT_TRUE(tok[4] == L'\0');
105+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L":", &reserve), nullptr);
61106
}
62107

63-
TEST(LlvmLibcStrTokTest, MultipleDelimiters) {
64-
wchar_t src[] = L"12,.34";
65-
wchar_t *buf;
66-
wchar_t *token;
67-
68-
token = LIBC_NAMESPACE::wcstok(src, L".", &buf);
69-
ASSERT_TRUE(token[0] == L'1');
70-
ASSERT_TRUE(token[1] == L'2');
71-
ASSERT_TRUE(token[2] == L',');
72-
ASSERT_TRUE(token[3] == L'\0');
73-
74-
token = LIBC_NAMESPACE::wcstok(src, L".,", &buf);
75-
ASSERT_TRUE(token[0] == L'1');
76-
ASSERT_TRUE(token[1] == L'2');
77-
ASSERT_TRUE(token[2] == L'\0');
78-
79-
token = LIBC_NAMESPACE::wcstok(src, L",.", &buf);
80-
ASSERT_TRUE(token[0] == L'1');
81-
ASSERT_TRUE(token[1] == L'2');
82-
ASSERT_TRUE(token[2] == L'\0');
83-
84-
token = LIBC_NAMESPACE::wcstok(src, L":,.", &buf);
85-
ASSERT_TRUE(token[0] == L'1');
86-
ASSERT_TRUE(token[1] == L'2');
87-
ASSERT_TRUE(token[2] == L'\0');
108+
TEST(LlvmLibcWCSTokReentrantTest, ShouldNotGoPastNullTerminator) {
109+
wchar_t src[] = {L'1', L'2', L'\0', L',', L'3'};
110+
wchar_t *reserve = nullptr;
111+
wchar_t *tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
112+
ASSERT_TRUE(tok[0] == L'1');
113+
ASSERT_TRUE(tok[1] == L'2');
114+
ASSERT_TRUE(tok[2] == L'\0');
115+
// Another call to ensure that 'reserve' is not in a bad state.
116+
tok = LIBC_NAMESPACE::wcstok(src, L",", &reserve);
117+
ASSERT_TRUE(tok[0] == L'1');
118+
ASSERT_TRUE(tok[1] == L'2');
119+
ASSERT_TRUE(tok[2] == L'\0');
120+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(nullptr, L",", &reserve), nullptr);
88121
}
89122

90-
TEST(LlvmLibcStrTokTest, ShouldNotGoPastNullTerminator) {
91-
wchar_t src[] = {L'1', L'2', L'\0', L',', L'3'};
92-
wchar_t *buf;
93-
wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",", &buf);
94-
ASSERT_TRUE(token[0] == L'1');
95-
ASSERT_TRUE(token[1] == L'2');
96-
ASSERT_TRUE(token[2] == L'\0');
123+
TEST(LlvmLibcWCSTokReentrantTest,
124+
ShouldReturnNullptrWhenBothSrcAndSaveptrAreNull) {
125+
wchar_t *src = nullptr;
126+
wchar_t *reserve = nullptr;
127+
// Ensure that instead of crashing if src and reserve are null, nullptr is
128+
// returned
129+
ASSERT_EQ(LIBC_NAMESPACE::wcstok(src, L",", &reserve), nullptr);
130+
// And that neither src nor reserve are changed when that happens
131+
ASSERT_EQ(src, nullptr);
132+
ASSERT_EQ(reserve, nullptr);
97133
}
98134

99-
TEST(LlvmLibcStrTokTest, SubsequentCallsShouldFindFollowingDelimiters) {
135+
TEST(LlvmLibcWCSTokReentrantTest,
136+
SubsequentCallsShouldFindFollowingDelimiters) {
100137
wchar_t src[] = L"12,34.56";
101-
wchar_t *buf;
102-
wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",.", &buf);
138+
wchar_t *reserve = nullptr;
139+
wchar_t *token = LIBC_NAMESPACE::wcstok(src, L",.", &reserve);
103140
ASSERT_TRUE(token[0] == L'1');
104141
ASSERT_TRUE(token[1] == L'2');
105142
ASSERT_TRUE(token[2] == L'\0');
106143

107-
token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &buf);
144+
token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &reserve);
108145
ASSERT_TRUE(token[0] == L'3');
109146
ASSERT_TRUE(token[1] == L'4');
110147
ASSERT_TRUE(token[2] == L'\0');
111148

112-
token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &buf);
149+
token = LIBC_NAMESPACE::wcstok(nullptr, L",.", &reserve);
113150
ASSERT_TRUE(token[0] == L'5');
114151
ASSERT_TRUE(token[1] == L'6');
115152
ASSERT_TRUE(token[2] == L'\0');
116-
117-
token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &buf);
153+
token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve);
118154
ASSERT_EQ(token, nullptr);
119155
// Subsequent calls after hitting the end of the string should also return
120156
// nullptr.
121-
token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &buf);
157+
token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve);
122158
ASSERT_EQ(token, nullptr);
123159
}
124160

125-
TEST(LlvmLibcStrTokTest, DelimitersShouldNotBeIncludedInToken) {
126-
wchar_t *buf;
161+
TEST(LlvmLibcWCSTokReentrantTest, DelimitersShouldNotBeIncludedInToken) {
127162
wchar_t src[] = L"__ab__:_cd__:__ef__:__";
128-
wchar_t *token = LIBC_NAMESPACE::wcstok(src, L"_:", &buf);
163+
wchar_t *reserve = nullptr;
164+
wchar_t *token = LIBC_NAMESPACE::wcstok(src, L"_:", &reserve);
129165
ASSERT_TRUE(token[0] == L'a');
130166
ASSERT_TRUE(token[1] == L'b');
131167
ASSERT_TRUE(token[2] == L'\0');
132168

133-
token = LIBC_NAMESPACE::wcstok(nullptr, L":_", &buf);
169+
token = LIBC_NAMESPACE::wcstok(nullptr, L":_", &reserve);
134170
ASSERT_TRUE(token[0] == L'c');
135171
ASSERT_TRUE(token[1] == L'd');
136172
ASSERT_TRUE(token[2] == L'\0');
137173

138-
token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,", &buf);
174+
token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,", &reserve);
139175
ASSERT_TRUE(token[0] == L'e');
140176
ASSERT_TRUE(token[1] == L'f');
141177
ASSERT_TRUE(token[2] == L'\0');
142178

143-
token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &buf);
179+
token = LIBC_NAMESPACE::wcstok(nullptr, L"_:,_", &reserve);
144180
ASSERT_EQ(token, nullptr);
145181
}

0 commit comments

Comments
 (0)