Skip to content

Commit 69ed44c

Browse files
committed
public string functions + tests
1 parent 843c79a commit 69ed44c

File tree

9 files changed

+218
-108
lines changed

9 files changed

+218
-108
lines changed

libc/src/wchar/CMakeLists.txt

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -168,7 +168,7 @@ add_entrypoint_object(
168168
DEPENDS
169169
libc.hdr.types.wchar_t
170170
libc.src.__support.wchar.mbstate
171-
libc.src.__support.wchar.string_converter
171+
libc.src.__support.wchar.wcsnrtombs
172172
libc.src.__support.libc_errno
173173
)
174174

@@ -182,7 +182,7 @@ add_entrypoint_object(
182182
libc.hdr.types.wchar_t
183183
libc.hdr.types.mbstate_t
184184
libc.src.__support.wchar.mbstate
185-
libc.src.__support.wchar.string_converter
185+
libc.src.__support.wchar.wcsnrtombs
186186
libc.src.__support.libc_errno
187187
)
188188

@@ -196,7 +196,7 @@ add_entrypoint_object(
196196
libc.hdr.types.wchar_t
197197
libc.hdr.types.mbstate_t
198198
libc.src.__support.wchar.mbstate
199-
libc.src.__support.wchar.string_converter
199+
libc.src.__support.wchar.wcsnrtombs
200200
libc.src.__support.libc_errno
201201
)
202202

libc/src/wchar/wcsnrtombs.cpp

Lines changed: 9 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -6,7 +6,7 @@
66
//
77
//===----------------------------------------------------------------------===//
88

9-
#include "src/wchar/wcsrtombs.h"
9+
#include "src/wchar/wcsnrtombs.h"
1010

1111
#include "hdr/types/char32_t.h"
1212
#include "hdr/types/mbstate_t.h"
@@ -16,35 +16,24 @@
1616
#include "src/__support/libc_errno.h"
1717
#include "src/__support/macros/config.h"
1818
#include "src/__support/wchar/mbstate.h"
19-
#include "src/__support/wchar/string_converter.h"
19+
#include "src/__support/wchar/wcsnrtombs.h"
2020

2121
namespace LIBC_NAMESPACE_DECL {
2222

2323
LLVM_LIBC_FUNCTION(size_t, wcsnrtombs,
2424
(char *__restrict s, const wchar_t **__restrict pwcs,
2525
size_t nwc, size_t len, mbstate_t *ps)) {
2626
static internal::mbstate internal_mbstate;
27-
internal::StringConverter<char32_t> str_conv(
28-
reinterpret_cast<const char32_t *>(pwcs),
27+
auto result = internal::wcsnrtombs(
28+
s, pwcs, nwc, len,
2929
ps == nullptr ? &internal_mbstate
30-
: reinterpret_cast<internal::mbstate *>(ps),
31-
len, nwc);
32-
33-
int dst_idx = 0;
34-
ErrorOr<char8_t> converted = str_conv.popUTF8();
35-
while (converted.has_value()) {
36-
if (s != nullptr)
37-
s[dst_idx] = converted.value();
38-
dst_idx++;
39-
converted = str_conv.popUTF8();
30+
: reinterpret_cast<internal::mbstate *>(ps));
31+
if (!result.has_value()) {
32+
libc_errno = result.error();
33+
return -1;
4034
}
4135

42-
pwcs += str_conv.getSourceIndex();
43-
if (converted.error() == -1) // if we hit conversion limit
44-
return dst_idx;
45-
46-
libc_errno = converted.error();
47-
return -1;
36+
return result.value();
4837
}
4938

5039
} // namespace LIBC_NAMESPACE_DECL

libc/src/wchar/wcsrtombs.cpp

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -16,35 +16,24 @@
1616
#include "src/__support/libc_errno.h"
1717
#include "src/__support/macros/config.h"
1818
#include "src/__support/wchar/mbstate.h"
19-
#include "src/__support/wchar/string_converter.h"
19+
#include "src/__support/wchar/wcsnrtombs.h"
2020

2121
namespace LIBC_NAMESPACE_DECL {
2222

2323
LLVM_LIBC_FUNCTION(size_t, wcsrtombs,
2424
(char *__restrict s, const wchar_t **__restrict pwcs,
2525
size_t n, mbstate_t *ps)) {
2626
static internal::mbstate internal_mbstate;
27-
internal::StringConverter<char32_t> str_conv(
28-
reinterpret_cast<const char32_t *>(*pwcs),
27+
auto result = internal::wcsnrtombs(
28+
s, pwcs, SIZE_MAX, n,
2929
ps == nullptr ? &internal_mbstate
30-
: reinterpret_cast<internal::mbstate *>(ps),
31-
n);
32-
33-
int dst_idx = 0;
34-
ErrorOr<char8_t> converted = str_conv.popUTF8();
35-
while (converted.has_value()) {
36-
if (s != nullptr)
37-
s[dst_idx] = converted.value();
38-
dst_idx++;
39-
converted = str_conv.popUTF8();
30+
: reinterpret_cast<internal::mbstate *>(ps));
31+
if (!result.has_value()) {
32+
libc_errno = result.error();
33+
return -1;
4034
}
41-
42-
*pwcs += str_conv.getSourceIndex();
43-
if (converted.error() == -1) // if we hit conversion limit
44-
return dst_idx;
4535

46-
libc_errno = converted.error();
47-
return -1;
36+
return result.value();
4837
}
4938

5039
} // namespace LIBC_NAMESPACE_DECL

libc/src/wchar/wcstombs.cpp

Lines changed: 9 additions & 24 deletions
Original file line numberDiff line numberDiff line change
@@ -15,38 +15,23 @@
1515
#include "src/__support/libc_errno.h"
1616
#include "src/__support/macros/config.h"
1717
#include "src/__support/wchar/mbstate.h"
18-
#include "src/__support/wchar/string_converter.h"
18+
#include "src/__support/wchar/wcsnrtombs.h"
1919

2020
namespace LIBC_NAMESPACE_DECL {
2121

2222
LLVM_LIBC_FUNCTION(size_t, wcstombs,
23-
(char *__restrict s, const wchar_t *__restrict pwcs,
23+
(char *__restrict s, const wchar_t *__restrict wcs,
2424
size_t n)) {
2525
static internal::mbstate internal_mbstate;
26-
27-
if (s == nullptr)
28-
n = SIZE_MAX;
29-
30-
internal::StringConverter<char32_t> str_conv(
31-
reinterpret_cast<const char32_t *>(pwcs), &internal_mbstate, n);
32-
33-
int dst_idx = 0;
34-
ErrorOr<char8_t> converted = str_conv.popUTF8();
35-
while (converted.has_value()) {
36-
if (s != nullptr)
37-
s[dst_idx] = converted.value();
38-
39-
if (converted.value() != '\0')
40-
dst_idx++;
41-
42-
converted = str_conv.popUTF8();
26+
const wchar_t *wcs_ptr_copy = wcs;
27+
auto result =
28+
internal::wcsnrtombs(s, &wcs_ptr_copy, SIZE_MAX, n, &internal_mbstate);
29+
if (!result.has_value()) {
30+
libc_errno = result.error();
31+
return -1;
4332
}
4433

45-
if (converted.error() == -1) // if we hit conversion limit
46-
return dst_idx;
47-
48-
libc_errno = converted.error();
49-
return -1;
34+
return result.value();
5035
}
5136

5237
} // namespace LIBC_NAMESPACE_DECL

libc/test/src/__support/wchar/wcsnrtombs_test.cpp

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -151,6 +151,7 @@ TEST(LlvmLibcWcsnrtombs, NullDest) {
151151
}
152152

153153
TEST(LlvmLibcWcsnrtombs, InvalidState) {
154+
// this is more thoroughly tested by CharacterConverter
154155
LIBC_NAMESPACE::internal::mbstate state;
155156
state.total_bytes = 100;
156157

libc/test/src/wchar/CMakeLists.txt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -127,6 +127,20 @@ add_libc_test(
127127
libc.hdr.types.mbstate_t
128128
)
129129

130+
add_libc_test(
131+
wcsnrtombs_test
132+
SUITE
133+
libc_wchar_unittests
134+
SRCS
135+
wcsnrtombs_test.cpp
136+
DEPENDS
137+
libc.src.wchar.wcsnrtombs
138+
libc.test.UnitTest.ErrnoCheckingTest
139+
libc.hdr.types.wchar_t
140+
libc.src.string.memset
141+
libc.hdr.types.mbstate_t
142+
)
143+
130144
add_libc_test(
131145
wmemset_test
132146
SUITE
Lines changed: 156 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,156 @@
1+
//===-- Unittests for wcsnrtombs ------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "hdr/types/mbstate_t.h"
10+
#include "src/string/memset.h"
11+
#include "src/wchar/wcsnrtombs.h"
12+
#include "test/UnitTest/ErrnoCheckingTest.h"
13+
#include "test/UnitTest/Test.h"
14+
15+
using LlvmLibcWcsnrtombs = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
16+
17+
// these tests are fairly simple as this function just calls into the internal
18+
// wcsnrtombs which is more thoroughly tested
19+
20+
TEST_F(LlvmLibcWcsnrtombs, AllMultibyteLengths) {
21+
mbstate_t state;
22+
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
23+
24+
/// clown emoji, sigma symbol, y with diaeresis, letter A
25+
const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
26+
static_cast<wchar_t>(0x2211),
27+
static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
28+
static_cast<wchar_t>(0x0)};
29+
const wchar_t *cur = src;
30+
char mbs[11];
31+
32+
ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 11, &state),
33+
static_cast<size_t>(10));
34+
ASSERT_ERRNO_SUCCESS();
35+
ASSERT_EQ(cur, nullptr);
36+
ASSERT_EQ(mbs[0], '\xF0'); // clown begin
37+
ASSERT_EQ(mbs[1], '\x9F');
38+
ASSERT_EQ(mbs[2], '\xA4');
39+
ASSERT_EQ(mbs[3], '\xA1');
40+
ASSERT_EQ(mbs[4], '\xE2'); // sigma begin
41+
ASSERT_EQ(mbs[5], '\x88');
42+
ASSERT_EQ(mbs[6], '\x91');
43+
ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin
44+
ASSERT_EQ(mbs[8], '\xBF');
45+
ASSERT_EQ(mbs[9], '\x41'); // A begin
46+
ASSERT_EQ(mbs[10], '\0'); // null terminator
47+
}
48+
49+
TEST_F(LlvmLibcWcsnrtombs, DestLimit) {
50+
mbstate_t state;
51+
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
52+
53+
/// clown emoji, sigma symbol, y with diaeresis, letter A
54+
const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
55+
static_cast<wchar_t>(0x2211),
56+
static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
57+
static_cast<wchar_t>(0x0)};
58+
const wchar_t *cur = src;
59+
60+
char mbs[11];
61+
for (int i = 0; i < 11; ++i)
62+
mbs[i] = '\x01'; // dummy initial values
63+
64+
ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 4, &state),
65+
static_cast<size_t>(4));
66+
ASSERT_ERRNO_SUCCESS();
67+
ASSERT_EQ(cur, src + 1);
68+
ASSERT_EQ(mbs[0], '\xF0');
69+
ASSERT_EQ(mbs[1], '\x9F');
70+
ASSERT_EQ(mbs[2], '\xA4');
71+
ASSERT_EQ(mbs[3], '\xA1');
72+
ASSERT_EQ(mbs[4], '\x01'); // didn't write more than 4 bytes
73+
74+
for (int i = 0; i < 11; ++i)
75+
mbs[i] = '\x01'; // dummy initial values
76+
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
77+
cur = src;
78+
79+
// not enough bytes to convert the second character, so only converts one
80+
ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 6, &state),
81+
static_cast<size_t>(4));
82+
ASSERT_ERRNO_SUCCESS();
83+
ASSERT_EQ(cur, src + 1);
84+
ASSERT_EQ(mbs[0], '\xF0');
85+
ASSERT_EQ(mbs[1], '\x9F');
86+
ASSERT_EQ(mbs[2], '\xA4');
87+
ASSERT_EQ(mbs[3], '\xA1');
88+
ASSERT_EQ(mbs[4], '\x01');
89+
}
90+
91+
TEST(LlvmLibcWcsnrtombs, SrcLimit) {
92+
mbstate_t state;
93+
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
94+
95+
/// clown emoji, sigma symbol, y with diaeresis, letter A
96+
const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
97+
static_cast<wchar_t>(0x2211),
98+
static_cast<wchar_t>(0xff), static_cast<wchar_t>(0x41),
99+
static_cast<wchar_t>(0x0)};
100+
const wchar_t *cur = src;
101+
102+
char mbs[11];
103+
for (int i = 0; i < 11; ++i)
104+
mbs[i] = '\x01'; // dummy initial values
105+
106+
auto res = LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 2, 11, &state);
107+
ASSERT_ERRNO_SUCCESS();
108+
ASSERT_EQ(res, static_cast<size_t>(7));
109+
ASSERT_EQ(cur, src + 2);
110+
ASSERT_EQ(mbs[0], '\xF0'); // clown begin
111+
ASSERT_EQ(mbs[1], '\x9F');
112+
ASSERT_EQ(mbs[2], '\xA4');
113+
ASSERT_EQ(mbs[3], '\xA1');
114+
ASSERT_EQ(mbs[4], '\xE2'); // sigma begin
115+
ASSERT_EQ(mbs[5], '\x88');
116+
ASSERT_EQ(mbs[6], '\x91');
117+
ASSERT_EQ(mbs[7], '\x01');
118+
119+
res = LIBC_NAMESPACE::wcsnrtombs(mbs + res, &cur, 100, 11, &state);
120+
ASSERT_ERRNO_SUCCESS();
121+
ASSERT_EQ(res, static_cast<size_t>(3));
122+
ASSERT_EQ(cur, nullptr);
123+
ASSERT_EQ(mbs[0], '\xF0'); // clown begin
124+
ASSERT_EQ(mbs[1], '\x9F');
125+
ASSERT_EQ(mbs[2], '\xA4');
126+
ASSERT_EQ(mbs[3], '\xA1');
127+
ASSERT_EQ(mbs[4], '\xE2'); // sigma begin
128+
ASSERT_EQ(mbs[5], '\x88');
129+
ASSERT_EQ(mbs[6], '\x91');
130+
ASSERT_EQ(mbs[7], '\xC3'); // y diaeresis begin
131+
ASSERT_EQ(mbs[8], '\xBF');
132+
ASSERT_EQ(mbs[9], '\x41'); // A begin
133+
ASSERT_EQ(mbs[10], '\0'); // null terminator
134+
}
135+
136+
TEST_F(LlvmLibcWcsnrtombs, ErrnoTest) {
137+
mbstate_t state;
138+
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
139+
140+
const wchar_t src[] = {static_cast<wchar_t>(0x1f921),
141+
static_cast<wchar_t>(0x2211),
142+
static_cast<wchar_t>(0x12ffff), // invalid widechar
143+
static_cast<wchar_t>(0x0)};
144+
const wchar_t *cur = src;
145+
char mbs[11];
146+
147+
// n parameter ignored when dest is null
148+
ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 7, &state),
149+
static_cast<size_t>(7));
150+
ASSERT_ERRNO_SUCCESS();
151+
152+
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
153+
ASSERT_EQ(LIBC_NAMESPACE::wcsnrtombs(mbs, &cur, 5, 100, &state),
154+
static_cast<size_t>(-1));
155+
ASSERT_ERRNO_EQ(EILSEQ);
156+
}

0 commit comments

Comments
 (0)