Skip to content

Commit 23d9216

Browse files
committed
fixed edge case with null dest; added tests
1 parent 0de8381 commit 23d9216

File tree

3 files changed

+195
-9
lines changed

3 files changed

+195
-9
lines changed

libc/src/__support/wchar/wcsrtombs.cpp

Lines changed: 9 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -25,29 +25,32 @@ ErrorOr<size_t> wcsrtombs(char *__restrict dst, const wchar_t **__restrict src,
2525
size_t len, mbstate *__restrict ps) {
2626
static_assert(sizeof(wchar_t) == 4);
2727

28-
if (src == nullptr || dst == nullptr)
28+
if (src == nullptr)
2929
return Error(-1);
30+
31+
// ignore len parameter when theres no destination string
32+
if (dst == nullptr)
33+
len = SIZE_MAX;
3034

3135
size_t bytes_written = 0;
32-
const wchar_t *wc_ptr = *src;
3336
while (bytes_written < len) {
3437
char buf[4];
35-
auto result = internal::wcrtomb(dst + bytes_written, *wc_ptr, ps,
36-
len - bytes_written);
38+
auto result =
39+
internal::wcrtomb(dst + bytes_written, **src, ps, len - bytes_written);
3740
if (!result.has_value())
3841
return result; // forward the error
3942

4043
if (result.value() == -1) // couldn't complete the conversion
4144
return len;
4245

4346
// terminate the loop after converting the null wide character
44-
if (*wc_ptr == L'\0') {
47+
if (**src == L'\0') {
4548
*src = '\0';
4649
return bytes_written;
4750
}
4851

4952
bytes_written += result.value();
50-
wc_ptr++;
53+
(*src)++;
5154
}
5255

5356
return bytes_written;

libc/src/wchar/wcsrtombs.cpp

Lines changed: 0 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,6 @@ LLVM_LIBC_FUNCTION(size_t, wcsrtombs,
2525
static internal::mbstate internal_mbstate;
2626

2727
LIBC_CRASH_ON_NULLPTR(src);
28-
char buf[len];
29-
if (dst == nullptr)
30-
dst = buf;
3128

3229
auto result = internal::wcsrtombs(
3330
dst, src, len,
Lines changed: 186 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,186 @@
1+
//===-- Unittests for wcsrtombs ------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "hdr/types/mbstate_t.h"
10+
#include "hdr/types/wchar_t.h"
11+
#include "src/__support/libc_errno.h"
12+
#include "src/string/memset.h"
13+
#include "src/wchar/wcsrtombs.h"
14+
#include "test/UnitTest/Test.h"
15+
16+
TEST(LlvmLibcWCSRToMBSTest, SingleCharacterOneByte) {
17+
mbstate_t state;
18+
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
19+
const wchar_t *wcs = L"U";
20+
const wchar_t *wcs_start = wcs;
21+
char mbs[] = {0, 0};
22+
size_t cnt = LIBC_NAMESPACE::wcsrtombs(mbs, &wcs, 2, &state);
23+
ASSERT_EQ(cnt, static_cast<size_t>(1));
24+
ASSERT_EQ(mbs[0], 'U');
25+
ASSERT_EQ(mbs[1], '\0');
26+
ASSERT_EQ(wcs, wcs_start + 1);
27+
}
28+
29+
TEST(LlvmLibcWCSRToMBSTest, MultipleCompleteConversions) {
30+
mbstate_t state;
31+
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
32+
33+
// utf32: 0xff -> utf8: 0xc3 0xbf
34+
// utf32: 0xac15 -> utf8: 0xea 0xb0 0x95
35+
const wchar_t *wcs = L"\xFF\xAC15";
36+
const wchar_t *wcs_start = wcs;
37+
38+
// init with dummy value of 1 so that we can check when null byte written
39+
char mbs[7] = {1, 1, 1, 1, 1, 1, 1};
40+
char expected[6] = {0xC3, 0xBF, 0xEA, 0xB0, 0x95, 0x00};
41+
42+
size_t cnt1 = LIBC_NAMESPACE::wcsrtombs(mbs, &wcs, 2, &state);
43+
ASSERT_EQ(cnt1, static_cast<size_t>(2));
44+
ASSERT_EQ(wcs, wcs_start + 1);
45+
ASSERT_EQ(mbs[0], expected[0]);
46+
ASSERT_EQ(mbs[1], expected[1]);
47+
ASSERT_EQ(mbs[2], '\x01'); // not modified
48+
49+
size_t cnt2 = LIBC_NAMESPACE::wcsrtombs(mbs + cnt1, &wcs, 3, &state);
50+
ASSERT_EQ(cnt2, static_cast<size_t>(3));
51+
ASSERT_EQ(wcs, wcs_start + 2);
52+
ASSERT_EQ(mbs[0], expected[0]);
53+
ASSERT_EQ(mbs[1], expected[1]);
54+
ASSERT_EQ(mbs[2], expected[2]);
55+
ASSERT_EQ(mbs[3], expected[3]);
56+
ASSERT_EQ(mbs[4], expected[4]);
57+
ASSERT_EQ(mbs[5], '\x01'); // null byte not yet written
58+
59+
// all that is left in the string is the null terminator
60+
size_t cnt3 = LIBC_NAMESPACE::wcsrtombs(mbs + cnt1 + cnt2, &wcs, 50, &state);
61+
ASSERT_EQ(cnt3, static_cast<size_t>(0));
62+
ASSERT_EQ(wcs, nullptr);
63+
ASSERT_EQ(mbs[0], expected[0]);
64+
ASSERT_EQ(mbs[1], expected[1]);
65+
ASSERT_EQ(mbs[2], expected[2]);
66+
ASSERT_EQ(mbs[3], expected[3]);
67+
ASSERT_EQ(mbs[4], expected[4]);
68+
ASSERT_EQ(mbs[5], expected[5]);
69+
ASSERT_EQ(mbs[6], '\x01'); // should not write beyond null terminator
70+
}
71+
72+
TEST(LlvmLibcWCSRToMBSTest, MultiplePartialConversions) {
73+
mbstate_t state;
74+
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
75+
76+
// utf32: 0xff -> utf8: 0xc3 0xbf
77+
// utf32: 0xac15 -> utf8: 0xea 0xb0 0x95
78+
const wchar_t *wcs = L"\xFF\xAC15";
79+
const wchar_t *wcs_start = wcs;
80+
81+
// init with dummy value of 1 so that we can check when null byte written
82+
char mbs[7] = {1, 1, 1, 1, 1, 1, 1};
83+
char expected[6] = {0xC3, 0xBF, 0xEA, 0xB0, 0x95, 0x00};
84+
size_t written = 0;
85+
size_t count = 0;
86+
87+
count = LIBC_NAMESPACE::wcsrtombs(mbs, &wcs, 1, &state);
88+
written += count;
89+
ASSERT_EQ(count, static_cast<size_t>(1));
90+
ASSERT_EQ(wcs, wcs_start);
91+
ASSERT_EQ(mbs[0], expected[0]);
92+
ASSERT_EQ(mbs[1], '\x01');
93+
94+
count = LIBC_NAMESPACE::wcsrtombs(mbs + written, &wcs, 2, &state);
95+
written += count;
96+
ASSERT_EQ(count, static_cast<size_t>(2));
97+
ASSERT_EQ(wcs, wcs_start + 1);
98+
ASSERT_EQ(mbs[0], expected[0]);
99+
ASSERT_EQ(mbs[1], expected[1]);
100+
ASSERT_EQ(mbs[2], expected[2]);
101+
ASSERT_EQ(mbs[3], '\x01');
102+
103+
count = LIBC_NAMESPACE::wcsrtombs(mbs + written, &wcs, 3, &state);
104+
written += count;
105+
ASSERT_EQ(count, static_cast<size_t>(2));
106+
ASSERT_EQ(wcs, nullptr);
107+
ASSERT_EQ(mbs[0], expected[0]);
108+
ASSERT_EQ(mbs[1], expected[1]);
109+
ASSERT_EQ(mbs[2], expected[2]);
110+
ASSERT_EQ(mbs[3], expected[3]);
111+
ASSERT_EQ(mbs[4], expected[4]);
112+
ASSERT_EQ(mbs[5], expected[5]);
113+
ASSERT_EQ(mbs[6], '\x01');
114+
}
115+
116+
TEST(LlvmLibcWCSRToMBSTest, NullDestination) {
117+
mbstate_t state;
118+
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
119+
120+
// utf32: 0x1f921 -> utf8: 0xf0 0x9f 0xa4 0xa1
121+
// utf32: 0xac15 -> utf8: 0xea 0xb0 0x95
122+
const wchar_t *wcs = L"\x1F921\xAC15";
123+
124+
// null destination means the conversion isnt stored, but all the side effects
125+
// still occur. the len parameter is also ignored
126+
size_t count = LIBC_NAMESPACE::wcsrtombs(nullptr, &wcs, 3, &state);
127+
ASSERT_EQ(count, static_cast<size_t>(7));
128+
ASSERT_EQ(wcs, nullptr);
129+
}
130+
131+
TEST(LlvmLibcWCSRToMBSTest, NullState) {
132+
// same as MultiplePartialConversions test except without an explicit
133+
// mbstate_t
134+
135+
const wchar_t *wcs = L"\xFF\xAC15";
136+
const wchar_t *wcs_start = wcs;
137+
138+
// init with dummy value of 1 so that we can check when null byte written
139+
char mbs[7] = {1, 1, 1, 1, 1, 1, 1};
140+
char expected[6] = {0xC3, 0xBF, 0xEA, 0xB0, 0x95, 0x00};
141+
size_t written = 0;
142+
size_t count = 0;
143+
144+
count = LIBC_NAMESPACE::wcsrtombs(mbs, &wcs, 1, nullptr);
145+
written += count;
146+
ASSERT_EQ(count, static_cast<size_t>(1));
147+
ASSERT_EQ(wcs, wcs_start);
148+
ASSERT_EQ(mbs[0], expected[0]);
149+
ASSERT_EQ(mbs[1], '\x01');
150+
151+
count = LIBC_NAMESPACE::wcsrtombs(mbs + written, &wcs, 2, nullptr);
152+
written += count;
153+
ASSERT_EQ(count, static_cast<size_t>(2));
154+
ASSERT_EQ(wcs, wcs_start + 1);
155+
ASSERT_EQ(mbs[0], expected[0]);
156+
ASSERT_EQ(mbs[1], expected[1]);
157+
ASSERT_EQ(mbs[2], expected[2]);
158+
ASSERT_EQ(mbs[3], '\x01');
159+
160+
count = LIBC_NAMESPACE::wcsrtombs(mbs + written, &wcs, 3, nullptr);
161+
written += count;
162+
ASSERT_EQ(count, static_cast<size_t>(2));
163+
ASSERT_EQ(wcs, nullptr);
164+
ASSERT_EQ(mbs[0], expected[0]);
165+
ASSERT_EQ(mbs[1], expected[1]);
166+
ASSERT_EQ(mbs[2], expected[2]);
167+
ASSERT_EQ(mbs[3], expected[3]);
168+
ASSERT_EQ(mbs[4], expected[4]);
169+
ASSERT_EQ(mbs[5], expected[5]);
170+
ASSERT_EQ(mbs[6], '\x01');
171+
}
172+
173+
TEST(LlvmLibcWCSRToMBSTest, InvalidWchar) {
174+
mbstate_t state;
175+
LIBC_NAMESPACE::memset(&state, 0, sizeof(mbstate_t));
176+
177+
const wchar_t *wcs = L"\xFF\xAC15\x12FFFF";
178+
char mbs[15];
179+
// convert the valid wchar
180+
size_t count = LIBC_NAMESPACE::wcsrtombs(mbs, &wcs, 7, &state);
181+
ASSERT_EQ(count, static_cast<size_t>(7));
182+
183+
count = LIBC_NAMESPACE::wcsrtombs(mbs + count, &wcs, 7, &state); // invalid
184+
ASSERT_EQ(count, static_cast<size_t>(-1));
185+
ASSERT_EQ(static_cast<int>(libc_errno), EILSEQ);
186+
}

0 commit comments

Comments
 (0)