Skip to content

Commit 0694e91

Browse files
author
Sriya Pratipati
committed
added tests for mbstowcs
1 parent 26a6741 commit 0694e91

File tree

6 files changed

+187
-0
lines changed

6 files changed

+187
-0
lines changed

libc/config/linux/x86_64/entrypoints.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1261,6 +1261,8 @@ if(LLVM_LIBC_FULL_BUILD)
12611261
# wchar.h entrypoints
12621262
libc.src.wchar.mbrtowc
12631263
libc.src.wchar.mbtowc
1264+
libc.src.wchar.mbstowcs
1265+
libc.src.wchar.mbsrtowcs
12641266
libc.src.wchar.wcrtomb
12651267
libc.src.wchar.wctomb
12661268
)

libc/include/wchar.yaml

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,23 @@ functions:
5353
- type: wchar_t *__restrict
5454
- type: const char *__restrict
5555
- type: size_t
56+
- name: mbstowcs
57+
standards:
58+
- stdc
59+
return_type: size_t
60+
arguments:
61+
- type: wchar_t *__restrict
62+
- type: const char *__restrict
63+
- type: size_t
64+
- name: mbsrtowcs
65+
standards:
66+
- stdc
67+
return_type: size_t
68+
arguments:
69+
- type: wchar_t *__restrict
70+
- type: const char **__restrict
71+
- type: size_t
72+
- type: mbstate_t *__restrict
5673
- name: wmemset
5774
standards:
5875
- stdc

libc/src/wchar/CMakeLists.txt

Lines changed: 32 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -159,6 +159,38 @@ add_entrypoint_object(
159159
libc.src.__support.wchar.mbstate
160160
)
161161

162+
add_entrypoint_object(
163+
mbstowcs
164+
SRCS
165+
mbstowcs.cpp
166+
HDRS
167+
mbstowcs.h
168+
DEPENDS
169+
libc.hdr.types.size_t
170+
libc.hdr.types.wchar_t
171+
libc.src.__support.common
172+
libc.src.__support.macros.config
173+
libc.src.__support.libc_errno
174+
libc.src.__support.wchar.string_converter
175+
libc.src.__support.wchar.mbstate
176+
)
177+
178+
add_entrypoint_object(
179+
mbsrtowcs
180+
SRCS
181+
mbsrtowcs.cpp
182+
HDRS
183+
mbsrtowcs.h
184+
DEPENDS
185+
libc.hdr.types.size_t
186+
libc.hdr.types.wchar_t
187+
libc.src.__support.common
188+
libc.src.__support.macros.config
189+
libc.src.__support.libc_errno
190+
libc.src.__support.wchar.string_converter
191+
libc.src.__support.wchar.mbstate
192+
)
193+
162194
add_entrypoint_object(
163195
wmemset
164196
SRCS

libc/src/wchar/mbstowcs.cpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -21,14 +21,20 @@ namespace LIBC_NAMESPACE_DECL {
2121
LLVM_LIBC_FUNCTION(size_t, mbstowcs,
2222
(wchar_t *__restrict pwcs, const char *__restrict s,
2323
size_t n)) {
24+
n = pwcs == nullptr ? SIZE_MAX : n;
2425
static internal::mbstate internal_mbstate;
2526
internal::StringConverter<char8_t> str_conv(
2627
reinterpret_cast<const char8_t *>(s), &internal_mbstate, n);
2728
int dst_idx = 0;
29+
2830
ErrorOr<char32_t> converted = str_conv.popUTF32();
31+
2932
while (converted.has_value()) {
3033
if (pwcs != nullptr)
3134
pwcs[dst_idx] = converted.value();
35+
// if it is null terminator, do not count in return value
36+
if (converted.value() == L'\0')
37+
return dst_idx;
3238
dst_idx++;
3339
converted = str_conv.popUTF32();
3440
}

libc/test/src/wchar/CMakeLists.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,19 @@ add_libc_test(
6464
libc.test.UnitTest.ErrnoCheckingTest
6565
)
6666

67+
add_libc_test(
68+
mbstowcs_test
69+
SUITE
70+
libc_wchar_unittests
71+
SRCS
72+
mbstowcs_test.cpp
73+
DEPENDS
74+
libc.src.__support.libc_errno
75+
libc.src.wchar.mbstowcs
76+
libc.hdr.types.wchar_t
77+
libc.test.UnitTest.ErrnoCheckingTest
78+
)
79+
6780
add_libc_test(
6881
wctob_test
6982
SUITE
Lines changed: 117 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,117 @@
1+
//===-- Unittests for mbstowcs --------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "hdr/types/wchar_t.h"
10+
#include "src/__support/libc_errno.h"
11+
#include "src/wchar/mbstowcs.h"
12+
#include "test/UnitTest/ErrnoCheckingTest.h"
13+
#include "test/UnitTest/Test.h"
14+
15+
using LlvmLibcMBSToWCSTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
16+
17+
TEST_F(LlvmLibcMBSToWCSTest, OneByteOneChar) {
18+
const char *ch = "A";
19+
wchar_t dest[2];
20+
size_t n = LIBC_NAMESPACE::mbstowcs(dest, ch, 1);
21+
ASSERT_EQ(static_cast<char>(*dest), 'A');
22+
ASSERT_EQ(static_cast<int>(n), 1);
23+
ASSERT_ERRNO_SUCCESS();
24+
25+
n = LIBC_NAMESPACE::mbstowcs(dest + 1, ch + 1, 1);
26+
ASSERT_EQ(static_cast<char>(dest[1]), '\0');
27+
// Should not include null terminator
28+
ASSERT_EQ(static_cast<int>(n), 0);
29+
ASSERT_ERRNO_SUCCESS();
30+
}
31+
32+
TEST_F(LlvmLibcMBSToWCSTest, FourByteOneChar) {
33+
const char *src = "\xf0\x9f\x98\xb9"; // laughing cat emoji 😹
34+
wchar_t dest[2];
35+
size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 2);
36+
ASSERT_ERRNO_SUCCESS();
37+
ASSERT_EQ(static_cast<int>(dest[0]), 128569);
38+
ASSERT_TRUE(dest[1] == L'\0');
39+
// Should not count null terminator in number
40+
ASSERT_EQ(static_cast<int>(n), 1);
41+
}
42+
43+
TEST_F(LlvmLibcMBSToWCSTest, MultiByteTwoCharacters) {
44+
// Two laughing cat emojis "😹😹"
45+
const char *src = "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
46+
wchar_t dest[3];
47+
size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3);
48+
ASSERT_ERRNO_SUCCESS();
49+
ASSERT_EQ(static_cast<int>(dest[0]), 128569);
50+
ASSERT_EQ(static_cast<int>(dest[1]), 128569);
51+
ASSERT_TRUE(dest[2] == L'\0');
52+
// Should not count null terminator in number
53+
ASSERT_EQ(static_cast<int>(n), 2);
54+
}
55+
56+
TEST_F(LlvmLibcMBSToWCSTest, MixedNumberOfBytes) {
57+
// 'A', sigma symbol 'Σ', recycling symbol '♻', laughing cat emoji '😹'
58+
const char *src = "A\xce\xa3\xe2\x99\xbb\xf0\x9f\x98\xb9";
59+
wchar_t dest[5];
60+
size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 5);
61+
ASSERT_ERRNO_SUCCESS();
62+
ASSERT_EQ(static_cast<char>(dest[0]), 'A');
63+
ASSERT_EQ(static_cast<int>(dest[1]), 931);
64+
ASSERT_EQ(static_cast<int>(dest[2]), 9851);
65+
ASSERT_EQ(static_cast<int>(dest[3]), 128569);
66+
ASSERT_TRUE(dest[4] == L'\0');
67+
// Should not count null terminator in number
68+
ASSERT_EQ(static_cast<int>(n), 4);
69+
}
70+
71+
TEST_F(LlvmLibcMBSToWCSTest, ReadLessThanStringLength) {
72+
// Four laughing cat emojis "😹😹😹😹"
73+
const char *src =
74+
"\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
75+
wchar_t dest[5] = {L'a', L'b', L'c', L'd', L'e'};
76+
size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3);
77+
ASSERT_ERRNO_SUCCESS();
78+
// Should have read 3 emojis
79+
ASSERT_EQ(static_cast<int>(n), 3);
80+
ASSERT_EQ(static_cast<int>(dest[0]), 128569);
81+
ASSERT_EQ(static_cast<int>(dest[1]), 128569);
82+
ASSERT_EQ(static_cast<int>(dest[2]), 128569);
83+
ASSERT_TRUE(dest[3] == L'd');
84+
ASSERT_TRUE(dest[4] == L'e');
85+
}
86+
87+
TEST_F(LlvmLibcMBSToWCSTest, InvalidFirstByte) {
88+
// 0x80 is invalid first byte of mb character
89+
const char *src =
90+
"\x80\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
91+
wchar_t dest[3];
92+
size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3);
93+
// Should return error and set errno
94+
ASSERT_EQ(static_cast<int>(n), -1);
95+
ASSERT_ERRNO_EQ(EILSEQ);
96+
}
97+
98+
TEST_F(LlvmLibcMBSToWCSTest, InvalidMiddleByte) {
99+
// The 7th byte is invalid for a 4 byte character
100+
const char *src =
101+
"\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
102+
wchar_t dest[3];
103+
size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 5);
104+
// Should return error and set errno
105+
ASSERT_EQ(static_cast<int>(n), -1);
106+
ASSERT_ERRNO_EQ(EILSEQ);
107+
}
108+
109+
TEST_F(LlvmLibcMBSToWCSTest, NullDestination) {
110+
// Four laughing cat emojis "😹😹😹😹"
111+
const char *src =
112+
"\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
113+
size_t n = LIBC_NAMESPACE::mbstowcs(nullptr, src, 2);
114+
ASSERT_ERRNO_SUCCESS();
115+
// Null destination should ignore len and read till end of string
116+
ASSERT_EQ(static_cast<int>(n), 4);
117+
}

0 commit comments

Comments
 (0)