Skip to content

Commit 6bc7497

Browse files
author
Sriya Pratipati
committed
added internal mbsnrtowcs
1 parent 1921356 commit 6bc7497

File tree

8 files changed

+275
-22
lines changed

8 files changed

+275
-22
lines changed

libc/src/__support/wchar/CMakeLists.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -68,3 +68,21 @@ add_object_library(
6868
.character_converter
6969
.mbstate
7070
)
71+
72+
add_object_library(
73+
mbsnrtowcs
74+
HDRS
75+
mbsnrtowcs.h
76+
SRCS
77+
mbsnrtowcs.cpp
78+
DEPENDS
79+
libc.hdr.errno_macros
80+
libc.hdr.types.wchar_t
81+
libc.hdr.types.size_t
82+
libc.src.__support.common
83+
libc.src.__support.error_or
84+
libc.src.__support.macros.config
85+
.character_converter
86+
.mbstate
87+
.string_converter
88+
)
Lines changed: 57 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,57 @@
1+
//===-- Implementation for mbsnrtowcs function ------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/__support/wchar/mbsnrtowcs.h"
10+
#include "hdr/errno_macros.h"
11+
#include "hdr/types/size_t.h"
12+
#include "hdr/types/wchar_t.h"
13+
#include "src/__support/common.h"
14+
#include "src/__support/error_or.h"
15+
#include "src/__support/macros/config.h"
16+
#include "src/__support/wchar/character_converter.h"
17+
#include "src/__support/wchar/mbstate.h"
18+
#include "src/__support/wchar/string_converter.h"
19+
20+
namespace LIBC_NAMESPACE_DECL {
21+
namespace internal {
22+
23+
ErrorOr<size_t> mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
24+
size_t nmc, size_t len, mbstate *__restrict ps) {
25+
if (*src == nullptr)
26+
return 0;
27+
// Checking if mbstate is valid
28+
CharacterConverter char_conv(ps);
29+
if (!char_conv.isValidState())
30+
return Error(EINVAL);
31+
32+
StringConverter<char8_t> str_conv(reinterpret_cast<const char8_t *>(*src), ps,
33+
len, nmc);
34+
size_t dst_idx = 0;
35+
ErrorOr<char32_t> converted = str_conv.popUTF32();
36+
while (converted.has_value()) {
37+
if (dst != nullptr)
38+
dst[dst_idx] = converted.value();
39+
// null terminator should not be counted in return value
40+
if (converted.value() == L'\0') {
41+
src = nullptr;
42+
return dst_idx;
43+
}
44+
dst_idx++;
45+
converted = str_conv.popUTF32();
46+
}
47+
48+
*src += str_conv.getSourceIndex();
49+
if (converted.error() == -1) // if we hit conversion limit
50+
return dst_idx;
51+
52+
return Error(converted.error());
53+
}
54+
55+
} // namespace internal
56+
57+
} // namespace LIBC_NAMESPACE_DECL
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//===-- Implementation header for mbsnrtowcs function -----------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS
10+
#define LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS
11+
12+
#include "hdr/types/size_t.h"
13+
#include "hdr/types/wchar_t.h"
14+
#include "src/__support/common.h"
15+
#include "src/__support/error_or.h"
16+
#include "src/__support/macros/config.h"
17+
#include "src/__support/wchar/mbstate.h"
18+
19+
namespace LIBC_NAMESPACE_DECL {
20+
namespace internal {
21+
22+
ErrorOr<size_t> mbsnrtowcs(wchar_t *__restrict dst, const char **__restrict src,
23+
size_t nmc, size_t len, mbstate *__restrict ps);
24+
25+
} // namespace internal
26+
27+
} // namespace LIBC_NAMESPACE_DECL
28+
29+
#endif // LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSNRTOWCS

libc/src/wchar/CMakeLists.txt

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -171,8 +171,8 @@ add_entrypoint_object(
171171
libc.src.__support.common
172172
libc.src.__support.macros.config
173173
libc.src.__support.libc_errno
174-
libc.src.__support.wchar.string_converter
175174
libc.src.__support.wchar.mbstate
175+
libc.src.__support.wchar.mbsnrtowcs
176176
)
177177

178178
add_entrypoint_object(

libc/src/wchar/mbsrtowcs.cpp

Lines changed: 7 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,7 @@ LLVM_LIBC_FUNCTION(size_t, mbsrtowcs,
2323
size_t len, mbstate_t *__restrict ps)) {
2424
static internal::mbstate internal_mbstate;
2525
internal::StringConverter<char8_t> str_conv(
26-
reinterpret_cast<const char8_t *>(src),
26+
reinterpret_cast<const char8_t *>(*src),
2727
ps == nullptr ? &internal_mbstate
2828
: reinterpret_cast<internal::mbstate *>(ps),
2929
len);
@@ -32,11 +32,16 @@ LLVM_LIBC_FUNCTION(size_t, mbsrtowcs,
3232
ErrorOr<char32_t> converted = str_conv.popUTF32();
3333
while (converted.has_value()) {
3434
dst[dst_idx] = converted.value();
35+
// null terminator should not be counted in return value
36+
if (converted.value() == L'\0') {
37+
src = nullptr;
38+
return dst_idx;
39+
}
3540
dst_idx++;
3641
converted = str_conv.popUTF32();
3742
}
3843

39-
src += str_conv.getSourceIndex();
44+
*src += str_conv.getSourceIndex();
4045
if (converted.error() == -1) // if we hit conversion limit
4146
return dst_idx;
4247

libc/src/wchar/mbstowcs.cpp

Lines changed: 8 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -13,8 +13,8 @@
1313
#include "src/__support/common.h"
1414
#include "src/__support/libc_errno.h"
1515
#include "src/__support/macros/config.h"
16+
#include "src/__support/wchar/mbsnrtowcs.h"
1617
#include "src/__support/wchar/mbstate.h"
17-
#include "src/__support/wchar/string_converter.h"
1818

1919
namespace LIBC_NAMESPACE_DECL {
2020

@@ -23,26 +23,15 @@ LLVM_LIBC_FUNCTION(size_t, mbstowcs,
2323
size_t n)) {
2424
n = pwcs == nullptr ? SIZE_MAX : n;
2525
static internal::mbstate internal_mbstate;
26-
internal::StringConverter<char8_t> str_conv(
27-
reinterpret_cast<const char8_t *>(s), &internal_mbstate, n);
28-
int dst_idx = 0;
26+
const char *temp = s;
27+
auto ret = internal::mbsnrtowcs(pwcs, &temp, SIZE_MAX, n, &internal_mbstate);
2928

30-
ErrorOr<char32_t> converted = str_conv.popUTF32();
31-
32-
while (converted.has_value()) {
33-
if (pwcs != nullptr)
34-
pwcs[dst_idx] = converted.value();
35-
// if it is null terminator, do not count in return value
36-
if (converted.value() == L'\0')
37-
return dst_idx;
38-
dst_idx++;
39-
converted = str_conv.popUTF32();
29+
if (!ret.has_value()) {
30+
// Encoding failure
31+
libc_errno = ret.error();
32+
return -1;
4033
}
41-
if (converted.error() == -1) // if we hit conversion limit
42-
return dst_idx;
43-
44-
libc_errno = converted.error();
45-
return -1;
34+
return ret.value();
4635
}
4736

4837
} // namespace LIBC_NAMESPACE_DECL

libc/test/src/wchar/CMakeLists.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -77,6 +77,22 @@ add_libc_test(
7777
libc.test.UnitTest.ErrnoCheckingTest
7878
)
7979

80+
add_libc_test(
81+
mbsrtowcs_test
82+
SUITE
83+
libc_wchar_unittests
84+
SRCS
85+
mbsrtowcs_test.cpp
86+
DEPENDS
87+
libc.src.__support.libc_errno
88+
libc.src.__support.wchar.mbstate
89+
libc.src.string.memset
90+
libc.src.wchar.mbsrtowcs
91+
libc.hdr.types.mbstate_t
92+
libc.hdr.types.wchar_t
93+
libc.test.UnitTest.ErrnoCheckingTest
94+
)
95+
8096
add_libc_test(
8197
wctob_test
8298
SUITE
Lines changed: 139 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,139 @@
1+
//===-- Unittests for mbsetowcs -------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "hdr/types/mbstate_t.h"
10+
#include "hdr/types/wchar_t.h"
11+
#include "src/__support/libc_errno.h"
12+
#include "src/__support/wchar/mbstate.h"
13+
#include "src/string/memset.h"
14+
#include "src/wchar/mbsrtowcs.h"
15+
#include "test/UnitTest/ErrnoCheckingTest.h"
16+
#include "test/UnitTest/Test.h"
17+
18+
using LlvmLibcMBSRToWCSTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
19+
20+
TEST_F(LlvmLibcMBSRToWCSTest, OneByteOneChar) {
21+
const char *ch = "A";
22+
wchar_t dest[2];
23+
mbstate_t *mb;
24+
LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
25+
size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &ch, 1, mb);
26+
ASSERT_EQ(static_cast<char>(*dest), 'A');
27+
ASSERT_EQ(static_cast<int>(n), 1);
28+
ASSERT_ERRNO_SUCCESS();
29+
30+
n = LIBC_NAMESPACE::mbsrtowcs(dest + 1, &ch, 1, mb);
31+
ASSERT_EQ(static_cast<char>(dest[1]), '\0');
32+
// Should not include null terminator
33+
ASSERT_EQ(static_cast<int>(n), 0);
34+
ASSERT_ERRNO_SUCCESS();
35+
}
36+
37+
// TEST_F(LlvmLibcMBSRToWCSTest, FourByteOneChar) {
38+
// const char *src = "\xf0\x9f\x98\xb9"; // laughing cat emoji 😹
39+
// wchar_t dest[2];
40+
// size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 2);
41+
// ASSERT_ERRNO_SUCCESS();
42+
// ASSERT_EQ(static_cast<int>(dest[0]), 128569);
43+
// ASSERT_TRUE(dest[1] == L'\0');
44+
// // Should not count null terminator in number
45+
// ASSERT_EQ(static_cast<int>(n), 1);
46+
// }
47+
48+
// TEST_F(LlvmLibcMBSRToWCSTest, MultiByteTwoCharacters) {
49+
// // Two laughing cat emojis "😹😹"
50+
// const char *src = "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
51+
// wchar_t dest[3];
52+
// size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3);
53+
// ASSERT_ERRNO_SUCCESS();
54+
// ASSERT_EQ(static_cast<int>(dest[0]), 128569);
55+
// ASSERT_EQ(static_cast<int>(dest[1]), 128569);
56+
// ASSERT_TRUE(dest[2] == L'\0');
57+
// // Should not count null terminator in number
58+
// ASSERT_EQ(static_cast<int>(n), 2);
59+
// }
60+
61+
// TEST_F(LlvmLibcMBSRToWCSTest, MixedNumberOfBytes) {
62+
// // 'A', sigma symbol 'Σ', recycling symbol '♻', laughing cat emoji '😹'
63+
// const char *src = "A\xce\xa3\xe2\x99\xbb\xf0\x9f\x98\xb9";
64+
// wchar_t dest[5];
65+
// size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 5);
66+
// ASSERT_ERRNO_SUCCESS();
67+
// ASSERT_EQ(static_cast<char>(dest[0]), 'A');
68+
// ASSERT_EQ(static_cast<int>(dest[1]), 931);
69+
// ASSERT_EQ(static_cast<int>(dest[2]), 9851);
70+
// ASSERT_EQ(static_cast<int>(dest[3]), 128569);
71+
// ASSERT_TRUE(dest[4] == L'\0');
72+
// // Should not count null terminator in number
73+
// ASSERT_EQ(static_cast<int>(n), 4);
74+
// }
75+
76+
// TEST_F(LlvmLibcMBSRToWCSTest, ReadLessThanStringLength) {
77+
// // Four laughing cat emojis "😹😹😹😹"
78+
// const char *src =
79+
// "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
80+
// wchar_t dest[5] = {L'a', L'b', L'c', L'd', L'e'};
81+
// size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3);
82+
// ASSERT_ERRNO_SUCCESS();
83+
// // Should have read 3 emojis
84+
// ASSERT_EQ(static_cast<int>(n), 3);
85+
// ASSERT_EQ(static_cast<int>(dest[0]), 128569);
86+
// ASSERT_EQ(static_cast<int>(dest[1]), 128569);
87+
// ASSERT_EQ(static_cast<int>(dest[2]), 128569);
88+
// ASSERT_TRUE(dest[3] == L'd');
89+
// ASSERT_TRUE(dest[4] == L'e');
90+
// }
91+
92+
// TEST_F(LlvmLibcMBSRToWCSTest, InvalidFirstByte) {
93+
// // 0x80 is invalid first byte of mb character
94+
// const char *src =
95+
// "\x80\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
96+
// wchar_t dest[3];
97+
// size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 3);
98+
// // Should return error and set errno
99+
// ASSERT_EQ(static_cast<int>(n), -1);
100+
// ASSERT_ERRNO_EQ(EILSEQ);
101+
// }
102+
103+
// TEST_F(LlvmLibcMBSRToWCSTest, InvalidMiddleByte) {
104+
// // The 7th byte is invalid for a 4 byte character
105+
// const char *src =
106+
// "\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
107+
// wchar_t dest[3];
108+
// size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 5);
109+
// // Should return error and set errno
110+
// ASSERT_EQ(static_cast<int>(n), -1);
111+
// ASSERT_ERRNO_EQ(EILSEQ);
112+
// }
113+
114+
// TEST_F(LlvmLibcMBSRToWCSTest, NullDestination) {
115+
// // Four laughing cat emojis "😹😹😹😹"
116+
// const char *src =
117+
// "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
118+
// size_t n = LIBC_NAMESPACE::mbstowcs(nullptr, src, 2);
119+
// ASSERT_ERRNO_SUCCESS();
120+
// // Null destination should ignore len and read till end of string
121+
// ASSERT_EQ(static_cast<int>(n), 4);
122+
// }
123+
124+
// TEST_F(LlvmLibcMBSRToWCSTest, ErrnoChecks) {
125+
// // Two laughing cat emojis and invalid 3rd mb char (3rd byte of it)
126+
// const char *src =
127+
// "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9";
128+
// wchar_t dest[5];
129+
// // First two bytes are valid --> should not set errno
130+
// size_t n = LIBC_NAMESPACE::mbstowcs(dest, src, 2);
131+
// ASSERT_ERRNO_SUCCESS();
132+
// ASSERT_EQ(static_cast<int>(n), 2);
133+
// ASSERT_EQ(static_cast<int>(dest[0]), 128569);
134+
// ASSERT_EQ(static_cast<int>(dest[1]), 128569);
135+
// // Trying to read the 3rd byte should set errno
136+
// n = LIBC_NAMESPACE::mbstowcs(dest, src, 2);
137+
// ASSERT_ERRNO_EQ(EILSEQ);
138+
// ASSERT_EQ(static_cast<int>(n), -1);
139+
// }

0 commit comments

Comments
 (0)