Skip to content

Commit 8dacb64

Browse files
sribee8Sriya Pratipati
authored andcommitted
[libc] mbtowc implementation (llvm#145405)
Implemented mbtowcs and tests for the function. --------- Co-authored-by: Sriya Pratipati <[email protected]>
1 parent e69297d commit 8dacb64

File tree

7 files changed

+254
-0
lines changed

7 files changed

+254
-0
lines changed

libc/config/linux/x86_64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1248,6 +1248,7 @@ if(LLVM_LIBC_FULL_BUILD)
12481248

12491249
# wchar.h entrypoints
12501250
libc.src.wchar.mbrtowc
1251+
libc.src.wchar.mbtowc
12511252
libc.src.wchar.wcrtomb
12521253
)
12531254
endif()

libc/include/wchar.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,14 @@ functions:
3838
- type: const char *__restrict
3939
- type: size_t
4040
- type: mbstate_t *__restrict
41+
- name: mbtowc
42+
standards:
43+
- stdc
44+
return_type: int
45+
arguments:
46+
- type: wchar_t *__restrict
47+
- type: const char *__restrict
48+
- type: size_t
4149
- name: wmemset
4250
standards:
4351
- stdc

libc/src/wchar/CMakeLists.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,22 @@ add_entrypoint_object(
6565
libc.src.__support.wchar.mbstate
6666
)
6767

68+
add_entrypoint_object(
69+
mbtowc
70+
SRCS
71+
mbtowc.cpp
72+
HDRS
73+
mbtowc.h
74+
DEPENDS
75+
libc.hdr.types.size_t
76+
libc.hdr.types.wchar_t
77+
libc.src.__support.common
78+
libc.src.__support.macros.config
79+
libc.src.__support.libc_errno
80+
libc.src.__support.wchar.mbrtowc
81+
libc.src.__support.wchar.mbstate
82+
)
83+
6884
add_entrypoint_object(
6985
wmemset
7086
SRCS

libc/src/wchar/mbtowc.cpp

Lines changed: 40 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,40 @@
1+
//===-- Implementation of mbtowc -----------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/wchar/mbtowc.h"
10+
11+
#include "hdr/types/size_t.h"
12+
#include "hdr/types/wchar_t.h"
13+
#include "src/__support/common.h"
14+
#include "src/__support/libc_errno.h"
15+
#include "src/__support/macros/config.h"
16+
#include "src/__support/wchar/mbrtowc.h"
17+
#include "src/__support/wchar/mbstate.h"
18+
19+
namespace LIBC_NAMESPACE_DECL {
20+
21+
LLVM_LIBC_FUNCTION(int, mbtowc,
22+
(wchar_t *__restrict pwc, const char *__restrict s,
23+
size_t n)) {
24+
// returns 0 since UTF-8 encoding is not state-dependent
25+
if (s == nullptr)
26+
return 0;
27+
internal::mbstate internal_mbstate;
28+
// temp ptr to use if pwc is nullptr
29+
wchar_t buf[1];
30+
auto ret =
31+
internal::mbrtowc(pwc == nullptr ? buf : pwc, s, n, &internal_mbstate);
32+
if (!ret.has_value() || static_cast<int>(ret.value()) == -2) {
33+
// Encoding failure
34+
libc_errno = EILSEQ;
35+
return -1;
36+
}
37+
return static_cast<int>(ret.value());
38+
}
39+
40+
} // namespace LIBC_NAMESPACE_DECL

libc/src/wchar/mbtowc.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//===-- Implementation header for mbtowc ---------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC_WCHAR_MBTOWC_H
10+
#define LLVM_LIBC_SRC_WCHAR_MBTOWC_H
11+
12+
#include "hdr/types/size_t.h"
13+
#include "hdr/types/wchar_t.h"
14+
#include "src/__support/macros/config.h"
15+
16+
namespace LIBC_NAMESPACE_DECL {
17+
18+
int mbtowc(wchar_t *__restrict pwc, const char *__restrict s, size_t n);
19+
20+
} // namespace LIBC_NAMESPACE_DECL
21+
22+
#endif // LLVM_LIBC_SRC_WCHAR_MBTOWC_H

libc/test/src/wchar/CMakeLists.txt

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,19 @@ add_libc_test(
3737
libc.hdr.types.wchar_t
3838
)
3939

40+
add_libc_test(
41+
mbtowc_test
42+
SUITE
43+
libc_wchar_unittests
44+
SRCS
45+
mbtowc_test.cpp
46+
DEPENDS
47+
libc.src.__support.libc_errno
48+
libc.src.wchar.mbtowc
49+
libc.hdr.types.wchar_t
50+
libc.test.UnitTest.ErrnoCheckingTest
51+
)
52+
4053
add_libc_test(
4154
wctob_test
4255
SUITE

libc/test/src/wchar/mbtowc_test.cpp

Lines changed: 154 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,154 @@
1+
//===-- Unittests for mbtowc ---------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "hdr/types/wchar_t.h"
10+
#include "src/__support/libc_errno.h"
11+
#include "src/wchar/mbtowc.h"
12+
#include "test/UnitTest/ErrnoCheckingTest.h"
13+
#include "test/UnitTest/Test.h"
14+
15+
using LlvmLibcMBToWCTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
16+
17+
TEST_F(LlvmLibcMBToWCTest, OneByte) {
18+
const char *ch = "A";
19+
wchar_t dest[2];
20+
int n = LIBC_NAMESPACE::mbtowc(dest, ch, 1);
21+
ASSERT_EQ(static_cast<char>(*dest), 'A');
22+
ASSERT_EQ(n, 1);
23+
24+
// Should fail since we have not read enough
25+
n = LIBC_NAMESPACE::mbtowc(dest, ch, 0);
26+
ASSERT_EQ(n, -1);
27+
ASSERT_ERRNO_EQ(EILSEQ);
28+
}
29+
30+
TEST_F(LlvmLibcMBToWCTest, TwoByte) {
31+
const char ch[2] = {static_cast<char>(0xC2),
32+
static_cast<char>(0x8E)}; // Ž car symbol
33+
wchar_t dest[2];
34+
int n = LIBC_NAMESPACE::mbtowc(dest, ch, 2);
35+
ASSERT_EQ(static_cast<int>(*dest), 142);
36+
ASSERT_EQ(n, 2);
37+
38+
// Should fail since we have not read enough
39+
n = LIBC_NAMESPACE::mbtowc(dest, ch, 1);
40+
ASSERT_EQ(n, -1);
41+
// Should fail after trying to read next byte too
42+
n = LIBC_NAMESPACE::mbtowc(dest, ch + 1, 1);
43+
ASSERT_EQ(n, -1);
44+
ASSERT_ERRNO_EQ(EILSEQ);
45+
}
46+
47+
TEST_F(LlvmLibcMBToWCTest, ThreeByte) {
48+
const char ch[3] = {static_cast<char>(0xE2), static_cast<char>(0x88),
49+
static_cast<char>(0x91)}; // ∑ sigma symbol
50+
wchar_t dest[2];
51+
int n = LIBC_NAMESPACE::mbtowc(dest, ch, 3);
52+
ASSERT_EQ(static_cast<int>(*dest), 8721);
53+
ASSERT_EQ(n, 3);
54+
55+
// Should fail since we have not read enough
56+
n = LIBC_NAMESPACE::mbtowc(dest, ch, 2);
57+
ASSERT_EQ(n, -1);
58+
ASSERT_ERRNO_EQ(EILSEQ);
59+
}
60+
61+
TEST_F(LlvmLibcMBToWCTest, FourByte) {
62+
const char ch[4] = {static_cast<char>(0xF0), static_cast<char>(0x9F),
63+
static_cast<char>(0xA4),
64+
static_cast<char>(0xA1)}; // 🤡 clown emoji
65+
wchar_t dest[2];
66+
int n = LIBC_NAMESPACE::mbtowc(dest, ch, 4);
67+
ASSERT_EQ(static_cast<int>(*dest), 129313);
68+
ASSERT_EQ(n, 4);
69+
70+
// Should fail since we have not read enough
71+
n = LIBC_NAMESPACE::mbtowc(dest, ch, 2);
72+
ASSERT_EQ(n, -1);
73+
ASSERT_ERRNO_EQ(EILSEQ);
74+
}
75+
76+
TEST_F(LlvmLibcMBToWCTest, InvalidByte) {
77+
const char ch[1] = {static_cast<char>(0x80)};
78+
wchar_t dest[2];
79+
int n = LIBC_NAMESPACE::mbtowc(dest, ch, 1);
80+
ASSERT_EQ(n, -1);
81+
ASSERT_ERRNO_EQ(EILSEQ);
82+
}
83+
84+
TEST_F(LlvmLibcMBToWCTest, InvalidMultiByte) {
85+
const char ch[4] = {static_cast<char>(0x80), static_cast<char>(0x00),
86+
static_cast<char>(0x80),
87+
static_cast<char>(0x00)}; // invalid sequence of bytes
88+
wchar_t dest[2];
89+
// Trying to push all 4 should error
90+
int n = LIBC_NAMESPACE::mbtowc(dest, ch, 4);
91+
ASSERT_EQ(n, -1);
92+
ASSERT_ERRNO_EQ(EILSEQ);
93+
94+
// Trying to push the second and third should correspond to null wc
95+
n = LIBC_NAMESPACE::mbtowc(dest, ch + 1, 2);
96+
ASSERT_EQ(n, 0);
97+
ASSERT_TRUE(*dest == L'\0');
98+
}
99+
100+
TEST_F(LlvmLibcMBToWCTest, InvalidLastByte) {
101+
// Last byte is invalid since it does not have correct starting sequence.
102+
// 0xC0 --> 11000000 starting sequence should be 10xxxxxx
103+
const char ch[4] = {static_cast<char>(0xF1), static_cast<char>(0x80),
104+
static_cast<char>(0x80), static_cast<char>(0xC0)};
105+
wchar_t dest[2];
106+
// Trying to push all 4 should error
107+
int n = LIBC_NAMESPACE::mbtowc(dest, ch, 4);
108+
ASSERT_EQ(n, -1);
109+
ASSERT_ERRNO_EQ(EILSEQ);
110+
}
111+
112+
TEST_F(LlvmLibcMBToWCTest, ValidTwoByteWithExtraRead) {
113+
const char ch[3] = {static_cast<char>(0xC2), static_cast<char>(0x8E),
114+
static_cast<char>(0x80)};
115+
wchar_t dest[2];
116+
// Trying to push all 3 should return valid 2 byte
117+
int n = LIBC_NAMESPACE::mbtowc(dest, ch, 3);
118+
ASSERT_EQ(n, 2);
119+
ASSERT_EQ(static_cast<int>(*dest), 142);
120+
}
121+
122+
TEST_F(LlvmLibcMBToWCTest, TwoValidTwoBytes) {
123+
const char ch[4] = {static_cast<char>(0xC2), static_cast<char>(0x8E),
124+
static_cast<char>(0xC7), static_cast<char>(0x8C)};
125+
wchar_t dest[2];
126+
int n = LIBC_NAMESPACE::mbtowc(dest, ch, 2);
127+
ASSERT_EQ(n, 2);
128+
ASSERT_EQ(static_cast<int>(*dest), 142);
129+
n = LIBC_NAMESPACE::mbtowc(dest + 1, ch + 2, 2);
130+
ASSERT_EQ(n, 2);
131+
ASSERT_EQ(static_cast<int>(*(dest + 1)), 460);
132+
}
133+
134+
TEST_F(LlvmLibcMBToWCTest, NullString) {
135+
wchar_t dest[2] = {L'O', L'K'};
136+
// reading on nullptr should return 0
137+
int n = LIBC_NAMESPACE::mbtowc(dest, nullptr, 2);
138+
ASSERT_EQ(n, 0);
139+
ASSERT_TRUE(dest[0] == L'O');
140+
// reading a null terminator should return 0
141+
const char *ch = "\0";
142+
n = LIBC_NAMESPACE::mbtowc(dest, ch, 1);
143+
ASSERT_EQ(n, 0);
144+
}
145+
146+
TEST_F(LlvmLibcMBToWCTest, NullWCPtr) {
147+
const char ch[2] = {
148+
static_cast<char>(0xC2),
149+
static_cast<char>(0x8E),
150+
};
151+
// a null destination should still return the number of read bytes
152+
int n = LIBC_NAMESPACE::mbtowc(nullptr, ch, 2);
153+
ASSERT_EQ(n, 2);
154+
}

0 commit comments

Comments
 (0)