Skip to content

Commit 4921132

Browse files
author
Sriya Pratipati
committed
[libc] mbtowc implementation
Implemented mbtowcs and tests for the function.
1 parent 179d724 commit 4921132

File tree

7 files changed

+224
-0
lines changed

7 files changed

+224
-0
lines changed

libc/config/linux/x86_64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1248,6 +1248,7 @@ if(LLVM_LIBC_FULL_BUILD)
12481248

12491249
# wchar.h entrypoints
12501250
libc.src.wchar.mbrtowc
1251+
libc.src.wchar.mbtowc
12511252
libc.src.wchar.wcrtomb
12521253
)
12531254
endif()

libc/include/wchar.yaml

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,14 @@ functions:
3838
- type: const char *__restrict
3939
- type: size_t
4040
- type: mbstate_t *__restrict
41+
- name: mbtowc
42+
standards:
43+
- stdc
44+
return_type: int
45+
arguments:
46+
- type: wchar_t *__restrict
47+
- type: const char *__restrict
48+
- type: size_t
4149
- name: wmemset
4250
standards:
4351
- stdc

libc/src/wchar/CMakeLists.txt

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,21 @@ add_entrypoint_object(
6565
libc.src.__support.wchar.mbstate
6666
)
6767

68+
add_entrypoint_object(
69+
mbtowc
70+
SRCS
71+
mbtowc.cpp
72+
HDRS
73+
mbtowc.h
74+
DEPENDS
75+
libc.hdr.types.size_t
76+
libc.hdr.types.wchar_t
77+
libc.src.__support.common
78+
libc.src.__support.macros.config
79+
libc.src.__support.wchar.mbrtowc
80+
libc.src.__support.wchar.mbstate
81+
)
82+
6883
add_entrypoint_object(
6984
wmemset
7085
SRCS

libc/src/wchar/mbtowc.cpp

Lines changed: 34 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,34 @@
1+
//===-- Implementation of mbtowc -----------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/wchar/mbtowc.h"
10+
11+
#include "hdr/types/size_t.h"
12+
#include "hdr/types/wchar_t.h"
13+
#include "src/__support/common.h"
14+
#include "src/__support/macros/config.h"
15+
#include "src/__support/wchar/mbrtowc.h"
16+
#include "src/__support/wchar/mbstate.h"
17+
18+
namespace LIBC_NAMESPACE_DECL {
19+
20+
LLVM_LIBC_FUNCTION(int, mbtowc,
21+
(wchar_t *__restrict pwc, const char *__restrict s,
22+
size_t n)) {
23+
if (s == nullptr)
24+
return 0;
25+
internal::mbstate internal_mbstate;
26+
auto ret = internal::mbrtowc(pwc, s, n, &internal_mbstate);
27+
if (!ret.has_value() || static_cast<int>(ret.value()) == -2) {
28+
// Encoding failure
29+
return -1;
30+
}
31+
return static_cast<int>(ret.value());
32+
}
33+
34+
} // namespace LIBC_NAMESPACE_DECL

libc/src/wchar/mbtowc.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//===-- Implementation header for mbtowc ---------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC_WCHAR_MBTOWC_H
10+
#define LLVM_LIBC_SRC_WCHAR_MBTOWC_H
11+
12+
#include "hdr/types/size_t.h"
13+
#include "hdr/types/wchar_t.h"
14+
#include "src/__support/macros/config.h"
15+
16+
namespace LIBC_NAMESPACE_DECL {
17+
18+
int mbtowc(wchar_t *__restrict pwc, const char *__restrict s, size_t n);
19+
20+
} // namespace LIBC_NAMESPACE_DECL
21+
22+
#endif // LLVM_LIBC_SRC_WCHAR_MBTOWC_H

libc/test/src/wchar/CMakeLists.txt

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,18 @@ add_libc_test(
3737
libc.hdr.types.wchar_t
3838
)
3939

40+
add_libc_test(
41+
mbtowc_test
42+
SUITE
43+
libc_wchar_unittests
44+
SRCS
45+
mbtowc_test.cpp
46+
DEPENDS
47+
libc.src.__support.libc_errno
48+
libc.src.wchar.mbtowc
49+
libc.hdr.types.wchar_t
50+
)
51+
4052
add_libc_test(
4153
wctob_test
4254
SUITE
Lines changed: 132 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,132 @@
1+
//===-- Unittests for mbtowc ---------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "hdr/types/wchar_t.h"
10+
#include "src/wchar/mbtowc.h"
11+
#include "test/UnitTest/Test.h"
12+
13+
TEST(LlvmLibcMBToWC, OneByte) {
14+
const char *ch = "A";
15+
wchar_t dest[2];
16+
int n = LIBC_NAMESPACE::mbtowc(dest, ch, 1);
17+
ASSERT_EQ(static_cast<char>(*dest), 'A');
18+
ASSERT_EQ(n, 1);
19+
20+
// Should fail since we have not read enough
21+
n = LIBC_NAMESPACE::mbtowc(dest, ch, 0);
22+
ASSERT_EQ(n, -1);
23+
}
24+
25+
TEST(LlvmLibcMBToWC, TwoByte) {
26+
const char ch[2] = {static_cast<char>(0xC2),
27+
static_cast<char>(0x8E)}; // Ž car symbol
28+
wchar_t dest[2];
29+
int n = LIBC_NAMESPACE::mbtowc(dest, ch, 2);
30+
ASSERT_EQ(static_cast<int>(*dest), 142);
31+
ASSERT_EQ(n, 2);
32+
33+
// Should fail since we have not read enough
34+
n = LIBC_NAMESPACE::mbtowc(dest, ch, 1);
35+
ASSERT_EQ(n, -1);
36+
// Should fail after trying to read next byte too
37+
n = LIBC_NAMESPACE::mbtowc(dest, ch + 1, 1);
38+
ASSERT_EQ(n, -1);
39+
}
40+
41+
TEST(LlvmLibcMBToWC, ThreeByte) {
42+
const char ch[3] = {static_cast<char>(0xE2), static_cast<char>(0x88),
43+
static_cast<char>(0x91)}; // ∑ sigma symbol
44+
wchar_t dest[2];
45+
int n = LIBC_NAMESPACE::mbtowc(dest, ch, 3);
46+
ASSERT_EQ(static_cast<int>(*dest), 8721);
47+
ASSERT_EQ(n, 3);
48+
49+
// Should fail since we have not read enough
50+
n = LIBC_NAMESPACE::mbtowc(dest, ch, 2);
51+
ASSERT_EQ(n, -1);
52+
}
53+
54+
TEST(LlvmLibcMBToWC, FourByte) {
55+
const char ch[4] = {static_cast<char>(0xF0), static_cast<char>(0x9F),
56+
static_cast<char>(0xA4),
57+
static_cast<char>(0xA1)}; // 🤡 clown emoji
58+
wchar_t dest[2];
59+
int n = LIBC_NAMESPACE::mbtowc(dest, ch, 4);
60+
ASSERT_EQ(static_cast<int>(*dest), 129313);
61+
ASSERT_EQ(n, 4);
62+
63+
// Should fail since we have not read enough
64+
n = LIBC_NAMESPACE::mbtowc(dest, ch, 2);
65+
ASSERT_EQ(n, -1);
66+
}
67+
68+
TEST(LlvmLibcMBToWC, InvalidByte) {
69+
const char ch[1] = {static_cast<char>(0x80)};
70+
wchar_t dest[2];
71+
int n = LIBC_NAMESPACE::mbtowc(dest, ch, 1);
72+
ASSERT_EQ(n, -1);
73+
}
74+
75+
TEST(LlvmLibcMBToWC, InvalidMultiByte) {
76+
const char ch[4] = {static_cast<char>(0x80), static_cast<char>(0x00),
77+
static_cast<char>(0x80),
78+
static_cast<char>(0x00)}; // invalid sequence of bytes
79+
wchar_t dest[2];
80+
// Trying to push all 4 should error
81+
int n = LIBC_NAMESPACE::mbtowc(dest, ch, 4);
82+
ASSERT_EQ(n, -1);
83+
// Trying to push the second and third should correspond to null wc
84+
n = LIBC_NAMESPACE::mbtowc(dest, ch + 1, 2);
85+
ASSERT_EQ(n, 0);
86+
ASSERT_TRUE(*dest == L'\0');
87+
}
88+
89+
TEST(LlvmLibcMBToWC, InvalidLastByte) {
90+
// Last byte is invalid since it does not have correct starting sequence.
91+
// 0xC0 --> 11000000 starting sequence should be 10xxxxxx
92+
const char ch[4] = {static_cast<char>(0xF1), static_cast<char>(0x80),
93+
static_cast<char>(0x80), static_cast<char>(0xC0)};
94+
wchar_t dest[2];
95+
// Trying to push all 4 should error
96+
int n = LIBC_NAMESPACE::mbtowc(dest, ch, 4);
97+
ASSERT_EQ(n, -1);
98+
}
99+
100+
TEST(LlvmLibcMBToWC, ValidTwoByteWithExtraRead) {
101+
const char ch[3] = {static_cast<char>(0xC2), static_cast<char>(0x8E),
102+
static_cast<char>(0x80)};
103+
wchar_t dest[2];
104+
// Trying to push all 3 should return valid 2 byte
105+
int n = LIBC_NAMESPACE::mbtowc(dest, ch, 3);
106+
ASSERT_EQ(n, 2);
107+
ASSERT_EQ(static_cast<int>(*dest), 142);
108+
}
109+
110+
TEST(LlvmLibcMBToWC, TwoValidTwoBytes) {
111+
const char ch[4] = {static_cast<char>(0xC2), static_cast<char>(0x8E),
112+
static_cast<char>(0xC7), static_cast<char>(0x8C)};
113+
wchar_t dest[2];
114+
int n = LIBC_NAMESPACE::mbtowc(dest, ch, 2);
115+
ASSERT_EQ(n, 2);
116+
ASSERT_EQ(static_cast<int>(*dest), 142);
117+
n = LIBC_NAMESPACE::mbtowc(dest + 1, ch + 2, 2);
118+
ASSERT_EQ(n, 2);
119+
ASSERT_EQ(static_cast<int>(*(dest + 1)), 460);
120+
}
121+
122+
TEST(LlvmLibcMBToWC, NullString) {
123+
wchar_t dest[2] = {L'O', L'K'};
124+
// reading on nullptr should return 0
125+
int n = LIBC_NAMESPACE::mbtowc(dest, nullptr, 2);
126+
ASSERT_EQ(n, 0);
127+
ASSERT_TRUE(dest[0] == L'O');
128+
// reading a null terminator should return 0
129+
const char *ch = "\0";
130+
n = LIBC_NAMESPACE::mbtowc(dest, ch, 1);
131+
ASSERT_EQ(n, 0);
132+
}

0 commit comments

Comments
 (0)