Skip to content

Commit 4f2686e

Browse files
sribee8Sriya Pratipati
andauthored
[libc] Implemented mblen functions (#150141)
Implemented mblen and mbrlen as well as tests --------- Co-authored-by: Sriya Pratipati <[email protected]>
1 parent 1a0f482 commit 4f2686e

File tree

10 files changed

+433
-0
lines changed

10 files changed

+433
-0
lines changed

libc/config/linux/x86_64/entrypoints.txt

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1261,6 +1261,8 @@ if(LLVM_LIBC_FULL_BUILD)
12611261
libc.src.sys.socket.recvmsg
12621262

12631263
# wchar.h entrypoints
1264+
libc.src.wchar.mblen
1265+
libc.src.wchar.mbrlen
12641266
libc.src.wchar.mbrtowc
12651267
libc.src.wchar.mbtowc
12661268
libc.src.wchar.wcrtomb

libc/include/wchar.yaml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,21 @@ functions:
5353
- type: wchar_t *__restrict
5454
- type: const char *__restrict
5555
- type: size_t
56+
- name: mblen
57+
standards:
58+
- stdc
59+
return_type: int
60+
arguments:
61+
- type: const char *
62+
- type: size_t
63+
- name: mbrlen
64+
standards:
65+
- stdc
66+
return_type: size_t
67+
arguments:
68+
- type: const char *__restrict
69+
- type: size_t
70+
- type: mbstate_t *__restrict
5671
- name: wmemset
5772
standards:
5873
- stdc

libc/src/wchar/CMakeLists.txt

Lines changed: 31 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -169,6 +169,37 @@ add_entrypoint_object(
169169
libc.src.__support.wchar.mbstate
170170
)
171171

172+
add_entrypoint_object(
173+
mblen
174+
SRCS
175+
mblen.cpp
176+
HDRS
177+
mblen.h
178+
DEPENDS
179+
libc.hdr.types.size_t
180+
libc.src.__support.common
181+
libc.src.__support.macros.config
182+
libc.src.__support.libc_errno
183+
libc.src.__support.wchar.mbrtowc
184+
libc.src.__support.wchar.mbstate
185+
)
186+
187+
add_entrypoint_object(
188+
mbrlen
189+
SRCS
190+
mbrlen.cpp
191+
HDRS
192+
mbrlen.h
193+
DEPENDS
194+
libc.hdr.types.size_t
195+
libc.hdr.types.mbstate_t
196+
libc.src.__support.common
197+
libc.src.__support.macros.config
198+
libc.src.__support.wchar.mbrtowc
199+
libc.src.__support.libc_errno
200+
libc.src.__support.wchar.mbstate
201+
)
202+
172203
add_entrypoint_object(
173204
wmemset
174205
SRCS

libc/src/wchar/mblen.cpp

Lines changed: 35 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,35 @@
1+
//===-- Implementation of mblen -------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/wchar/mblen.h"
10+
11+
#include "hdr/types/size_t.h"
12+
#include "src/__support/common.h"
13+
#include "src/__support/libc_errno.h"
14+
#include "src/__support/macros/config.h"
15+
#include "src/__support/wchar/mbrtowc.h"
16+
#include "src/__support/wchar/mbstate.h"
17+
18+
namespace LIBC_NAMESPACE_DECL {
19+
20+
LLVM_LIBC_FUNCTION(int, mblen, (const char *s, size_t n)) {
21+
// returns 0 since UTF-8 encoding is not state-dependent
22+
if (s == nullptr)
23+
return 0;
24+
internal::mbstate internal_mbstate;
25+
auto ret = internal::mbrtowc(nullptr, s, n, &internal_mbstate);
26+
if (!ret.has_value() || static_cast<int>(ret.value()) == -2) {
27+
// Encoding failure
28+
if (!ret.has_value())
29+
libc_errno = EILSEQ;
30+
return -1;
31+
}
32+
return static_cast<int>(ret.value());
33+
}
34+
35+
} // namespace LIBC_NAMESPACE_DECL

libc/src/wchar/mblen.h

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,21 @@
1+
//===-- Implementation header for mblen -----------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC_WCHAR_MBLEN_H
10+
#define LLVM_LIBC_SRC_WCHAR_MBLEN_H
11+
12+
#include "hdr/types/size_t.h"
13+
#include "src/__support/macros/config.h"
14+
15+
namespace LIBC_NAMESPACE_DECL {
16+
17+
int mblen(const char *s, size_t n);
18+
19+
} // namespace LIBC_NAMESPACE_DECL
20+
21+
#endif // LLVM_LIBC_SRC_WCHAR_MBLEN_H

libc/src/wchar/mbrlen.cpp

Lines changed: 37 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,37 @@
1+
//===-- Implementation of mbrlen ------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/wchar/mbrlen.h"
10+
11+
#include "hdr/types/mbstate_t.h"
12+
#include "hdr/types/size_t.h"
13+
#include "src/__support/common.h"
14+
#include "src/__support/libc_errno.h"
15+
#include "src/__support/macros/config.h"
16+
#include "src/__support/wchar/mbrtowc.h"
17+
#include "src/__support/wchar/mbstate.h"
18+
19+
namespace LIBC_NAMESPACE_DECL {
20+
21+
LLVM_LIBC_FUNCTION(size_t, mbrlen,
22+
(const char *__restrict s, size_t n,
23+
mbstate_t *__restrict ps)) {
24+
static internal::mbstate internal_mbstate;
25+
auto ret = internal::mbrtowc(nullptr, s, n,
26+
ps == nullptr
27+
? &internal_mbstate
28+
: reinterpret_cast<internal::mbstate *>(ps));
29+
if (!ret.has_value()) {
30+
// Encoding failure
31+
libc_errno = ret.error();
32+
return -1;
33+
}
34+
return ret.value();
35+
}
36+
37+
} // namespace LIBC_NAMESPACE_DECL

libc/src/wchar/mbrlen.h

Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
//===-- Implementation header for mbrlen ----------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC_WCHAR_MBRLEN_H
10+
#define LLVM_LIBC_SRC_WCHAR_MBRLEN_H
11+
12+
#include "hdr/types/mbstate_t.h"
13+
#include "hdr/types/size_t.h"
14+
#include "src/__support/macros/config.h"
15+
16+
namespace LIBC_NAMESPACE_DECL {
17+
18+
size_t mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps);
19+
20+
} // namespace LIBC_NAMESPACE_DECL
21+
22+
#endif // LLVM_LIBC_SRC_WCHAR_MBRLEN_H

libc/test/src/wchar/CMakeLists.txt

Lines changed: 27 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,33 @@ add_libc_test(
6464
libc.test.UnitTest.ErrnoCheckingTest
6565
)
6666

67+
add_libc_test(
68+
mblen_test
69+
SUITE
70+
libc_wchar_unittests
71+
SRCS
72+
mblen_test.cpp
73+
DEPENDS
74+
libc.src.__support.libc_errno
75+
libc.src.wchar.mblen
76+
libc.test.UnitTest.ErrnoCheckingTest
77+
)
78+
79+
add_libc_test(
80+
mbrlen_test
81+
SUITE
82+
libc_wchar_unittests
83+
SRCS
84+
mbrlen_test.cpp
85+
DEPENDS
86+
libc.src.__support.libc_errno
87+
libc.src.__support.wchar.mbstate
88+
libc.src.string.memset
89+
libc.src.wchar.mbrlen
90+
libc.hdr.types.mbstate_t
91+
libc.test.UnitTest.ErrnoCheckingTest
92+
)
93+
6794
add_libc_test(
6895
wctob_test
6996
SUITE

libc/test/src/wchar/mblen_test.cpp

Lines changed: 104 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,104 @@
1+
//===-- Unittests for mblen -----------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/__support/libc_errno.h"
10+
#include "src/wchar/mblen.h"
11+
#include "test/UnitTest/ErrnoCheckingTest.h"
12+
#include "test/UnitTest/Test.h"
13+
14+
using LlvmLibcMBLenTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
15+
16+
TEST_F(LlvmLibcMBLenTest, OneByte) {
17+
const char *ch = "A";
18+
int n = LIBC_NAMESPACE::mblen(ch, 1);
19+
ASSERT_ERRNO_SUCCESS();
20+
ASSERT_EQ(n, 1);
21+
22+
// Should fail since we have not read enough
23+
n = LIBC_NAMESPACE::mblen(ch, 0);
24+
ASSERT_ERRNO_SUCCESS();
25+
ASSERT_EQ(n, -1);
26+
}
27+
28+
TEST_F(LlvmLibcMBLenTest, TwoByte) {
29+
const char ch[2] = {static_cast<char>(0xC2),
30+
static_cast<char>(0x8E)}; // Ž car symbol
31+
int n = LIBC_NAMESPACE::mblen(ch, 4);
32+
ASSERT_ERRNO_SUCCESS();
33+
ASSERT_EQ(n, 2);
34+
35+
// Should fail since we have not read enough
36+
n = LIBC_NAMESPACE::mblen(ch, 1);
37+
ASSERT_EQ(n, -1);
38+
ASSERT_ERRNO_SUCCESS();
39+
// Should fail after trying to read next byte too
40+
n = LIBC_NAMESPACE::mblen(ch + 1, 1);
41+
ASSERT_EQ(n, -1);
42+
// This one should be an invalid starting byte so should set errno
43+
ASSERT_ERRNO_EQ(EILSEQ);
44+
}
45+
46+
TEST_F(LlvmLibcMBLenTest, ThreeByte) {
47+
const char ch[3] = {static_cast<char>(0xE2), static_cast<char>(0x88),
48+
static_cast<char>(0x91)}; // ∑ sigma symbol
49+
int n = LIBC_NAMESPACE::mblen(ch, 3);
50+
ASSERT_EQ(n, 3);
51+
ASSERT_ERRNO_SUCCESS();
52+
53+
// Should fail since we have not read enough
54+
n = LIBC_NAMESPACE::mblen(ch, 2);
55+
ASSERT_EQ(n, -1);
56+
ASSERT_ERRNO_SUCCESS();
57+
}
58+
59+
TEST_F(LlvmLibcMBLenTest, FourByte) {
60+
const char ch[4] = {static_cast<char>(0xF0), static_cast<char>(0x9F),
61+
static_cast<char>(0xA4),
62+
static_cast<char>(0xA1)}; // 🤡 clown emoji
63+
int n = LIBC_NAMESPACE::mblen(ch, 4);
64+
ASSERT_EQ(n, 4);
65+
ASSERT_ERRNO_SUCCESS();
66+
67+
// Should fail since we have not read enough
68+
n = LIBC_NAMESPACE::mblen(ch, 2);
69+
ASSERT_EQ(n, -1);
70+
ASSERT_ERRNO_SUCCESS();
71+
}
72+
73+
TEST_F(LlvmLibcMBLenTest, InvalidByte) {
74+
const char ch[1] = {static_cast<char>(0x80)};
75+
int n = LIBC_NAMESPACE::mblen(ch, 1);
76+
ASSERT_EQ(n, -1);
77+
ASSERT_ERRNO_EQ(EILSEQ);
78+
}
79+
80+
TEST_F(LlvmLibcMBLenTest, InvalidMultiByte) {
81+
const char ch[4] = {static_cast<char>(0x80), static_cast<char>(0x00),
82+
static_cast<char>(0x80),
83+
static_cast<char>(0x00)}; // invalid sequence of bytes
84+
// Trying to push all 4 should error
85+
int n = LIBC_NAMESPACE::mblen(ch, 4);
86+
ASSERT_EQ(n, -1);
87+
ASSERT_ERRNO_EQ(EILSEQ);
88+
89+
// Trying to push the second and third should correspond to null wc
90+
n = LIBC_NAMESPACE::mblen(ch + 1, 2);
91+
ASSERT_EQ(n, 0);
92+
ASSERT_ERRNO_SUCCESS();
93+
}
94+
95+
TEST_F(LlvmLibcMBLenTest, NullString) {
96+
// reading on nullptr should return 0
97+
int n = LIBC_NAMESPACE::mblen(nullptr, 2);
98+
ASSERT_EQ(n, 0);
99+
ASSERT_ERRNO_SUCCESS();
100+
// reading a null terminator should return 0
101+
const char *ch = "\0";
102+
n = LIBC_NAMESPACE::mblen(ch, 1);
103+
ASSERT_EQ(n, 0);
104+
}

0 commit comments

Comments
 (0)