Skip to content

Commit bf0440f

Browse files
author
Sriya Pratipati
committed
finished most of the implementation except for mbstate checking
1 parent 70bedc7 commit bf0440f

File tree

10 files changed

+307
-0
lines changed

10 files changed

+307
-0
lines changed

libc/config/linux/x86_64/entrypoints.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -1249,6 +1249,7 @@ if(LLVM_LIBC_FULL_BUILD)
12491249

12501250
# wchar.h entrypoints
12511251
libc.src.wchar.mbrtowc
1252+
libc.src.wchar.mbsrtowcs
12521253
libc.src.wchar.mbtowc
12531254
libc.src.wchar.wcrtomb
12541255
)

libc/include/wchar.yaml

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -38,6 +38,15 @@ functions:
3838
- type: const char *__restrict
3939
- type: size_t
4040
- type: mbstate_t *__restrict
41+
- name: mbsrtowcs
42+
standards:
43+
- stdc
44+
return_type: size_t
45+
arguments:
46+
- type: wchar_t *__restrict
47+
- type: const char **__restrict
48+
- type: size_t
49+
- type: mbstate_t *__restrict
4150
- name: mbtowc
4251
standards:
4352
- stdc

libc/src/__support/wchar/CMakeLists.txt

Lines changed: 16 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -51,3 +51,19 @@ add_object_library(
5151
.character_converter
5252
.mbstate
5353
)
54+
55+
add_object_library(
56+
mbsrtowcs
57+
HDRS
58+
mbsrtowcs.h
59+
SRCS
60+
mbsrtowcs.cpp
61+
DEPENDS
62+
libc.hdr.types.wchar_t
63+
libc.hdr.types.size_t
64+
libc.src.__support.common
65+
libc.src.__support.error_or
66+
libc.src.__support.macros.config
67+
.mbstate
68+
.mbrtowc
69+
)
Lines changed: 44 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,44 @@
1+
//===-- Implementation for mbsrtowcs function -------------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/__support/wchar/mbsrtowcs.h"
10+
#include "hdr/types/mbstate_t.h"
11+
#include "hdr/types/size_t.h"
12+
#include "hdr/types/wchar_t.h"
13+
#include "src/__support/common.h"
14+
#include "src/__support/error_or.h"
15+
#include "src/__support/macros/config.h"
16+
#include "src/__support/wchar/mbrtowc.h"
17+
#include "src/__support/wchar/mbstate.h"
18+
19+
namespace LIBC_NAMESPACE_DECL {
20+
namespace internal {
21+
22+
ErrorOr<size_t> mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
23+
size_t len, mbstate *__restrict ps) {
24+
size_t i = 0;
25+
// Converting characters until we reach error or null terminator
26+
for (; i < len; ++i, ++dst) {
27+
auto check = mbrtowc(dst, *src, 4, ps);
28+
// Encoding error
29+
if (!check.has_value())
30+
return Error(-1);
31+
// Successfully encoded, check for null terminator
32+
if (*dst == L'\0') {
33+
*src = nullptr;
34+
return i;
35+
}
36+
// Set src to point right after the last character converted
37+
*src = *src + check.value();
38+
}
39+
return i;
40+
}
41+
42+
} // namespace internal
43+
44+
} // namespace LIBC_NAMESPACE_DECL
Lines changed: 29 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,29 @@
1+
//===-- Implementation header for mbsrtowcs function ------------*- C++ -*-===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSRTOWCS
10+
#define LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSRTOWCS
11+
12+
#include "hdr/types/size_t.h"
13+
#include "hdr/types/wchar_t.h"
14+
#include "src/__support/common.h"
15+
#include "src/__support/error_or.h"
16+
#include "src/__support/macros/config.h"
17+
#include "src/__support/wchar/mbstate.h"
18+
19+
namespace LIBC_NAMESPACE_DECL {
20+
namespace internal {
21+
22+
ErrorOr<size_t> mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
23+
size_t len, mbstate *__restrict ps);
24+
25+
} // namespace internal
26+
27+
} // namespace LIBC_NAMESPACE_DECL
28+
29+
#endif // LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSRTOWCS

libc/src/wchar/CMakeLists.txt

Lines changed: 18 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -65,6 +65,24 @@ add_entrypoint_object(
6565
libc.src.__support.wchar.mbstate
6666
)
6767

68+
add_entrypoint_object(
69+
mbsrtowcs
70+
SRCS
71+
mbsrtowcs.cpp
72+
HDRS
73+
mbsrtowcs.h
74+
DEPENDS
75+
libc.hdr.types.size_t
76+
libc.hdr.types.mbstate_t
77+
libc.hdr.types.wchar_t
78+
libc.src.__support.common
79+
libc.src.__support.macros.config
80+
libc.src.__support.wchar.mbsrtowcs
81+
libc.src.__support.libc_errno
82+
libc.src.__support.wchar.mbstate
83+
libc.src.__support.macros.null_check
84+
)
85+
6886
add_entrypoint_object(
6987
mbtowc
7088
SRCS

libc/src/wchar/mbsrtowcs.cpp

Lines changed: 41 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,41 @@
1+
//===-- Implementation of mbsrtowcs ---------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "src/wchar/mbsrtowcs.h"
10+
11+
#include "hdr/types/mbstate_t.h"
12+
#include "hdr/types/size_t.h"
13+
#include "hdr/types/wchar_t.h"
14+
#include "src/__support/common.h"
15+
#include "src/__support/libc_errno.h"
16+
#include "src/__support/macros/config.h"
17+
#include "src/__support/macros/null_check.h"
18+
#include "src/__support/wchar/mbsrtowcs.h"
19+
#include "src/__support/wchar/mbstate.h"
20+
21+
namespace LIBC_NAMESPACE_DECL {
22+
23+
LLVM_LIBC_FUNCTION(size_t, mbsrtowcs,
24+
(wchar_t *__restrict dst, const char **__restrict src,
25+
size_t len, mbstate_t *__restrict ps)) {
26+
LIBC_CRASH_ON_NULLPTR(src);
27+
static internal::mbstate internal_mbstate;
28+
wchar_t temp[len];
29+
auto ret = internal::mbsrtowcs(
30+
dst == nullptr ? temp : dst, src, len,
31+
ps == nullptr ? &internal_mbstate
32+
: reinterpret_cast<internal::mbstate *>(ps));
33+
if (!ret.has_value()) {
34+
// Encoding failure
35+
libc_errno = EILSEQ;
36+
return -1;
37+
}
38+
return ret.value();
39+
}
40+
41+
} // namespace LIBC_NAMESPACE_DECL

libc/src/wchar/mbsrtowcs.h

Lines changed: 24 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,24 @@
1+
//===-- Implementation header for mbsrtowcs -------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#ifndef LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H
10+
#define LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H
11+
12+
#include "hdr/types/mbstate_t.h"
13+
#include "hdr/types/size_t.h"
14+
#include "hdr/types/wchar_t.h"
15+
#include "src/__support/macros/config.h"
16+
17+
namespace LIBC_NAMESPACE_DECL {
18+
19+
size_t mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
20+
size_t len, mbstate_t *__restrict ps);
21+
22+
} // namespace LIBC_NAMESPACE_DECL
23+
24+
#endif // LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H

libc/test/src/wchar/CMakeLists.txt

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,6 +37,20 @@ add_libc_test(
3737
libc.hdr.types.wchar_t
3838
)
3939

40+
add_libc_test(
41+
mbsrtowcs_test
42+
SUITE
43+
libc_wchar_unittests
44+
SRCS
45+
mbsrtowcs_test.cpp
46+
DEPENDS
47+
libc.src.__support.libc_errno
48+
libc.src.string.memset
49+
libc.src.wchar.mbsrtowcs
50+
libc.hdr.types.mbstate_t
51+
libc.hdr.types.wchar_t
52+
)
53+
4054
add_libc_test(
4155
mbtowc_test
4256
SUITE
Lines changed: 111 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,111 @@
1+
//===-- Unittests for mbsrtowcs -------------------------------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
9+
#include "hdr/types/wchar_t.h"
10+
#include "src/__support/libc_errno.h"
11+
#include "src/string/memset.h"
12+
#include "src/wchar/mbsrtowcs.h"
13+
#include "test/UnitTest/ErrnoCheckingTest.h"
14+
#include "test/UnitTest/Test.h"
15+
16+
using LlvmLibcMBSRToWCSTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;
17+
18+
TEST_F(LlvmLibcMBSRToWCSTest, OneByteOneCharacter) {
19+
mbstate_t *mb;
20+
LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
21+
const char *ch = "A";
22+
wchar_t dest[2];
23+
size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &ch, 2, mb);
24+
ASSERT_TRUE(dest[0] == L'A');
25+
ASSERT_TRUE(dest[1] == L'\0');
26+
// Should not count null terminator in number
27+
ASSERT_EQ(static_cast<int>(n), 1);
28+
// Should set ch to nullptr after reading null terminator
29+
ASSERT_EQ(ch, nullptr);
30+
}
31+
32+
TEST_F(LlvmLibcMBSRToWCSTest, MultiByteOneCharacter) {
33+
const char *src = "\xf0\x9f\x98\xb9"; // laughing cat emoji 😹
34+
wchar_t dest[2];
35+
size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 2, nullptr);
36+
ASSERT_EQ(static_cast<int>(dest[0]), 128569);
37+
ASSERT_TRUE(dest[1] == L'\0');
38+
// Should not count null terminator in number
39+
ASSERT_EQ(static_cast<int>(n), 1);
40+
// Should set ch to nullptr after reading null terminator
41+
ASSERT_EQ(src, nullptr);
42+
}
43+
44+
TEST_F(LlvmLibcMBSRToWCSTest, MultiByteTwoCharacters) {
45+
// Two laughing cat emojis "😹😹"
46+
const char *src = "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
47+
wchar_t dest[3];
48+
size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, nullptr);
49+
ASSERT_EQ(static_cast<int>(dest[0]), 128569);
50+
ASSERT_EQ(static_cast<int>(dest[1]), 128569);
51+
ASSERT_TRUE(dest[2] == L'\0');
52+
// Should not count null terminator in number
53+
ASSERT_EQ(static_cast<int>(n), 2);
54+
// Should set ch to nullptr after reading null terminator
55+
ASSERT_EQ(src, nullptr);
56+
}
57+
58+
TEST_F(LlvmLibcMBSRToWCSTest, ReadLessThanStringLength) {
59+
// Four laughing cat emojis "😹😹😹😹"
60+
const char *src =
61+
"\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
62+
const char *check = src;
63+
wchar_t dest[3];
64+
size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, nullptr);
65+
// Should have read 3 emojis
66+
ASSERT_EQ(static_cast<int>(n), 3);
67+
ASSERT_EQ(static_cast<int>(dest[0]), 128569);
68+
ASSERT_EQ(static_cast<int>(dest[1]), 128569);
69+
ASSERT_EQ(static_cast<int>(dest[2]), 128569);
70+
// src should now point to the 4th cat emoji aka 13th byte
71+
ASSERT_EQ((check + 12), src);
72+
}
73+
74+
TEST_F(LlvmLibcMBSRToWCSTest, InvalidFirstByte) {
75+
// 0x80 is invalid first byte of mb character
76+
const char *src =
77+
"\x80\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
78+
wchar_t dest[3];
79+
size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, nullptr);
80+
// Should return error and set errno
81+
ASSERT_EQ(static_cast<int>(n), -1);
82+
ASSERT_ERRNO_EQ(EILSEQ);
83+
}
84+
85+
TEST_F(LlvmLibcMBSRToWCSTest, InvalidMiddleByte) {
86+
// The 7th byte is invalid for a 4 byte character
87+
const char *src =
88+
"\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
89+
wchar_t dest[3];
90+
size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 5, nullptr);
91+
// Should return error and set errno
92+
ASSERT_EQ(static_cast<int>(n), -1);
93+
ASSERT_ERRNO_EQ(EILSEQ);
94+
}
95+
96+
TEST_F(LlvmLibcMBSRToWCSTest, NullDestination) {
97+
// Four laughing cat emojis "😹😹😹😹"
98+
const char *src =
99+
"\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
100+
size_t n = LIBC_NAMESPACE::mbsrtowcs(nullptr, &src, 5, nullptr);
101+
// Null destination should still return correct number of read chars
102+
ASSERT_EQ(static_cast<int>(n), 4);
103+
}
104+
105+
#if defined(LIBC_ADD_NULL_CHECKS) && !defined(LIBC_HAS_SANITIZER)
106+
TEST_F(LlvmLibcMBSRToWCSTest, NullSource) {
107+
// Passing in a nullptr source should crash the program
108+
EXPECT_DEATH([] { LIBC_NAMESPACE::mbsrtowcs(nullptr, nullptr, 1, nullptr); },
109+
WITH_SIGNAL(-1));
110+
}
111+
#endif // LIBC_HAS_ADDRESS_SANITIZER

0 commit comments

Comments
 (0)