Skip to content

Commit 1e5ae8d

Browse files
author
Sriya Pratipati
committed
simplified code and added extra test
1 parent 7906fc0 commit 1e5ae8d

File tree

6 files changed

+31
-8
lines changed

6 files changed

+31
-8
lines changed

libc/src/__support/wchar/character_converter.cpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -27,7 +27,6 @@ constexpr uint32_t MASK_ENCODED_BITS =
2727
mask_trailing_ones<uint32_t, ENCODED_BITS_PER_UTF8>();
2828
// Maximum value for utf-32 for a utf-8 sequence of a given length
2929
constexpr char32_t MAX_VALUE_PER_UTF8_LEN[] = {0x7f, 0x7ff, 0xffff, 0x10ffff};
30-
constexpr int MAX_UTF8_LENGTH = 4;
3130

3231
CharacterConverter::CharacterConverter(mbstate *mbstate) { state = mbstate; }
3332

libc/src/__support/wchar/character_converter.h

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,8 @@
1717

1818
namespace LIBC_NAMESPACE_DECL {
1919
namespace internal {
20+
21+
constexpr int MAX_UTF8_LENGTH = 4;
2022

2123
class CharacterConverter {
2224
private:

libc/src/__support/wchar/mbrtowc.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -36,7 +36,7 @@ ErrorOr<size_t> mbrtowc(wchar_t *__restrict pwc, const char *__restrict s,
3636
return Error(EILSEQ);
3737
}
3838
auto wc = char_conv.pop_utf32();
39-
if (wc.has_value()) {
39+
if (wc.has_value() && pwc != nullptr) {
4040
*pwc = wc.value();
4141
// null terminator -> return 0
4242
if (wc.value() == L'\0')

libc/src/__support/wchar/mbsrtowcs.cpp

Lines changed: 9 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -15,27 +15,32 @@
1515
#include "src/__support/macros/config.h"
1616
#include "src/__support/wchar/mbrtowc.h"
1717
#include "src/__support/wchar/mbstate.h"
18+
#include "src/__support/wchar/character_converter.h"
1819

1920
namespace LIBC_NAMESPACE_DECL {
2021
namespace internal {
2122

2223
ErrorOr<size_t> mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
2324
size_t len, mbstate *__restrict ps) {
2425
size_t i = 0;
25-
constexpr size_t MAX_UTF8_LENGTH = 4;
2626
// Converting characters until we reach error or null terminator
27-
for (; i < len; ++i, ++dst) {
28-
auto check = mbrtowc(dst, *src, MAX_UTF8_LENGTH, ps);
27+
for (; i < len; ++i) {
28+
wchar_t temp;
29+
auto check = internal::mbrtowc(dst == nullptr ? &temp : dst, *src,
30+
MAX_UTF8_LENGTH, ps);
2931
// Encoding error/invalid mbstate
3032
if (!check.has_value())
3133
return check;
3234
// Successfully encoded, check for null terminator
33-
if (*dst == L'\0') {
35+
if (temp == L'\0' || (dst != nullptr && *dst == L'\0')) {
3436
*src = nullptr;
3537
return i;
3638
}
3739
// Set src to point right after the last character converted
3840
*src = *src + check.value();
41+
// Incrementing destination
42+
if (dst != nullptr)
43+
++dst;
3944
}
4045
return i;
4146
}

libc/src/wchar/mbsrtowcs.cpp

Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -25,9 +25,9 @@ LLVM_LIBC_FUNCTION(size_t, mbsrtowcs,
2525
size_t len, mbstate_t *__restrict ps)) {
2626
LIBC_CRASH_ON_NULLPTR(src);
2727
static internal::mbstate internal_mbstate;
28-
wchar_t temp[len];
28+
len = dst == nullptr ? SIZE_MAX : len;
2929
auto ret = internal::mbsrtowcs(
30-
dst == nullptr ? temp : dst, src, dst == nullptr ? SIZE_MAX : len,
30+
dst, src, len,
3131
ps == nullptr ? &internal_mbstate
3232
: reinterpret_cast<internal::mbstate *>(ps));
3333
if (!ret.has_value()) {

libc/test/src/wchar/mbsrtowcs_test.cpp

Lines changed: 17 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -59,6 +59,23 @@ TEST_F(LlvmLibcMBSRToWCSTest, MultiByteTwoCharacters) {
5959
ASSERT_EQ(src, nullptr);
6060
}
6161

62+
TEST_F(LlvmLibcMBSRToWCSTest, MixedNumberOfBytes) {
63+
// 'A', sigma symbol 'Σ', recycling symbol '♻', laughing cat emoji '😹'
64+
const char *src = "A\xce\xa3\xe2\x99\xbb\xf0\x9f\x98\xb9";
65+
wchar_t dest[5];
66+
size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 5, nullptr);
67+
ASSERT_ERRNO_SUCCESS();
68+
ASSERT_EQ(static_cast<char>(dest[0]), 'A');
69+
ASSERT_EQ(static_cast<int>(dest[1]), 931);
70+
ASSERT_EQ(static_cast<int>(dest[2]), 9851);
71+
ASSERT_EQ(static_cast<int>(dest[3]), 128569);
72+
ASSERT_TRUE(dest[4] == L'\0');
73+
// Should not count null terminator in number
74+
ASSERT_EQ(static_cast<int>(n), 4);
75+
// Should set ch to nullptr after reading null terminator
76+
ASSERT_EQ(src, nullptr);
77+
}
78+
6279
TEST_F(LlvmLibcMBSRToWCSTest, ReadLessThanStringLength) {
6380
// Four laughing cat emojis "😹😹😹😹"
6481
const char *src =

0 commit comments

Comments
 (0)