Skip to content
Closed
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions libc/config/linux/x86_64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1249,6 +1249,7 @@ if(LLVM_LIBC_FULL_BUILD)

# wchar.h entrypoints
libc.src.wchar.mbrtowc
libc.src.wchar.mbsrtowcs
libc.src.wchar.mbtowc
libc.src.wchar.wcrtomb
libc.src.wchar.wctomb
Expand Down
9 changes: 9 additions & 0 deletions libc/include/wchar.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -38,6 +38,15 @@ functions:
- type: const char *__restrict
- type: size_t
- type: mbstate_t *__restrict
- name: mbsrtowcs
standards:
- stdc
return_type: size_t
arguments:
- type: wchar_t *__restrict
- type: const char **__restrict
- type: size_t
- type: mbstate_t *__restrict
- name: mbtowc
standards:
- stdc
Expand Down
16 changes: 16 additions & 0 deletions libc/src/__support/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -53,3 +53,19 @@ add_object_library(
.character_converter
.mbstate
)

add_object_library(
mbsrtowcs
HDRS
mbsrtowcs.h
SRCS
mbsrtowcs.cpp
DEPENDS
libc.hdr.types.wchar_t
libc.hdr.types.size_t
libc.src.__support.common
libc.src.__support.error_or
libc.src.__support.macros.config
.mbstate
.mbrtowc
)
44 changes: 44 additions & 0 deletions libc/src/__support/wchar/mbsrtowcs.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,44 @@
//===-- Implementation for mbsrtowcs function -------------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/__support/wchar/mbsrtowcs.h"
#include "hdr/types/mbstate_t.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
#include "src/__support/error_or.h"
#include "src/__support/macros/config.h"
#include "src/__support/wchar/mbrtowc.h"
#include "src/__support/wchar/mbstate.h"

namespace LIBC_NAMESPACE_DECL {
namespace internal {

ErrorOr<size_t> mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
size_t len, mbstate *__restrict ps) {
size_t i = 0;
// Converting characters until we reach error or null terminator
for (; i < len; ++i, ++dst) {
auto check = mbrtowc(dst, *src, 4, ps);
// Encoding error/invalid mbstate
if (!check.has_value())
return Error(check.error());
// Successfully encoded, check for null terminator
if (*dst == L'\0') {
*src = nullptr;
return i;
}
// Set src to point right after the last character converted
*src = *src + check.value();
}
return i;
}

} // namespace internal

} // namespace LIBC_NAMESPACE_DECL
29 changes: 29 additions & 0 deletions libc/src/__support/wchar/mbsrtowcs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
//===-- Implementation header for mbsrtowcs function ------------*- C++ -*-===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSRTOWCS
#define LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSRTOWCS

#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
#include "src/__support/error_or.h"
#include "src/__support/macros/config.h"
#include "src/__support/wchar/mbstate.h"

namespace LIBC_NAMESPACE_DECL {
namespace internal {

ErrorOr<size_t> mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
size_t len, mbstate *__restrict ps);

} // namespace internal

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC___SUPPORT_WCHAR_MBSRTOWCS
18 changes: 18 additions & 0 deletions libc/src/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -78,6 +78,24 @@ add_entrypoint_object(
libc.src.__support.wchar.mbstate
)

add_entrypoint_object(
mbsrtowcs
SRCS
mbsrtowcs.cpp
HDRS
mbsrtowcs.h
DEPENDS
libc.hdr.types.size_t
libc.hdr.types.mbstate_t
libc.hdr.types.wchar_t
libc.src.__support.common
libc.src.__support.macros.config
libc.src.__support.wchar.mbsrtowcs
libc.src.__support.libc_errno
libc.src.__support.wchar.mbstate
libc.src.__support.macros.null_check
)

add_entrypoint_object(
mbtowc
SRCS
Expand Down
41 changes: 41 additions & 0 deletions libc/src/wchar/mbsrtowcs.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,41 @@
//===-- Implementation of mbsrtowcs ---------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/wchar/mbsrtowcs.h"

#include "hdr/types/mbstate_t.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/common.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
#include "src/__support/macros/null_check.h"
#include "src/__support/wchar/mbsrtowcs.h"
#include "src/__support/wchar/mbstate.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(size_t, mbsrtowcs,
(wchar_t *__restrict dst, const char **__restrict src,
size_t len, mbstate_t *__restrict ps)) {
LIBC_CRASH_ON_NULLPTR(src);
static internal::mbstate internal_mbstate;
wchar_t temp[len];
auto ret = internal::mbsrtowcs(
dst == nullptr ? temp : dst, src, len,
ps == nullptr ? &internal_mbstate
: reinterpret_cast<internal::mbstate *>(ps));
if (!ret.has_value()) {
// Encoding failure
libc_errno = ret.error();
return -1;
}
return ret.value();
}

} // namespace LIBC_NAMESPACE_DECL
24 changes: 24 additions & 0 deletions libc/src/wchar/mbsrtowcs.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,24 @@
//===-- Implementation header for mbsrtowcs -------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H
#define LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H

#include "hdr/types/mbstate_t.h"
#include "hdr/types/size_t.h"
#include "hdr/types/wchar_t.h"
#include "src/__support/macros/config.h"

namespace LIBC_NAMESPACE_DECL {

size_t mbsrtowcs(wchar_t *__restrict dst, const char **__restrict src,
size_t len, mbstate_t *__restrict ps);

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_WCHAR_MBSRTOWCS_H
14 changes: 14 additions & 0 deletions libc/test/src/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,20 @@ add_libc_test(
libc.test.UnitTest.ErrnoCheckingTest
)

add_libc_test(
mbsrtowcs_test
SUITE
libc_wchar_unittests
SRCS
mbsrtowcs_test.cpp
DEPENDS
libc.src.__support.libc_errno
libc.src.string.memset
libc.src.wchar.mbsrtowcs
libc.hdr.types.mbstate_t
libc.hdr.types.wchar_t
)

add_libc_test(
mbtowc_test
SUITE
Expand Down
132 changes: 132 additions & 0 deletions libc/test/src/wchar/mbsrtowcs_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,132 @@
//===-- Unittests for mbsrtowcs -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "hdr/types/wchar_t.h"
#include "src/__support/libc_errno.h"
#include "src/__support/wchar/mbstate.h"
#include "src/string/memset.h"
#include "src/wchar/mbsrtowcs.h"
#include "test/UnitTest/ErrnoCheckingTest.h"
#include "test/UnitTest/Test.h"

using LlvmLibcMBSRToWCSTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;

TEST_F(LlvmLibcMBSRToWCSTest, OneByteOneCharacter) {
mbstate_t *mb;
LIBC_NAMESPACE::memset(&mb, 0, sizeof(mbstate_t));
const char *ch = "A";
wchar_t dest[2];
size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &ch, 2, mb);
ASSERT_ERRNO_SUCCESS();
ASSERT_TRUE(dest[0] == L'A');
ASSERT_TRUE(dest[1] == L'\0');
// Should not count null terminator in number
ASSERT_EQ(static_cast<int>(n), 1);
// Should set ch to nullptr after reading null terminator
ASSERT_EQ(ch, nullptr);
}

TEST_F(LlvmLibcMBSRToWCSTest, MultiByteOneCharacter) {
const char *src = "\xf0\x9f\x98\xb9"; // laughing cat emoji 😹
wchar_t dest[2];
size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 2, nullptr);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(static_cast<int>(dest[0]), 128569);
ASSERT_TRUE(dest[1] == L'\0');
// Should not count null terminator in number
ASSERT_EQ(static_cast<int>(n), 1);
// Should set ch to nullptr after reading null terminator
ASSERT_EQ(src, nullptr);
}

TEST_F(LlvmLibcMBSRToWCSTest, MultiByteTwoCharacters) {
// Two laughing cat emojis "😹😹"
const char *src = "\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
wchar_t dest[3];
size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, nullptr);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(static_cast<int>(dest[0]), 128569);
ASSERT_EQ(static_cast<int>(dest[1]), 128569);
ASSERT_TRUE(dest[2] == L'\0');
// Should not count null terminator in number
ASSERT_EQ(static_cast<int>(n), 2);
// Should set ch to nullptr after reading null terminator
ASSERT_EQ(src, nullptr);
}

TEST_F(LlvmLibcMBSRToWCSTest, ReadLessThanStringLength) {
// Four laughing cat emojis "😹😹😹😹"
const char *src =
"\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
const char *check = src;
wchar_t dest[3];
size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, nullptr);
ASSERT_ERRNO_SUCCESS();
// Should have read 3 emojis
ASSERT_EQ(static_cast<int>(n), 3);
ASSERT_EQ(static_cast<int>(dest[0]), 128569);
ASSERT_EQ(static_cast<int>(dest[1]), 128569);
ASSERT_EQ(static_cast<int>(dest[2]), 128569);
// src should now point to the 4th cat emoji aka 13th byte
ASSERT_EQ((check + 12), src);
}

TEST_F(LlvmLibcMBSRToWCSTest, InvalidFirstByte) {
// 0x80 is invalid first byte of mb character
const char *src =
"\x80\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
wchar_t dest[3];
size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, nullptr);
// Should return error and set errno
ASSERT_EQ(static_cast<int>(n), -1);
ASSERT_ERRNO_EQ(EILSEQ);
}

TEST_F(LlvmLibcMBSRToWCSTest, InvalidMiddleByte) {
// The 7th byte is invalid for a 4 byte character
const char *src =
"\xf0\x9f\x98\xb9\xf0\x9f\xf0\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
wchar_t dest[3];
size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 5, nullptr);
// Should return error and set errno
ASSERT_EQ(static_cast<int>(n), -1);
ASSERT_ERRNO_EQ(EILSEQ);
}

TEST_F(LlvmLibcMBSRToWCSTest, NullDestination) {
// Four laughing cat emojis "😹😹😹😹"
const char *src =
"\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
size_t n = LIBC_NAMESPACE::mbsrtowcs(nullptr, &src, 5, nullptr);
ASSERT_ERRNO_SUCCESS();
// Null destination should still return correct number of read chars
ASSERT_EQ(static_cast<int>(n), 4);
}

TEST_F(LlvmLibcMBSRToWCSTest, InvalidMBState) {
mbstate_t *mb;
LIBC_NAMESPACE::internal::mbstate inv;
inv.total_bytes = 6;
mb = reinterpret_cast<mbstate_t *>(&inv);
// Four laughing cat emojis "😹😹😹😹"
const char *src =
"\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9\xf0\x9f\x98\xb9";
wchar_t dest[3];
size_t n = LIBC_NAMESPACE::mbsrtowcs(dest, &src, 3, mb);
// Should fail from invalid mbstate
ASSERT_EQ(static_cast<int>(n), -1);
ASSERT_ERRNO_EQ(EINVAL);
}

#if defined(LIBC_ADD_NULL_CHECKS) && !defined(LIBC_HAS_SANITIZER)
TEST_F(LlvmLibcMBSRToWCSTest, NullSource) {
// Passing in a nullptr source should crash the program
EXPECT_DEATH([] { LIBC_NAMESPACE::mbsrtowcs(nullptr, nullptr, 1, nullptr); },
WITH_SIGNAL(-1));
}
#endif // LIBC_HAS_ADDRESS_SANITIZER
Loading