Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions libc/config/linux/x86_64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1261,6 +1261,8 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.sys.socket.recvmsg

# wchar.h entrypoints
libc.src.wchar.mblen
libc.src.wchar.mbrlen
libc.src.wchar.mbrtowc
libc.src.wchar.mbtowc
libc.src.wchar.wcrtomb
Expand Down
15 changes: 15 additions & 0 deletions libc/include/wchar.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,21 @@ functions:
- type: wchar_t *__restrict
- type: const char *__restrict
- type: size_t
- name: mblen
standards:
- stdc
return_type: int
arguments:
- type: const char *
- type: size_t
- name: mbrlen
standards:
- stdc
return_type: size_t
arguments:
- type: const char *__restrict
- type: size_t
- type: mbstate_t *__restrict
- name: wmemset
standards:
- stdc
Expand Down
31 changes: 31 additions & 0 deletions libc/src/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,37 @@ add_entrypoint_object(
libc.src.__support.wchar.mbstate
)

add_entrypoint_object(
mblen
SRCS
mblen.cpp
HDRS
mblen.h
DEPENDS
libc.hdr.types.size_t
libc.src.__support.common
libc.src.__support.macros.config
libc.src.__support.libc_errno
libc.src.__support.wchar.mbrtowc
libc.src.__support.wchar.mbstate
)

add_entrypoint_object(
mbrlen
SRCS
mbrlen.cpp
HDRS
mbrlen.h
DEPENDS
libc.hdr.types.size_t
libc.hdr.types.mbstate_t
libc.src.__support.common
libc.src.__support.macros.config
libc.src.__support.wchar.mbrtowc
libc.src.__support.libc_errno
libc.src.__support.wchar.mbstate
)

add_entrypoint_object(
wmemset
SRCS
Expand Down
35 changes: 35 additions & 0 deletions libc/src/wchar/mblen.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
//===-- Implementation of mblen -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/wchar/mblen.h"

#include "hdr/types/size_t.h"
#include "src/__support/common.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
#include "src/__support/wchar/mbrtowc.h"
#include "src/__support/wchar/mbstate.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(int, mblen, (const char *s, size_t n)) {
// returns 0 since UTF-8 encoding is not state-dependent
if (s == nullptr)
return 0;
internal::mbstate internal_mbstate;
auto ret = internal::mbrtowc(nullptr, s, n, &internal_mbstate);
if (!ret.has_value() || static_cast<int>(ret.value()) == -2) {
// Encoding failure
if (!ret.has_value())
libc_errno = EILSEQ;
return -1;
}
return static_cast<int>(ret.value());
}

} // namespace LIBC_NAMESPACE_DECL
21 changes: 21 additions & 0 deletions libc/src/wchar/mblen.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//===-- Implementation header for mblen -----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_WCHAR_MBLEN_H
#define LLVM_LIBC_SRC_WCHAR_MBLEN_H

#include "hdr/types/size_t.h"
#include "src/__support/macros/config.h"

namespace LIBC_NAMESPACE_DECL {

int mblen(const char *s, size_t n);

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_WCHAR_MBLEN_H
37 changes: 37 additions & 0 deletions libc/src/wchar/mbrlen.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
//===-- Implementation of mbrlen ------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/wchar/mbrlen.h"

#include "hdr/types/mbstate_t.h"
#include "hdr/types/size_t.h"
#include "src/__support/common.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
#include "src/__support/wchar/mbrtowc.h"
#include "src/__support/wchar/mbstate.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(size_t, mbrlen,
(const char *__restrict s, size_t n,
mbstate_t *__restrict ps)) {
static internal::mbstate internal_mbstate;
auto ret = internal::mbrtowc(nullptr, s, n,
ps == nullptr
? &internal_mbstate
: reinterpret_cast<internal::mbstate *>(ps));
if (!ret.has_value()) {
// Encoding failure
libc_errno = ret.error();
return -1;
}
return ret.value();
}

} // namespace LIBC_NAMESPACE_DECL
22 changes: 22 additions & 0 deletions libc/src/wchar/mbrlen.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//===-- Implementation header for mbrlen ----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_WCHAR_MBRLEN_H
#define LLVM_LIBC_SRC_WCHAR_MBRLEN_H

#include "hdr/types/mbstate_t.h"
#include "hdr/types/size_t.h"
#include "src/__support/macros/config.h"

namespace LIBC_NAMESPACE_DECL {

size_t mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps);

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_WCHAR_MBRLEN_H
27 changes: 27 additions & 0 deletions libc/test/src/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,33 @@ add_libc_test(
libc.test.UnitTest.ErrnoCheckingTest
)

add_libc_test(
mblen_test
SUITE
libc_wchar_unittests
SRCS
mblen_test.cpp
DEPENDS
libc.src.__support.libc_errno
libc.src.wchar.mblen
libc.test.UnitTest.ErrnoCheckingTest
)

add_libc_test(
mbrlen_test
SUITE
libc_wchar_unittests
SRCS
mbrlen_test.cpp
DEPENDS
libc.src.__support.libc_errno
libc.src.__support.wchar.mbstate
libc.src.string.memset
libc.src.wchar.mbrlen
libc.hdr.types.mbstate_t
libc.test.UnitTest.ErrnoCheckingTest
)

add_libc_test(
wctob_test
SUITE
Expand Down
104 changes: 104 additions & 0 deletions libc/test/src/wchar/mblen_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
//===-- Unittests for mblen -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/__support/libc_errno.h"
#include "src/wchar/mblen.h"
#include "test/UnitTest/ErrnoCheckingTest.h"
#include "test/UnitTest/Test.h"

using LlvmLibcMBLenTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;

TEST_F(LlvmLibcMBLenTest, OneByte) {
const char *ch = "A";
int n = LIBC_NAMESPACE::mblen(ch, 1);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(n, 1);

// Should fail since we have not read enough
n = LIBC_NAMESPACE::mblen(ch, 0);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(n, -1);
}

TEST_F(LlvmLibcMBLenTest, TwoByte) {
const char ch[2] = {static_cast<char>(0xC2),
static_cast<char>(0x8E)}; // Ž car symbol
int n = LIBC_NAMESPACE::mblen(ch, 4);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(n, 2);

// Should fail since we have not read enough
n = LIBC_NAMESPACE::mblen(ch, 1);
ASSERT_EQ(n, -1);
ASSERT_ERRNO_SUCCESS();
// Should fail after trying to read next byte too
n = LIBC_NAMESPACE::mblen(ch + 1, 1);
ASSERT_EQ(n, -1);
// This one should be an invalid starting byte so should set errno
ASSERT_ERRNO_EQ(EILSEQ);
}

TEST_F(LlvmLibcMBLenTest, ThreeByte) {
const char ch[3] = {static_cast<char>(0xE2), static_cast<char>(0x88),
static_cast<char>(0x91)}; // ∑ sigma symbol
int n = LIBC_NAMESPACE::mblen(ch, 3);
ASSERT_EQ(n, 3);
ASSERT_ERRNO_SUCCESS();

// Should fail since we have not read enough
n = LIBC_NAMESPACE::mblen(ch, 2);
ASSERT_EQ(n, -1);
ASSERT_ERRNO_SUCCESS();
}

TEST_F(LlvmLibcMBLenTest, FourByte) {
const char ch[4] = {static_cast<char>(0xF0), static_cast<char>(0x9F),
static_cast<char>(0xA4),
static_cast<char>(0xA1)}; // 🤡 clown emoji
int n = LIBC_NAMESPACE::mblen(ch, 4);
ASSERT_EQ(n, 4);
ASSERT_ERRNO_SUCCESS();

// Should fail since we have not read enough
n = LIBC_NAMESPACE::mblen(ch, 2);
ASSERT_EQ(n, -1);
ASSERT_ERRNO_SUCCESS();
}

TEST_F(LlvmLibcMBLenTest, InvalidByte) {
const char ch[1] = {static_cast<char>(0x80)};
int n = LIBC_NAMESPACE::mblen(ch, 1);
ASSERT_EQ(n, -1);
ASSERT_ERRNO_EQ(EILSEQ);
}

TEST_F(LlvmLibcMBLenTest, InvalidMultiByte) {
const char ch[4] = {static_cast<char>(0x80), static_cast<char>(0x00),
static_cast<char>(0x80),
static_cast<char>(0x00)}; // invalid sequence of bytes
// Trying to push all 4 should error
int n = LIBC_NAMESPACE::mblen(ch, 4);
ASSERT_EQ(n, -1);
ASSERT_ERRNO_EQ(EILSEQ);

// Trying to push the second and third should correspond to null wc
n = LIBC_NAMESPACE::mblen(ch + 1, 2);
ASSERT_EQ(n, 0);
ASSERT_ERRNO_SUCCESS();
}

TEST_F(LlvmLibcMBLenTest, NullString) {
// reading on nullptr should return 0
int n = LIBC_NAMESPACE::mblen(nullptr, 2);
ASSERT_EQ(n, 0);
ASSERT_ERRNO_SUCCESS();
// reading a null terminator should return 0
const char *ch = "\0";
n = LIBC_NAMESPACE::mblen(ch, 1);
ASSERT_EQ(n, 0);
}
Loading
Loading