Skip to content
Merged
Show file tree
Hide file tree
Changes from 4 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions libc/config/linux/x86_64/entrypoints.txt
Original file line number Diff line number Diff line change
Expand Up @@ -1261,6 +1261,8 @@ if(LLVM_LIBC_FULL_BUILD)
libc.src.sys.socket.recvmsg

# wchar.h entrypoints
libc.src.wchar.mblen
libc.src.wchar.mbrlen
libc.src.wchar.mbrtowc
libc.src.wchar.mbtowc
libc.src.wchar.wcrtomb
Expand Down
15 changes: 15 additions & 0 deletions libc/include/wchar.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -53,6 +53,21 @@ functions:
- type: wchar_t *__restrict
- type: const char *__restrict
- type: size_t
- name: mblen
standards:
- stdc
return_type: int
arguments:
- type: const char *__restrict
- type: size_t
- name: mbrlen
standards:
- stdc
return_type: size_t
arguments:
- type: const char *__restrict
- type: size_t
- type: mbstate_t *__restrict
- name: wmemset
standards:
- stdc
Expand Down
31 changes: 31 additions & 0 deletions libc/src/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -169,6 +169,37 @@ add_entrypoint_object(
libc.src.__support.wchar.mbstate
)

add_entrypoint_object(
mblen
SRCS
mblen.cpp
HDRS
mblen.h
DEPENDS
libc.hdr.types.size_t
libc.src.__support.common
libc.src.__support.macros.config
libc.src.__support.libc_errno
libc.src.__support.wchar.mbrtowc
libc.src.__support.wchar.mbstate
)

add_entrypoint_object(
mbrlen
SRCS
mbrlen.cpp
HDRS
mbrlen.h
DEPENDS
libc.hdr.types.size_t
libc.hdr.types.mbstate_t
libc.src.__support.common
libc.src.__support.macros.config
libc.src.__support.wchar.mbrtowc
libc.src.__support.libc_errno
libc.src.__support.wchar.mbstate
)

add_entrypoint_object(
wmemset
SRCS
Expand Down
35 changes: 35 additions & 0 deletions libc/src/wchar/mblen.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
//===-- Implementation of mblen -------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/wchar/mblen.h"

#include "hdr/types/size_t.h"
#include "src/__support/common.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
#include "src/__support/wchar/mbrtowc.h"
#include "src/__support/wchar/mbstate.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(int, mblen, (const char *__restrict s, size_t n)) {
// returns 0 since UTF-8 encoding is not state-dependent
if (s == nullptr)
return 0;
internal::mbstate internal_mbstate;
auto ret = internal::mbrtowc(nullptr, s, n, &internal_mbstate);
if (!ret.has_value() || static_cast<int>(ret.value()) == -2) {
// Encoding failure
if (!ret.has_value())
libc_errno = EILSEQ;
return -1;
}
return static_cast<int>(ret.value());
}

} // namespace LIBC_NAMESPACE_DECL
21 changes: 21 additions & 0 deletions libc/src/wchar/mblen.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,21 @@
//===-- Implementation header for mblen -----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_WCHAR_MBLEN_H
#define LLVM_LIBC_SRC_WCHAR_MBLEN_H

#include "hdr/types/size_t.h"
#include "src/__support/macros/config.h"

namespace LIBC_NAMESPACE_DECL {

int mblen(const char *__restrict s, size_t n);

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_WCHAR_MBLEN_H
37 changes: 37 additions & 0 deletions libc/src/wchar/mbrlen.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,37 @@
//===-- Implementation of mbrlen ------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/wchar/mbrlen.h"

#include "hdr/types/mbstate_t.h"
#include "hdr/types/size_t.h"
#include "src/__support/common.h"
#include "src/__support/libc_errno.h"
#include "src/__support/macros/config.h"
#include "src/__support/wchar/mbrtowc.h"
#include "src/__support/wchar/mbstate.h"

namespace LIBC_NAMESPACE_DECL {

LLVM_LIBC_FUNCTION(size_t, mbrlen,
(const char *__restrict s, size_t n,
mbstate_t *__restrict ps)) {
static internal::mbstate internal_mbstate;
auto ret = internal::mbrtowc(nullptr, s, n,
ps == nullptr
? &internal_mbstate
: reinterpret_cast<internal::mbstate *>(ps));
if (!ret.has_value()) {
// Encoding failure
libc_errno = ret.error();
return -1;
}
return ret.value();
}

} // namespace LIBC_NAMESPACE_DECL
22 changes: 22 additions & 0 deletions libc/src/wchar/mbrlen.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
//===-- Implementation header for mbrlen ----------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#ifndef LLVM_LIBC_SRC_WCHAR_MBRLEN_H
#define LLVM_LIBC_SRC_WCHAR_MBRLEN_H

#include "hdr/types/mbstate_t.h"
#include "hdr/types/size_t.h"
#include "src/__support/macros/config.h"

namespace LIBC_NAMESPACE_DECL {

size_t mbrlen(const char *__restrict s, size_t n, mbstate_t *__restrict ps);

} // namespace LIBC_NAMESPACE_DECL

#endif // LLVM_LIBC_SRC_WCHAR_MBRLEN_H
27 changes: 27 additions & 0 deletions libc/test/src/wchar/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,33 @@ add_libc_test(
libc.test.UnitTest.ErrnoCheckingTest
)

add_libc_test(
mblen_test
SUITE
libc_wchar_unittests
SRCS
mblen_test.cpp
DEPENDS
libc.src.__support.libc_errno
libc.src.wchar.mblen
libc.test.UnitTest.ErrnoCheckingTest
)

add_libc_test(
mbrlen_test
SUITE
libc_wchar_unittests
SRCS
mbrlen_test.cpp
DEPENDS
libc.src.__support.libc_errno
libc.src.__support.wchar.mbstate
libc.src.string.memset
libc.src.wchar.mbrlen
libc.hdr.types.mbstate_t
libc.test.UnitTest.ErrnoCheckingTest
)

add_libc_test(
wctob_test
SUITE
Expand Down
104 changes: 104 additions & 0 deletions libc/test/src/wchar/mblen_test.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
//===-- Unittests for mblen -----------------------------------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//

#include "src/__support/libc_errno.h"
#include "src/wchar/mblen.h"
#include "test/UnitTest/ErrnoCheckingTest.h"
#include "test/UnitTest/Test.h"

using LlvmLibcMBLenTest = LIBC_NAMESPACE::testing::ErrnoCheckingTest;

TEST_F(LlvmLibcMBLenTest, OneByte) {
const char *ch = "A";
int n = LIBC_NAMESPACE::mblen(ch, 1);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(n, 1);

// Should fail since we have not read enough
n = LIBC_NAMESPACE::mblen(ch, 0);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(n, -1);
}

TEST_F(LlvmLibcMBLenTest, TwoByte) {
const char ch[2] = {static_cast<char>(0xC2),
static_cast<char>(0x8E)}; // Ž car symbol
int n = LIBC_NAMESPACE::mblen(ch, 4);
ASSERT_ERRNO_SUCCESS();
ASSERT_EQ(n, 2);

// Should fail since we have not read enough
n = LIBC_NAMESPACE::mblen(ch, 1);
ASSERT_EQ(n, -1);
ASSERT_ERRNO_SUCCESS();
// Should fail after trying to read next byte too
n = LIBC_NAMESPACE::mblen(ch + 1, 1);
ASSERT_EQ(n, -1);
// This one should be an invalid starting byte so should set errno
ASSERT_ERRNO_EQ(EILSEQ);
}

TEST_F(LlvmLibcMBLenTest, ThreeByte) {
const char ch[3] = {static_cast<char>(0xE2), static_cast<char>(0x88),
static_cast<char>(0x91)}; // ∑ sigma symbol
int n = LIBC_NAMESPACE::mblen(ch, 3);
ASSERT_EQ(n, 3);
ASSERT_ERRNO_SUCCESS();

// Should fail since we have not read enough
n = LIBC_NAMESPACE::mblen(ch, 2);
ASSERT_EQ(n, -1);
ASSERT_ERRNO_SUCCESS();
}

TEST_F(LlvmLibcMBLenTest, FourByte) {
const char ch[4] = {static_cast<char>(0xF0), static_cast<char>(0x9F),
static_cast<char>(0xA4),
static_cast<char>(0xA1)}; // 🤡 clown emoji
int n = LIBC_NAMESPACE::mblen(ch, 4);
ASSERT_EQ(n, 4);
ASSERT_ERRNO_SUCCESS();

// Should fail since we have not read enough
n = LIBC_NAMESPACE::mblen(ch, 2);
ASSERT_EQ(n, -1);
ASSERT_ERRNO_SUCCESS();
}

TEST_F(LlvmLibcMBLenTest, InvalidByte) {
const char ch[1] = {static_cast<char>(0x80)};
int n = LIBC_NAMESPACE::mblen(ch, 1);
ASSERT_EQ(n, -1);
ASSERT_ERRNO_EQ(EILSEQ);
}

TEST_F(LlvmLibcMBLenTest, InvalidMultiByte) {
const char ch[4] = {static_cast<char>(0x80), static_cast<char>(0x00),
static_cast<char>(0x80),
static_cast<char>(0x00)}; // invalid sequence of bytes
// Trying to push all 4 should error
int n = LIBC_NAMESPACE::mblen(ch, 4);
ASSERT_EQ(n, -1);
ASSERT_ERRNO_EQ(EILSEQ);

// Trying to push the second and third should correspond to null wc
n = LIBC_NAMESPACE::mblen(ch + 1, 2);
ASSERT_EQ(n, 0);
ASSERT_ERRNO_SUCCESS();
}

TEST_F(LlvmLibcMBLenTest, NullString) {
// reading on nullptr should return 0
int n = LIBC_NAMESPACE::mblen(nullptr, 2);
ASSERT_EQ(n, 0);
ASSERT_ERRNO_SUCCESS();
// reading a null terminator should return 0
const char *ch = "\0";
n = LIBC_NAMESPACE::mblen(ch, 1);
ASSERT_EQ(n, 0);
}
Loading
Loading