-
Notifications
You must be signed in to change notification settings - Fork 15.4k
[libc] WCS to integer internal function #147857
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Conversation
|
@llvm/pr-subscribers-libc Author: Uzair Nawaz (uzairnawaz) ChangesDuplicated str_to_integer.h and modified it to work with widechars. Full diff: https://github.com/llvm/llvm-project/pull/147857.diff 4 Files Affected:
diff --git a/libc/src/__support/CMakeLists.txt b/libc/src/__support/CMakeLists.txt
index 7e85136c08851..294d68474bd53 100644
--- a/libc/src/__support/CMakeLists.txt
+++ b/libc/src/__support/CMakeLists.txt
@@ -180,6 +180,19 @@ add_header_library(
libc.src.__support.common
)
+add_header_library(
+ wcs_to_integer
+ HDRS
+ wcs_to_integer.h
+ DEPENDS
+ .wctype_utils
+ .str_to_num_result
+ libc.hdr.errno_macros
+ libc.src.__support.CPP.limits
+ libc.src.__support.CPP.type_traits
+ libc.src.__support.common
+)
+
add_header_library(
integer_to_string
HDRS
diff --git a/libc/src/__support/wcs_to_integer.h b/libc/src/__support/wcs_to_integer.h
new file mode 100644
index 0000000000000..e16ee5008d0b2
--- /dev/null
+++ b/libc/src/__support/wcs_to_integer.h
@@ -0,0 +1,156 @@
+//===-- Widechar string to integer conversion utils -------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SRC___SUPPORT_WCS_TO_INTEGER_H
+#define LLVM_LIBC_SRC___SUPPORT_WCS_TO_INTEGER_H
+
+#include "hdr/errno_macros.h" // For ERANGE
+#include "src/__support/CPP/limits.h"
+#include "src/__support/CPP/type_traits.h"
+#include "src/__support/CPP/type_traits/make_unsigned.h"
+#include "src/__support/big_int.h"
+#include "src/__support/common.h"
+#include "src/__support/macros/config.h"
+#include "src/__support/str_to_num_result.h"
+#include "src/__support/uint128.h"
+#include "src/__support/wctype_utils.h"
+
+namespace LIBC_NAMESPACE_DECL {
+namespace internal {
+
+// Returns a pointer to the first character in src that is not a whitespace
+// character (as determined by iswspace())
+// TODO: Change from returning a pointer to returning a length.
+LIBC_INLINE const wchar_t *
+first_non_whitespace(const wchar_t *__restrict src,
+ size_t src_len = cpp::numeric_limits<size_t>::max()) {
+ size_t src_cur = 0;
+ while (src_cur < src_len && internal::iswspace(src[src_cur])) {
+ ++src_cur;
+ }
+ return src + src_cur;
+}
+
+// checks if the next 3 characters of the string pointer are the start of a
+// hexadecimal number. Does not advance the string pointer.
+LIBC_INLINE bool
+is_hex_start(const wchar_t *__restrict src,
+ size_t src_len = cpp::numeric_limits<size_t>::max()) {
+ if (src_len < 3)
+ return false;
+ return *src == L'0' && towlower(*(src + 1)) == L'x' && iswalnum(*(src + 2)) &&
+ b36_wchar_to_int(*(src + 2)) < 16;
+}
+
+// Takes the address of the string pointer and parses the base from the start of
+// it.
+LIBC_INLINE int infer_base(const wchar_t *__restrict src, size_t src_len) {
+ // A hexadecimal number is defined as "the prefix 0x or 0X followed by a
+ // sequence of the decimal digits and the letters a (or A) through f (or F)
+ // with values 10 through 15 respectively." (C standard 6.4.4.1)
+ if (is_hex_start(src, src_len))
+ return 16;
+ // An octal number is defined as "the prefix 0 optionally followed by a
+ // sequence of the digits 0 through 7 only" (C standard 6.4.4.1) and so any
+ // number that starts with 0, including just 0, is an octal number.
+ if (src_len > 0 && src[0] == L'0')
+ return 8;
+ // A decimal number is defined as beginning "with a nonzero digit and
+ // consist[ing] of a sequence of decimal digits." (C standard 6.4.4.1)
+ return 10;
+}
+
+template <class T>
+LIBC_INLINE StrToNumResult<T>
+wcstointeger(const wchar_t *__restrict src, int base,
+ const size_t src_len = cpp::numeric_limits<size_t>::max()) {
+ using ResultType = make_integral_or_big_int_unsigned_t<T>;
+
+ ResultType result = 0;
+
+ bool is_number = false;
+ size_t src_cur = 0;
+ int error_val = 0;
+
+ if (src_len == 0)
+ return {0, 0, 0};
+
+ if (base < 0 || base == 1 || base > 36)
+ return {0, 0, EINVAL};
+
+ src_cur = static_cast<size_t>(first_non_whitespace(src, src_len) - src);
+
+ wchar_t result_sign = L'+';
+ if (src[src_cur] == L'+' || src[src_cur] == L'-') {
+ result_sign = src[src_cur];
+ ++src_cur;
+ }
+
+ if (base == 0)
+ base = infer_base(src + src_cur, src_len - src_cur);
+
+ if (base == 16 && is_hex_start(src + src_cur, src_len - src_cur))
+ src_cur = src_cur + 2;
+
+ constexpr bool IS_UNSIGNED = cpp::is_unsigned_v<T>;
+ const bool is_positive = (result_sign == L'+');
+
+ ResultType constexpr NEGATIVE_MAX =
+ !IS_UNSIGNED ? static_cast<ResultType>(cpp::numeric_limits<T>::max()) + 1
+ : cpp::numeric_limits<T>::max();
+ ResultType const abs_max =
+ (is_positive ? cpp::numeric_limits<T>::max() : NEGATIVE_MAX);
+ ResultType const abs_max_div_by_base =
+ abs_max / static_cast<ResultType>(base);
+
+ while (src_cur < src_len && iswalnum(src[src_cur])) {
+ int cur_digit = b36_wchar_to_int(src[src_cur]);
+ if (cur_digit >= base)
+ break;
+
+ is_number = true;
+ ++src_cur;
+
+ // If the number has already hit the maximum value for the current type then
+ // the result cannot change, but we still need to advance src to the end of
+ // the number.
+ if (result == abs_max) {
+ error_val = ERANGE;
+ continue;
+ }
+
+ if (result > abs_max_div_by_base) {
+ result = abs_max;
+ error_val = ERANGE;
+ } else {
+ result = result * static_cast<ResultType>(base);
+ }
+ if (result > abs_max - static_cast<ResultType>(cur_digit)) {
+ result = abs_max;
+ error_val = ERANGE;
+ } else {
+ result = result + static_cast<ResultType>(cur_digit);
+ }
+ }
+
+ ptrdiff_t str_len = is_number ? static_cast<ptrdiff_t>(src_cur) : 0;
+
+ if (error_val == ERANGE) {
+ if (is_positive || IS_UNSIGNED)
+ return {cpp::numeric_limits<T>::max(), str_len, error_val};
+ else // T is signed and there is a negative overflow
+ return {cpp::numeric_limits<T>::min(), str_len, error_val};
+ }
+
+ return {static_cast<T>(is_positive ? result : -result), str_len, error_val};
+}
+
+} // namespace internal
+} // namespace LIBC_NAMESPACE_DECL
+
+#endif // LLVM_LIBC_SRC___SUPPORT_WCS_TO_INTEGER_H
diff --git a/libc/test/src/__support/CMakeLists.txt b/libc/test/src/__support/CMakeLists.txt
index 9f626ed31cc07..e54d7a5c9638b 100644
--- a/libc/test/src/__support/CMakeLists.txt
+++ b/libc/test/src/__support/CMakeLists.txt
@@ -141,6 +141,17 @@ add_libc_test(
libc.src.__support.str_to_integer
)
+add_libc_test(
+ wcs_to_integer_test
+ SUITE
+ libc-support-tests
+ SRCS
+ wcs_to_integer_test.cpp
+ DEPENDS
+ libc.src.__support.integer_literals
+ libc.src.__support.wcs_to_integer
+)
+
add_libc_test(
integer_to_string_test
SUITE
diff --git a/libc/test/src/__support/wcs_to_integer_test.cpp b/libc/test/src/__support/wcs_to_integer_test.cpp
new file mode 100644
index 0000000000000..e4107929c15fc
--- /dev/null
+++ b/libc/test/src/__support/wcs_to_integer_test.cpp
@@ -0,0 +1,239 @@
+//===-- Unittests for wcs_to_integer --------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "src/__support/libc_errno.h"
+#include "src/__support/wcs_to_integer.h"
+#include <stddef.h>
+
+#include "test/UnitTest/Test.h"
+
+// This file is for testing the src_len argument and other internal interface
+// features. Primary testing is done through the public interface.
+
+TEST(LlvmLibcStrToIntegerTest, SimpleLength) {
+ auto result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"12345", 10, 10);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(5));
+ ASSERT_EQ(result.value, 12345);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"12345", 10, 2);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(2));
+ ASSERT_EQ(result.value, 12);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"12345", 10, 0);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
+ ASSERT_EQ(result.value, 0);
+}
+
+TEST(LlvmLibcStrToIntegerTest, LeadingSpaces) {
+ auto result =
+ LIBC_NAMESPACE::internal::wcstointeger<int>(L" 12345", 10, 15);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(10));
+ ASSERT_EQ(result.value, 12345);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L" 12345", 10, 10);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(10));
+ ASSERT_EQ(result.value, 12345);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L" 12345", 10, 7);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(7));
+ ASSERT_EQ(result.value, 12);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L" 12345", 10, 5);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
+ ASSERT_EQ(result.value, 0);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L" 12345", 10, 0);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
+ ASSERT_EQ(result.value, 0);
+}
+
+TEST(LlvmLibcStrToIntegerTest, LeadingSign) {
+ auto result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"+12345", 10, 10);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
+ ASSERT_EQ(result.value, 12345);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"-12345", 10, 10);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
+ ASSERT_EQ(result.value, -12345);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"+12345", 10, 6);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
+ ASSERT_EQ(result.value, 12345);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"-12345", 10, 6);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
+ ASSERT_EQ(result.value, -12345);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"+12345", 10, 3);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(3));
+ ASSERT_EQ(result.value, 12);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"-12345", 10, 3);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(3));
+ ASSERT_EQ(result.value, -12);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"+12345", 10, 1);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
+ ASSERT_EQ(result.value, 0);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"-12345", 10, 1);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
+ ASSERT_EQ(result.value, 0);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"+12345", 10, 0);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
+ ASSERT_EQ(result.value, 0);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"-12345", 10, 0);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
+ ASSERT_EQ(result.value, 0);
+}
+
+TEST(LlvmLibcStrToIntegerTest, Base16PrefixAutoSelect) {
+ auto result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"0x12345", 0, 10);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(7));
+ ASSERT_EQ(result.value, 0x12345);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"0x12345", 0, 7);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(7));
+ ASSERT_EQ(result.value, 0x12345);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"0x12345", 0, 5);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(5));
+ ASSERT_EQ(result.value, 0x123);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"0x12345", 0, 2);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(1));
+ ASSERT_EQ(result.value, 0);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"0x12345", 0, 0);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
+ ASSERT_EQ(result.value, 0);
+}
+
+TEST(LlvmLibcStrToIntegerTest, Base16PrefixManualSelect) {
+ auto result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"0x12345", 16, 10);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(7));
+ ASSERT_EQ(result.value, 0x12345);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"0x12345", 16, 7);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(7));
+ ASSERT_EQ(result.value, 0x12345);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"0x12345", 16, 5);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(5));
+ ASSERT_EQ(result.value, 0x123);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"0x12345", 16, 2);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(1));
+ ASSERT_EQ(result.value, 0);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"0x12345", 16, 0);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
+ ASSERT_EQ(result.value, 0);
+}
+
+TEST(LlvmLibcStrToIntegerTest, Base8PrefixAutoSelect) {
+ auto result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"012345", 0, 10);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
+ ASSERT_EQ(result.value, 012345);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"012345", 0, 6);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
+ ASSERT_EQ(result.value, 012345);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"012345", 0, 4);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(4));
+ ASSERT_EQ(result.value, 0123);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"012345", 0, 1);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(1));
+ ASSERT_EQ(result.value, 0);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"012345", 0, 0);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
+ ASSERT_EQ(result.value, 0);
+}
+
+TEST(LlvmLibcStrToIntegerTest, Base8PrefixManualSelect) {
+ auto result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"012345", 8, 10);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
+ ASSERT_EQ(result.value, 012345);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"012345", 8, 6);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
+ ASSERT_EQ(result.value, 012345);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"012345", 8, 4);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(4));
+ ASSERT_EQ(result.value, 0123);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"012345", 8, 1);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(1));
+ ASSERT_EQ(result.value, 0);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L"012345", 8, 0);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(0));
+ ASSERT_EQ(result.value, 0);
+}
+
+TEST(LlvmLibcStrToIntegerTest, CombinedTests) {
+ auto result =
+ LIBC_NAMESPACE::internal::wcstointeger<int>(L" -0x123", 0, 10);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(10));
+ ASSERT_EQ(result.value, -0x123);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L" -0x123", 0, 8);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(8));
+ ASSERT_EQ(result.value, -0x1);
+
+ result = LIBC_NAMESPACE::internal::wcstointeger<int>(L" -0x123", 0, 7);
+ EXPECT_FALSE(result.has_error());
+ EXPECT_EQ(result.parsed_len, ptrdiff_t(6));
+ ASSERT_EQ(result.value, 0);
+}
|
michaelrj-google
left a comment
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
LGTM with one change
Duplicated str_to_integer.h and modified it to work with widechars.
A future patch will implement the public functions (wcstol, wcstoll, etc) by calling this internal function.