[libcxx][libc] Hand in Hand PoC with from_chars

michaelrj-google · michaelrj-google · commit 08de46ef7ed4 · 2024-07-30T10:46:40.000-07:00
DO NOT MERGE, PROOF OF CONCEPT ONLY.

This patch aims to demonstrate the utility of sharing code between libc
and libc++ by using the libc float conversion code in the libc++
function from_chars. This patch adds from_chars for float and double
(long double is possible but was causing errors so was skipped here), as
well as a test to demonstrate that it works.

This is very much just a proof of concept, not intended to be committed
as-is. The from_chars code written is copied from the libc parsing code
and is not functionally complete, nor does it follow the correct coding
style.
diff --git a/libc/shared/str_to_float.h b/libc/shared/str_to_float.h
@@ -0,0 +1,29 @@
+//===-- String to float conversion utils ------------------------*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_LIBC_SHARED_STR_TO_FLOAT_H
+#define LLVM_LIBC_SHARED_STR_TO_FLOAT_H
+
+#include "src/__support/str_to_float.h"
+
+namespace LIBC_NAMESPACE::shared {
+
+// WARNING: This is a proof of concept. In future the interface point for libcxx
+// won't be using libc internal classes.
+
+template <class T>
+inline internal::FloatConvertReturn<T> decimal_exp_to_float(
+    internal::ExpandedFloat<T> init_num, bool truncated,
+    internal::RoundDirection round, const char *__restrict num_start,
+    const size_t num_len = cpp::numeric_limits<size_t>::max()) {
+  return internal::decimal_exp_to_float(init_num, truncated, round, num_start,
+                                        num_len);
+}
+} // namespace LIBC_NAMESPACE::shared
+
+#endif // LLVM_LIBC_SHARED_STR_TO_FLOAT_H
diff --git a/libcxx/include/CMakeLists.txt b/libcxx/include/CMakeLists.txt
@@ -235,6 +235,7 @@ set(files
   __bit/rotate.h
   __bit_reference
   __charconv/chars_format.h
+  __charconv/from_chars_floating_point.h
   __charconv/from_chars_integral.h
   __charconv/from_chars_result.h
   __charconv/tables.h
diff --git a/libcxx/include/__charconv/from_chars_floating_point.h b/libcxx/include/__charconv/from_chars_floating_point.h
@@ -0,0 +1,68 @@
+// -*- C++ -*-
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP___CHARCONV_FROM_CHARS_FLOATING_POINT_H
+#define _LIBCPP___CHARCONV_FROM_CHARS_FLOATING_POINT_H
+
+#include <__assert>
+#include <__charconv/chars_format.h>
+#include <__charconv/from_chars_result.h>
+#include <__charconv/traits.h>
+#include <__config>
+#include <__system_error/errc.h>
+#include <__type_traits/enable_if.h>
+#include <__type_traits/integral_constant.h>
+#include <__type_traits/is_floating_point.h>
+
+#if !defined(_LIBCPP_HAS_NO_PRAGMA_SYSTEM_HEADER)
+#  pragma GCC system_header
+#endif
+
+_LIBCPP_PUSH_MACROS
+#include <__undef_macros>
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+#if _LIBCPP_STD_VER >= 17
+
+from_chars_result from_chars_floating_point(
+    const char* __first, const char* __last, float& value, chars_format fmt = chars_format::general);
+
+from_chars_result from_chars_floating_point(
+    const char* __first, const char* __last, double& value, chars_format fmt = chars_format::general);
+
+// template <typename _Tp, __enable_if_t<is_floating_point<_Tp>::value, int> = 0>
+// inline from_chars_result
+// from_chars(const char* __first, const char* __last, _Tp& __value, chars_format fmt = chars_format::general) {
+//   return std::from_chars_floating_point(__first, __last, __value, fmt);
+// }
+
+// inline from_chars_result
+// from_chars(const char* __first, const char* __last, float& __value, chars_format fmt = chars_format::general) {
+//   return std::from_chars_floating_point(__first, __last, __value, fmt);
+// }
+
+// inline from_chars_result
+// from_chars(const char* __first, const char* __last, double& __value, chars_format fmt = chars_format::general) {
+//   return std::from_chars_floating_point(__first, __last, __value, fmt);
+// }
+
+from_chars_result
+from_chars(const char* __first, const char* __last, float& __value, chars_format fmt = chars_format::general);
+
+from_chars_result
+from_chars(const char* __first, const char* __last, double& __value, chars_format fmt = chars_format::general);
+
+#endif // _LIBCPP_STD_VER >= 17
+
+_LIBCPP_END_NAMESPACE_STD
+
+_LIBCPP_POP_MACROS
+
+#endif // _LIBCPP___CHARCONV_FROM_CHARS_FLOATING_POINT_H
diff --git a/libcxx/include/charconv b/libcxx/include/charconv
@@ -73,6 +73,7 @@ namespace std {
 
 #if _LIBCPP_STD_VER >= 17
 #  include <__charconv/chars_format.h>
+#  include <__charconv/from_chars_floating_point.h>
 #  include <__charconv/from_chars_integral.h>
 #  include <__charconv/from_chars_result.h>
 #  include <__charconv/tables.h>
diff --git a/libcxx/src/CMakeLists.txt b/libcxx/src/CMakeLists.txt
@@ -31,6 +31,7 @@ set(LIBCXX_SOURCES
   include/ryu/f2s.h
   include/ryu/ryu.h
   include/to_chars_floating_point.h
+  include/from_chars_floating_point.h
   legacy_pointer_safety.cpp
   memory.cpp
   memory_resource.cpp
@@ -179,7 +180,7 @@ split_list(LIBCXX_LINK_FLAGS)
 # Build the shared library.
 if (LIBCXX_ENABLE_SHARED)
   add_library(cxx_shared SHARED ${exclude_from_all} ${LIBCXX_SOURCES} ${LIBCXX_HEADERS})
-  target_include_directories(cxx_shared PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
+  target_include_directories(cxx_shared PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../../libc) #TODO: Do this properly
   target_link_libraries(cxx_shared PUBLIC cxx-headers libcxx-libc-shared
                                    PRIVATE ${LIBCXX_LIBRARIES})
   set_target_properties(cxx_shared
@@ -272,7 +273,7 @@ set(CMAKE_STATIC_LIBRARY_PREFIX "lib")
 # Build the static library.
 if (LIBCXX_ENABLE_STATIC)
   add_library(cxx_static STATIC ${exclude_from_all} ${LIBCXX_SOURCES} ${LIBCXX_HEADERS})
-  target_include_directories(cxx_static PRIVATE ${CMAKE_CURRENT_SOURCE_DIR})
+  target_include_directories(cxx_static PRIVATE ${CMAKE_CURRENT_SOURCE_DIR} ${CMAKE_CURRENT_SOURCE_DIR}/../../libc) #TODO: Do this properly
   target_link_libraries(cxx_static PUBLIC cxx-headers libcxx-libc-static
                                    PRIVATE ${LIBCXX_LIBRARIES}
                                    PRIVATE libcxx-abi-static)
diff --git a/libcxx/src/charconv.cpp b/libcxx/src/charconv.cpp
@@ -9,6 +9,7 @@
 #include <charconv>
 #include <string.h>
 
+#include "include/from_chars_floating_point.h"
 #include "include/to_chars_floating_point.h"
 
 _LIBCPP_BEGIN_NAMESPACE_STD
@@ -74,4 +75,20 @@ to_chars_result to_chars(char* __first, char* __last, long double __value, chars
       __first, __last, static_cast<double>(__value), __fmt, __precision);
 }
 
+from_chars_result from_chars_floating_point(const char* __first, const char* __last, float& value, chars_format fmt) {
+  return from_chars_floating_point<float>(__first, __last, value, fmt);
+}
+
+from_chars_result from_chars_floating_point(const char* __first, const char* __last, double& value, chars_format fmt) {
+  return from_chars_floating_point<double>(__first, __last, value, fmt);
+}
+
+from_chars_result from_chars(const char* __first, const char* __last, float& __value, chars_format fmt) {
+  return std::from_chars_floating_point(__first, __last, __value, fmt);
+}
+
+from_chars_result from_chars(const char* __first, const char* __last, double& __value, chars_format fmt) {
+  return std::from_chars_floating_point(__first, __last, __value, fmt);
+}
+
 _LIBCPP_END_NAMESPACE_STD
diff --git a/libcxx/src/include/from_chars_floating_point.h b/libcxx/src/include/from_chars_floating_point.h
@@ -0,0 +1,135 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
+#define _LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
+
+// NEVER DO THIS FOR REAL, this is just for demonstration purposes.
+#define LIBC_NAMESPACE libc_namespace_in_libcxx
+
+// This header is in the shared LLVM-libc header library.
+#include "shared/str_to_float.h"
+
+#include <__assert>
+#include <__config>
+#include <charconv>
+#include <limits>
+#include <type_traits>
+
+// Included for the _Floating_type_traits class
+#include "to_chars_floating_point.h"
+
+_LIBCPP_BEGIN_NAMESPACE_STD
+
+template <typename _Tp, __enable_if_t<std::is_floating_point<_Tp>::value, int> = 0>
+from_chars_result from_chars_floating_point(const char* __first, const char* __last, _Tp& value, chars_format fmt) {
+  using _Traits    = _Floating_type_traits<_Tp>;
+  using _Uint_type = typename _Traits::_Uint_type;
+  ptrdiff_t length = __last - __first;
+  _LIBCPP_ASSERT_INTERNAL(length > 0, "");
+
+  // hacky parsing code as example. Not intended for actual use. I'm just going to handle the base 10
+  // chars_format::general case. Also, no sign, inf, or nan handling.
+  _LIBCPP_ASSERT_INTERNAL(fmt == std::chars_format::general, "");
+
+  const char* src = __first; // rename to match the libc code copied for this section.
+
+  _Uint_type mantissa            = 0;
+  int exponent                   = 0;
+  bool truncated                 = false;
+  bool seen_digit                = false;
+  bool after_decimal             = false;
+  size_t index                   = 0;
+  const size_t BASE              = 10;
+  constexpr char EXPONENT_MARKER = 'e';
+  constexpr char DECIMAL_POINT   = '.';
+
+  // The loop fills the mantissa with as many digits as it can hold
+  const _Uint_type bitstype_max_div_by_base = numeric_limits<_Uint_type>::max() / BASE;
+  while (index < length) {
+    if (LIBC_NAMESPACE::internal::isdigit(src[index])) {
+      uint32_t digit = src[index] - '0';
+      seen_digit     = true;
+
+      if (mantissa < bitstype_max_div_by_base) {
+        mantissa = (mantissa * BASE) + digit;
+        if (after_decimal) {
+          --exponent;
+        }
+      } else {
+        if (digit > 0)
+          truncated = true;
+        if (!after_decimal)
+          ++exponent;
+      }
+
+      ++index;
+      continue;
+    }
+    if (src[index] == DECIMAL_POINT) {
+      if (after_decimal) {
+        break; // this means that src[index] points to a second decimal point, ending the number.
+      }
+      after_decimal = true;
+      ++index;
+      continue;
+    }
+    // The character is neither a digit nor a decimal point.
+    break;
+  }
+
+  if (!seen_digit)
+    return {src + index, {}};
+
+  if (index < length && LIBC_NAMESPACE::internal::tolower(src[index]) == EXPONENT_MARKER) {
+    bool has_sign = false;
+    if (index + 1 < length && (src[index + 1] == '+' || src[index + 1] == '-')) {
+      has_sign = true;
+    }
+    if (index + 1 + static_cast<size_t>(has_sign) < length &&
+        LIBC_NAMESPACE::internal::isdigit(src[index + 1 + static_cast<size_t>(has_sign)])) {
+      ++index;
+      auto result = LIBC_NAMESPACE::internal::strtointeger<int32_t>(src + index, 10);
+      // if (result.has_error())
+      //   output.error = result.error;
+      int32_t add_to_exponent = result.value;
+      index += result.parsed_len;
+
+      // Here we do this operation as int64 to avoid overflow.
+      int64_t temp_exponent = static_cast<int64_t>(exponent) + static_cast<int64_t>(add_to_exponent);
+
+      // If the result is in the valid range, then we use it. The valid range is
+      // also within the int32 range, so this prevents overflow issues.
+      if (temp_exponent > LIBC_NAMESPACE::fputil::FPBits<_Tp>::MAX_BIASED_EXPONENT) {
+        exponent = LIBC_NAMESPACE::fputil::FPBits<_Tp>::MAX_BIASED_EXPONENT;
+      } else if (temp_exponent < -LIBC_NAMESPACE::fputil::FPBits<_Tp>::MAX_BIASED_EXPONENT) {
+        exponent = -LIBC_NAMESPACE::fputil::FPBits<_Tp>::MAX_BIASED_EXPONENT;
+      } else {
+        exponent = static_cast<int32_t>(temp_exponent);
+      }
+    }
+  }
+
+  LIBC_NAMESPACE::internal::ExpandedFloat<_Tp> expanded_float = {0, 0};
+  if (mantissa != 0) {
+    auto temp = LIBC_NAMESPACE::shared::decimal_exp_to_float<_Tp>(
+        {mantissa, exponent}, truncated, LIBC_NAMESPACE::internal::RoundDirection::Nearest, src, length);
+    expanded_float = temp.num;
+    // Note: there's also an error value in temp.error. I'm not doing that error handling right now though.
+  }
+
+  auto result = LIBC_NAMESPACE::fputil::FPBits<_Tp>();
+  result.set_mantissa(expanded_float.mantissa);
+  result.set_biased_exponent(expanded_float.exponent);
+  value = result.get_val();
+  return {src + index, {}};
+}
+
+_LIBCPP_END_NAMESPACE_STD
+
+#endif //_LIBCPP_SRC_INCLUDE_FROM_CHARS_FLOATING_POINT_H
diff --git a/libcxx/test/std/utilities/charconv/charconv.from.chars/float.pass.cpp b/libcxx/test/std/utilities/charconv/charconv.from.chars/float.pass.cpp
@@ -0,0 +1,70 @@
+//===----------------------------------------------------------------------===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+// UNSUPPORTED: c++03, c++11, c++14
+
+// <charconv>
+
+// constexpr from_chars_result from_chars(const char* first, const char* last,
+//                                        Float& value, chars_format fmt = chars_format::general)
+
+#include <charconv>
+#include "test_macros.h"
+#include "charconv_test_helpers.h"
+
+template <typename T>
+struct test_basics {
+  TEST_CONSTEXPR_CXX23 void operator()() {
+    std::from_chars_result r;
+    T x;
+
+    {
+      char s[] = "001x";
+
+      // the expected form of the subject sequence is a nonempty sequence of
+      // decimal digits optionally containing a decimal-point character, then
+      // an optional exponent part as defined in 6.4.4.3, excluding any digit
+      // separators (6.4.4.2); (C23 7.24.1.5)
+      r = std::from_chars(s, s + sizeof(s), x);
+      assert(r.ec == std::errc{});
+      assert(r.ptr == s + 3);
+      assert(x == T(1.0));
+    }
+
+    {
+      char s[] = "1.5e10";
+
+      r = std::from_chars(s, s + sizeof(s), x);
+      assert(r.ec == std::errc{});
+      assert(r.ptr == s + 6);
+      assert(x == T(1.5e10));
+    }
+
+    {
+      char s[] = "20040229";
+
+      // This number is halfway between two float values.
+      r = std::from_chars(s, s + sizeof(s), x);
+      assert(r.ec == std::errc{});
+      assert(r.ptr == s + 8);
+      assert(x == T(20040229));
+    }
+  }
+};
+
+TEST_CONSTEXPR_CXX23 bool test() {
+  run<test_basics>(all_floats);
+
+  return true;
+}
+
+int main(int, char**) {
+  test();
+
+  return 0;
+}
diff --git a/libcxx/test/support/charconv_test_helpers.h b/libcxx/test/support/charconv_test_helpers.h
@@ -317,6 +317,8 @@ auto all_unsigned = type_list<
     >();
 auto integrals = concat(all_signed, all_unsigned);
 
+auto all_floats = type_list< float, double >(); //TODO: Add long double
+
 template <template <typename> class Fn, typename... Ts>
 TEST_CONSTEXPR_CXX23 void
 run(type_list<Ts...>)