Skip to content

Commit 4700500

Browse files
committed
use Charconv for precise parsing
1 parent 3956bdb commit 4700500

28 files changed

+5548
-10
lines changed

CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,9 @@ endif()
5353

5454

5555
function(boost_json_setup_properties target)
56+
if(MSVC)
57+
target_compile_features(${target} PRIVATE _SCL_SECURE_NO_WARNINGS)
58+
endif()
5659
target_compile_features(${target} PUBLIC cxx_constexpr)
5760
target_compile_definitions(${target} PUBLIC BOOST_JSON_NO_LIB=1)
5861

build/Jamfile

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -69,9 +69,11 @@ project boost/json
6969
<link>shared:<define>BOOST_JSON_DYN_LINK=1
7070
<link>static:<define>BOOST_JSON_STATIC_LINK=1
7171
<define>BOOST_JSON_SOURCE
72+
<toolset>msvc:<define>_SCL_SECURE_NO_WARNINGS
7273
: usage-requirements
7374
<link>shared:<define>BOOST_JSON_DYN_LINK=1
7475
<link>static:<define>BOOST_JSON_STATIC_LINK=1
76+
<toolset>msvc:<define>_SCL_SECURE_NO_WARNINGS
7577
: source-location ../src
7678
;
7779

include/boost/json/basic_parser_impl.hpp

Lines changed: 10 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -15,6 +15,7 @@
1515
#include <boost/json/basic_parser.hpp>
1616
#include <boost/json/error.hpp>
1717
#include <boost/json/detail/buffer.hpp>
18+
#include <boost/json/detail/charconv/from_chars.hpp>
1819
#include <boost/json/detail/sse2.hpp>
1920
#include <cmath>
2021
#include <limits>
@@ -2677,16 +2678,20 @@ parse_number(const char* p,
26772678
if( precise_parsing )
26782679
{
26792680
char const* data = begin;
2681+
std::size_t full_size = size;
26802682
// if we previously suspended or if the current input ends with the
26812683
// number, we need to copy the current part of the number to the
26822684
// temporary buffer
2683-
if(BOOST_JSON_UNLIKELY( num_buf_.size() || !cs ))
2685+
if(BOOST_JSON_UNLIKELY( num_buf_.size() ))
26842686
{
2685-
num_buf_.grow( size + 1 );
2686-
num_buf_.append( begin, size );
2687-
data = num_buf_.append( "\0", 1 );
2687+
data = num_buf_.append( begin, size );
2688+
full_size = num_buf_.size();
26882689
}
2689-
d = std::strtod( data, nullptr );
2690+
auto const err = detail::charconv::from_chars(
2691+
data, data + full_size, d );
2692+
BOOST_ASSERT( err.ec != std::errc::invalid_argument );
2693+
BOOST_ASSERT( err.ptr == data + full_size );
2694+
(void)err;
26902695
}
26912696
else
26922697
d = detail::dec_to_float(
Lines changed: 22 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,22 @@
1+
// Copyright 2023 Matt Borland
2+
// Distributed under the Boost Software License, Version 1.0.
3+
// https://www.boost.org/LICENSE_1_0.txt
4+
5+
#ifndef BOOST_JSON_DETAIL_CHARCONV_CHARS_FORMAT_HPP
6+
#define BOOST_JSON_DETAIL_CHARCONV_CHARS_FORMAT_HPP
7+
8+
namespace boost { namespace json { namespace detail { namespace charconv {
9+
10+
// Floating-point format for primitive numerical conversion
11+
// chars_format is a bitmask type (16.3.3.3.3)
12+
enum class chars_format : unsigned
13+
{
14+
scientific = 1 << 0,
15+
fixed = 1 << 1,
16+
hex = 1 << 2,
17+
general = fixed | scientific
18+
};
19+
20+
}}}} // Namespaces
21+
22+
#endif // BOOST_JSON_DETAIL_CHARCONV_CHARS_FORMAT_HPP
Lines changed: 201 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,201 @@
1+
// Copyright 2020-2023 Daniel Lemire
2+
// Copyright 2023 Matt Borland
3+
// Distributed under the Boost Software License, Version 1.0.
4+
// https://www.boost.org/LICENSE_1_0.txt
5+
6+
#ifndef BOOST_JSON_DETAIL_CHARCONV_DETAIL_COMPUTE_FLOAT64_HPP
7+
#define BOOST_JSON_DETAIL_CHARCONV_DETAIL_COMPUTE_FLOAT64_HPP
8+
9+
#include <boost/json/detail/charconv/detail/config.hpp>
10+
#include <boost/json/detail/charconv/detail/significand_tables.hpp>
11+
#include <boost/json/detail/charconv/detail/emulated128.hpp>
12+
#include <boost/core/bit.hpp>
13+
#include <cstdint>
14+
#include <cfloat>
15+
#include <cstring>
16+
#include <cmath>
17+
18+
namespace boost { namespace json { namespace detail { namespace charconv { namespace detail {
19+
20+
static constexpr double powers_of_ten[] = {
21+
1e0, 1e1, 1e2, 1e3, 1e4, 1e5, 1e6, 1e7, 1e8, 1e9, 1e10, 1e11,
22+
1e12, 1e13, 1e14, 1e15, 1e16, 1e17, 1e18, 1e19, 1e20, 1e21, 1e22
23+
};
24+
25+
// Attempts to compute i * 10^(power) exactly; and if "negative" is true, negate the result.
26+
//
27+
// This function will only work in some cases, when it does not work, success is
28+
// set to false. This should work *most of the time* (like 99% of the time).
29+
// We assume that power is in the [-325, 308] interval.
30+
inline double compute_float64(std::int64_t power, std::uint64_t i, bool negative, bool& success) noexcept
31+
{
32+
static constexpr auto smallest_power = -325;
33+
static constexpr auto largest_power = 308;
34+
35+
// We start with a fast path
36+
// It was described in Clinger WD.
37+
// How to read floating point numbers accurately.
38+
// ACM SIGPLAN Notices. 1990
39+
#if (FLT_EVAL_METHOD != 1) && (FLT_EVAL_METHOD != 0)
40+
if (0 <= power && power <= 22 && i <= UINT64_C(9007199254740991))
41+
#else
42+
if (-22 <= power && power <= 22 && i <= UINT64_C(9007199254740991))
43+
#endif
44+
{
45+
// The general idea is as follows.
46+
// If 0 <= s < 2^53 and if 10^0 <= p <= 10^22 then
47+
// 1) Both s and p can be represented exactly as 64-bit floating-point
48+
// values
49+
// (binary64).
50+
// 2) Because s and p can be represented exactly as floating-point values,
51+
// then s * p
52+
// and s / p will produce correctly rounded values.
53+
54+
auto d = static_cast<double>(i);
55+
56+
if (power < 0)
57+
{
58+
d = d / powers_of_ten[-power];
59+
}
60+
else
61+
{
62+
d = d * powers_of_ten[power];
63+
}
64+
65+
if (negative)
66+
{
67+
d = -d;
68+
}
69+
70+
success = true;
71+
return d;
72+
}
73+
74+
// When 22 < power && power < 22 + 16, we could
75+
// hope for another, secondary fast path. It was
76+
// described by David M. Gay in "Correctly rounded
77+
// binary-decimal and decimal-binary conversions." (1990)
78+
// If you need to compute i * 10^(22 + x) for x < 16,
79+
// first compute i * 10^x, if you know that result is exact
80+
// (e.g., when i * 10^x < 2^53),
81+
// then you can still proceed and do (i * 10^x) * 10^22.
82+
// Is this worth your time?
83+
// You need 22 < power *and* power < 22 + 16 *and* (i * 10^(x-22) < 2^53)
84+
// for this second fast path to work.
85+
// If you have 22 < power *and* power < 22 + 16, and then you
86+
// optimistically compute "i * 10^(x-22)", there is still a chance that you
87+
// have wasted your time if i * 10^(x-22) >= 2^53. It makes the use cases of
88+
// this optimization maybe less common than we would like. Source:
89+
// http://www.exploringbinary.com/fast-path-decimal-to-floating-point-conversion/
90+
// also used in RapidJSON: https://rapidjson.org/strtod_8h_source.html
91+
92+
if (i == 0 || power < smallest_power)
93+
{
94+
return negative ? -0.0 : 0.0;
95+
}
96+
else if (power > largest_power)
97+
{
98+
return negative ? -HUGE_VAL : HUGE_VAL;
99+
}
100+
101+
const std::uint64_t factor_significand = significand_64[power - smallest_power];
102+
const std::int64_t exponent = (((152170 + 65536) * power) >> 16) + 1024 + 63;
103+
int leading_zeros = boost::core::countl_zero(i);
104+
i <<= static_cast<std::uint64_t>(leading_zeros);
105+
106+
uint128 product = umul128(i, factor_significand);
107+
std::uint64_t low = product.low;
108+
std::uint64_t high = product.high;
109+
110+
// We know that upper has at most one leading zero because
111+
// both i and factor_mantissa have a leading one. This means
112+
// that the result is at least as large as ((1<<63)*(1<<63))/(1<<64).
113+
//
114+
// As long as the first 9 bits of "upper" are not "1", then we
115+
// know that we have an exact computed value for the leading
116+
// 55 bits because any imprecision would play out as a +1, in the worst case.
117+
// Having 55 bits is necessary because we need 53 bits for the mantissa,
118+
// but we have to have one rounding bit and, we can waste a bit if the most
119+
// significant bit of the product is zero.
120+
//
121+
// We expect this next branch to be rarely taken (say 1% of the time).
122+
// When (upper & 0x1FF) == 0x1FF, it can be common for
123+
// lower + i < lower to be true (proba. much higher than 1%).
124+
if (BOOST_UNLIKELY((high & 0x1FF) == 0x1FF) && (low + i < low))
125+
{
126+
const std::uint64_t factor_significand_low = significand_128[power - smallest_power];
127+
product = umul128(i, factor_significand_low);
128+
//const std::uint64_t product_low = product.low;
129+
const std::uint64_t product_middle2 = product.high;
130+
const std::uint64_t product_middle1 = low;
131+
std::uint64_t product_high = high;
132+
const std::uint64_t product_middle = product_middle1 + product_middle2;
133+
134+
if (product_middle < product_middle1)
135+
{
136+
product_high++;
137+
}
138+
139+
// Commented out because possibly unneeded
140+
// See: https://arxiv.org/pdf/2212.06644.pdf
141+
/*
142+
// we want to check whether mantissa *i + i would affect our result
143+
// This does happen, e.g. with 7.3177701707893310e+15
144+
if (((product_middle + 1 == 0) && ((product_high & 0x1FF) == 0x1FF) && (product_low + i < product_low)))
145+
{
146+
success = false;
147+
return 0;
148+
}
149+
*/
150+
151+
low = product_middle;
152+
high = product_high;
153+
}
154+
155+
// The final significand should be 53 bits with a leading 1
156+
// We shift it so that it occupies 54 bits with a leading 1
157+
const std::uint64_t upper_bit = high >> 63;
158+
std::uint64_t significand = high >> (upper_bit + 9);
159+
leading_zeros += static_cast<int>(1 ^ upper_bit);
160+
161+
// If we have lots of trailing zeros we may fall between two values
162+
if (BOOST_UNLIKELY((low == 0) && ((high & 0x1FF) == 0) && ((significand & 3) == 1)))
163+
{
164+
// if significand & 1 == 1 we might need to round up
165+
success = false;
166+
return 0;
167+
}
168+
169+
significand += significand & 1;
170+
significand >>= 1;
171+
172+
// Here the significand < (1<<53), unless there is an overflow
173+
if (significand >= (UINT64_C(1) << 53))
174+
{
175+
significand = (UINT64_C(1) << 52);
176+
leading_zeros--;
177+
}
178+
179+
significand &= ~(UINT64_C(1) << 52);
180+
const std::uint64_t real_exponent = exponent - leading_zeros;
181+
182+
// We have to check that real_exponent is in range, otherwise fail
183+
if (BOOST_UNLIKELY((real_exponent < 1) || (real_exponent > 2046)))
184+
{
185+
success = false;
186+
return 0;
187+
}
188+
189+
significand |= real_exponent << 52;
190+
significand |= ((static_cast<std::uint64_t>(negative) << 63));
191+
192+
double d;
193+
std::memcpy(&d, &significand, sizeof(d));
194+
195+
success = true;
196+
return d;
197+
}
198+
199+
}}}}} // Namespaces
200+
201+
#endif // BOOST_JSON_DETAIL_CHARCONV_DETAIL_COMPUTE_FLOAT64_HPP
Lines changed: 59 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,59 @@
1+
// Copyright 2023 Matt Borland
2+
// Distributed under the Boost Software License, Version 1.0.
3+
// https://www.boost.org/LICENSE_1_0.txt
4+
5+
#ifndef BOOST_JSON_DETAIL_CHARCONV_DETAIL_CONFIG_HPP
6+
#define BOOST_JSON_DETAIL_CHARCONV_DETAIL_CONFIG_HPP
7+
8+
#include <boost/config.hpp>
9+
#include <type_traits>
10+
#include <cfloat>
11+
12+
// Use 128 bit integers and supress warnings for using extensions
13+
#if defined(BOOST_HAS_INT128)
14+
# define BOOST_JSON_INT128_MAX (boost::int128_type)(((boost::uint128_type) 1 << 127) - 1)
15+
# define BOOST_JSON_UINT128_MAX ((2 * (boost::uint128_type) BOOST_JSON_INT128_MAX) + 1)
16+
#endif
17+
18+
#ifndef BOOST_NO_CXX14_CONSTEXPR
19+
# define BOOST_JSON_CXX14_CONSTEXPR BOOST_CXX14_CONSTEXPR
20+
# define BOOST_JSON_CXX14_CONSTEXPR_NO_INLINE BOOST_CXX14_CONSTEXPR
21+
#else
22+
# define BOOST_JSON_CXX14_CONSTEXPR inline
23+
# define BOOST_JSON_CXX14_CONSTEXPR_NO_INLINE
24+
#endif
25+
26+
#if defined(__GNUC__) && __GNUC__ == 5
27+
# define BOOST_JSON_GCC5_CONSTEXPR inline
28+
#else
29+
# define BOOST_JSON_GCC5_CONSTEXPR BOOST_JSON_CXX14_CONSTEXPR
30+
#endif
31+
32+
// Inclue intrinsics if available
33+
#if defined(BOOST_MSVC)
34+
# include <intrin.h>
35+
# if defined(_WIN64)
36+
# define BOOST_JSON_HAS_MSVC_64BIT_INTRINSICS
37+
# else
38+
# define BOOST_JSON_HAS_MSVC_32BIT_INTRINSICS
39+
# endif
40+
#endif
41+
42+
// Suppress additional buffer overrun check.
43+
// I have no idea why MSVC thinks some functions here are vulnerable to the buffer overrun
44+
// attacks. No, they aren't.
45+
#if defined(__GNUC__) || defined(__clang__)
46+
#define BOOST_JSON_SAFEBUFFERS
47+
#elif defined(_MSC_VER)
48+
#define BOOST_JSON_SAFEBUFFERS __declspec(safebuffers)
49+
#else
50+
#define BOOST_JSON_SAFEBUFFERS
51+
#endif
52+
53+
#if defined(__has_builtin)
54+
#define BOOST_JSON_HAS_BUILTIN(x) __has_builtin(x)
55+
#else
56+
#define BOOST_JSON_HAS_BUILTIN(x) false
57+
#endif
58+
59+
#endif // BOOST_JSON_DETAIL_CHARCONV_DETAIL_CONFIG_HPP

0 commit comments

Comments
 (0)