Skip to content

Commit 0e7a10a

Browse files
authored
Merge pull request #252 from LeszekSwirski/parse-error
Record parse failure reason and location
2 parents 3838b00 + b6ce2c4 commit 0e7a10a

File tree

2 files changed

+97
-15
lines changed

2 files changed

+97
-15
lines changed

include/fast_float/ascii_number.h

Lines changed: 50 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,25 @@ void loop_parse_if_eight_digits(const char*& p, const char* const pend, uint64_t
234234
}
235235
}
236236

237+
enum class parse_error {
238+
no_error,
239+
// [JSON-only] The minus sign must be followed by an integer.
240+
missing_integer_after_sign,
241+
// A sign must be followed by an integer or dot.
242+
missing_integer_or_dot_after_sign,
243+
// [JSON-only] The integer part must not have leading zeros.
244+
leading_zeros_in_integer_part,
245+
// [JSON-only] The integer part must have at least one digit.
246+
no_digits_in_integer_part,
247+
// [JSON-only] If there is a decimal point, there must be digits in the
248+
// fractional part.
249+
no_digits_in_fractional_part,
250+
// The mantissa must have at least one digit.
251+
no_digits_in_mantissa,
252+
// Scientific notation requires an exponential part.
253+
missing_exponential_part,
254+
};
255+
237256
template <typename UC>
238257
struct parsed_number_string_t {
239258
int64_t exponent{0};
@@ -245,11 +264,22 @@ struct parsed_number_string_t {
245264
// contains the range of the significant digits
246265
span<const UC> integer{}; // non-nullable
247266
span<const UC> fraction{}; // nullable
267+
parse_error error{parse_error::no_error};
248268
};
249269

250270
using byte_span = span<const char>;
251271
using parsed_number_string = parsed_number_string_t<char>;
252272

273+
template <typename UC>
274+
fastfloat_really_inline FASTFLOAT_CONSTEXPR20 parsed_number_string_t<UC>
275+
report_parse_error(UC const* p, parse_error error) {
276+
parsed_number_string_t<UC> answer;
277+
answer.valid = false;
278+
answer.lastmatch = p;
279+
answer.error = error;
280+
return answer;
281+
}
282+
253283
// Assuming that you use no more than 19 digits, this will
254284
// parse an ASCII string.
255285
template <typename UC>
@@ -269,15 +299,16 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
269299
#endif
270300
++p;
271301
if (p == pend) {
272-
return answer;
302+
return report_parse_error<UC>(
303+
p, parse_error::missing_integer_or_dot_after_sign);
273304
}
274305
if (fmt & FASTFLOAT_JSONFMT) {
275306
if (!is_integer(*p)) { // a sign must be followed by an integer
276-
return answer;
307+
return report_parse_error<UC>(p, parse_error::missing_integer_after_sign);
277308
}
278309
} else {
279310
if (!is_integer(*p) && (*p != decimal_point)) { // a sign must be followed by an integer or the dot
280-
return answer;
311+
return report_parse_error<UC>(p, parse_error::missing_integer_or_dot_after_sign);
281312
}
282313
}
283314
}
@@ -297,8 +328,12 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
297328
answer.integer = span<const UC>(start_digits, size_t(digit_count));
298329
if (fmt & FASTFLOAT_JSONFMT) {
299330
// at least 1 digit in integer part, without leading zeros
300-
if (digit_count == 0 || (start_digits[0] == UC('0') && digit_count > 1)) {
301-
return answer;
331+
if (digit_count == 0) {
332+
return report_parse_error<UC>(p, parse_error::no_digits_in_integer_part);
333+
}
334+
if ((start_digits[0] == UC('0') && digit_count > 1)) {
335+
return report_parse_error<UC>(start_digits,
336+
parse_error::leading_zeros_in_integer_part);
302337
}
303338
}
304339

@@ -323,11 +358,10 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
323358
if (fmt & FASTFLOAT_JSONFMT) {
324359
// at least 1 digit in fractional part
325360
if (has_decimal_point && exponent == 0) {
326-
return answer;
361+
return report_parse_error<UC>(p, parse_error::no_digits_in_fractional_part);
327362
}
328-
}
329-
else if (digit_count == 0) { // we must have encountered at least one integer!
330-
return answer;
363+
} else if (digit_count == 0) { // we must have encountered at least one integer!
364+
return report_parse_error<UC>(p, parse_error::no_digits_in_mantissa);
331365
}
332366
int64_t exp_number = 0; // explicit exponential part
333367
if ( ((fmt & chars_format::scientific) &&
@@ -350,8 +384,10 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
350384
}
351385
if ((p == pend) || !is_integer(*p)) {
352386
if(!(fmt & chars_format::fixed)) {
353-
// We are in error.
354-
return answer;
387+
// The exponential part is invalid for scientific notation, so it must
388+
// be a trailing token for fixed notation. However, fixed notation is
389+
// disabled, so report a scientific notation error.
390+
return report_parse_error<UC>(p, parse_error::missing_exponential_part);
355391
}
356392
// Otherwise, we will be ignoring the 'e'.
357393
p = location_of_e;
@@ -368,7 +404,9 @@ parsed_number_string_t<UC> parse_number_string(UC const *p, UC const * pend, par
368404
}
369405
} else {
370406
// If it scientific and not fixed, we have to bail out.
371-
if((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) { return answer; }
407+
if ((fmt & chars_format::scientific) && !(fmt & chars_format::fixed)) {
408+
return report_parse_error<UC>(p, parse_error::missing_exponential_part);
409+
}
372410
}
373411
answer.lastmatch = p;
374412
answer.valid = true;

tests/json_fmt.cpp

Lines changed: 47 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -45,6 +45,15 @@ struct AcceptedValue {
4545
ExpectedResult expected;
4646
};
4747

48+
struct RejectReason {
49+
fast_float::parse_error error;
50+
intptr_t location_offset;
51+
};
52+
struct RejectedValue {
53+
std::string input;
54+
RejectReason reason;
55+
};
56+
4857
int main() {
4958
const std::vector<AcceptedValue> accept{
5059
{"-0.2", {-0.2, ""}},
@@ -55,8 +64,18 @@ int main() {
5564
{"1e", {1., "e"}},
5665
{"1e+", {1., "e+"}},
5766
{"inf", {std::numeric_limits<double>::infinity(), ""}}};
58-
const std::vector<std::string> reject{"-.2", "00.02", "0.e+1", "00.e+1",
59-
".25", "+0.25", "inf", "nan(snan)"};
67+
const std::vector<RejectedValue> reject{
68+
{"-.2", {fast_float::parse_error::missing_integer_after_sign, 1}},
69+
{"00.02", {fast_float::parse_error::leading_zeros_in_integer_part, 0}},
70+
{"0.e+1", {fast_float::parse_error::no_digits_in_fractional_part, 2}},
71+
{"00.e+1", {fast_float::parse_error::leading_zeros_in_integer_part, 0}},
72+
{".25", {fast_float::parse_error::no_digits_in_integer_part, 0}},
73+
// The following cases already start as invalid JSON, so they are
74+
// handled as trailing junk and the error is for not having digits in the
75+
// empty string before the invalid token.
76+
{"+0.25", {fast_float::parse_error::no_digits_in_integer_part, 0}},
77+
{"inf", {fast_float::parse_error::no_digits_in_integer_part, 0}},
78+
{"nan(snan)", {fast_float::parse_error::no_digits_in_integer_part, 0}}};
6079

6180
for (std::size_t i = 0; i < accept.size(); ++i)
6281
{
@@ -80,7 +99,7 @@ int main() {
8099

81100
for (std::size_t i = 0; i < reject.size(); ++i)
82101
{
83-
const auto& s = reject[i];
102+
const auto& s = reject[i].input;
84103
double result;
85104
auto answer = fast_float::from_chars(s.data(), s.data() + s.size(), result, fast_float::chars_format::json);
86105
if (answer.ec == std::errc()) {
@@ -89,6 +108,31 @@ int main() {
89108
}
90109
}
91110

111+
for (std::size_t i = 0; i < reject.size(); ++i)
112+
{
113+
const auto& f = reject[i].input;
114+
const auto& expected_reason = reject[i].reason;
115+
auto answer = fast_float::parse_number_string(
116+
f.data(), f.data() + f.size(),
117+
fast_float::parse_options(fast_float::chars_format::json));
118+
if (answer.valid) {
119+
std::cerr << "json parse accepted invalid json " << f << std::endl;
120+
return EXIT_FAILURE;
121+
}
122+
if (answer.error != expected_reason.error) {
123+
std::cerr << "json parse failure had invalid error reason " << f
124+
<< std::endl;
125+
return EXIT_FAILURE;
126+
}
127+
intptr_t error_location = answer.lastmatch - f.data();
128+
if (error_location != expected_reason.location_offset) {
129+
std::cerr << "json parse failure had invalid error location " << f
130+
<< " (expected " << expected_reason.location_offset << " got "
131+
<< error_location << ")" << std::endl;
132+
return EXIT_FAILURE;
133+
}
134+
}
135+
92136
if(main_readme() != EXIT_SUCCESS) { return EXIT_FAILURE; }
93137
if(main_readme2() != EXIT_SUCCESS) { return EXIT_FAILURE; }
94138

0 commit comments

Comments
 (0)