diff --git a/.gitmodules b/.gitmodules index eb19f3c..2e5c5fc 100644 --- a/.gitmodules +++ b/.gitmodules @@ -4,6 +4,6 @@ [submodule "third-party/JSONTestSuite"] path = third-party/JSONTestSuite url = https://github.com/nst/JSONTestSuite.git -[submodule "third-party/fast_double_parser"] - path = third-party/fast_double_parser - url = https://github.com/lemire/fast_double_parser.git +[submodule "third-party/fast_float"] + path = third-party/fast_float + url = https://github.com/fastfloat/fast_float diff --git a/CHANGELOG.md b/CHANGELOG.md index 04e182b..2be7aaf 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,5 +1,7 @@ # Changelog +* Switch from ``fast_double_parser`` to ``fast_float`` + **1.6.9 (2025-05-12)** * Remove unused import to fix installation on Termux (by veka0, [#105](https://github.com/Kijewski/pyjson5/pull/105)) diff --git a/MANIFEST.in b/MANIFEST.in index 494767a..0fbfc40 100644 --- a/MANIFEST.in +++ b/MANIFEST.in @@ -4,10 +4,10 @@ include Makefile include pyjson5.cpp include pyjson5.pyx include pyproject.toml -include third-party/fast_double_parser/include/fast_double_parser.h include requirements*.txt recursive-include docs ** recursive-include scripts ** recursive-include src ** recursive-include third-party/json5-tests ** recursive-include third-party/JSONTestSuite/test_parsing ** +recursive-include third-party/fast_float/include ** diff --git a/src/_decoder.pyx b/src/_decoder.pyx index 6038863..af0d114 100644 --- a/src/_decoder.pyx +++ b/src/_decoder.pyx @@ -243,14 +243,17 @@ cdef object _decode_string(ReaderRef reader, int32_t *c_in_out): cdef object _decode_double(StackHeapString[char] &buf, Py_ssize_t start): cdef double d0 - cdef const char *end_of_double + cdef from_chars_result result + + if has_invalid_exponent(buf.data()): + _raise_unclosed('NumericLiteral', start) d0 = 0.0 # silence warning - end_of_double = parse_number(buf.data(), &d0) - if end_of_double != NULL and end_of_double[0] == b'\0': - return PyFloat_FromDouble(d0) + result = from_chars(buf.data(), buf.data() + buf.size(), d0, fmt_json_or_infnan) + if (result.ec): + _raise_unclosed('NumericLiteral', start) - _raise_unclosed('NumericLiteral', start) + return PyFloat_FromDouble(d0) cdef object _decode_number_leading_zero(ReaderRef reader, StackHeapString[char] &buf, diff --git a/src/_fast_float_compat.hpp b/src/_fast_float_compat.hpp new file mode 100644 index 0000000..203a2c6 --- /dev/null +++ b/src/_fast_float_compat.hpp @@ -0,0 +1,50 @@ +#pragma once +#include "../third-party/fast_float/include/fast_float/float_common.h" + +/* This header file is a shim to handle 'enum class' in Cython, which doesn't + * namespace properly. */ +namespace chars_format { + using chars_format = fast_float::chars_format; + + constexpr chars_format fmt_json_or_infnan = fast_float::chars_format::json_or_infnan; +} + +namespace check_floats { + /* + * Check for invalid exponents on strings which represent floats. + * Does not guarantee that the float is valid -- only that *if* it has an + * exponent, the exponent is valid. + * + * Checks in this order: + * + * - no exponent OK (false) + * + * - nothing after exponent FAIL (true) + * + * - a sign (+/-) at the end FAIL (true) + * + * - anything after exponent FAIL (true) + * and optional sign + * which is not a digit + * + * - nothing failed? OK (false) + */ + bool has_invalid_exponent(const std::string &s) { + auto pos = s.find_first_of("e"); + if (pos == std::string::npos) return false; + + if (++pos >= s.size()) return true; + + if (s[pos] == '+' || s[pos] == '-') { + if (++pos >= s.size()) return true; + } + + // Now check the exponent part for a dot ('.') or any other non-digit + // character + for (; pos < s.size(); ++pos) { + if (!isdigit(s[pos])) return true; + } + + return false; + } +} diff --git a/src/_imports.pyx b/src/_imports.pyx index 834600d..ca94ef1 100644 --- a/src/_imports.pyx +++ b/src/_imports.pyx @@ -13,6 +13,7 @@ from cpython.object cimport PyObject, PyObject_GetIter from cpython.type cimport PyType_Check from cpython.unicode cimport PyUnicode_Check, PyUnicode_FromEncodedObject, PyUnicode_Format from libcpp cimport bool as boolean +from libcpp.string cimport string cdef extern from '' namespace 'std' nogil: @@ -124,9 +125,20 @@ cdef extern from 'src/_decoder_recursive_select.hpp' namespace 'JSON5EncoderCpp' DrsKind drs_lookup[128] +cdef extern from 'src/_fast_float_compat.hpp' namespace 'chars_format' nogil: + cdef cppclass chars_format: + pass + cdef const chars_format fmt_json_or_infnan + +cdef extern from 'src/_fast_float_compat.hpp' namespace 'check_floats' nogil: + cdef boolean has_invalid_exponent(string &s) + +cdef extern from 'third-party/fast_float/include/fast_float/fast_float.h' namespace 'fast_float' nogil: + ctypedef struct from_chars_result: + char *ptr + int ec -cdef extern from 'third-party/fast_double_parser/include/fast_double_parser.h' namespace 'fast_double_parser' nogil: - const char *parse_number(const char *p, double *outDouble) + cdef from_chars_result from_chars(char *first, char *last, double &value, chars_format fmt); cdef extern from 'src/dragonbox.cc' namespace 'dragonbox' nogil: diff --git a/third-party/fast_double_parser b/third-party/fast_double_parser deleted file mode 160000 index bc93aee..0000000 --- a/third-party/fast_double_parser +++ /dev/null @@ -1 +0,0 @@ -Subproject commit bc93aee338615e46faac4140dd60eef761ba5b12 diff --git a/third-party/fast_float b/third-party/fast_float new file mode 160000 index 0000000..c5a3ca3 --- /dev/null +++ b/third-party/fast_float @@ -0,0 +1 @@ +Subproject commit c5a3ca37c459050f367a4cb0b23c862c29242d30