1- /* auto-generated on 2024-12-12 10:37:26 -0500 . Do not edit! */
1+ /* auto-generated on 2025-03-27 15:01:10 -0400 . Do not edit! */
22/* including simdjson.cpp: */
33/* begin file simdjson.cpp */
44#define SIMDJSON_SRC_SIMDJSON_CPP
8383#endif
8484#endif
8585
86+ #if defined(__apple_build_version__)
87+ #if __apple_build_version__ < 14000000
88+ #define SIMDJSON_CONCEPT_DISABLED 1 // apple-clang/13 doesn't support std::convertible_to
89+ #endif
90+ #endif
91+
92+
8693#if defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED)
94+ #if __cpp_concepts >= 201907L
8795#include <utility>
8896#define SIMDJSON_SUPPORTS_DESERIALIZATION 1
97+ #else
98+ #define SIMDJSON_SUPPORTS_DESERIALIZATION 0
99+ #endif
89100#else // defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED)
90101#define SIMDJSON_SUPPORTS_DESERIALIZATION 0
91102#endif // defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED)
102113#include <cstdlib>
103114#include <cfloat>
104115#include <cassert>
116+ #include <climits>
105117#ifndef _WIN32
106118// strcasecmp, strncasecmp
107119#include <strings.h>
108120#endif
109121
122+ static_assert(CHAR_BIT == 8, "simdjson requires 8-bit bytes");
123+
124+
110125// We are using size_t without namespace std:: throughout the project
111126using std::size_t;
112127
@@ -140,6 +155,7 @@ using std::size_t;
140155#elif defined(__loongarch_lp64)
141156#define SIMDJSON_IS_LOONGARCH64 1
142157#elif defined(__PPC64__) || defined(_M_PPC64)
158+ #define SIMDJSON_IS_PPC64 1
143159#if defined(__ALTIVEC__)
144160#define SIMDJSON_IS_PPC64_VMX 1
145161#endif // defined(__ALTIVEC__)
@@ -760,22 +776,22 @@ inline namespace literals {
760776inline namespace string_view_literals {
761777
762778
763- constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1)
779+ constexpr std::string_view operator ""_sv( const char* str, size_t len ) noexcept // (1)
764780{
765781 return std::string_view{ str, len };
766782}
767783
768- constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2)
784+ constexpr std::u16string_view operator ""_sv( const char16_t* str, size_t len ) noexcept // (2)
769785{
770786 return std::u16string_view{ str, len };
771787}
772788
773- constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3)
789+ constexpr std::u32string_view operator ""_sv( const char32_t* str, size_t len ) noexcept // (3)
774790{
775791 return std::u32string_view{ str, len };
776792}
777793
778- constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4)
794+ constexpr std::wstring_view operator ""_sv( const wchar_t* str, size_t len ) noexcept // (4)
779795{
780796 return std::wstring_view{ str, len };
781797}
@@ -2106,22 +2122,22 @@ nssv_inline_ns namespace string_view_literals {
21062122
21072123#if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS
21082124
2109- nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1)
2125+ nssv_constexpr nonstd::sv_lite::string_view operator ""sv( const char* str, size_t len ) nssv_noexcept // (1)
21102126{
21112127 return nonstd::sv_lite::string_view{ str, len };
21122128}
21132129
2114- nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
2130+ nssv_constexpr nonstd::sv_lite::u16string_view operator ""sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
21152131{
21162132 return nonstd::sv_lite::u16string_view{ str, len };
21172133}
21182134
2119- nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
2135+ nssv_constexpr nonstd::sv_lite::u32string_view operator ""sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
21202136{
21212137 return nonstd::sv_lite::u32string_view{ str, len };
21222138}
21232139
2124- nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
2140+ nssv_constexpr nonstd::sv_lite::wstring_view operator ""sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
21252141{
21262142 return nonstd::sv_lite::wstring_view{ str, len };
21272143}
@@ -2130,22 +2146,22 @@ nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str,
21302146
21312147#if nssv_CONFIG_USR_SV_OPERATOR
21322148
2133- nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1)
2149+ nssv_constexpr nonstd::sv_lite::string_view operator ""_sv( const char* str, size_t len ) nssv_noexcept // (1)
21342150{
21352151 return nonstd::sv_lite::string_view{ str, len };
21362152}
21372153
2138- nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
2154+ nssv_constexpr nonstd::sv_lite::u16string_view operator ""_sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
21392155{
21402156 return nonstd::sv_lite::u16string_view{ str, len };
21412157}
21422158
2143- nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
2159+ nssv_constexpr nonstd::sv_lite::u32string_view operator ""_sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
21442160{
21452161 return nonstd::sv_lite::u32string_view{ str, len };
21462162}
21472163
2148- nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
2164+ nssv_constexpr nonstd::sv_lite::wstring_view operator ""_sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
21492165{
21502166 return nonstd::sv_lite::wstring_view{ str, len };
21512167}
@@ -2415,7 +2431,7 @@ enum error_code {
24152431 SUCCESS = 0, ///< No error
24162432 CAPACITY, ///< This parser can't support a document that big
24172433 MEMALLOC, ///< Error allocating memory, most likely out of memory
2418- TAPE_ERROR, ///< Something went wrong, this is a generic error
2434+ TAPE_ERROR, ///< Something went wrong, this is a generic error. Fatal/unrecoverable error.
24192435 DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation
24202436 STRING_ERROR, ///< Problem while parsing a string
24212437 T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't'
@@ -2440,13 +2456,21 @@ enum error_code {
24402456 PARSER_IN_USE, ///< parser is already in use.
24412457 OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order (checked when SIMDJSON_DEVELOPMENT_CHECKS=1)
24422458 INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it.
2443- INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early.
2459+ INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. Fatal/unrecoverable error.
24442460 SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value.
24452461 OUT_OF_BOUNDS, ///< Attempted to access location outside of document.
24462462 TRAILING_CONTENT, ///< Unexpected trailing content in the JSON input
24472463 NUM_ERROR_CODES
24482464};
24492465
2466+ /**
2467+ * Some errors are fatal and invalidate the document. This function returns true if the
2468+ * error is fatal. It returns true for TAPE_ERROR and INCOMPLETE_ARRAY_OR_OBJECT.
2469+ * Once a fatal error is encountered, the on-demand document is no longer valid and
2470+ * processing should stop.
2471+ */
2472+ inline bool is_fatal(error_code error) noexcept;
2473+
24502474/**
24512475 * It is the convention throughout the code that the macro SIMDJSON_DEVELOPMENT_CHECKS determines whether
24522476 * we check for OUT_OF_ORDER_ITERATION. The logic behind it is that these errors only occurs when the code
@@ -2749,14 +2773,30 @@ SIMDJSON_IMPL_CONCEPT(op_append, operator+=)
27492773#undef SIMDJSON_IMPL_CONCEPT
27502774} // namespace details
27512775
2776+
2777+ template <typename T>
2778+ concept string_view_like = std::is_convertible_v<T, std::string_view> &&
2779+ !std::is_convertible_v<T, const char*>;
2780+
2781+ template<typename T>
2782+ concept constructible_from_string_view = std::is_constructible_v<T, std::string_view>
2783+ && !std::is_same_v<T, std::string_view>
2784+ && std::is_default_constructible_v<T>;
2785+
2786+ template<typename M>
2787+ concept string_view_keyed_map = string_view_like<typename M::key_type>
2788+ && requires(std::remove_cvref_t<M>& m, typename M::key_type sv, typename M::mapped_type v) {
2789+ { m.emplace(sv, v) } -> std::same_as<std::pair<typename M::iterator, bool>>;
2790+ };
2791+
27522792/// Check if T is a container that we can append to, including:
27532793/// std::vector, std::deque, std::list, std::string, ...
27542794template <typename T>
27552795concept appendable_containers =
2756- details::supports_emplace_back<T> || details::supports_emplace<T> ||
2796+ ( details::supports_emplace_back<T> || details::supports_emplace<T> ||
27572797 details::supports_push_back<T> || details::supports_push<T> ||
27582798 details::supports_add<T> || details::supports_append<T> ||
2759- details::supports_insert<T>;
2799+ details::supports_insert<T>) && !string_view_keyed_map<T> ;
27602800
27612801/// Insert into the container however possible
27622802template <appendable_containers T, typename... Args>
@@ -2824,6 +2864,8 @@ concept optional_type = requires(std::remove_cvref_t<T> obj) {
28242864 { static_cast<bool>(obj) } -> std::same_as<bool>; // convertible to bool
28252865};
28262866
2867+
2868+
28272869} // namespace concepts
28282870} // namespace simdjson
28292871#endif // SIMDJSON_SUPPORTS_DESERIALIZATION
@@ -4495,6 +4537,11 @@ extern SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886];
44954537#include <iostream>
44964538
44974539namespace simdjson {
4540+
4541+ inline bool is_fatal(error_code error) noexcept {
4542+ return error == TAPE_ERROR || error == INCOMPLETE_ARRAY_OR_OBJECT;
4543+ }
4544+
44984545namespace internal {
44994546 // We store the error code so we can validate the error message is associated with the right code
45004547 struct error_code_info {
@@ -4680,7 +4727,7 @@ namespace internal {
46804727 { SUCCESS, "SUCCESS: No error" },
46814728 { CAPACITY, "CAPACITY: This parser can't support a document that big" },
46824729 { MEMALLOC, "MEMALLOC: Error allocating memory, we're most likely out of memory" },
4683- { TAPE_ERROR, "TAPE_ERROR: The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc." },
4730+ { TAPE_ERROR, "TAPE_ERROR: The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc. This is a fatal and unrecoverable error. " },
46844731 { DEPTH_ERROR, "DEPTH_ERROR: The JSON document was too deep (too many nested objects and arrays)" },
46854732 { STRING_ERROR, "STRING_ERROR: Problem while parsing a string" },
46864733 { T_ATOM_ERROR, "T_ATOM_ERROR: Problem while parsing an atom starting with the letter 't'" },
@@ -4705,7 +4752,7 @@ namespace internal {
47054752 { PARSER_IN_USE, "PARSER_IN_USE: Cannot parse a new document while a document is still in use." },
47064753 { OUT_OF_ORDER_ITERATION, "OUT_OF_ORDER_ITERATION: Objects and arrays can only be iterated when they are first encountered." },
47074754 { INSUFFICIENT_PADDING, "INSUFFICIENT_PADDING: simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length. Consider using the simdjson::padded_string class if needed." },
4708- { INCOMPLETE_ARRAY_OR_OBJECT, "INCOMPLETE_ARRAY_OR_OBJECT: JSON document ended early in the middle of an object or array." },
4755+ { INCOMPLETE_ARRAY_OR_OBJECT, "INCOMPLETE_ARRAY_OR_OBJECT: JSON document ended early in the middle of an object or array. This is a fatal and unrecoverable error. " },
47094756 { SCALAR_DOCUMENT_AS_VALUE, "SCALAR_DOCUMENT_AS_VALUE: A JSON document made of a scalar (number, Boolean, null or string) is treated as a value. Use get_bool(), get_double(), etc. on the document instead. "},
47104757 { OUT_OF_BOUNDS, "OUT_OF_BOUNDS: Attempt to access location outside of document."},
47114758 { TRAILING_CONTENT, "TRAILING_CONTENT: Unexpected trailing content in the JSON input."}
@@ -6771,7 +6818,7 @@ class document {
67716818 * The memory allocation is strict: you
67726819 * can you use this function to increase
67736820 * or lower the amount of allocated memory.
6774- * Passsing zero clears the memory.
6821+ * Passing zero clears the memory.
67756822 */
67766823 error_code allocate(size_t len) noexcept;
67776824 /** @private Capacity in bytes, in terms
@@ -9169,7 +9216,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
91699216 // floor(log(5**power)/log(2))
91709217 //
91719218 // Note that this is not magic: 152170/(1<<16) is
9172- // approximatively equal to log(5)/log(2).
9219+ // approximately equal to log(5)/log(2).
91739220 // The 1<<16 value is a power of two; we could use a
91749221 // larger power of 2 if we wanted to.
91759222 //
@@ -15529,7 +15576,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
1552915576 // floor(log(5**power)/log(2))
1553015577 //
1553115578 // Note that this is not magic: 152170/(1<<16) is
15532- // approximatively equal to log(5)/log(2).
15579+ // approximately equal to log(5)/log(2).
1553315580 // The 1<<16 value is a power of two; we could use a
1553415581 // larger power of 2 if we wanted to.
1553515582 //
@@ -20797,14 +20844,18 @@ namespace simd {
2079720844
2079820845 // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
2079920846 // Passing a 0 value for mask would be equivalent to writing out every byte to output.
20800- // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes
20847+ // Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes
2080120848 // get written.
2080220849 // Design consideration: it seems like a function with the
2080320850 // signature simd8<L> compress(uint32_t mask) would be
2080420851 // sensible, but the AVX ISA makes this kind of approach difficult.
2080520852 template<typename L>
2080620853 simdjson_inline void compress(uint64_t mask, L * output) const {
20807- _mm512_mask_compressstoreu_epi8 (output,~mask,*this);
20854+ // we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability
20855+ // (AMD Zen4 has terrible performance with it, it is effectively broken)
20856+ // _mm512_mask_compressstoreu_epi8 (output,~mask,*this);
20857+ __m512i compressed = _mm512_maskz_compress_epi8(~mask, *this);
20858+ _mm512_storeu_si512(output, compressed); // could use a mask
2080820859 }
2080920860
2081020861 template<typename L>
@@ -21749,7 +21800,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
2174921800 // floor(log(5**power)/log(2))
2175021801 //
2175121802 // Note that this is not magic: 152170/(1<<16) is
21752- // approximatively equal to log(5)/log(2).
21803+ // approximately equal to log(5)/log(2).
2175321804 // The 1<<16 value is a power of two; we could use a
2175421805 // larger power of 2 if we wanted to.
2175521806 //
@@ -23427,14 +23478,18 @@ namespace simd {
2342723478
2342823479 // Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
2342923480 // Passing a 0 value for mask would be equivalent to writing out every byte to output.
23430- // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes
23481+ // Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes
2343123482 // get written.
2343223483 // Design consideration: it seems like a function with the
2343323484 // signature simd8<L> compress(uint32_t mask) would be
2343423485 // sensible, but the AVX ISA makes this kind of approach difficult.
2343523486 template<typename L>
2343623487 simdjson_inline void compress(uint64_t mask, L * output) const {
23437- _mm512_mask_compressstoreu_epi8 (output,~mask,*this);
23488+ // we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability
23489+ // (AMD Zen4 has terrible performance with it, it is effectively broken)
23490+ // _mm512_mask_compressstoreu_epi8 (output,~mask,*this);
23491+ __m512i compressed = _mm512_maskz_compress_epi8(~mask, *this);
23492+ _mm512_storeu_si512(output, compressed); // could use a mask
2343823493 }
2343923494
2344023495 template<typename L>
@@ -28125,7 +28180,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
2812528180 // floor(log(5**power)/log(2))
2812628181 //
2812728182 // Note that this is not magic: 152170/(1<<16) is
28128- // approximatively equal to log(5)/log(2).
28183+ // approximately equal to log(5)/log(2).
2812928184 // The 1<<16 value is a power of two; we could use a
2813028185 // larger power of 2 if we wanted to.
2813128186 //
@@ -34867,7 +34922,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
3486734922 // floor(log(5**power)/log(2))
3486834923 //
3486934924 // Note that this is not magic: 152170/(1<<16) is
34870- // approximatively equal to log(5)/log(2).
34925+ // approximately equal to log(5)/log(2).
3487134926 // The 1<<16 value is a power of two; we could use a
3487234927 // larger power of 2 if we wanted to.
3487334928 //
@@ -41433,7 +41488,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
4143341488 // floor(log(5**power)/log(2))
4143441489 //
4143541490 // Note that this is not magic: 152170/(1<<16) is
41436- // approximatively equal to log(5)/log(2).
41491+ // approximately equal to log(5)/log(2).
4143741492 // The 1<<16 value is a power of two; we could use a
4143841493 // larger power of 2 if we wanted to.
4143941494 //
@@ -47444,7 +47499,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
4744447499 // floor(log(5**power)/log(2))
4744547500 //
4744647501 // Note that this is not magic: 152170/(1<<16) is
47447- // approximatively equal to log(5)/log(2).
47502+ // approximately equal to log(5)/log(2).
4744847503 // The 1<<16 value is a power of two; we could use a
4744947504 // larger power of 2 if we wanted to.
4745047505 //
@@ -53054,7 +53109,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
5305453109 // floor(log(5**power)/log(2))
5305553110 //
5305653111 // Note that this is not magic: 152170/(1<<16) is
53057- // approximatively equal to log(5)/log(2).
53112+ // approximately equal to log(5)/log(2).
5305853113 // The 1<<16 value is a power of two; we could use a
5305953114 // larger power of 2 if we wanted to.
5306053115 //
0 commit comments