1
- /* auto-generated on 2025-01-16 13:33:53 -0500. Do not edit! */
1
+ /* auto-generated on 2025-03-27 15:01:10 -0400. Do not edit! */
2
+ /* modified for SIMDJSON_PHP to mimic PHP's JSON encode/decode behavior */
2
3
/* including simdjson.cpp: */
3
4
/* begin file simdjson.cpp */
4
5
#define SIMDJSON_SRC_SIMDJSON_CPP
@@ -776,22 +777,22 @@ inline namespace literals {
776
777
inline namespace string_view_literals {
777
778
778
779
779
- constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1)
780
+ constexpr std::string_view operator ""_sv( const char* str, size_t len ) noexcept // (1)
780
781
{
781
782
return std::string_view{ str, len };
782
783
}
783
784
784
- constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2)
785
+ constexpr std::u16string_view operator ""_sv( const char16_t* str, size_t len ) noexcept // (2)
785
786
{
786
787
return std::u16string_view{ str, len };
787
788
}
788
789
789
- constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3)
790
+ constexpr std::u32string_view operator ""_sv( const char32_t* str, size_t len ) noexcept // (3)
790
791
{
791
792
return std::u32string_view{ str, len };
792
793
}
793
794
794
- constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4)
795
+ constexpr std::wstring_view operator ""_sv( const wchar_t* str, size_t len ) noexcept // (4)
795
796
{
796
797
return std::wstring_view{ str, len };
797
798
}
@@ -2122,22 +2123,22 @@ nssv_inline_ns namespace string_view_literals {
2122
2123
2123
2124
#if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS
2124
2125
2125
- nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1)
2126
+ nssv_constexpr nonstd::sv_lite::string_view operator ""sv( const char* str, size_t len ) nssv_noexcept // (1)
2126
2127
{
2127
2128
return nonstd::sv_lite::string_view{ str, len };
2128
2129
}
2129
2130
2130
- nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
2131
+ nssv_constexpr nonstd::sv_lite::u16string_view operator ""sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
2131
2132
{
2132
2133
return nonstd::sv_lite::u16string_view{ str, len };
2133
2134
}
2134
2135
2135
- nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
2136
+ nssv_constexpr nonstd::sv_lite::u32string_view operator ""sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
2136
2137
{
2137
2138
return nonstd::sv_lite::u32string_view{ str, len };
2138
2139
}
2139
2140
2140
- nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
2141
+ nssv_constexpr nonstd::sv_lite::wstring_view operator ""sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
2141
2142
{
2142
2143
return nonstd::sv_lite::wstring_view{ str, len };
2143
2144
}
@@ -2146,22 +2147,22 @@ nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str,
2146
2147
2147
2148
#if nssv_CONFIG_USR_SV_OPERATOR
2148
2149
2149
- nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1)
2150
+ nssv_constexpr nonstd::sv_lite::string_view operator ""_sv( const char* str, size_t len ) nssv_noexcept // (1)
2150
2151
{
2151
2152
return nonstd::sv_lite::string_view{ str, len };
2152
2153
}
2153
2154
2154
- nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
2155
+ nssv_constexpr nonstd::sv_lite::u16string_view operator ""_sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
2155
2156
{
2156
2157
return nonstd::sv_lite::u16string_view{ str, len };
2157
2158
}
2158
2159
2159
- nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
2160
+ nssv_constexpr nonstd::sv_lite::u32string_view operator ""_sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
2160
2161
{
2161
2162
return nonstd::sv_lite::u32string_view{ str, len };
2162
2163
}
2163
2164
2164
- nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
2165
+ nssv_constexpr nonstd::sv_lite::wstring_view operator ""_sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
2165
2166
{
2166
2167
return nonstd::sv_lite::wstring_view{ str, len };
2167
2168
}
@@ -2431,7 +2432,7 @@ enum error_code {
2431
2432
SUCCESS = 0, ///< No error
2432
2433
CAPACITY, ///< This parser can't support a document that big
2433
2434
MEMALLOC, ///< Error allocating memory, most likely out of memory
2434
- TAPE_ERROR, ///< Something went wrong, this is a generic error
2435
+ TAPE_ERROR, ///< Something went wrong, this is a generic error. Fatal/unrecoverable error.
2435
2436
DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation
2436
2437
STRING_ERROR, ///< Problem while parsing a string
2437
2438
T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't'
@@ -2456,13 +2457,21 @@ enum error_code {
2456
2457
PARSER_IN_USE, ///< parser is already in use.
2457
2458
OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order (checked when SIMDJSON_DEVELOPMENT_CHECKS=1)
2458
2459
INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it.
2459
- INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early.
2460
+ INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. Fatal/unrecoverable error.
2460
2461
SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value.
2461
2462
OUT_OF_BOUNDS, ///< Attempted to access location outside of document.
2462
2463
TRAILING_CONTENT, ///< Unexpected trailing content in the JSON input
2463
2464
NUM_ERROR_CODES
2464
2465
};
2465
2466
2467
+ /**
2468
+ * Some errors are fatal and invalidate the document. This function returns true if the
2469
+ * error is fatal. It returns true for TAPE_ERROR and INCOMPLETE_ARRAY_OR_OBJECT.
2470
+ * Once a fatal error is encountered, the on-demand document is no longer valid and
2471
+ * processing should stop.
2472
+ */
2473
+ inline bool is_fatal(error_code error) noexcept;
2474
+
2466
2475
/**
2467
2476
* It is the convention throughout the code that the macro SIMDJSON_DEVELOPMENT_CHECKS determines whether
2468
2477
* we check for OUT_OF_ORDER_ITERATION. The logic behind it is that these errors only occurs when the code
@@ -2765,14 +2774,30 @@ SIMDJSON_IMPL_CONCEPT(op_append, operator+=)
2765
2774
#undef SIMDJSON_IMPL_CONCEPT
2766
2775
} // namespace details
2767
2776
2777
+
2778
+ template <typename T>
2779
+ concept string_view_like = std::is_convertible_v<T, std::string_view> &&
2780
+ !std::is_convertible_v<T, const char*>;
2781
+
2782
+ template<typename T>
2783
+ concept constructible_from_string_view = std::is_constructible_v<T, std::string_view>
2784
+ && !std::is_same_v<T, std::string_view>
2785
+ && std::is_default_constructible_v<T>;
2786
+
2787
+ template<typename M>
2788
+ concept string_view_keyed_map = string_view_like<typename M::key_type>
2789
+ && requires(std::remove_cvref_t<M>& m, typename M::key_type sv, typename M::mapped_type v) {
2790
+ { m.emplace(sv, v) } -> std::same_as<std::pair<typename M::iterator, bool>>;
2791
+ };
2792
+
2768
2793
/// Check if T is a container that we can append to, including:
2769
2794
/// std::vector, std::deque, std::list, std::string, ...
2770
2795
template <typename T>
2771
2796
concept appendable_containers =
2772
- details::supports_emplace_back<T> || details::supports_emplace<T> ||
2797
+ ( details::supports_emplace_back<T> || details::supports_emplace<T> ||
2773
2798
details::supports_push_back<T> || details::supports_push<T> ||
2774
2799
details::supports_add<T> || details::supports_append<T> ||
2775
- details::supports_insert<T>;
2800
+ details::supports_insert<T>) && !string_view_keyed_map<T> ;
2776
2801
2777
2802
/// Insert into the container however possible
2778
2803
template <appendable_containers T, typename... Args>
@@ -2840,6 +2865,8 @@ concept optional_type = requires(std::remove_cvref_t<T> obj) {
2840
2865
{ static_cast<bool>(obj) } -> std::same_as<bool>; // convertible to bool
2841
2866
};
2842
2867
2868
+
2869
+
2843
2870
} // namespace concepts
2844
2871
} // namespace simdjson
2845
2872
#endif // SIMDJSON_SUPPORTS_DESERIALIZATION
@@ -4511,6 +4538,11 @@ extern SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886];
4511
4538
#include <iostream>
4512
4539
4513
4540
namespace simdjson {
4541
+
4542
+ inline bool is_fatal(error_code error) noexcept {
4543
+ return error == TAPE_ERROR || error == INCOMPLETE_ARRAY_OR_OBJECT;
4544
+ }
4545
+
4514
4546
namespace internal {
4515
4547
// We store the error code so we can validate the error message is associated with the right code
4516
4548
struct error_code_info {
@@ -4696,7 +4728,7 @@ namespace internal {
4696
4728
{ SUCCESS, "SUCCESS: No error" },
4697
4729
{ CAPACITY, "CAPACITY: This parser can't support a document that big" },
4698
4730
{ MEMALLOC, "MEMALLOC: Error allocating memory, we're most likely out of memory" },
4699
- { TAPE_ERROR, "TAPE_ERROR: The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc." },
4731
+ { TAPE_ERROR, "TAPE_ERROR: The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc. This is a fatal and unrecoverable error. " },
4700
4732
{ DEPTH_ERROR, "DEPTH_ERROR: The JSON document was too deep (too many nested objects and arrays)" },
4701
4733
{ STRING_ERROR, "STRING_ERROR: Problem while parsing a string" },
4702
4734
{ T_ATOM_ERROR, "T_ATOM_ERROR: Problem while parsing an atom starting with the letter 't'" },
@@ -4721,7 +4753,7 @@ namespace internal {
4721
4753
{ PARSER_IN_USE, "PARSER_IN_USE: Cannot parse a new document while a document is still in use." },
4722
4754
{ OUT_OF_ORDER_ITERATION, "OUT_OF_ORDER_ITERATION: Objects and arrays can only be iterated when they are first encountered." },
4723
4755
{ INSUFFICIENT_PADDING, "INSUFFICIENT_PADDING: simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length. Consider using the simdjson::padded_string class if needed." },
4724
- { INCOMPLETE_ARRAY_OR_OBJECT, "INCOMPLETE_ARRAY_OR_OBJECT: JSON document ended early in the middle of an object or array." },
4756
+ { INCOMPLETE_ARRAY_OR_OBJECT, "INCOMPLETE_ARRAY_OR_OBJECT: JSON document ended early in the middle of an object or array. This is a fatal and unrecoverable error. " },
4725
4757
{ SCALAR_DOCUMENT_AS_VALUE, "SCALAR_DOCUMENT_AS_VALUE: A JSON document made of a scalar (number, Boolean, null or string) is treated as a value. Use get_bool(), get_double(), etc. on the document instead. "},
4726
4758
{ OUT_OF_BOUNDS, "OUT_OF_BOUNDS: Attempt to access location outside of document."},
4727
4759
{ TRAILING_CONTENT, "TRAILING_CONTENT: Unexpected trailing content in the JSON input."}
@@ -6787,7 +6819,7 @@ class document {
6787
6819
* The memory allocation is strict: you
6788
6820
* can you use this function to increase
6789
6821
* or lower the amount of allocated memory.
6790
- * Passsing zero clears the memory.
6822
+ * Passing zero clears the memory.
6791
6823
*/
6792
6824
error_code allocate(size_t len) noexcept;
6793
6825
/** @private Capacity in bytes, in terms
@@ -9185,7 +9217,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
9185
9217
// floor(log(5**power)/log(2))
9186
9218
//
9187
9219
// Note that this is not magic: 152170/(1<<16) is
9188
- // approximatively equal to log(5)/log(2).
9220
+ // approximately equal to log(5)/log(2).
9189
9221
// The 1<<16 value is a power of two; we could use a
9190
9222
// larger power of 2 if we wanted to.
9191
9223
//
@@ -15555,7 +15587,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
15555
15587
// floor(log(5**power)/log(2))
15556
15588
//
15557
15589
// Note that this is not magic: 152170/(1<<16) is
15558
- // approximatively equal to log(5)/log(2).
15590
+ // approximately equal to log(5)/log(2).
15559
15591
// The 1<<16 value is a power of two; we could use a
15560
15592
// larger power of 2 if we wanted to.
15561
15593
//
@@ -20833,14 +20865,18 @@ namespace simd {
20833
20865
20834
20866
// Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
20835
20867
// Passing a 0 value for mask would be equivalent to writing out every byte to output.
20836
- // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes
20868
+ // Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes
20837
20869
// get written.
20838
20870
// Design consideration: it seems like a function with the
20839
20871
// signature simd8<L> compress(uint32_t mask) would be
20840
20872
// sensible, but the AVX ISA makes this kind of approach difficult.
20841
20873
template<typename L>
20842
20874
simdjson_inline void compress(uint64_t mask, L * output) const {
20843
- _mm512_mask_compressstoreu_epi8 (output,~mask,*this);
20875
+ // we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability
20876
+ // (AMD Zen4 has terrible performance with it, it is effectively broken)
20877
+ // _mm512_mask_compressstoreu_epi8 (output,~mask,*this);
20878
+ __m512i compressed = _mm512_maskz_compress_epi8(~mask, *this);
20879
+ _mm512_storeu_si512(output, compressed); // could use a mask
20844
20880
}
20845
20881
20846
20882
template<typename L>
@@ -21785,7 +21821,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
21785
21821
// floor(log(5**power)/log(2))
21786
21822
//
21787
21823
// Note that this is not magic: 152170/(1<<16) is
21788
- // approximatively equal to log(5)/log(2).
21824
+ // approximately equal to log(5)/log(2).
21789
21825
// The 1<<16 value is a power of two; we could use a
21790
21826
// larger power of 2 if we wanted to.
21791
21827
//
@@ -23473,14 +23509,18 @@ namespace simd {
23473
23509
23474
23510
// Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
23475
23511
// Passing a 0 value for mask would be equivalent to writing out every byte to output.
23476
- // Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes
23512
+ // Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes
23477
23513
// get written.
23478
23514
// Design consideration: it seems like a function with the
23479
23515
// signature simd8<L> compress(uint32_t mask) would be
23480
23516
// sensible, but the AVX ISA makes this kind of approach difficult.
23481
23517
template<typename L>
23482
23518
simdjson_inline void compress(uint64_t mask, L * output) const {
23483
- _mm512_mask_compressstoreu_epi8 (output,~mask,*this);
23519
+ // we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability
23520
+ // (AMD Zen4 has terrible performance with it, it is effectively broken)
23521
+ // _mm512_mask_compressstoreu_epi8 (output,~mask,*this);
23522
+ __m512i compressed = _mm512_maskz_compress_epi8(~mask, *this);
23523
+ _mm512_storeu_si512(output, compressed); // could use a mask
23484
23524
}
23485
23525
23486
23526
template<typename L>
@@ -28171,7 +28211,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
28171
28211
// floor(log(5**power)/log(2))
28172
28212
//
28173
28213
// Note that this is not magic: 152170/(1<<16) is
28174
- // approximatively equal to log(5)/log(2).
28214
+ // approximately equal to log(5)/log(2).
28175
28215
// The 1<<16 value is a power of two; we could use a
28176
28216
// larger power of 2 if we wanted to.
28177
28217
//
@@ -34923,7 +34963,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
34923
34963
// floor(log(5**power)/log(2))
34924
34964
//
34925
34965
// Note that this is not magic: 152170/(1<<16) is
34926
- // approximatively equal to log(5)/log(2).
34966
+ // approximately equal to log(5)/log(2).
34927
34967
// The 1<<16 value is a power of two; we could use a
34928
34968
// larger power of 2 if we wanted to.
34929
34969
//
@@ -41499,7 +41539,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
41499
41539
// floor(log(5**power)/log(2))
41500
41540
//
41501
41541
// Note that this is not magic: 152170/(1<<16) is
41502
- // approximatively equal to log(5)/log(2).
41542
+ // approximately equal to log(5)/log(2).
41503
41543
// The 1<<16 value is a power of two; we could use a
41504
41544
// larger power of 2 if we wanted to.
41505
41545
//
@@ -47520,7 +47560,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
47520
47560
// floor(log(5**power)/log(2))
47521
47561
//
47522
47562
// Note that this is not magic: 152170/(1<<16) is
47523
- // approximatively equal to log(5)/log(2).
47563
+ // approximately equal to log(5)/log(2).
47524
47564
// The 1<<16 value is a power of two; we could use a
47525
47565
// larger power of 2 if we wanted to.
47526
47566
//
@@ -53140,7 +53180,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
53140
53180
// floor(log(5**power)/log(2))
53141
53181
//
53142
53182
// Note that this is not magic: 152170/(1<<16) is
53143
- // approximatively equal to log(5)/log(2).
53183
+ // approximately equal to log(5)/log(2).
53144
53184
// The 1<<16 value is a power of two; we could use a
53145
53185
// larger power of 2 if we wanted to.
53146
53186
//
0 commit comments