Skip to content

Commit 936df52

Browse files
committed
Upgrade simjson to v3.12.3
1 parent 2838544 commit 936df52

File tree

7 files changed

+1132
-484
lines changed

7 files changed

+1132
-484
lines changed

src/simdjson.cpp

Lines changed: 72 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,5 @@
1-
/* auto-generated on 2025-01-16 13:33:53 -0500. Do not edit! */
1+
/* auto-generated on 2025-03-27 15:01:10 -0400. Do not edit! */
2+
/* modified for SIMDJSON_PHP to mimic PHP's JSON encode/decode behavior */
23
/* including simdjson.cpp: */
34
/* begin file simdjson.cpp */
45
#define SIMDJSON_SRC_SIMDJSON_CPP
@@ -776,22 +777,22 @@ inline namespace literals {
776777
inline namespace string_view_literals {
777778

778779

779-
constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1)
780+
constexpr std::string_view operator ""_sv( const char* str, size_t len ) noexcept // (1)
780781
{
781782
return std::string_view{ str, len };
782783
}
783784

784-
constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2)
785+
constexpr std::u16string_view operator ""_sv( const char16_t* str, size_t len ) noexcept // (2)
785786
{
786787
return std::u16string_view{ str, len };
787788
}
788789

789-
constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3)
790+
constexpr std::u32string_view operator ""_sv( const char32_t* str, size_t len ) noexcept // (3)
790791
{
791792
return std::u32string_view{ str, len };
792793
}
793794

794-
constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4)
795+
constexpr std::wstring_view operator ""_sv( const wchar_t* str, size_t len ) noexcept // (4)
795796
{
796797
return std::wstring_view{ str, len };
797798
}
@@ -2122,22 +2123,22 @@ nssv_inline_ns namespace string_view_literals {
21222123

21232124
#if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS
21242125

2125-
nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1)
2126+
nssv_constexpr nonstd::sv_lite::string_view operator ""sv( const char* str, size_t len ) nssv_noexcept // (1)
21262127
{
21272128
return nonstd::sv_lite::string_view{ str, len };
21282129
}
21292130

2130-
nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
2131+
nssv_constexpr nonstd::sv_lite::u16string_view operator ""sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
21312132
{
21322133
return nonstd::sv_lite::u16string_view{ str, len };
21332134
}
21342135

2135-
nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
2136+
nssv_constexpr nonstd::sv_lite::u32string_view operator ""sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
21362137
{
21372138
return nonstd::sv_lite::u32string_view{ str, len };
21382139
}
21392140

2140-
nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
2141+
nssv_constexpr nonstd::sv_lite::wstring_view operator ""sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
21412142
{
21422143
return nonstd::sv_lite::wstring_view{ str, len };
21432144
}
@@ -2146,22 +2147,22 @@ nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str,
21462147

21472148
#if nssv_CONFIG_USR_SV_OPERATOR
21482149

2149-
nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1)
2150+
nssv_constexpr nonstd::sv_lite::string_view operator ""_sv( const char* str, size_t len ) nssv_noexcept // (1)
21502151
{
21512152
return nonstd::sv_lite::string_view{ str, len };
21522153
}
21532154

2154-
nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
2155+
nssv_constexpr nonstd::sv_lite::u16string_view operator ""_sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
21552156
{
21562157
return nonstd::sv_lite::u16string_view{ str, len };
21572158
}
21582159

2159-
nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
2160+
nssv_constexpr nonstd::sv_lite::u32string_view operator ""_sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
21602161
{
21612162
return nonstd::sv_lite::u32string_view{ str, len };
21622163
}
21632164

2164-
nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
2165+
nssv_constexpr nonstd::sv_lite::wstring_view operator ""_sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
21652166
{
21662167
return nonstd::sv_lite::wstring_view{ str, len };
21672168
}
@@ -2431,7 +2432,7 @@ enum error_code {
24312432
SUCCESS = 0, ///< No error
24322433
CAPACITY, ///< This parser can't support a document that big
24332434
MEMALLOC, ///< Error allocating memory, most likely out of memory
2434-
TAPE_ERROR, ///< Something went wrong, this is a generic error
2435+
TAPE_ERROR, ///< Something went wrong, this is a generic error. Fatal/unrecoverable error.
24352436
DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation
24362437
STRING_ERROR, ///< Problem while parsing a string
24372438
T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't'
@@ -2456,13 +2457,21 @@ enum error_code {
24562457
PARSER_IN_USE, ///< parser is already in use.
24572458
OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order (checked when SIMDJSON_DEVELOPMENT_CHECKS=1)
24582459
INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it.
2459-
INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early.
2460+
INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. Fatal/unrecoverable error.
24602461
SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value.
24612462
OUT_OF_BOUNDS, ///< Attempted to access location outside of document.
24622463
TRAILING_CONTENT, ///< Unexpected trailing content in the JSON input
24632464
NUM_ERROR_CODES
24642465
};
24652466

2467+
/**
2468+
* Some errors are fatal and invalidate the document. This function returns true if the
2469+
* error is fatal. It returns true for TAPE_ERROR and INCOMPLETE_ARRAY_OR_OBJECT.
2470+
* Once a fatal error is encountered, the on-demand document is no longer valid and
2471+
* processing should stop.
2472+
*/
2473+
inline bool is_fatal(error_code error) noexcept;
2474+
24662475
/**
24672476
* It is the convention throughout the code that the macro SIMDJSON_DEVELOPMENT_CHECKS determines whether
24682477
* we check for OUT_OF_ORDER_ITERATION. The logic behind it is that these errors only occurs when the code
@@ -2765,14 +2774,30 @@ SIMDJSON_IMPL_CONCEPT(op_append, operator+=)
27652774
#undef SIMDJSON_IMPL_CONCEPT
27662775
} // namespace details
27672776

2777+
2778+
template <typename T>
2779+
concept string_view_like = std::is_convertible_v<T, std::string_view> &&
2780+
!std::is_convertible_v<T, const char*>;
2781+
2782+
template<typename T>
2783+
concept constructible_from_string_view = std::is_constructible_v<T, std::string_view>
2784+
&& !std::is_same_v<T, std::string_view>
2785+
&& std::is_default_constructible_v<T>;
2786+
2787+
template<typename M>
2788+
concept string_view_keyed_map = string_view_like<typename M::key_type>
2789+
&& requires(std::remove_cvref_t<M>& m, typename M::key_type sv, typename M::mapped_type v) {
2790+
{ m.emplace(sv, v) } -> std::same_as<std::pair<typename M::iterator, bool>>;
2791+
};
2792+
27682793
/// Check if T is a container that we can append to, including:
27692794
/// std::vector, std::deque, std::list, std::string, ...
27702795
template <typename T>
27712796
concept appendable_containers =
2772-
details::supports_emplace_back<T> || details::supports_emplace<T> ||
2797+
(details::supports_emplace_back<T> || details::supports_emplace<T> ||
27732798
details::supports_push_back<T> || details::supports_push<T> ||
27742799
details::supports_add<T> || details::supports_append<T> ||
2775-
details::supports_insert<T>;
2800+
details::supports_insert<T>) && !string_view_keyed_map<T>;
27762801

27772802
/// Insert into the container however possible
27782803
template <appendable_containers T, typename... Args>
@@ -2840,6 +2865,8 @@ concept optional_type = requires(std::remove_cvref_t<T> obj) {
28402865
{ static_cast<bool>(obj) } -> std::same_as<bool>; // convertible to bool
28412866
};
28422867

2868+
2869+
28432870
} // namespace concepts
28442871
} // namespace simdjson
28452872
#endif // SIMDJSON_SUPPORTS_DESERIALIZATION
@@ -4511,6 +4538,11 @@ extern SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886];
45114538
#include <iostream>
45124539

45134540
namespace simdjson {
4541+
4542+
inline bool is_fatal(error_code error) noexcept {
4543+
return error == TAPE_ERROR || error == INCOMPLETE_ARRAY_OR_OBJECT;
4544+
}
4545+
45144546
namespace internal {
45154547
// We store the error code so we can validate the error message is associated with the right code
45164548
struct error_code_info {
@@ -4696,7 +4728,7 @@ namespace internal {
46964728
{ SUCCESS, "SUCCESS: No error" },
46974729
{ CAPACITY, "CAPACITY: This parser can't support a document that big" },
46984730
{ MEMALLOC, "MEMALLOC: Error allocating memory, we're most likely out of memory" },
4699-
{ TAPE_ERROR, "TAPE_ERROR: The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc." },
4731+
{ TAPE_ERROR, "TAPE_ERROR: The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc. This is a fatal and unrecoverable error." },
47004732
{ DEPTH_ERROR, "DEPTH_ERROR: The JSON document was too deep (too many nested objects and arrays)" },
47014733
{ STRING_ERROR, "STRING_ERROR: Problem while parsing a string" },
47024734
{ T_ATOM_ERROR, "T_ATOM_ERROR: Problem while parsing an atom starting with the letter 't'" },
@@ -4721,7 +4753,7 @@ namespace internal {
47214753
{ PARSER_IN_USE, "PARSER_IN_USE: Cannot parse a new document while a document is still in use." },
47224754
{ OUT_OF_ORDER_ITERATION, "OUT_OF_ORDER_ITERATION: Objects and arrays can only be iterated when they are first encountered." },
47234755
{ INSUFFICIENT_PADDING, "INSUFFICIENT_PADDING: simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length. Consider using the simdjson::padded_string class if needed." },
4724-
{ INCOMPLETE_ARRAY_OR_OBJECT, "INCOMPLETE_ARRAY_OR_OBJECT: JSON document ended early in the middle of an object or array." },
4756+
{ INCOMPLETE_ARRAY_OR_OBJECT, "INCOMPLETE_ARRAY_OR_OBJECT: JSON document ended early in the middle of an object or array. This is a fatal and unrecoverable error." },
47254757
{ SCALAR_DOCUMENT_AS_VALUE, "SCALAR_DOCUMENT_AS_VALUE: A JSON document made of a scalar (number, Boolean, null or string) is treated as a value. Use get_bool(), get_double(), etc. on the document instead. "},
47264758
{ OUT_OF_BOUNDS, "OUT_OF_BOUNDS: Attempt to access location outside of document."},
47274759
{ TRAILING_CONTENT, "TRAILING_CONTENT: Unexpected trailing content in the JSON input."}
@@ -6787,7 +6819,7 @@ class document {
67876819
* The memory allocation is strict: you
67886820
* can you use this function to increase
67896821
* or lower the amount of allocated memory.
6790-
* Passsing zero clears the memory.
6822+
* Passing zero clears the memory.
67916823
*/
67926824
error_code allocate(size_t len) noexcept;
67936825
/** @private Capacity in bytes, in terms
@@ -9185,7 +9217,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
91859217
// floor(log(5**power)/log(2))
91869218
//
91879219
// Note that this is not magic: 152170/(1<<16) is
9188-
// approximatively equal to log(5)/log(2).
9220+
// approximately equal to log(5)/log(2).
91899221
// The 1<<16 value is a power of two; we could use a
91909222
// larger power of 2 if we wanted to.
91919223
//
@@ -15555,7 +15587,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
1555515587
// floor(log(5**power)/log(2))
1555615588
//
1555715589
// Note that this is not magic: 152170/(1<<16) is
15558-
// approximatively equal to log(5)/log(2).
15590+
// approximately equal to log(5)/log(2).
1555915591
// The 1<<16 value is a power of two; we could use a
1556015592
// larger power of 2 if we wanted to.
1556115593
//
@@ -20833,14 +20865,18 @@ namespace simd {
2083320865

2083420866
// Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
2083520867
// Passing a 0 value for mask would be equivalent to writing out every byte to output.
20836-
// Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes
20868+
// Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes
2083720869
// get written.
2083820870
// Design consideration: it seems like a function with the
2083920871
// signature simd8<L> compress(uint32_t mask) would be
2084020872
// sensible, but the AVX ISA makes this kind of approach difficult.
2084120873
template<typename L>
2084220874
simdjson_inline void compress(uint64_t mask, L * output) const {
20843-
_mm512_mask_compressstoreu_epi8 (output,~mask,*this);
20875+
// we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability
20876+
// (AMD Zen4 has terrible performance with it, it is effectively broken)
20877+
// _mm512_mask_compressstoreu_epi8 (output,~mask,*this);
20878+
__m512i compressed = _mm512_maskz_compress_epi8(~mask, *this);
20879+
_mm512_storeu_si512(output, compressed); // could use a mask
2084420880
}
2084520881

2084620882
template<typename L>
@@ -21785,7 +21821,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
2178521821
// floor(log(5**power)/log(2))
2178621822
//
2178721823
// Note that this is not magic: 152170/(1<<16) is
21788-
// approximatively equal to log(5)/log(2).
21824+
// approximately equal to log(5)/log(2).
2178921825
// The 1<<16 value is a power of two; we could use a
2179021826
// larger power of 2 if we wanted to.
2179121827
//
@@ -23473,14 +23509,18 @@ namespace simd {
2347323509

2347423510
// Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
2347523511
// Passing a 0 value for mask would be equivalent to writing out every byte to output.
23476-
// Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes
23512+
// Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes
2347723513
// get written.
2347823514
// Design consideration: it seems like a function with the
2347923515
// signature simd8<L> compress(uint32_t mask) would be
2348023516
// sensible, but the AVX ISA makes this kind of approach difficult.
2348123517
template<typename L>
2348223518
simdjson_inline void compress(uint64_t mask, L * output) const {
23483-
_mm512_mask_compressstoreu_epi8 (output,~mask,*this);
23519+
// we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability
23520+
// (AMD Zen4 has terrible performance with it, it is effectively broken)
23521+
// _mm512_mask_compressstoreu_epi8 (output,~mask,*this);
23522+
__m512i compressed = _mm512_maskz_compress_epi8(~mask, *this);
23523+
_mm512_storeu_si512(output, compressed); // could use a mask
2348423524
}
2348523525

2348623526
template<typename L>
@@ -28171,7 +28211,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
2817128211
// floor(log(5**power)/log(2))
2817228212
//
2817328213
// Note that this is not magic: 152170/(1<<16) is
28174-
// approximatively equal to log(5)/log(2).
28214+
// approximately equal to log(5)/log(2).
2817528215
// The 1<<16 value is a power of two; we could use a
2817628216
// larger power of 2 if we wanted to.
2817728217
//
@@ -34923,7 +34963,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
3492334963
// floor(log(5**power)/log(2))
3492434964
//
3492534965
// Note that this is not magic: 152170/(1<<16) is
34926-
// approximatively equal to log(5)/log(2).
34966+
// approximately equal to log(5)/log(2).
3492734967
// The 1<<16 value is a power of two; we could use a
3492834968
// larger power of 2 if we wanted to.
3492934969
//
@@ -41499,7 +41539,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
4149941539
// floor(log(5**power)/log(2))
4150041540
//
4150141541
// Note that this is not magic: 152170/(1<<16) is
41502-
// approximatively equal to log(5)/log(2).
41542+
// approximately equal to log(5)/log(2).
4150341543
// The 1<<16 value is a power of two; we could use a
4150441544
// larger power of 2 if we wanted to.
4150541545
//
@@ -47520,7 +47560,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
4752047560
// floor(log(5**power)/log(2))
4752147561
//
4752247562
// Note that this is not magic: 152170/(1<<16) is
47523-
// approximatively equal to log(5)/log(2).
47563+
// approximately equal to log(5)/log(2).
4752447564
// The 1<<16 value is a power of two; we could use a
4752547565
// larger power of 2 if we wanted to.
4752647566
//
@@ -53140,7 +53180,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
5314053180
// floor(log(5**power)/log(2))
5314153181
//
5314253182
// Note that this is not magic: 152170/(1<<16) is
53143-
// approximatively equal to log(5)/log(2).
53183+
// approximately equal to log(5)/log(2).
5314453184
// The 1<<16 value is a power of two; we could use a
5314553185
// larger power of 2 if we wanted to.
5314653186
//

0 commit comments

Comments
 (0)