Skip to content

Commit a5de078

Browse files
Update simdjson to 3.12.3 (#101)
1 parent f39712a commit a5de078

File tree

5 files changed

+1340
-638
lines changed

5 files changed

+1340
-638
lines changed

README.md

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3,7 +3,7 @@
33

44
A basic Lua binding to [simdjson](https://simdjson.org). The simdjson library is an incredibly fast JSON parser that uses SIMD instructions and fancy algorithms to parse JSON very quickly. It's been tested with LuaJIT 2.0/2.1 and Lua 5.1, 5.2, 5.3, and 5.4 on linux/osx/windows. It has a general parsing mode and a lazy mode that uses a JSON pointer.
55

6-
Current simdjson version: 3.11.3
6+
Current simdjson version: 3.12.3
77

88
## Installation
99
If all the requirements are met, lua-simdjson can be install via luarocks with:
Lines changed: 2 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,8 +1,8 @@
11
package="lua-simdjson"
2-
version="0.0.6-1"
2+
version="0.0.7-1"
33
source = {
44
url = "git://github.com/FourierTransformer/lua-simdjson",
5-
tag = "0.0.6"
5+
tag = "0.0.7"
66
}
77
description = {
88
summary = "This is a simple Lua binding for simdjson",

src/luasimdjson.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -15,7 +15,7 @@
1515
#include "luasimdjson.h"
1616

1717
#define LUA_SIMDJSON_NAME "simdjson"
18-
#define LUA_SIMDJSON_VERSION "0.0.6"
18+
#define LUA_SIMDJSON_VERSION "0.0.7"
1919

2020
using namespace simdjson;
2121

src/simdjson.cpp

Lines changed: 87 additions & 32 deletions
Original file line numberDiff line numberDiff line change
@@ -1,4 +1,4 @@
1-
/* auto-generated on 2024-12-12 10:37:26 -0500. Do not edit! */
1+
/* auto-generated on 2025-03-27 15:01:10 -0400. Do not edit! */
22
/* including simdjson.cpp: */
33
/* begin file simdjson.cpp */
44
#define SIMDJSON_SRC_SIMDJSON_CPP
@@ -83,9 +83,20 @@
8383
#endif
8484
#endif
8585

86+
#if defined(__apple_build_version__)
87+
#if __apple_build_version__ < 14000000
88+
#define SIMDJSON_CONCEPT_DISABLED 1 // apple-clang/13 doesn't support std::convertible_to
89+
#endif
90+
#endif
91+
92+
8693
#if defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED)
94+
#if __cpp_concepts >= 201907L
8795
#include <utility>
8896
#define SIMDJSON_SUPPORTS_DESERIALIZATION 1
97+
#else
98+
#define SIMDJSON_SUPPORTS_DESERIALIZATION 0
99+
#endif
89100
#else // defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED)
90101
#define SIMDJSON_SUPPORTS_DESERIALIZATION 0
91102
#endif // defined(__cpp_concepts) && !defined(SIMDJSON_CONCEPT_DISABLED)
@@ -102,11 +113,15 @@
102113
#include <cstdlib>
103114
#include <cfloat>
104115
#include <cassert>
116+
#include <climits>
105117
#ifndef _WIN32
106118
// strcasecmp, strncasecmp
107119
#include <strings.h>
108120
#endif
109121

122+
static_assert(CHAR_BIT == 8, "simdjson requires 8-bit bytes");
123+
124+
110125
// We are using size_t without namespace std:: throughout the project
111126
using std::size_t;
112127

@@ -140,6 +155,7 @@ using std::size_t;
140155
#elif defined(__loongarch_lp64)
141156
#define SIMDJSON_IS_LOONGARCH64 1
142157
#elif defined(__PPC64__) || defined(_M_PPC64)
158+
#define SIMDJSON_IS_PPC64 1
143159
#if defined(__ALTIVEC__)
144160
#define SIMDJSON_IS_PPC64_VMX 1
145161
#endif // defined(__ALTIVEC__)
@@ -760,22 +776,22 @@ inline namespace literals {
760776
inline namespace string_view_literals {
761777

762778

763-
constexpr std::string_view operator "" _sv( const char* str, size_t len ) noexcept // (1)
779+
constexpr std::string_view operator ""_sv( const char* str, size_t len ) noexcept // (1)
764780
{
765781
return std::string_view{ str, len };
766782
}
767783

768-
constexpr std::u16string_view operator "" _sv( const char16_t* str, size_t len ) noexcept // (2)
784+
constexpr std::u16string_view operator ""_sv( const char16_t* str, size_t len ) noexcept // (2)
769785
{
770786
return std::u16string_view{ str, len };
771787
}
772788

773-
constexpr std::u32string_view operator "" _sv( const char32_t* str, size_t len ) noexcept // (3)
789+
constexpr std::u32string_view operator ""_sv( const char32_t* str, size_t len ) noexcept // (3)
774790
{
775791
return std::u32string_view{ str, len };
776792
}
777793

778-
constexpr std::wstring_view operator "" _sv( const wchar_t* str, size_t len ) noexcept // (4)
794+
constexpr std::wstring_view operator ""_sv( const wchar_t* str, size_t len ) noexcept // (4)
779795
{
780796
return std::wstring_view{ str, len };
781797
}
@@ -2106,22 +2122,22 @@ nssv_inline_ns namespace string_view_literals {
21062122

21072123
#if nssv_CONFIG_STD_SV_OPERATOR && nssv_HAVE_STD_DEFINED_LITERALS
21082124

2109-
nssv_constexpr nonstd::sv_lite::string_view operator "" sv( const char* str, size_t len ) nssv_noexcept // (1)
2125+
nssv_constexpr nonstd::sv_lite::string_view operator ""sv( const char* str, size_t len ) nssv_noexcept // (1)
21102126
{
21112127
return nonstd::sv_lite::string_view{ str, len };
21122128
}
21132129

2114-
nssv_constexpr nonstd::sv_lite::u16string_view operator "" sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
2130+
nssv_constexpr nonstd::sv_lite::u16string_view operator ""sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
21152131
{
21162132
return nonstd::sv_lite::u16string_view{ str, len };
21172133
}
21182134

2119-
nssv_constexpr nonstd::sv_lite::u32string_view operator "" sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
2135+
nssv_constexpr nonstd::sv_lite::u32string_view operator ""sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
21202136
{
21212137
return nonstd::sv_lite::u32string_view{ str, len };
21222138
}
21232139

2124-
nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
2140+
nssv_constexpr nonstd::sv_lite::wstring_view operator ""sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
21252141
{
21262142
return nonstd::sv_lite::wstring_view{ str, len };
21272143
}
@@ -2130,22 +2146,22 @@ nssv_constexpr nonstd::sv_lite::wstring_view operator "" sv( const wchar_t* str,
21302146

21312147
#if nssv_CONFIG_USR_SV_OPERATOR
21322148

2133-
nssv_constexpr nonstd::sv_lite::string_view operator "" _sv( const char* str, size_t len ) nssv_noexcept // (1)
2149+
nssv_constexpr nonstd::sv_lite::string_view operator ""_sv( const char* str, size_t len ) nssv_noexcept // (1)
21342150
{
21352151
return nonstd::sv_lite::string_view{ str, len };
21362152
}
21372153

2138-
nssv_constexpr nonstd::sv_lite::u16string_view operator "" _sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
2154+
nssv_constexpr nonstd::sv_lite::u16string_view operator ""_sv( const char16_t* str, size_t len ) nssv_noexcept // (2)
21392155
{
21402156
return nonstd::sv_lite::u16string_view{ str, len };
21412157
}
21422158

2143-
nssv_constexpr nonstd::sv_lite::u32string_view operator "" _sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
2159+
nssv_constexpr nonstd::sv_lite::u32string_view operator ""_sv( const char32_t* str, size_t len ) nssv_noexcept // (3)
21442160
{
21452161
return nonstd::sv_lite::u32string_view{ str, len };
21462162
}
21472163

2148-
nssv_constexpr nonstd::sv_lite::wstring_view operator "" _sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
2164+
nssv_constexpr nonstd::sv_lite::wstring_view operator ""_sv( const wchar_t* str, size_t len ) nssv_noexcept // (4)
21492165
{
21502166
return nonstd::sv_lite::wstring_view{ str, len };
21512167
}
@@ -2415,7 +2431,7 @@ enum error_code {
24152431
SUCCESS = 0, ///< No error
24162432
CAPACITY, ///< This parser can't support a document that big
24172433
MEMALLOC, ///< Error allocating memory, most likely out of memory
2418-
TAPE_ERROR, ///< Something went wrong, this is a generic error
2434+
TAPE_ERROR, ///< Something went wrong, this is a generic error. Fatal/unrecoverable error.
24192435
DEPTH_ERROR, ///< Your document exceeds the user-specified depth limitation
24202436
STRING_ERROR, ///< Problem while parsing a string
24212437
T_ATOM_ERROR, ///< Problem while parsing an atom starting with the letter 't'
@@ -2440,13 +2456,21 @@ enum error_code {
24402456
PARSER_IN_USE, ///< parser is already in use.
24412457
OUT_OF_ORDER_ITERATION, ///< tried to iterate an array or object out of order (checked when SIMDJSON_DEVELOPMENT_CHECKS=1)
24422458
INSUFFICIENT_PADDING, ///< The JSON doesn't have enough padding for simdjson to safely parse it.
2443-
INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early.
2459+
INCOMPLETE_ARRAY_OR_OBJECT, ///< The document ends early. Fatal/unrecoverable error.
24442460
SCALAR_DOCUMENT_AS_VALUE, ///< A scalar document is treated as a value.
24452461
OUT_OF_BOUNDS, ///< Attempted to access location outside of document.
24462462
TRAILING_CONTENT, ///< Unexpected trailing content in the JSON input
24472463
NUM_ERROR_CODES
24482464
};
24492465

2466+
/**
2467+
* Some errors are fatal and invalidate the document. This function returns true if the
2468+
* error is fatal. It returns true for TAPE_ERROR and INCOMPLETE_ARRAY_OR_OBJECT.
2469+
* Once a fatal error is encountered, the on-demand document is no longer valid and
2470+
* processing should stop.
2471+
*/
2472+
inline bool is_fatal(error_code error) noexcept;
2473+
24502474
/**
24512475
* It is the convention throughout the code that the macro SIMDJSON_DEVELOPMENT_CHECKS determines whether
24522476
* we check for OUT_OF_ORDER_ITERATION. The logic behind it is that these errors only occurs when the code
@@ -2749,14 +2773,30 @@ SIMDJSON_IMPL_CONCEPT(op_append, operator+=)
27492773
#undef SIMDJSON_IMPL_CONCEPT
27502774
} // namespace details
27512775

2776+
2777+
template <typename T>
2778+
concept string_view_like = std::is_convertible_v<T, std::string_view> &&
2779+
!std::is_convertible_v<T, const char*>;
2780+
2781+
template<typename T>
2782+
concept constructible_from_string_view = std::is_constructible_v<T, std::string_view>
2783+
&& !std::is_same_v<T, std::string_view>
2784+
&& std::is_default_constructible_v<T>;
2785+
2786+
template<typename M>
2787+
concept string_view_keyed_map = string_view_like<typename M::key_type>
2788+
&& requires(std::remove_cvref_t<M>& m, typename M::key_type sv, typename M::mapped_type v) {
2789+
{ m.emplace(sv, v) } -> std::same_as<std::pair<typename M::iterator, bool>>;
2790+
};
2791+
27522792
/// Check if T is a container that we can append to, including:
27532793
/// std::vector, std::deque, std::list, std::string, ...
27542794
template <typename T>
27552795
concept appendable_containers =
2756-
details::supports_emplace_back<T> || details::supports_emplace<T> ||
2796+
(details::supports_emplace_back<T> || details::supports_emplace<T> ||
27572797
details::supports_push_back<T> || details::supports_push<T> ||
27582798
details::supports_add<T> || details::supports_append<T> ||
2759-
details::supports_insert<T>;
2799+
details::supports_insert<T>) && !string_view_keyed_map<T>;
27602800

27612801
/// Insert into the container however possible
27622802
template <appendable_containers T, typename... Args>
@@ -2824,6 +2864,8 @@ concept optional_type = requires(std::remove_cvref_t<T> obj) {
28242864
{ static_cast<bool>(obj) } -> std::same_as<bool>; // convertible to bool
28252865
};
28262866

2867+
2868+
28272869
} // namespace concepts
28282870
} // namespace simdjson
28292871
#endif // SIMDJSON_SUPPORTS_DESERIALIZATION
@@ -4495,6 +4537,11 @@ extern SIMDJSON_DLLIMPORTEXPORT const uint32_t digit_to_val32[886];
44954537
#include <iostream>
44964538

44974539
namespace simdjson {
4540+
4541+
inline bool is_fatal(error_code error) noexcept {
4542+
return error == TAPE_ERROR || error == INCOMPLETE_ARRAY_OR_OBJECT;
4543+
}
4544+
44984545
namespace internal {
44994546
// We store the error code so we can validate the error message is associated with the right code
45004547
struct error_code_info {
@@ -4680,7 +4727,7 @@ namespace internal {
46804727
{ SUCCESS, "SUCCESS: No error" },
46814728
{ CAPACITY, "CAPACITY: This parser can't support a document that big" },
46824729
{ MEMALLOC, "MEMALLOC: Error allocating memory, we're most likely out of memory" },
4683-
{ TAPE_ERROR, "TAPE_ERROR: The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc." },
4730+
{ TAPE_ERROR, "TAPE_ERROR: The JSON document has an improper structure: missing or superfluous commas, braces, missing keys, etc. This is a fatal and unrecoverable error." },
46844731
{ DEPTH_ERROR, "DEPTH_ERROR: The JSON document was too deep (too many nested objects and arrays)" },
46854732
{ STRING_ERROR, "STRING_ERROR: Problem while parsing a string" },
46864733
{ T_ATOM_ERROR, "T_ATOM_ERROR: Problem while parsing an atom starting with the letter 't'" },
@@ -4705,7 +4752,7 @@ namespace internal {
47054752
{ PARSER_IN_USE, "PARSER_IN_USE: Cannot parse a new document while a document is still in use." },
47064753
{ OUT_OF_ORDER_ITERATION, "OUT_OF_ORDER_ITERATION: Objects and arrays can only be iterated when they are first encountered." },
47074754
{ INSUFFICIENT_PADDING, "INSUFFICIENT_PADDING: simdjson requires the input JSON string to have at least SIMDJSON_PADDING extra bytes allocated, beyond the string's length. Consider using the simdjson::padded_string class if needed." },
4708-
{ INCOMPLETE_ARRAY_OR_OBJECT, "INCOMPLETE_ARRAY_OR_OBJECT: JSON document ended early in the middle of an object or array." },
4755+
{ INCOMPLETE_ARRAY_OR_OBJECT, "INCOMPLETE_ARRAY_OR_OBJECT: JSON document ended early in the middle of an object or array. This is a fatal and unrecoverable error." },
47094756
{ SCALAR_DOCUMENT_AS_VALUE, "SCALAR_DOCUMENT_AS_VALUE: A JSON document made of a scalar (number, Boolean, null or string) is treated as a value. Use get_bool(), get_double(), etc. on the document instead. "},
47104757
{ OUT_OF_BOUNDS, "OUT_OF_BOUNDS: Attempt to access location outside of document."},
47114758
{ TRAILING_CONTENT, "TRAILING_CONTENT: Unexpected trailing content in the JSON input."}
@@ -6771,7 +6818,7 @@ class document {
67716818
* The memory allocation is strict: you
67726819
* can you use this function to increase
67736820
* or lower the amount of allocated memory.
6774-
* Passsing zero clears the memory.
6821+
* Passing zero clears the memory.
67756822
*/
67766823
error_code allocate(size_t len) noexcept;
67776824
/** @private Capacity in bytes, in terms
@@ -9169,7 +9216,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
91699216
// floor(log(5**power)/log(2))
91709217
//
91719218
// Note that this is not magic: 152170/(1<<16) is
9172-
// approximatively equal to log(5)/log(2).
9219+
// approximately equal to log(5)/log(2).
91739220
// The 1<<16 value is a power of two; we could use a
91749221
// larger power of 2 if we wanted to.
91759222
//
@@ -15529,7 +15576,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
1552915576
// floor(log(5**power)/log(2))
1553015577
//
1553115578
// Note that this is not magic: 152170/(1<<16) is
15532-
// approximatively equal to log(5)/log(2).
15579+
// approximately equal to log(5)/log(2).
1553315580
// The 1<<16 value is a power of two; we could use a
1553415581
// larger power of 2 if we wanted to.
1553515582
//
@@ -20797,14 +20844,18 @@ namespace simd {
2079720844

2079820845
// Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
2079920846
// Passing a 0 value for mask would be equivalent to writing out every byte to output.
20800-
// Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes
20847+
// Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes
2080120848
// get written.
2080220849
// Design consideration: it seems like a function with the
2080320850
// signature simd8<L> compress(uint32_t mask) would be
2080420851
// sensible, but the AVX ISA makes this kind of approach difficult.
2080520852
template<typename L>
2080620853
simdjson_inline void compress(uint64_t mask, L * output) const {
20807-
_mm512_mask_compressstoreu_epi8 (output,~mask,*this);
20854+
// we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability
20855+
// (AMD Zen4 has terrible performance with it, it is effectively broken)
20856+
// _mm512_mask_compressstoreu_epi8 (output,~mask,*this);
20857+
__m512i compressed = _mm512_maskz_compress_epi8(~mask, *this);
20858+
_mm512_storeu_si512(output, compressed); // could use a mask
2080820859
}
2080920860

2081020861
template<typename L>
@@ -21749,7 +21800,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
2174921800
// floor(log(5**power)/log(2))
2175021801
//
2175121802
// Note that this is not magic: 152170/(1<<16) is
21752-
// approximatively equal to log(5)/log(2).
21803+
// approximately equal to log(5)/log(2).
2175321804
// The 1<<16 value is a power of two; we could use a
2175421805
// larger power of 2 if we wanted to.
2175521806
//
@@ -23427,14 +23478,18 @@ namespace simd {
2342723478

2342823479
// Copies to 'output" all bytes corresponding to a 0 in the mask (interpreted as a bitset).
2342923480
// Passing a 0 value for mask would be equivalent to writing out every byte to output.
23430-
// Only the first 32 - count_ones(mask) bytes of the result are significant but 32 bytes
23481+
// Only the first 64 - count_ones(mask) bytes of the result are significant but 64 bytes
2343123482
// get written.
2343223483
// Design consideration: it seems like a function with the
2343323484
// signature simd8<L> compress(uint32_t mask) would be
2343423485
// sensible, but the AVX ISA makes this kind of approach difficult.
2343523486
template<typename L>
2343623487
simdjson_inline void compress(uint64_t mask, L * output) const {
23437-
_mm512_mask_compressstoreu_epi8 (output,~mask,*this);
23488+
// we deliberately avoid _mm512_mask_compressstoreu_epi8 for portability
23489+
// (AMD Zen4 has terrible performance with it, it is effectively broken)
23490+
// _mm512_mask_compressstoreu_epi8 (output,~mask,*this);
23491+
__m512i compressed = _mm512_maskz_compress_epi8(~mask, *this);
23492+
_mm512_storeu_si512(output, compressed); // could use a mask
2343823493
}
2343923494

2344023495
template<typename L>
@@ -28125,7 +28180,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
2812528180
// floor(log(5**power)/log(2))
2812628181
//
2812728182
// Note that this is not magic: 152170/(1<<16) is
28128-
// approximatively equal to log(5)/log(2).
28183+
// approximately equal to log(5)/log(2).
2812928184
// The 1<<16 value is a power of two; we could use a
2813028185
// larger power of 2 if we wanted to.
2813128186
//
@@ -34867,7 +34922,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
3486734922
// floor(log(5**power)/log(2))
3486834923
//
3486934924
// Note that this is not magic: 152170/(1<<16) is
34870-
// approximatively equal to log(5)/log(2).
34925+
// approximately equal to log(5)/log(2).
3487134926
// The 1<<16 value is a power of two; we could use a
3487234927
// larger power of 2 if we wanted to.
3487334928
//
@@ -41433,7 +41488,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
4143341488
// floor(log(5**power)/log(2))
4143441489
//
4143541490
// Note that this is not magic: 152170/(1<<16) is
41436-
// approximatively equal to log(5)/log(2).
41491+
// approximately equal to log(5)/log(2).
4143741492
// The 1<<16 value is a power of two; we could use a
4143841493
// larger power of 2 if we wanted to.
4143941494
//
@@ -47444,7 +47499,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
4744447499
// floor(log(5**power)/log(2))
4744547500
//
4744647501
// Note that this is not magic: 152170/(1<<16) is
47447-
// approximatively equal to log(5)/log(2).
47502+
// approximately equal to log(5)/log(2).
4744847503
// The 1<<16 value is a power of two; we could use a
4744947504
// larger power of 2 if we wanted to.
4745047505
//
@@ -53054,7 +53109,7 @@ simdjson_inline bool compute_float_64(int64_t power, uint64_t i, bool negative,
5305453109
// floor(log(5**power)/log(2))
5305553110
//
5305653111
// Note that this is not magic: 152170/(1<<16) is
53057-
// approximatively equal to log(5)/log(2).
53112+
// approximately equal to log(5)/log(2).
5305853113
// The 1<<16 value is a power of two; we could use a
5305953114
// larger power of 2 if we wanted to.
5306053115
//

0 commit comments

Comments
 (0)