Skip to content

Commit 5ac64d1

Browse files
committed
Fix: Avoid tagging if Arm or LA57
1 parent 72dfc31 commit 5ac64d1

File tree

1 file changed

+58
-59
lines changed

1 file changed

+58
-59
lines changed

less_slow.cpp

Lines changed: 58 additions & 59 deletions
Original file line numberDiff line numberDiff line change
@@ -2715,7 +2715,7 @@ static constexpr std::string_view malicious_json = R"({
27152715

27162716
static constexpr std::string_view packets_json[3] = {valid_json, invalid_json, malicious_json};
27172717

2718-
struct limited_arena_t {
2718+
struct arena_t {
27192719
static constexpr std::size_t capacity_k = 4096;
27202720
alignas(64) std::byte buffer[capacity_k];
27212721

@@ -2731,8 +2731,8 @@ struct limited_arena_t {
27312731
* @brief Allocates a new chunk of `size` bytes from the arena.
27322732
* @return The new pointer or `nullptr` if OOM.
27332733
*/
2734-
inline std::byte *allocate_from_arena(limited_arena_t &arena, std::size_t size) noexcept {
2735-
if (arena.total_allocated + size > limited_arena_t::capacity_k) return nullptr; // Not enough space
2734+
inline std::byte *allocate_from_arena(arena_t &arena, std::size_t size) noexcept {
2735+
if (arena.total_allocated + size > arena_t::capacity_k) return nullptr; // Not enough space
27362736
std::byte *ptr = arena.buffer + arena.total_allocated;
27372737
arena.total_allocated += size;
27382738
arena.unique_allocations++;
@@ -2743,10 +2743,10 @@ inline std::byte *allocate_from_arena(limited_arena_t &arena, std::size_t size)
27432743
* @brief Deallocates a chunk of memory previously allocated from the arena.
27442744
* This implementation does not "reuse" partial free space unless everything is freed.
27452745
*/
2746-
inline void deallocate_from_arena(limited_arena_t &arena, std::byte *ptr, std::size_t size) noexcept {
2746+
inline void deallocate_from_arena(arena_t &arena, std::byte *ptr, std::size_t size) noexcept {
27472747
// Check if ptr is within the arena
27482748
std::byte *start = arena.buffer;
2749-
std::byte *end = arena.buffer + limited_arena_t::capacity_k;
2749+
std::byte *end = arena.buffer + arena_t::capacity_k;
27502750
if (ptr < start || ptr >= end) return; // Invalid pointer => no-op
27512751
arena.total_reclaimed += size;
27522752
// Reset completely if fully reclaimed
@@ -2761,7 +2761,7 @@ inline void deallocate_from_arena(limited_arena_t &arena, std::byte *ptr, std::s
27612761
* @return The new pointer or `nullptr` if OOM.
27622762
*/
27632763
inline std::byte *reallocate_from_arena( //
2764-
limited_arena_t &arena, std::byte *ptr, std::size_t old_size, std::size_t new_size) noexcept {
2764+
arena_t &arena, std::byte *ptr, std::size_t old_size, std::size_t new_size) noexcept {
27652765
if (!ptr) return allocate_from_arena(arena, new_size); // A fresh allocation
27662766

27672767
// This is effectively a `free` operation
@@ -2778,7 +2778,7 @@ inline std::byte *reallocate_from_arena( //
27782778
// Expand in-place if there's enough room
27792779
std::size_t offset = static_cast<std::size_t>(ptr - arena.buffer);
27802780
std::size_t required_space = offset + new_size;
2781-
if (required_space <= limited_arena_t::capacity_k) {
2781+
if (required_space <= arena_t::capacity_k) {
27822782
// We can grow (or shrink) in place
27832783
arena.total_allocated = required_space;
27842784
return ptr;
@@ -2840,7 +2840,7 @@ bool contains_xss_in_yyjson(yyjson_val *node) noexcept {
28402840
*
28412841
* @see YYJSON allocators: https://ibireme.github.io/yyjson/doc/doxygen/html/structyyjson__alc.html
28422842
*/
2843-
yyjson_alc yyjson_wrap_arena_prepending(limited_arena_t &arena) noexcept {
2843+
yyjson_alc yyjson_wrap_arena_prepend(arena_t &arena) noexcept {
28442844
yyjson_alc alc;
28452845
alc.ctx = &arena;
28462846

@@ -2850,7 +2850,7 @@ yyjson_alc yyjson_wrap_arena_prepending(limited_arena_t &arena) noexcept {
28502850
using alc_size_t = std::uint16_t;
28512851
alc.malloc = +[](void *ctx, size_t size_native) noexcept -> void * {
28522852
alc_size_t size = static_cast<alc_size_t>(size_native);
2853-
std::byte *result = allocate_from_arena(*static_cast<limited_arena_t *>(ctx), size + sizeof(alc_size_t));
2853+
std::byte *result = allocate_from_arena(*static_cast<arena_t *>(ctx), size + sizeof(alc_size_t));
28542854
if (!result) return nullptr;
28552855
std::memcpy(result, &size, sizeof(alc_size_t));
28562856
return (void *)(result + sizeof(alc_size_t));
@@ -2859,8 +2859,8 @@ yyjson_alc yyjson_wrap_arena_prepending(limited_arena_t &arena) noexcept {
28592859
alc_size_t old_size = static_cast<alc_size_t>(old_size_native);
28602860
alc_size_t size = static_cast<alc_size_t>(size_native);
28612861
std::byte *start = static_cast<std::byte *>(ptr) - sizeof(alc_size_t);
2862-
std::byte *new_start = reallocate_from_arena( //
2863-
*static_cast<limited_arena_t *>(ctx), start, //
2862+
std::byte *new_start = reallocate_from_arena( //
2863+
*static_cast<arena_t *>(ctx), start, //
28642864
old_size + sizeof(alc_size_t), size + sizeof(alc_size_t));
28652865
if (!new_start) return nullptr;
28662866
// Don't forget to increment the size if the pointer was reallocated
@@ -2871,13 +2871,13 @@ yyjson_alc yyjson_wrap_arena_prepending(limited_arena_t &arena) noexcept {
28712871
std::byte *start = static_cast<std::byte *>(ptr) - sizeof(alc_size_t);
28722872
alc_size_t size;
28732873
std::memcpy(&size, start, sizeof(alc_size_t));
2874-
deallocate_from_arena(*static_cast<limited_arena_t *>(ctx), start, size + sizeof(alc_size_t));
2874+
deallocate_from_arena(*static_cast<arena_t *>(ctx), start, size + sizeof(alc_size_t));
28752875
};
28762876
return alc;
28772877
}
28782878

28792879
/**
2880-
* There is also an even cooler way to allocate memory! @b Pointer-tagging! 🏷️
2880+
* There is also an even cooler way to allocate memory! @b Pointer-tag! 🏷️
28812881
* 64-bit address space is a lie! Most systems only use 48 bits for addresses,
28822882
* some even less. So, we can use the remaining bits to store metadata about
28832883
* the allocated block, like its size, or the arena it came from.
@@ -2908,7 +2908,7 @@ inline std::pair<void *, std::uint16_t> pointer_untag(void *ptr) noexcept {
29082908
return {reinterpret_cast<void *>(addr), size};
29092909
}
29102910

2911-
yyjson_alc yyjson_wrap_arena_tagging(limited_arena_t &arena) noexcept {
2911+
yyjson_alc yyjson_wrap_arena_tag(arena_t &arena) noexcept {
29122912
yyjson_alc alc;
29132913
alc.ctx = &arena;
29142914

@@ -2918,28 +2918,28 @@ yyjson_alc yyjson_wrap_arena_tagging(limited_arena_t &arena) noexcept {
29182918
using alc_size_t = std::uint16_t;
29192919
alc.malloc = +[](void *ctx, size_t size_native) noexcept -> void * {
29202920
alc_size_t size = static_cast<alc_size_t>(size_native);
2921-
std::byte *result = allocate_from_arena(*static_cast<limited_arena_t *>(ctx), size);
2921+
std::byte *result = allocate_from_arena(*static_cast<arena_t *>(ctx), size);
29222922
if (!result) return nullptr;
29232923
return pointer_tag(result, size);
29242924
};
29252925

29262926
alc.realloc = +[](void *ctx, void *ptr, size_t old_size_native, size_t size_native) noexcept -> void * {
29272927
alc_size_t size = static_cast<alc_size_t>(size_native);
29282928
auto [real_ptr, _] = pointer_untag(ptr);
2929-
std::byte *new_ptr = reallocate_from_arena(*static_cast<limited_arena_t *>(ctx),
2930-
static_cast<std::byte *>(real_ptr), old_size_native, size_native);
2929+
std::byte *new_ptr = reallocate_from_arena(*static_cast<arena_t *>(ctx), static_cast<std::byte *>(real_ptr),
2930+
old_size_native, size_native);
29312931
if (!new_ptr) return nullptr;
29322932
return pointer_tag(new_ptr, size);
29332933
};
29342934

29352935
alc.free = +[](void *ctx, void *ptr) noexcept -> void {
29362936
auto [real_ptr, size] = pointer_untag(ptr);
2937-
deallocate_from_arena(*static_cast<limited_arena_t *>(ctx), static_cast<std::byte *>(real_ptr), size);
2937+
deallocate_from_arena(*static_cast<arena_t *>(ctx), static_cast<std::byte *>(real_ptr), size);
29382938
};
29392939
return alc;
29402940
}
29412941

2942-
yyjson_alc yyjson_wrapp_malloc(limited_arena_t &) noexcept {
2942+
yyjson_alc yyjson_wrapp_malloc(arena_t &) noexcept {
29432943
yyjson_alc alc;
29442944
alc.ctx = NULL;
29452945
alc.malloc = +[](void *, size_t size) noexcept -> void * { return malloc(size); };
@@ -2948,7 +2948,7 @@ yyjson_alc yyjson_wrapp_malloc(limited_arena_t &) noexcept {
29482948
return alc;
29492949
}
29502950

2951-
typedef yyjson_alc (*yyjson_alc_wrapper)(limited_arena_t &);
2951+
typedef yyjson_alc (*yyjson_alc_wrapper)(arena_t &);
29522952

29532953
static void json_yyjson(bm::State &state, yyjson_alc_wrapper alc_wrapper = yyjson_wrapp_malloc) {
29542954

@@ -2957,7 +2957,7 @@ static void json_yyjson(bm::State &state, yyjson_alc_wrapper alc_wrapper = yyjso
29572957
// char yyjson_buffer[4096];
29582958
// yyjson_alc_pool_init(&alc, yyjson_buffer, sizeof(yyjson_buffer));
29592959
//
2960-
using arena_t = limited_arena_t;
2960+
using arena_t = arena_t;
29612961
arena_t arena;
29622962

29632963
// Repeat the checks many times
@@ -2988,28 +2988,27 @@ static void json_yyjson(bm::State &state, yyjson_alc_wrapper alc_wrapper = yyjso
29882988
bm::Counter(peak_memory_usage * 1.0 / peak_memory_calls, bm::Counter::kAvgThreads);
29892989
}
29902990

2991-
BENCHMARK_CAPTURE(json_yyjson, malloc, yyjson_wrapp_malloc) //
2992-
->MinTime(10)
2993-
->Name("json_yyjson<malloc>");
2994-
BENCHMARK_CAPTURE(json_yyjson, prepending, yyjson_wrap_arena_prepending)
2995-
->MinTime(10)
2996-
->Name("json_yyjson<limited_arena, prepending>");
2997-
BENCHMARK_CAPTURE(json_yyjson, tagging, yyjson_wrap_arena_tagging)
2998-
->MinTime(10)
2999-
->Name("json_yyjson<tagging_arena, tagging>");
3000-
2991+
BENCHMARK_CAPTURE(json_yyjson, malloc, yyjson_wrapp_malloc)->MinTime(10)->Name("json_yyjson<malloc>");
30012992
BENCHMARK_CAPTURE(json_yyjson, malloc, yyjson_wrapp_malloc)
30022993
->MinTime(10)
30032994
->Name("json_yyjson<malloc>")
30042995
->Threads(physical_cores());
3005-
BENCHMARK_CAPTURE(json_yyjson, prepending, yyjson_wrap_arena_prepending)
2996+
2997+
BENCHMARK_CAPTURE(json_yyjson, prepend, yyjson_wrap_arena_prepend)->MinTime(10)->Name("json_yyjson<arena, prepend>");
2998+
BENCHMARK_CAPTURE(json_yyjson, prepend, yyjson_wrap_arena_prepend)
30062999
->MinTime(10)
3007-
->Name("json_yyjson<limited_arena, prepending>")
3000+
->Name("json_yyjson<arena, prepend>")
30083001
->Threads(physical_cores());
3009-
BENCHMARK_CAPTURE(json_yyjson, tagging, yyjson_wrap_arena_tagging)
3002+
3003+
#if defined(__x86_64__) || defined(__i386__) // On Arm checking for support is much more complex
3004+
#if !defined(__LA57__) // On x86-64, the Linux kernel can disable the 5-level paging
3005+
BENCHMARK_CAPTURE(json_yyjson, tag, yyjson_wrap_arena_tag)->MinTime(10)->Name("json_yyjson<arena, tag>");
3006+
BENCHMARK_CAPTURE(json_yyjson, tag, yyjson_wrap_arena_tag)
30103007
->MinTime(10)
3011-
->Name("json_yyjson<tagging_arena, tagging>")
3008+
->Name("json_yyjson<arena, tag>")
30123009
->Threads(physical_cores());
3010+
#endif // !defined(__LA57__)
3011+
#endif // defined(__x86_64__) || defined(__i386__)
30133012

30143013
/**
30153014
* The `nlohmann::json` library is designed to be simple and easy to use, but it's
@@ -3051,7 +3050,7 @@ using json_with_alloc = nlohmann::basic_json< //
30513050

30523051
/**
30533052
* The `allocate_from_arena` and `deallocate_from_arena` are fairly elegant and simple.
3054-
* But we have no way of supplying our `limited_arena_t` instance to the `nlohmann::json`
3053+
* But we have no way of supplying our `arena_t` instance to the `nlohmann::json`
30553054
* library and it has no mechanism internally to propagate the allocator state to the nested
30563055
* containers:
30573056
*
@@ -3069,35 +3068,35 @@ using json_with_alloc = nlohmann::basic_json< //
30693068
* which is an immediate @b code-smell, while with `yyjson` we can pass a context object down!
30703069
*/
30713070

3072-
thread_local limited_arena_t limited_arena;
3071+
thread_local arena_t thread_local_arena;
30733072

30743073
template <typename value_type_>
3075-
struct limited_allocator {
3074+
struct arena_allocator {
30763075
using value_type = value_type_;
30773076

3078-
limited_allocator() noexcept = default;
3077+
arena_allocator() noexcept = default;
30793078

30803079
template <typename other_type_>
3081-
limited_allocator(limited_allocator<other_type_> const &) noexcept {}
3080+
arena_allocator(arena_allocator<other_type_> const &) noexcept {}
30823081

30833082
value_type *allocate(std::size_t n) noexcept(false) {
3084-
if (auto ptr = allocate_from_arena(limited_arena, n * sizeof(value_type)); ptr)
3083+
if (auto ptr = allocate_from_arena(thread_local_arena, n * sizeof(value_type)); ptr)
30853084
return reinterpret_cast<value_type *>(ptr);
30863085
else
30873086
throw std::bad_alloc();
30883087
}
30893088

30903089
void deallocate(value_type *ptr, std::size_t n) noexcept {
3091-
deallocate_from_arena(limited_arena, reinterpret_cast<std::byte *>(ptr), n * sizeof(value_type));
3090+
deallocate_from_arena(thread_local_arena, reinterpret_cast<std::byte *>(ptr), n * sizeof(value_type));
30923091
}
30933092

30943093
// Rebind mechanism and comparators are for compatibility with STL containers
30953094
template <typename other_type_>
30963095
struct rebind {
3097-
using other = limited_allocator<other_type_>;
3096+
using other = arena_allocator<other_type_>;
30983097
};
3099-
bool operator==(limited_allocator const &) const noexcept { return true; }
3100-
bool operator!=(limited_allocator const &) const noexcept { return false; }
3098+
bool operator==(arena_allocator const &) const noexcept { return true; }
3099+
bool operator!=(arena_allocator const &) const noexcept { return false; }
31013100
};
31023101

31033102
template <typename json_type_>
@@ -3121,7 +3120,7 @@ bool contains_xss_nlohmann(json_type_ const &j) noexcept {
31213120
}
31223121

31233122
using default_json = json_with_alloc<std::allocator>;
3124-
using fixed_buffer_json = json_with_alloc<limited_allocator>;
3123+
using arena_json = json_with_alloc<arena_allocator>;
31253124

31263125
enum class exception_handling_t { throw_k, noexcept_k };
31273126

@@ -3157,8 +3156,8 @@ static void json_nlohmann(bm::State &state) {
31573156
if (!json.is_discarded()) bm::DoNotOptimize(contains_xss_nlohmann(json));
31583157
}
31593158
if constexpr (!std::is_same_v<json_type_, default_json>) {
3160-
peak_memory_usage = std::max(peak_memory_usage, limited_arena.total_allocated);
3161-
peak_memory_calls = std::max(peak_memory_calls, limited_arena.unique_allocations);
3159+
peak_memory_usage = std::max(peak_memory_usage, thread_local_arena.total_allocated);
3160+
peak_memory_calls = std::max(peak_memory_calls, thread_local_arena.unique_allocations);
31623161
}
31633162
}
31643163
state.SetBytesProcessed(bytes_processed);
@@ -3170,30 +3169,30 @@ static void json_nlohmann(bm::State &state) {
31703169
BENCHMARK(json_nlohmann<default_json, exception_handling_t::throw_k>)
31713170
->MinTime(10)
31723171
->Name("json_nlohmann<std::allocator, throw>");
3173-
BENCHMARK(json_nlohmann<fixed_buffer_json, exception_handling_t::throw_k>)
3172+
BENCHMARK(json_nlohmann<arena_json, exception_handling_t::throw_k>)
31743173
->MinTime(10)
3175-
->Name("json_nlohmann<limited_arena, throw>");
3174+
->Name("json_nlohmann<arena_allocator, throw>");
31763175
BENCHMARK(json_nlohmann<default_json, exception_handling_t::noexcept_k>)
31773176
->MinTime(10)
31783177
->Name("json_nlohmann<std::allocator, noexcept>");
3179-
BENCHMARK(json_nlohmann<fixed_buffer_json, exception_handling_t::noexcept_k>)
3178+
BENCHMARK(json_nlohmann<arena_json, exception_handling_t::noexcept_k>)
31803179
->MinTime(10)
3181-
->Name("json_nlohmann<limited_arena, noexcept>");
3180+
->Name("json_nlohmann<arena_allocator, noexcept>");
31823181
BENCHMARK(json_nlohmann<default_json, exception_handling_t::throw_k>)
31833182
->MinTime(10)
31843183
->Name("json_nlohmann<std::allocator, throw>")
31853184
->Threads(physical_cores());
3186-
BENCHMARK(json_nlohmann<fixed_buffer_json, exception_handling_t::throw_k>)
3185+
BENCHMARK(json_nlohmann<arena_json, exception_handling_t::throw_k>)
31873186
->MinTime(10)
3188-
->Name("json_nlohmann<limited_arena, throw>")
3187+
->Name("json_nlohmann<arena_allocator, throw>")
31893188
->Threads(physical_cores());
31903189
BENCHMARK(json_nlohmann<default_json, exception_handling_t::noexcept_k>)
31913190
->MinTime(10)
31923191
->Name("json_nlohmann<std::allocator, noexcept>")
31933192
->Threads(physical_cores());
3194-
BENCHMARK(json_nlohmann<fixed_buffer_json, exception_handling_t::noexcept_k>)
3193+
BENCHMARK(json_nlohmann<arena_json, exception_handling_t::noexcept_k>)
31953194
->MinTime(10)
3196-
->Name("json_nlohmann<limited_arena, noexcept>")
3195+
->Name("json_nlohmann<arena_allocator, noexcept>")
31973196
->Threads(physical_cores());
31983197

31993198
/**
@@ -3202,11 +3201,11 @@ BENCHMARK(json_nlohmann<fixed_buffer_json, exception_handling_t::noexcept_k>)
32023201
* cores, are as follows:
32033202
*
32043203
* - `json_yyjson<malloc>`: @b 359 ns @b 369 ns
3205-
* - `json_yyjson<limited_arena>`: @b 326 ns @b 326 ns
3204+
* - `json_yyjson<arena>`: @b 326 ns @b 326 ns
32063205
* - `json_nlohmann<std::allocator, throw>`: @b 6'440 ns @b 11'821 ns
3207-
* - `json_nlohmann<limited_arena, throw>`: @b 6'041 ns @b 11'601 ns
3206+
* - `json_nlohmann<arena_allocator, throw>`: @b 6'041 ns @b 11'601 ns
32083207
* - `json_nlohmann<std::allocator, noexcept>`: @b 4'741 ns @b 11'512 ns
3209-
* - `json_nlohmann<limited_arena, noexcept>`: @b 4'316 ns @b 12'209 ns
3208+
* - `json_nlohmann<arena_allocator, noexcept>`: @b 4'316 ns @b 12'209 ns
32103209
*
32113210
* The reason, why `yyjson` numbers are less affected by the allocator change,
32123211
* is because it doesn't need many dynamic allocations. It manages a linked list

0 commit comments

Comments
 (0)