@@ -2840,16 +2840,7 @@ bool contains_xss_in_yyjson(yyjson_val *node) noexcept {
28402840 *
28412841 * @see YYJSON allocators: https://ibireme.github.io/yyjson/doc/doxygen/html/structyyjson__alc.html
28422842 */
2843- template <bool use_arena>
2844- static void json_yyjson (bm::State &state) {
2845-
2846- // Wrap our custom arena into a `yyjson_alc` structure, alternatively we could use:
2847- //
2848- // char yyjson_buffer[4096];
2849- // yyjson_alc_pool_init(&alc, yyjson_buffer, sizeof(yyjson_buffer));
2850- //
2851- using arena_t = limited_arena_t ;
2852- arena_t arena;
2843+ yyjson_alc yyjson_wrap_arena_prepending (limited_arena_t &arena) noexcept {
28532844 yyjson_alc alc;
28542845 alc.ctx = &arena;
28552846
@@ -2868,8 +2859,8 @@ static void json_yyjson(bm::State &state) {
28682859 alc_size_t old_size = static_cast <alc_size_t >(old_size_native);
28692860 alc_size_t size = static_cast <alc_size_t >(size_native);
28702861 std::byte *start = static_cast <std::byte *>(ptr) - sizeof (alc_size_t );
2871- std::byte *new_start = reallocate_from_arena ( //
2872- *static_cast <arena_t *>(ctx), start, //
2862+ std::byte *new_start = reallocate_from_arena ( //
2863+ *static_cast <limited_arena_t *>(ctx), start, //
28732864 old_size + sizeof (alc_size_t ), size + sizeof (alc_size_t ));
28742865 if (!new_start) return nullptr ;
28752866 // Don't forget to increment the size if the pointer was reallocated
@@ -2880,9 +2871,95 @@ static void json_yyjson(bm::State &state) {
28802871 std::byte *start = static_cast <std::byte *>(ptr) - sizeof (alc_size_t );
28812872 alc_size_t size;
28822873 std::memcpy (&size, start, sizeof (alc_size_t ));
2883- deallocate_from_arena (*static_cast <arena_t *>(ctx), start, size + sizeof (alc_size_t ));
2874+ deallocate_from_arena (*static_cast <limited_arena_t *>(ctx), start, size + sizeof (alc_size_t ));
2875+ };
2876+ return alc;
2877+ }
2878+
2879+ /* *
2880+ * There is also an even cooler way to allocate memory! @b Pointer-tagging! 🏷️
2881+ * 64-bit address space is a lie! Most systems only use 48 bits for addresses,
2882+ * some even less. So, we can use the remaining bits to store metadata about
2883+ * the allocated block, like its size, or the arena it came from.
2884+ *
2885+ * On x86, for example, calling @b `lscpu` will show:
2886+ *
2887+ * Architecture: x86_64
2888+ * CPU op-mode(s): 32-bit, 64-bit
2889+ * Address sizes: 46 bits physical, 48 bits virtual
2890+ * Byte Order: Little Endian
2891+ *
2892+ * 48-bit virtual addressing allows mapping up to @b 256-TiB of virtual space.
2893+ */
2894+
2895+ constexpr std::uintptr_t pointer_tag_mask_k = 0xFFFF000000000000ull ;
2896+
2897+ inline void *pointer_tag (void *ptr, std::uint16_t size) noexcept {
2898+ std::uintptr_t addr = reinterpret_cast <std::uintptr_t >(ptr);
2899+ std::uintptr_t tagged = (addr & ~pointer_tag_mask_k) | (static_cast <std::uintptr_t >(size) << 48 );
2900+ if (addr & (1ull << 47 )) tagged |= pointer_tag_mask_k;
2901+ return reinterpret_cast <void *>(tagged);
2902+ }
2903+
2904+ inline std::pair<void *, std::uint16_t > pointer_untag (void *ptr) noexcept {
2905+ std::uintptr_t tagged = reinterpret_cast <std::uintptr_t >(ptr);
2906+ std::uint16_t size = static_cast <std::uint16_t >(tagged >> 48 );
2907+ std::uintptr_t addr = tagged & ~pointer_tag_mask_k;
2908+ return {reinterpret_cast <void *>(addr), size};
2909+ }
2910+
2911+ yyjson_alc yyjson_wrap_arena_tagging (limited_arena_t &arena) noexcept {
2912+ yyjson_alc alc;
2913+ alc.ctx = &arena;
2914+
2915+ // ? There is a neat trick that allows us to use a lambda as a
2916+ // ? C-style function pointer by using the unary `+` operator.
2917+ // ? Assuming our buffer is only 4 KB, a 16-bit unsigned integer is enough...
2918+ using alc_size_t = std::uint16_t ;
2919+ alc.malloc = +[](void *ctx, size_t size_native) noexcept -> void * {
2920+ alc_size_t size = static_cast <alc_size_t >(size_native);
2921+ std::byte *result = allocate_from_arena (*static_cast <limited_arena_t *>(ctx), size);
2922+ if (!result) return nullptr ;
2923+ return pointer_tag (result, size);
28842924 };
28852925
2926+ alc.realloc = +[](void *ctx, void *ptr, size_t old_size_native, size_t size_native) noexcept -> void * {
2927+ alc_size_t size = static_cast <alc_size_t >(size_native);
2928+ auto [real_ptr, _] = pointer_untag (ptr);
2929+ std::byte *new_ptr = reallocate_from_arena (*static_cast <limited_arena_t *>(ctx),
2930+ static_cast <std::byte *>(real_ptr), old_size_native, size_native);
2931+ if (!new_ptr) return nullptr ;
2932+ return pointer_tag (new_ptr, size);
2933+ };
2934+
2935+ alc.free = +[](void *ctx, void *ptr) noexcept -> void {
2936+ auto [real_ptr, size] = pointer_untag (ptr);
2937+ deallocate_from_arena (*static_cast <limited_arena_t *>(ctx), static_cast <std::byte *>(real_ptr), size);
2938+ };
2939+ return alc;
2940+ }
2941+
2942+ yyjson_alc yyjson_wrapp_malloc (limited_arena_t &) noexcept {
2943+ yyjson_alc alc;
2944+ alc.ctx = NULL ;
2945+ alc.malloc = +[](void *, size_t size) noexcept -> void * { return malloc (size); };
2946+ alc.realloc = +[](void *, void *ptr, size_t , size_t size) noexcept -> void * { return realloc (ptr, size); };
2947+ alc.free = +[](void *, void *ptr) noexcept -> void { free (ptr); };
2948+ return alc;
2949+ }
2950+
2951+ typedef yyjson_alc (*yyjson_alc_wrapper)(limited_arena_t &);
2952+
2953+ static void json_yyjson (bm::State &state, yyjson_alc_wrapper alc_wrapper = yyjson_wrapp_malloc) {
2954+
2955+ // Wrap our custom arena into a `yyjson_alc` structure, alternatively we could use:
2956+ //
2957+ // char yyjson_buffer[4096];
2958+ // yyjson_alc_pool_init(&alc, yyjson_buffer, sizeof(yyjson_buffer));
2959+ //
2960+ using arena_t = limited_arena_t ;
2961+ arena_t arena;
2962+
28862963 // Repeat the checks many times
28872964 std::size_t bytes_processed = 0 ;
28882965 std::size_t peak_memory_usage = 0 ;
@@ -2896,9 +2973,10 @@ static void json_yyjson(bm::State &state) {
28962973 yyjson_read_err error;
28972974 std::memset (&error, 0 , sizeof (error));
28982975
2976+ yyjson_alc alc = alc_wrapper (arena);
28992977 yyjson_doc *doc = yyjson_read_opts ( //
29002978 (char *)packet_json.data (), packet_json.size (), //
2901- YYJSON_READ_NOFLAG, use_arena ? &alc : NULL , &error);
2979+ YYJSON_READ_NOFLAG, &alc, &error);
29022980 if (!error.code ) bm::DoNotOptimize (contains_xss_in_yyjson (yyjson_doc_get_root (doc)));
29032981 peak_memory_usage = std::max (peak_memory_usage, arena.total_allocated );
29042982 peak_memory_calls = std::max (peak_memory_calls, arena.unique_allocations );
@@ -2910,10 +2988,28 @@ static void json_yyjson(bm::State &state) {
29102988 bm::Counter (peak_memory_usage * 1.0 / peak_memory_calls, bm::Counter::kAvgThreads );
29112989}
29122990
2913- BENCHMARK (json_yyjson<false >)->MinTime(10 )->Name(" json_yyjson<malloc>" );
2914- BENCHMARK (json_yyjson<true >)->MinTime(10 )->Name(" json_yyjson<limited_arena>" );
2915- BENCHMARK (json_yyjson<false >)->MinTime(10 )->Name(" json_yyjson<malloc>" )->Threads(physical_cores());
2916- BENCHMARK (json_yyjson<true >)->MinTime(10 )->Name(" json_yyjson<limited_arena>" )->Threads(physical_cores());
2991+ BENCHMARK_CAPTURE (json_yyjson, malloc, yyjson_wrapp_malloc) //
2992+ ->MinTime(10 )
2993+ ->Name(" json_yyjson<malloc>" );
2994+ BENCHMARK_CAPTURE (json_yyjson, prepending, yyjson_wrap_arena_prepending)
2995+ ->MinTime(10 )
2996+ ->Name(" json_yyjson<limited_arena, prepending>" );
2997+ BENCHMARK_CAPTURE (json_yyjson, tagging, yyjson_wrap_arena_tagging)
2998+ ->MinTime(10 )
2999+ ->Name(" json_yyjson<tagging_arena, tagging>" );
3000+
3001+ BENCHMARK_CAPTURE (json_yyjson, malloc, yyjson_wrapp_malloc)
3002+ ->MinTime(10 )
3003+ ->Name(" json_yyjson<malloc>" )
3004+ ->Threads(physical_cores());
3005+ BENCHMARK_CAPTURE (json_yyjson, prepending, yyjson_wrap_arena_prepending)
3006+ ->MinTime(10 )
3007+ ->Name(" json_yyjson<limited_arena, prepending>" )
3008+ ->Threads(physical_cores());
3009+ BENCHMARK_CAPTURE (json_yyjson, tagging, yyjson_wrap_arena_tagging)
3010+ ->MinTime(10 )
3011+ ->Name(" json_yyjson<tagging_arena, tagging>" )
3012+ ->Threads(physical_cores());
29173013
29183014/* *
29193015 * The `nlohmann::json` library is designed to be simple and easy to use, but it's
0 commit comments