diff --git a/benchmark/CMakeLists.txt b/benchmark/CMakeLists.txt index 77a1744..25574d7 100644 --- a/benchmark/CMakeLists.txt +++ b/benchmark/CMakeLists.txt @@ -42,6 +42,13 @@ target_sources(libfork_benchmark src/libfork_benchmark/fib/serial_return.cpp ) +# ---- Baremetal ---- + +target_sources(libfork_benchmark + PRIVATE + src/libfork_benchmark/fib/baremetal.cpp +) + # ---- Libfork ---- target_sources(libfork_benchmark diff --git a/benchmark/src/libfork_benchmark/fib/baremetal.cpp b/benchmark/src/libfork_benchmark/fib/baremetal.cpp new file mode 100644 index 0000000..a67b2a4 --- /dev/null +++ b/benchmark/src/libfork_benchmark/fib/baremetal.cpp @@ -0,0 +1,102 @@ +#include +#include +#include +#include + +#include + +#include "libfork_benchmark/fib/fib.hpp" + +// === Coroutine + +namespace { + +struct task { + struct promise_type : fib_bump_allocator { + + auto get_return_object() -> task { return {std::coroutine_handle::from_promise(*this)}; } + + auto initial_suspend() -> std::suspend_always { return {}; } + + auto final_suspend() noexcept { + struct final_awaitable : std::suspend_always { + auto await_suspend(std::coroutine_handle h) noexcept -> std::coroutine_handle<> { + + std::coroutine_handle<> cont = h.promise().continuation; + + h.destroy(); + + if (cont) { + return cont; + } + + return std::noop_coroutine(); + } + }; + + return final_awaitable{}; + } + + void return_value(std::int64_t val) { *value = val; } + void unhandled_exception() { std::terminate(); } + + std::int64_t *value = nullptr; + std::coroutine_handle<> continuation = nullptr; + }; + + std::coroutine_handle coro; + + auto set(std::int64_t &out) -> task & { + coro.promise().value = &out; + return *this; + } + + auto await_ready() noexcept -> bool { return false; } + + auto await_suspend(std::coroutine_handle<> h) -> std::coroutine_handle { + coro.promise().continuation = h; + return coro; + } + + void await_resume() noexcept {} +}; + +auto fib(std::int64_t n) -> task { + if (n <= 1) { + co_return n; + } + std::int64_t a = 0; + std::int64_t b = 0; + co_await fib(n - 1).set(a); + co_await fib(n - 2).set(b); + co_return a + b; +} + +void fib_coro_no_queue(benchmark::State &state) { + + std::int64_t n = state.range(0); + std::int64_t expect = fib_ref(n); + + state.counters["n"] = static_cast(n); + + // 8MB stack + std::unique_ptr buffer = std::make_unique(1024 * 1024 * 8); + fib_bump_ptr = buffer.get(); + + for (auto _ : state) { + benchmark::DoNotOptimize(n); + std::int64_t result = 0; + fib(n).set(result).coro.resume(); + CHECK_RESULT(result, expect); + benchmark::DoNotOptimize(result); + } + + if (fib_bump_ptr != buffer.get()) { + std::terminate(); // Stack leak + } +} + +} // namespace + +BENCHMARK(fib_coro_no_queue)->Name("test/baremetal/fib")->Arg(fib_test); +BENCHMARK(fib_coro_no_queue)->Name("base/baremetal/fib")->Arg(fib_base); diff --git a/benchmark/src/libfork_benchmark/fib/fib.hpp b/benchmark/src/libfork_benchmark/fib/fib.hpp index a0cca1e..9dde778 100644 --- a/benchmark/src/libfork_benchmark/fib/fib.hpp +++ b/benchmark/src/libfork_benchmark/fib/fib.hpp @@ -1,5 +1,7 @@ #pragma once +#include +#include #include #include "libfork_benchmark/common.hpp" @@ -27,3 +29,27 @@ constexpr auto fib_ref(std::int64_t n) -> std::int64_t { return curr; } + +// === Shared Allocator Logic === + +inline constexpr std::size_t k_fib_align = 2 * sizeof(void *); + +[[nodiscard]] +inline auto fib_align_size(std::size_t n) -> std::size_t { + return (n + k_fib_align - 1) & ~(k_fib_align - 1); +} + +inline thread_local std::byte *fib_bump_ptr = nullptr; + +struct fib_bump_allocator { + + static auto operator new(std::size_t sz) -> void * { + auto *prev = fib_bump_ptr; + fib_bump_ptr += fib_align_size(sz); + return prev; + } + + static auto operator delete(void *p, [[maybe_unused]] std::size_t sz) noexcept -> void { + fib_bump_ptr = std::bit_cast(p); + } +}; diff --git a/benchmark/src/libfork_benchmark/fib/lf_parts.cpp b/benchmark/src/libfork_benchmark/fib/lf_parts.cpp index d2e07a9..5341f34 100644 --- a/benchmark/src/libfork_benchmark/fib/lf_parts.cpp +++ b/benchmark/src/libfork_benchmark/fib/lf_parts.cpp @@ -19,26 +19,6 @@ struct stack_on_heap { } }; -thread_local static std::byte *sp = nullptr; - -[[nodiscard]] -auto align(std::size_t n) -> std::size_t { - return (n + lf::k_new_align - 1) & ~(lf::k_new_align - 1); -} - -struct tls_stack { - - static auto operator new(std::size_t sz) -> void * { - auto *prev = sp; - sp += align(sz); - return prev; - } - - static auto operator delete([[maybe_unused]] void *p, [[maybe_unused]] std::size_t sz) noexcept -> void { - sp = std::bit_cast(p); - } -}; - template constexpr auto no_await = [](this auto fib, std::int64_t *ret, std::int64_t n) -> lf::task { @@ -50,8 +30,8 @@ constexpr auto no_await = std::int64_t lhs = 0; std::int64_t rhs = 0; - fib(&lhs, n - 1).release()->handle().resume(); - fib(&rhs, n - 2).release()->handle().resume(); + fib(&lhs, n - 1).promise->handle().resume(); + fib(&rhs, n - 2).promise->handle().resume(); *ret = lhs + rhs; }; @@ -82,19 +62,19 @@ void fib(benchmark::State &state) { std::unique_ptr buffer = std::make_unique(1024 * 1024); - sp = buffer.get(); + fib_bump_ptr = buffer.get(); for (auto _ : state) { benchmark::DoNotOptimize(n); std::int64_t result = 0; - Fn(&result, n).release()->handle().resume(); + Fn(&result, n).promise->handle().resume(); CHECK_RESULT(result, expect); benchmark::DoNotOptimize(result); } - if (sp != buffer.get()) { + if (fib_bump_ptr != buffer.get()) { LF_TERMINATE("Stack leak detected"); } } @@ -107,8 +87,8 @@ BENCHMARK(fib>)->Name("base/libfork/fib/heap/no_await")- BENCHMARK(fib>)->Name("test/libfork/fib/heap/await")->Arg(fib_test); BENCHMARK(fib>)->Name("base/libfork/fib/heap/await")->Arg(fib_base); -BENCHMARK(fib>)->Name("test/libfork/fib/data/no_await")->Arg(fib_test); -BENCHMARK(fib>)->Name("base/libfork/fib/data/no_await")->Arg(fib_base); +BENCHMARK(fib>)->Name("test/libfork/fib/data/no_await")->Arg(fib_test); +BENCHMARK(fib>)->Name("base/libfork/fib/data/no_await")->Arg(fib_base); -BENCHMARK(fib>)->Name("test/libfork/fib/data/await")->Arg(fib_test); -BENCHMARK(fib>)->Name("base/libfork/fib/data/await")->Arg(fib_base); +BENCHMARK(fib>)->Name("test/libfork/fib/data/await")->Arg(fib_test); +BENCHMARK(fib>)->Name("base/libfork/fib/data/await")->Arg(fib_base); diff --git a/src/core/frame.cxx b/src/core/frame.cxx index f01a9b0..f2f0936 100644 --- a/src/core/frame.cxx +++ b/src/core/frame.cxx @@ -1,3 +1,5 @@ +module; +#include "libfork/__impl/utils.hpp" export module libfork.core:frame; import std; @@ -5,7 +7,11 @@ import std; namespace lf { struct frame_type { + frame_type *parent = nullptr; + + [[nodiscard]] + constexpr auto handle() LF_HOF(std::coroutine_handle::from_promise(*this)) }; static_assert(std::is_standard_layout_v); diff --git a/src/core/promise.cxx b/src/core/promise.cxx index 3a46707..9cf2aeb 100644 --- a/src/core/promise.cxx +++ b/src/core/promise.cxx @@ -20,19 +20,6 @@ struct promise_type; // =============== Task =============== // -/** - * @brief `std::unique_ptr` compatible deleter for coroutine promises. - */ -struct promise_deleter { - template - constexpr static void operator()(T *ptr) noexcept { - std::coroutine_handle::from_promise(*ptr).destroy(); - } -}; - -template -using unique_promise = std::unique_ptr; - /** * @brief The return type for libfork's async functions/coroutines. * @@ -50,7 +37,9 @@ using unique_promise = std::unique_ptr; * \endrst */ export template -struct task final : unique_promise> {}; +struct task { + promise_type *promise; +}; // =============== Frame-mixin =============== // @@ -61,13 +50,11 @@ constexpr auto final_suspend(frame_type *frame) -> std::coroutine_handle<> { frame_type *parent_frame = frame->parent; - { - // Destroy the child frame - unique_promise _{frame}; - } + // Destroy the child frame + frame->handle().destroy(); if (parent_frame != nullptr) { - return std::coroutine_handle::from_promise(*parent_frame); + return parent_frame->handle(); } return std::noop_coroutine(); @@ -81,22 +68,19 @@ struct final_awaitable : std::suspend_always { } }; -// TODO: can we type-erase T/Policy here? - -template struct just_awaitable : std::suspend_always { - task child; + frame_type *child; template auto await_suspend(std::coroutine_handle> parent) noexcept -> std::coroutine_handle<> { LF_ASSUME(child != nullptr); - LF_ASSUME(child->frame.parent == nullptr); + LF_ASSUME(child->parent == nullptr); - child->frame.parent = &parent.promise().frame; + child->parent = &parent.promise().frame; - return child.release()->handle(); + return child->handle(); } }; @@ -116,8 +100,8 @@ struct mixin_frame { // === Called by the compiler === // template - static constexpr auto await_transform(task child) -> just_awaitable { - return {.child = std::move(child)}; + static constexpr auto await_transform(task child) noexcept -> just_awaitable { + return {.child = &child.promise->frame}; } constexpr static auto initial_suspend() noexcept -> std::suspend_always { return {}; } @@ -136,7 +120,7 @@ struct promise_type : StackPolicy, mixin_frame { frame_type frame; - constexpr auto get_return_object() -> task { return {{this, {}}}; } + constexpr auto get_return_object() -> task { return {.promise = this}; } constexpr static void return_void() {} };