Skip to content

Commit 16431b0

Browse files
ConorWilliamsConor
authored andcommitted
[V4] Queue (#64)
* copy in from legacy * compiling * no return if no address * add nullptr fork/call overloads * tests * flto auto * format * silence warnings * better tests * fix typo in comment * test improvements * refactor fib.hpp * add baremetal queue bench * add benchmark * moving about concepts * pass by value * return pre-push size * add when empty
1 parent 78f58c6 commit 16431b0

File tree

11 files changed

+807
-72
lines changed

11 files changed

+807
-72
lines changed

CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -64,6 +64,7 @@ target_sources(libfork_libfork
6464
src/core/tuple.cxx
6565
src/core/ops.cxx
6666
src/core/context.cxx
67+
src/core/deque.cxx
6768
PRIVATE
6869
src/exception.cpp
6970
)

CMakePresets.json

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,7 @@
3232
"displayName": "Debug with warnings and hardening",
3333
"cacheVariables": {
3434
"CMAKE_BUILD_TYPE": "Debug",
35-
"CMAKE_CXX_FLAGS": "-O2 -Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion -Wcast-qual -Wformat -Wformat=2 -Wundef -Werror=float-equal -Wshadow -Wcast-align -Wunused -Wnull-dereference -Wdouble-promotion -Wimplicit-fallthrough -Wextra-semi -Woverloaded-virtual -Wnon-virtual-dtor -Wold-style-cast -Werror=format-security -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 -D_GLIBCXX_ASSERTIONS -fstrict-flex-arrays=3 -fstack-protector-strong -Wno-missing-braces"
35+
"CMAKE_CXX_FLAGS": "-O2 -Wall -Wextra -Wpedantic -Wconversion -Wsign-conversion -Wcast-qual -Wformat -Wformat=2 -Wundef -Werror=float-equal -Wshadow -Wcast-align -Wunused -Wnull-dereference -Wdouble-promotion -Wimplicit-fallthrough -Wextra-semi -Woverloaded-virtual -Wnon-virtual-dtor -Wold-style-cast -Werror=format-security -U_FORTIFY_SOURCE -D_FORTIFY_SOURCE=3 -D_GLIBCXX_ASSERTIONS -fstrict-flex-arrays=3 -fstack-protector-strong -Wno-missing-braces -Wno-missing-field-initializers"
3636
}
3737
},
3838
{
@@ -41,7 +41,7 @@
4141
"displayName": "Release",
4242
"cacheVariables": {
4343
"CMAKE_BUILD_TYPE": "Release",
44-
"CMAKE_CXX_FLAGS": "-O3 -DNDEBUG -flto -march=native"
44+
"CMAKE_CXX_FLAGS": "-O3 -DNDEBUG -flto=auto -march=native"
4545
}
4646
},
4747
{
@@ -50,7 +50,7 @@
5050
"displayName": "Release no RTTI or exceptions",
5151
"cacheVariables": {
5252
"CMAKE_BUILD_TYPE": "Release",
53-
"CMAKE_CXX_FLAGS": "-O3 -DNDEBUG -flto -march=native -fno-exceptions -fno-rtti"
53+
"CMAKE_CXX_FLAGS": "-O3 -DNDEBUG -flto=auto -march=native -fno-exceptions -fno-rtti"
5454
}
5555
},
5656
{

benchmark/src/libfork_benchmark/fib/baremetal.cpp

Lines changed: 63 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,22 @@
1111

1212
namespace {
1313

14+
// ==== Allocators ==== //
15+
16+
constinit inline thread_local std::byte *tls_bump_ptr = nullptr;
17+
1418
struct task {
15-
struct promise_type : tls_bump {
19+
struct promise_type {
20+
21+
static auto operator new(std::size_t sz) -> void * {
22+
auto *prev = tls_bump_ptr;
23+
tls_bump_ptr += fib_align_size(sz);
24+
return prev;
25+
}
26+
27+
static auto operator delete(void *p, [[maybe_unused]] std::size_t sz) noexcept -> void {
28+
tls_bump_ptr = std::bit_cast<std::byte *>(p);
29+
}
1630

1731
auto get_return_object() -> task { return {std::coroutine_handle<promise_type>::from_promise(*this)}; }
1832

@@ -96,7 +110,53 @@ void fib_coro_no_queue(benchmark::State &state) {
96110
}
97111
}
98112

113+
// === Recursive with Deque overhead
114+
115+
constinit inline thread_local lf::deque<std::int64_t> *tls_deque = nullptr;
116+
117+
LF_NO_INLINE
118+
auto deque() -> lf::deque<std::int64_t> & { return *tls_deque; }
119+
120+
auto fib_recursive_deque_impl(std::int64_t n) -> std::int64_t {
121+
if (n <= 1) {
122+
return n;
123+
}
124+
125+
// Emulate work item creation/scheduling overhead
126+
deque().push(n);
127+
std::int64_t a = fib_recursive_deque_impl(n - 1);
128+
deque().pop();
129+
130+
std::int64_t b = fib_recursive_deque_impl(n - 2);
131+
132+
return a + b;
133+
}
134+
135+
void fib_recursive_deque(benchmark::State &state) {
136+
137+
std::int64_t n = state.range(0);
138+
std::int64_t expect = fib_ref(n);
139+
140+
state.counters["n"] = static_cast<double>(n);
141+
142+
lf::deque<std::int64_t> deque;
143+
tls_deque = &deque;
144+
145+
for (auto _ : state) {
146+
benchmark::DoNotOptimize(n);
147+
std::int64_t result = fib_recursive_deque_impl(n);
148+
CHECK_RESULT(result, expect);
149+
benchmark::DoNotOptimize(result);
150+
}
151+
152+
tls_deque = nullptr;
153+
}
154+
99155
} // namespace
100156

101-
BENCHMARK(fib_coro_no_queue)->Name("test/baremetal/fib")->Arg(fib_test);
102-
BENCHMARK(fib_coro_no_queue)->Name("base/baremetal/fib")->Arg(fib_base);
157+
// Minimal coroutine, bump allocated (thread-local) stack
158+
BENCHMARK(fib_coro_no_queue)->Name("test/baremetal/fib/coro")->Arg(fib_test);
159+
BENCHMARK(fib_coro_no_queue)->Name("base/baremetal/fib/coro")->Arg(fib_base);
160+
161+
BENCHMARK(fib_recursive_deque)->Name("test/baremetal/fib/deque")->Arg(fib_test);
162+
BENCHMARK(fib_recursive_deque)->Name("base/baremetal/fib/deque")->Arg(fib_base);

benchmark/src/libfork_benchmark/fib/fib.hpp

Lines changed: 1 addition & 63 deletions
Original file line numberDiff line numberDiff line change
@@ -35,69 +35,7 @@ constexpr auto fib_ref(std::int64_t n) -> std::int64_t {
3535
return curr;
3636
}
3737

38-
// === Shared Allocator Logic ===
39-
40-
inline constexpr std::size_t k_fib_align = 2 * sizeof(void *);
41-
4238
[[nodiscard]]
4339
inline auto fib_align_size(std::size_t n) -> std::size_t {
44-
return (n + k_fib_align - 1) & ~(k_fib_align - 1);
40+
return (n + lf::k_new_align - 1) & ~(lf::k_new_align - 1);
4541
}
46-
47-
constinit inline thread_local std::byte *tls_bump_ptr = nullptr;
48-
49-
struct tls_bump {
50-
51-
static auto operator new(std::size_t sz) -> void * {
52-
auto *prev = tls_bump_ptr;
53-
tls_bump_ptr += fib_align_size(sz);
54-
return prev;
55-
}
56-
57-
static auto operator delete(void *p, [[maybe_unused]] std::size_t sz) noexcept -> void {
58-
tls_bump_ptr = std::bit_cast<std::byte *>(p);
59-
}
60-
};
61-
62-
// === Shared Context Logic ===
63-
64-
template <lf::stack_allocator Alloc>
65-
struct vector_ctx {
66-
67-
using handle_type = lf::frame_handle<vector_ctx>;
68-
69-
std::vector<handle_type> work;
70-
Alloc allocator;
71-
72-
vector_ctx() { work.reserve(1024); }
73-
74-
auto alloc() noexcept -> Alloc & { return allocator; }
75-
76-
// TODO: try LF_NO_INLINE for final allocator
77-
LF_NO_INLINE
78-
void push(handle_type handle) { work.push_back(handle); }
79-
80-
auto pop() noexcept -> handle_type {
81-
auto handle = work.back();
82-
work.pop_back();
83-
return handle;
84-
}
85-
};
86-
87-
template <lf::stack_allocator Alloc>
88-
struct poly_vector_ctx final : lf::polymorphic_context<Alloc> {
89-
90-
using handle_type = lf::frame_handle<lf::polymorphic_context<Alloc>>;
91-
92-
std::vector<handle_type> work;
93-
94-
poly_vector_ctx() { work.reserve(1024); }
95-
96-
void push(handle_type handle) override { work.push_back(handle); }
97-
98-
auto pop() noexcept -> handle_type override {
99-
auto handle = work.back();
100-
work.pop_back();
101-
return handle;
102-
}
103-
};

benchmark/src/libfork_benchmark/fib/lf_parts.cpp

Lines changed: 65 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -46,6 +46,62 @@ struct linear_allocator {
4646

4747
static_assert(lf::stack_allocator<linear_allocator>);
4848

49+
template <lf::stack_allocator Alloc>
50+
struct vector_ctx {
51+
52+
using handle_type = lf::frame_handle<vector_ctx>;
53+
54+
std::vector<handle_type> work;
55+
Alloc allocator;
56+
57+
vector_ctx() { work.reserve(1024); }
58+
59+
auto alloc() noexcept -> Alloc & { return allocator; }
60+
61+
// TODO: try LF_NO_INLINE for final allocator
62+
LF_NO_INLINE
63+
void push(handle_type handle) { work.push_back(handle); }
64+
65+
auto pop() noexcept -> handle_type {
66+
auto handle = work.back();
67+
work.pop_back();
68+
return handle;
69+
}
70+
};
71+
72+
template <lf::stack_allocator Alloc>
73+
struct poly_vector_ctx final : lf::polymorphic_context<Alloc> {
74+
75+
using handle_type = lf::frame_handle<lf::polymorphic_context<Alloc>>;
76+
77+
std::vector<handle_type> work;
78+
79+
poly_vector_ctx() { work.reserve(1024); }
80+
81+
void push(handle_type handle) override { work.push_back(handle); }
82+
83+
auto pop() noexcept -> handle_type override {
84+
auto handle = work.back();
85+
work.pop_back();
86+
return handle;
87+
}
88+
};
89+
90+
struct poly_deque_ctx final : lf::polymorphic_context<linear_allocator> {
91+
92+
using handle_type = lf::frame_handle<lf::polymorphic_context<linear_allocator>>;
93+
94+
lf::deque<handle_type> work;
95+
96+
void push(handle_type handle) override { work.push(handle); }
97+
98+
auto pop() noexcept -> handle_type override {
99+
return work.pop([] static -> handle_type {
100+
return {};
101+
});
102+
}
103+
};
104+
49105
using lf::task;
50106

51107
template <lf::worker_context T>
@@ -217,3 +273,12 @@ BENCHMARK(fib<fork_call<B>, A, B>)->Name("base/libfork/fib/poly_vector_ctx")->Ar
217273
// Same as above but with join.
218274
BENCHMARK(fib<fork_call<B, true>, A, B>)->Name("test/libfork/fib/poly_vector_ctx/join")->Arg(fib_test);
219275
BENCHMARK(fib<fork_call<B, true>, A, B>)->Name("base/libfork/fib/poly_vector_ctx/join")->Arg(fib_base);
276+
277+
using C = poly_deque_ctx;
278+
279+
// Return by value,
280+
// Libfork call/join/fork with co-await,
281+
// Polymorphic
282+
// Deque-backed context
283+
BENCHMARK(fib<fork_call<B, true>, C, B>)->Name("test/libfork/fib/poly_deque_ctx/join")->Arg(fib_test);
284+
BENCHMARK(fib<fork_call<B, true>, C, B>)->Name("base/libfork/fib/poly_deque_ctx/join")->Arg(fib_base);

src/core/concepts.cxx

Lines changed: 21 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -5,6 +5,27 @@ import std;
55

66
namespace lf {
77

8+
// =========== Atomic related concepts =========== //
9+
10+
/**
11+
* @brief Verify a type is suitable for use with `std::atomic`
12+
*
13+
* This requires a `TriviallyCopyable` type satisfying both `CopyConstructible` and `CopyAssignable`.
14+
*/
15+
export template <typename T>
16+
concept atomicable = std::is_trivially_copyable_v<T> && //
17+
std::is_copy_constructible_v<T> && //
18+
std::is_move_constructible_v<T> && //
19+
std::is_copy_assignable_v<T> && //
20+
std::is_move_assignable_v<T> && //
21+
std::same_as<T, std::remove_cv_t<T>>; //
22+
23+
/**
24+
* @brief A concept that verifies a type is lock-free when used with `std::atomic`.
25+
*/
26+
export template <typename T>
27+
concept lock_free = atomicable<T> && std::atomic<T>::is_always_lock_free;
28+
829
// ========== Specialization ========== //
930

1031
template <typename T, template <typename...> typename Template>

src/core/core.cxx

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -10,6 +10,7 @@ export import :tuple;
1010
export import :frame; // concepts
1111
export import :context; // concepts
1212
export import :ops; // concepts, tuple, utility
13+
export import :deque; // concepts
1314

1415
// T3 partitions
1516
export import :promise;

0 commit comments

Comments
 (0)