Skip to content

Commit 0058952

Browse files
authored
[libc] Simplifiy slab waiting in GPU memory allocator (#152872)
Summary: This moves the waiting to be done inside of the `try_lock` routine instead. This makes the logic much simpler since it's just a single loop on a load. We should have the same effect here, and since we don't care about this being a generic interface it shouldn't matter that it waits abit. Still wait free since it's guaranteed to make progress *eventually*.
1 parent dde474c commit 0058952

File tree

1 file changed

+12
-12
lines changed

1 file changed

+12
-12
lines changed

libc/src/__support/GPU/allocator.cpp

Lines changed: 12 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -166,7 +166,11 @@ static inline uint32_t get_leader_id(uint64_t ballot, uint32_t id) {
166166

167167
// We use a sentinal value to indicate a failed or in-progress allocation.
168168
template <typename T> bool is_sentinel(const T &x) {
169-
return x == cpp::numeric_limits<T>::max();
169+
if constexpr (cpp::is_pointer_v<T>)
170+
return reinterpret_cast<uintptr_t>(x) ==
171+
cpp::numeric_limits<uintptr_t>::max();
172+
else
173+
return x == cpp::numeric_limits<T>::max();
170174
}
171175

172176
} // namespace impl
@@ -446,7 +450,13 @@ struct GuardPtr {
446450
return new (raw) Slab(cpp::forward<Args>(args)...);
447451
}
448452

449-
if (!expected || impl::is_sentinel(reinterpret_cast<uintptr_t>(expected)))
453+
// If there is a slab allocation in progress we retry a few times.
454+
for (uint32_t t = 0; impl::is_sentinel(expected) && t < MAX_TRIES; ++t) {
455+
sleep_briefly();
456+
expected = ptr.load(cpp::MemoryOrder::RELAXED);
457+
}
458+
459+
if (!expected || impl::is_sentinel(expected))
450460
return nullptr;
451461

452462
if (!ref.acquire(n, count))
@@ -557,16 +567,6 @@ static Slab *find_slab(uint32_t chunk_size, uint64_t &uniform,
557567
Slab *slab = slots[index].try_lock(lane_mask, uniform & lane_mask,
558568
reserved, chunk_size, index);
559569

560-
// If there is a slab allocation in progress we retry a few times.
561-
for (uint32_t retries = 0;
562-
!slab && !impl::is_sentinel(reserved) && retries < MAX_TRIES;
563-
retries++) {
564-
uint64_t lane_mask = gpu::get_lane_mask();
565-
slab = slots[index].try_lock(lane_mask, uniform & lane_mask, reserved,
566-
chunk_size, index);
567-
sleep_briefly();
568-
}
569-
570570
// If we find a slab with a matching chunk size then we store the result.
571571
// Otherwise, we need to free the claimed lock and continue. In the case
572572
// of out-of-memory we receive a sentinel value and return a failure.

0 commit comments

Comments
 (0)