diff --git a/libc/src/__support/GPU/allocator.cpp b/libc/src/__support/GPU/allocator.cpp index 534a309fec7b4..3da339c63ccd0 100644 --- a/libc/src/__support/GPU/allocator.cpp +++ b/libc/src/__support/GPU/allocator.cpp @@ -166,7 +166,11 @@ static inline uint32_t get_leader_id(uint64_t ballot, uint32_t id) { // We use a sentinal value to indicate a failed or in-progress allocation. template bool is_sentinel(const T &x) { - return x == cpp::numeric_limits::max(); + if constexpr (cpp::is_pointer_v) + return reinterpret_cast(x) == + cpp::numeric_limits::max(); + else + return x == cpp::numeric_limits::max(); } } // namespace impl @@ -446,7 +450,13 @@ struct GuardPtr { return new (raw) Slab(cpp::forward(args)...); } - if (!expected || impl::is_sentinel(reinterpret_cast(expected))) + // If there is a slab allocation in progress we retry a few times. + for (uint32_t t = 0; impl::is_sentinel(expected) && t < MAX_TRIES; ++t) { + sleep_briefly(); + expected = ptr.load(cpp::MemoryOrder::RELAXED); + } + + if (!expected || impl::is_sentinel(expected)) return nullptr; if (!ref.acquire(n, count)) @@ -557,16 +567,6 @@ static Slab *find_slab(uint32_t chunk_size, uint64_t &uniform, Slab *slab = slots[index].try_lock(lane_mask, uniform & lane_mask, reserved, chunk_size, index); - // If there is a slab allocation in progress we retry a few times. - for (uint32_t retries = 0; - !slab && !impl::is_sentinel(reserved) && retries < MAX_TRIES; - retries++) { - uint64_t lane_mask = gpu::get_lane_mask(); - slab = slots[index].try_lock(lane_mask, uniform & lane_mask, reserved, - chunk_size, index); - sleep_briefly(); - } - // If we find a slab with a matching chunk size then we store the result. // Otherwise, we need to free the claimed lock and continue. In the case // of out-of-memory we receive a sentinel value and return a failure.