Skip to content

Commit 38c0907

Browse files
committed
Add a limit on the number of tries so we don't deadlock ever
1 parent b685f72 commit 38c0907

File tree

1 file changed

+15
-11
lines changed

1 file changed

+15
-11
lines changed

libc/src/__support/GPU/allocator.cpp

Lines changed: 15 additions & 11 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,10 @@ constexpr static uint32_t MIN_ALIGNMENT = MIN_SIZE - 1;
3636
// A sentinel used to indicate an invalid but non-null pointer value.
3737
constexpr static uint64_t SENTINEL = cpp::numeric_limits<uint64_t>::max();
3838

39+
// The number of times we will try starting on a single index before skipping
40+
// past it.
41+
constexpr static uint32_t MAX_TRIES = 512;
42+
3943
static_assert(!(ARRAY_SIZE & (ARRAY_SIZE - 1)), "Must be a power of two");
4044

4145
namespace impl {
@@ -413,10 +417,11 @@ static Slab *find_slab(uint32_t chunk_size) {
413417
uint64_t uniform = gpu::match_any(lane_mask, chunk_size);
414418

415419
Slab *result = nullptr;
420+
uint32_t nudge = 0;
416421
for (uint64_t mask = lane_mask; mask;
417-
mask = gpu::ballot(lane_mask, !result)) {
422+
mask = gpu::ballot(lane_mask, !result), ++nudge) {
418423
uint32_t index = cpp::numeric_limits<uint32_t>::max();
419-
for (uint32_t offset = 0;
424+
for (uint32_t offset = nudge / MAX_TRIES;
420425
gpu::ballot(lane_mask, index == cpp::numeric_limits<uint32_t>::max());
421426
offset += cpp::popcount(uniform & lane_mask)) {
422427
uint32_t candidate =
@@ -428,8 +433,9 @@ static Slab *find_slab(uint32_t chunk_size) {
428433
lane_mask, cpp::countr_zero(available & uniform), candidate);
429434

430435
// Each uniform group will use the first empty slot they find.
431-
if (index == cpp::numeric_limits<uint32_t>::max() &&
432-
(available & uniform))
436+
if (offset >= ARRAY_SIZE ||
437+
(index == cpp::numeric_limits<uint32_t>::max() &&
438+
(available & uniform)))
433439
index = new_index;
434440

435441
if (offset >= ARRAY_SIZE)
@@ -441,9 +447,6 @@ static Slab *find_slab(uint32_t chunk_size) {
441447
uint64_t reserved = 0;
442448
Slab *slab = slots[index].try_lock(lane_mask & mask, uniform & mask,
443449
reserved, chunk_size, index);
444-
uint64_t claimed = gpu::ballot(
445-
lane_mask & mask, reserved <= Slab::available_chunks(chunk_size));
446-
447450
// If we find a slab with a matching chunk size then we store the result.
448451
// Otherwise, we need to free the claimed lock and continue. In the case
449452
// of out-of-memory we return a sentinel value.
@@ -452,13 +455,14 @@ static Slab *find_slab(uint32_t chunk_size) {
452455
result = slab;
453456
} else if (slab && (reserved > Slab::available_chunks(chunk_size) ||
454457
slab->get_chunk_size() != chunk_size)) {
455-
// Shuffle the start so we don't get stuck behind another slab forever.
456458
if (slab->get_chunk_size() != chunk_size)
457-
start = impl::hash(start);
458-
slots[index].unlock(lane_mask & mask & ~claimed,
459-
mask & ~claimed & uniform);
459+
start = index + 1;
460+
slots[index].unlock(gpu::get_lane_mask(),
461+
gpu::get_lane_mask() & uniform);
460462
} else if (!slab && reserved == cpp::numeric_limits<uint64_t>::max()) {
461463
result = reinterpret_cast<Slab *>(SENTINEL);
464+
} else {
465+
sleep_briefly();
462466
}
463467
}
464468
}

0 commit comments

Comments
 (0)