Skip to content
Merged
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
19 changes: 13 additions & 6 deletions libc/src/__support/GPU/allocator.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -142,10 +142,16 @@ static inline constexpr bool is_pow2(uint64_t x) {
return x && (x & (x - 1)) == 0;
}

// Where this chunk size should start looking in the global array.
static inline constexpr uint32_t start_index(uint32_t chunk_index) {
return (ARRAY_SIZE * impl::get_chunk_id(chunk_index)) /
impl::get_chunk_id(SLAB_SIZE / 2);
// Where this chunk size should start looking in the global array. Small
// allocations are much more likely than large ones, so we give them the most
// space. We use a cubic easing function normalized on the possible chunks.
static inline constexpr uint32_t start_index(uint32_t chunk_size) {
constexpr uint32_t max_chunk = impl::get_chunk_id(SLAB_SIZE / 2);
uint64_t norm =
(1 << 16) - (impl::get_chunk_id(chunk_size) << 16) / max_chunk;
uint64_t bias = (norm * norm * norm) >> 32;
uint64_t inv = (1 << 16) - bias;
return static_cast<uint32_t>(((ARRAY_SIZE - 1) * inv) >> 16);
}

} // namespace impl
Expand Down Expand Up @@ -487,9 +493,10 @@ static Slab *find_slab(uint32_t chunk_size) {
uint32_t start = indices[chunk_id].load(cpp::MemoryOrder::RELAXED);
uint64_t uniform = gpu::match_any(gpu::get_lane_mask(), chunk_size);

for (uint32_t offset = 0; offset < ARRAY_SIZE; ++offset) {
for (uint32_t offset = 0; offset <= ARRAY_SIZE; ++offset) {
uint32_t index =
!offset ? start : (impl::start_index(chunk_size) + offset) % ARRAY_SIZE;
!offset ? start
: (impl::start_index(chunk_size) + offset - 1) % ARRAY_SIZE;

if (slots[index].use_count() < Slab::available_chunks(chunk_size)) {
uint64_t lane_mask = gpu::get_lane_mask();
Expand Down
Loading