Skip to content

Commit 8122d60

Browse files
committed
Michael comments
1 parent 381b5bc commit 8122d60

File tree

1 file changed

+24
-23
lines changed

1 file changed

+24
-23
lines changed

libc/src/__support/GPU/allocator.cpp

Lines changed: 24 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -28,6 +28,7 @@ namespace LIBC_NAMESPACE_DECL {
2828
constexpr static uint64_t MAX_SIZE = /* 64 GiB */ 64ull * 1024 * 1024 * 1024;
2929
constexpr static uint64_t SLAB_SIZE = /* 2 MiB */ 2ull * 1024 * 1024;
3030
constexpr static uint64_t ARRAY_SIZE = MAX_SIZE / SLAB_SIZE;
31+
constexpr static uint64_t SLAB_ALIGNMENT = SLAB_SIZE - 1;
3132
constexpr static uint32_t BITS_IN_WORD = sizeof(uint32_t) * 8;
3233
constexpr static uint32_t MIN_SIZE = 16;
3334

@@ -62,7 +63,7 @@ static void rpc_free(void *ptr) {
6263
// Convert a potentially disjoint bitmask into an increasing integer for use
6364
// with indexing between gpu lanes.
6465
static inline uint32_t lane_count(uint64_t lane_mask) {
65-
return cpp::popcount(lane_mask & ((1ull << gpu::get_lane_id()) - 1));
66+
return cpp::popcount(lane_mask & ((uint64_t(1) << gpu::get_lane_id()) - 1));
6667
}
6768

6869
// Obtain an initial value to seed a random number generator. We use the rounded
@@ -225,7 +226,7 @@ struct Slab {
225226

226227
uint32_t slot = index / BITS_IN_WORD;
227228
uint32_t bit = index % BITS_IN_WORD;
228-
if (mask & (1ull << gpu::get_lane_id())) {
229+
if (mask & (uint64_t(1) << gpu::get_lane_id())) {
229230
uint32_t before = cpp::AtomicRef(get_bitfield()[slot])
230231
.fetch_or(1u << bit, cpp::MemoryOrder::RELAXED);
231232
if (~before & (1 << bit)) {
@@ -262,11 +263,11 @@ template <typename T> struct GuardPtr {
262263
private:
263264
struct RefCounter {
264265
// Indicates that the object is in its deallocation phase and thus invalid.
265-
static constexpr uint64_t invalid = 1ull << 63;
266+
static constexpr uint64_t INVALID = uint64_t(1) << 63;
266267

267268
// If a read preempts an unlock call we indicate this so the following
268269
// unlock call can swap out the helped bit and maintain exlusive ownership.
269-
static constexpr uint64_t helped = 1ull << 62;
270+
static constexpr uint64_t HELPED = uint64_t(1) << 62;
270271

271272
// Resets the reference counter, cannot be reset to zero safely.
272273
void reset(uint32_t n, uint64_t &count) {
@@ -277,7 +278,7 @@ template <typename T> struct GuardPtr {
277278
// Acquire a slot in the reference counter if it is not invalid.
278279
bool acquire(uint32_t n, uint64_t &count) {
279280
count = counter.fetch_add(n, cpp::MemoryOrder::RELAXED) + n;
280-
return (count & invalid) == 0;
281+
return (count & INVALID) == 0;
281282
}
282283

283284
// Release a slot in the reference counter. This function should only be
@@ -289,13 +290,13 @@ template <typename T> struct GuardPtr {
289290
// helped us invalidating it. For the latter, claim that flag and return.
290291
if (counter.fetch_sub(n, cpp::MemoryOrder::RELAXED) == n) {
291292
uint64_t expected = 0;
292-
if (counter.compare_exchange_strong(expected, invalid,
293+
if (counter.compare_exchange_strong(expected, INVALID,
293294
cpp::MemoryOrder::RELAXED,
294295
cpp::MemoryOrder::RELAXED))
295296
return true;
296-
else if ((expected & helped) &&
297-
(counter.exchange(invalid, cpp::MemoryOrder::RELAXED) &
298-
helped))
297+
else if ((expected & HELPED) &&
298+
(counter.exchange(INVALID, cpp::MemoryOrder::RELAXED) &
299+
HELPED))
299300
return true;
300301
}
301302
return false;
@@ -306,9 +307,9 @@ template <typename T> struct GuardPtr {
306307
uint64_t read() {
307308
auto val = counter.load(cpp::MemoryOrder::RELAXED);
308309
if (val == 0 && counter.compare_exchange_strong(
309-
val, invalid | helped, cpp::MemoryOrder::RELAXED))
310+
val, INVALID | HELPED, cpp::MemoryOrder::RELAXED))
310311
return 0;
311-
return (val & invalid) ? 0 : val;
312+
return (val & INVALID) ? 0 : val;
312313
}
313314

314315
cpp::Atomic<uint64_t> counter{0};
@@ -318,7 +319,7 @@ template <typename T> struct GuardPtr {
318319
RefCounter ref{};
319320

320321
// A sentinel value used to claim the pointer slot.
321-
static constexpr uint64_t sentinel = ~0ULL;
322+
static constexpr uint64_t sentinel = cpp::numeric_limits<uint64_t>::max();
322323

323324
// Should be called be a single lane for each different pointer.
324325
template <typename... Args>
@@ -328,7 +329,7 @@ template <typename T> struct GuardPtr {
328329
ptr.compare_exchange_strong(expected, reinterpret_cast<T *>(sentinel),
329330
cpp::MemoryOrder::RELAXED,
330331
cpp::MemoryOrder::RELAXED)) {
331-
count = ~0ull;
332+
count = cpp::numeric_limits<uint64_t>::max();
332333
T *mem = reinterpret_cast<T *>(impl::rpc_allocate(sizeof(T)));
333334
if (!mem)
334335
return nullptr;
@@ -357,22 +358,22 @@ template <typename T> struct GuardPtr {
357358
// The uniform mask represents which lanes share the same pointer. For each
358359
// uniform value we elect a leader to handle it on behalf of the other lanes.
359360
template <typename... Args>
360-
T *try_lock(uint64_t lane_mask, uint64_t unifrom, uint64_t &count,
361+
T *try_lock(uint64_t lane_mask, uint64_t uniform, uint64_t &count,
361362
Args &&...args) {
362363
count = 0;
363364
T *result = nullptr;
364-
if (gpu::get_lane_id() == uint32_t(cpp::countr_zero(unifrom)))
365-
result = try_lock_impl(cpp::popcount(unifrom), count,
365+
if (gpu::get_lane_id() == uint32_t(cpp::countr_zero(uniform)))
366+
result = try_lock_impl(cpp::popcount(uniform), count,
366367
cpp::forward<Args>(args)...);
367-
result = gpu::shuffle(lane_mask, cpp::countr_zero(unifrom), result);
368+
result = gpu::shuffle(lane_mask, cpp::countr_zero(uniform), result);
368369

369370
if (!result)
370371
return nullptr;
371372

372373
// Obtain the value of the reference counter for each lane given the
373374
// aggregate value.
374-
count = gpu::shuffle(lane_mask, cpp::countr_zero(unifrom), count) -
375-
cpp::popcount(unifrom) + impl::lane_count(unifrom) + 1;
375+
count = gpu::shuffle(lane_mask, cpp::countr_zero(uniform), count) -
376+
cpp::popcount(uniform) + impl::lane_count(uniform) + 1;
376377
return result;
377378
}
378379

@@ -433,7 +434,7 @@ static Slab *find_slab(uint32_t chunk_size) {
433434
}
434435

435436
// Malloc returned a null pointer and we are out-of-memory.
436-
if (!slab && reserved == ~0ull)
437+
if (!slab && reserved == cpp::numeric_limits<uint64_t>::max())
437438
return nullptr;
438439

439440
// The slab is in the process of being initialized. Start at the beginning
@@ -480,12 +481,12 @@ void deallocate(void *ptr) {
480481
return;
481482

482483
// All non-slab allocations will be alinged on a 2MiB boundary.
483-
if ((reinterpret_cast<uintptr_t>(ptr) & 0x1fffff) == 0)
484+
if ((reinterpret_cast<uintptr_t>(ptr) & SLAB_ALIGNMENT) == 0)
484485
return impl::rpc_free(ptr);
485486

486487
// The original slab pointer is the 2MiB boundary using the given pointer.
487-
Slab *slab =
488-
reinterpret_cast<Slab *>((reinterpret_cast<uintptr_t>(ptr) & ~0x1fffff));
488+
Slab *slab = reinterpret_cast<Slab *>(
489+
(reinterpret_cast<uintptr_t>(ptr) & ~SLAB_ALIGNMENT));
489490
slab->deallocate(ptr);
490491
release_slab(slab);
491492
}

0 commit comments

Comments
 (0)