@@ -39,9 +39,6 @@ constexpr static uint32_t MIN_ALIGNMENT = MIN_SIZE - 1;
3939// The number of times to attempt claiming an in-progress slab allocation.
4040constexpr static uint32_t MAX_TRIES = 1024 ;
4141
42- // A sentinel used to indicate an invalid but non-null pointer value.
43- constexpr static uint64_t SENTINEL = cpp::numeric_limits<uint64_t >::max();
44-
4542static_assert (!(ARRAY_SIZE & (ARRAY_SIZE - 1 )), " Must be a power of two" );
4643
4744namespace impl {
@@ -163,6 +160,11 @@ static inline uint32_t get_leader_id(uint64_t ballot, uint32_t id) {
163160 return BITS_IN_DWORD - cpp::countl_zero (ballot & ~mask) - 1 ;
164161}
165162
163+ // We use a sentinal value to indicate a failed or in-progress allocation.
164+ template <typename T> bool is_sentinel (const T &x) {
165+ return x == cpp::numeric_limits<T>::max ();
166+ }
167+
166168} // namespace impl
167169
168170// / A slab allocator used to hand out identically sized slabs of memory.
@@ -343,20 +345,20 @@ struct GuardPtr {
343345private:
344346 struct RefCounter {
345347 // Indicates that the object is in its deallocation phase and thus invalid.
346- static constexpr uint64_t INVALID = uint64_t (1 ) << 63 ;
348+ static constexpr uint32_t INVALID = uint32_t (1 ) << 31 ;
347349
348350 // If a read preempts an unlock call we indicate this so the following
349351 // unlock call can swap out the helped bit and maintain exclusive ownership.
350- static constexpr uint64_t HELPED = uint64_t (1 ) << 62 ;
352+ static constexpr uint32_t HELPED = uint32_t (1 ) << 30 ;
351353
352354 // Resets the reference counter, cannot be reset to zero safely.
353- void reset (uint32_t n, uint64_t &count) {
355+ void reset (uint32_t n, uint32_t &count) {
354356 counter.store (n, cpp::MemoryOrder::RELAXED);
355357 count = n;
356358 }
357359
358360 // Acquire a slot in the reference counter if it is not invalid.
359- bool acquire (uint32_t n, uint64_t &count) {
361+ bool acquire (uint32_t n, uint32_t &count) {
360362 count = counter.fetch_add (n, cpp::MemoryOrder::RELAXED) + n;
361363 return (count & INVALID) == 0 ;
362364 }
@@ -369,7 +371,7 @@ struct GuardPtr {
369371 // another thread resurrected the counter and we quit, or a parallel read
370372 // helped us invalidating it. For the latter, claim that flag and return.
371373 if (counter.fetch_sub (n, cpp::MemoryOrder::RELAXED) == n) {
372- uint64_t expected = 0 ;
374+ uint32_t expected = 0 ;
373375 if (counter.compare_exchange_strong (expected, INVALID,
374376 cpp::MemoryOrder::RELAXED,
375377 cpp::MemoryOrder::RELAXED))
@@ -392,28 +394,29 @@ struct GuardPtr {
392394 return (val & INVALID) ? 0 : val;
393395 }
394396
395- cpp::Atomic<uint64_t > counter{0 };
397+ cpp::Atomic<uint32_t > counter{0 };
396398 };
397399
398- cpp::Atomic<Slab *> ptr{ nullptr } ;
399- RefCounter ref{} ;
400+ cpp::Atomic<Slab *> ptr;
401+ RefCounter ref;
400402
401403 // Should be called be a single lane for each different pointer.
402404 template <typename ... Args>
403- Slab *try_lock_impl (uint32_t n, uint64_t &count, Args &&...args) {
405+ Slab *try_lock_impl (uint32_t n, uint32_t &count, Args &&...args) {
404406 Slab *expected = ptr.load (cpp::MemoryOrder::RELAXED);
405407 if (!expected &&
406408 ptr.compare_exchange_strong (
407- expected, reinterpret_cast <Slab *>(SENTINEL),
409+ expected,
410+ reinterpret_cast <Slab *>(cpp::numeric_limits<uintptr_t >::max ()),
408411 cpp::MemoryOrder::RELAXED, cpp::MemoryOrder::RELAXED)) {
409- count = cpp::numeric_limits<uint64_t >::max ();
412+ count = cpp::numeric_limits<uint32_t >::max ();
410413 void *raw = impl::rpc_allocate (sizeof (Slab));
411414 if (!raw)
412415 return nullptr ;
413416 return new (raw) Slab (cpp::forward<Args>(args)...);
414417 }
415418
416- if (!expected || expected == reinterpret_cast <Slab *>(SENTINEL ))
419+ if (!expected || impl::is_sentinel ( reinterpret_cast <uintptr_t >(expected) ))
417420 return nullptr ;
418421
419422 if (!ref.acquire (n, count))
@@ -425,7 +428,7 @@ struct GuardPtr {
425428
426429 // Finalize the associated memory and signal that it is ready to use by
427430 // resetting the counter.
428- void finalize (Slab *mem, uint32_t n, uint64_t &count) {
431+ void finalize (Slab *mem, uint32_t n, uint32_t &count) {
429432 cpp::atomic_thread_fence (cpp::MemoryOrder::RELEASE);
430433 ptr.store (mem, cpp::MemoryOrder::RELAXED);
431434 cpp::atomic_thread_fence (cpp::MemoryOrder::ACQUIRE);
@@ -438,7 +441,7 @@ struct GuardPtr {
438441 // The uniform mask represents which lanes share the same pointer. For each
439442 // uniform value we elect a leader to handle it on behalf of the other lanes.
440443 template <typename ... Args>
441- Slab *try_lock (uint64_t lane_mask, uint64_t uniform, uint64_t &count,
444+ Slab *try_lock (uint64_t lane_mask, uint64_t uniform, uint32_t &count,
442445 Args &&...args) {
443446 count = 0 ;
444447 Slab *result = nullptr ;
@@ -453,13 +456,13 @@ struct GuardPtr {
453456
454457 // We defer storing the newly allocated slab until now so that we can use
455458 // multiple lanes to initialize it and release it for use.
456- if (count == cpp::numeric_limits< uint64_t >:: max ( )) {
459+ if (impl::is_sentinel (count )) {
457460 result->initialize (uniform);
458461 if (gpu::get_lane_id () == uint32_t (cpp::countr_zero (uniform)))
459462 finalize (result, cpp::popcount (uniform), count);
460463 }
461464
462- if (count != cpp::numeric_limits< uint64_t >:: max ( ))
465+ if (! impl::is_sentinel (count ))
463466 count = count - cpp::popcount (uniform) +
464467 impl::lane_count (uniform, gpu::get_lane_id ()) + 1 ;
465468
@@ -515,14 +518,15 @@ static Slab *find_slab(uint32_t chunk_size, uint64_t &uniform) {
515518 if (!offset ||
516519 slots[index].use_count () < Slab::available_chunks (chunk_size)) {
517520 uint64_t lane_mask = gpu::get_lane_mask ();
518- uint64_t reserved = 0 ;
521+ uint32_t reserved = 0 ;
519522
520523 Slab *slab = slots[index].try_lock (lane_mask, uniform & lane_mask,
521524 reserved, chunk_size, index);
522525
523526 // If there is a slab allocation in progress we retry a few times.
524527 for (uint32_t retries = 0 ;
525- retries < MAX_TRIES && !slab && reserved != SENTINEL; retries++) {
528+ !slab && !impl::is_sentinel (reserved) && retries < MAX_TRIES;
529+ retries++) {
526530 uint64_t lane_mask = gpu::get_lane_mask ();
527531 slab = slots[index].try_lock (lane_mask, uniform & lane_mask, reserved,
528532 chunk_size, index);
@@ -542,7 +546,7 @@ static Slab *find_slab(uint32_t chunk_size, uint64_t &uniform) {
542546 slab->get_chunk_size () != chunk_size)) {
543547 slots[index].unlock (gpu::get_lane_mask (),
544548 gpu::get_lane_mask () & uniform);
545- } else if (!slab && reserved == SENTINEL ) {
549+ } else if (!slab && impl::is_sentinel ( reserved) ) {
546550 uniform = uniform & gpu::get_lane_mask ();
547551 return nullptr ;
548552 } else {
@@ -575,7 +579,7 @@ void *allocate(uint64_t size) {
575579 uint32_t chunk_size = impl::get_chunk_size (static_cast <uint32_t >(size));
576580 uint64_t uniform = gpu::match_any (gpu::get_lane_mask (), chunk_size);
577581 Slab *slab = find_slab (chunk_size, uniform);
578- if (!slab || slab == reinterpret_cast <Slab *>(SENTINEL ))
582+ if (!slab || impl::is_sentinel ( reinterpret_cast <uintptr_t >(slab) ))
579583 return nullptr ;
580584
581585 uint64_t lane_mask = gpu::get_lane_mask ();
0 commit comments