@@ -455,21 +455,8 @@ bool VirtualGPU::dispatchGenericAqlPacket(
455455
456456 // TODO: placeholder to setup the kernel to populate start and end timestamp.
457457 if (timestamp_ != nullptr ) {
458- // Pool size must grow to the size of pending AQL packets
459- const uint32_t pool_size = index - read;
460- if (pool_size >= signal_pool_.size ()) {
461- ProfilingSignal profiling_signal = {};
462- if (HSA_STATUS_SUCCESS != hsa_signal_create (0 , 0 , nullptr , &profiling_signal.signal_ )) {
463- LogPrintfError (" Failed signal allocation id = %d" , pool_size);
464- return false ;
465- }
466- signal_pool_.push_back (profiling_signal);
467- assert (queueSize >= signal_pool_.size () && " Pool will be reallocated!" );
468- }
469- // Move index inside the valid pool
470- ++current_signal_ %= signal_pool_.size ();
471458 // Find signal slot
472- ProfilingSignal* profilingSignal = &signal_pool_[current_signal_ ];
459+ ProfilingSignal* profilingSignal = &signal_pool_[index & queueMask ];
473460 // Make sure we save the old results in the TS structure
474461 if (profilingSignal->ts_ != nullptr ) {
475462 profilingSignal->ts_ ->checkGpuTime ();
@@ -549,8 +536,6 @@ bool VirtualGPU::dispatchGenericAqlPacket(
549536 LogPrintfError (" Failed signal [0x%lx] wait" , signal.handle );
550537 return false ;
551538 }
552- // Reset the pool of signals
553- current_signal_ = 0 ;
554539 }
555540
556541 return true ;
@@ -635,14 +620,8 @@ void VirtualGPU::ResetQueueStates() {
635620 // Release all memory dependencies
636621 memoryDependency ().clear ();
637622
638- if (dev ().settings ().barrier_sync_ ) {
639- // Release the pool, since runtime just completed a barrier
640- // @note: Runtime can reset kernel arg pool only if the barrier with L2 invalidation was issued
641- resetKernArgPool ();
642- } else {
643- // Reset the pool of signals
644- current_signal_ = 0 ;
645- }
623+ // Release the pool, since runtime just completed a barrier
624+ resetKernArgPool ();
646625}
647626
648627// ================================================================================================
@@ -854,17 +833,8 @@ bool VirtualGPU::initPool(size_t kernarg_pool_size, uint signal_pool_count) {
854833 }
855834
856835 if (signal_pool_count != 0 ) {
857- // Reserve signal pool for all entries in the queue, since profiling logic will save the
858- // pointer in timestamp info for the future references
859- signal_pool_.reserve (signal_pool_count);
860- // If barrier is disable, then allocate a small portion of all signals and grow the array later.
861- // @note: the optimization requires a wait for signal on reuse, which is only available when
862- // the barrier is disabled
863- constexpr uint32_t kDefaultSignalPoolSize = 32 ;
864- const uint32_t default_signal_pool_size = (dev ().settings ().barrier_sync_ ) ?
865- signal_pool_count : kDefaultSignalPoolSize ;
866- signal_pool_.resize (default_signal_pool_size);
867- for (uint i = 0 ; i < default_signal_pool_size; ++i) {
836+ signal_pool_.resize (signal_pool_count);
837+ for (uint i = 0 ; i < signal_pool_count; ++i) {
868838 ProfilingSignal profilingSignal;
869839 if (HSA_STATUS_SUCCESS != hsa_signal_create (0 , 0 , nullptr , &profilingSignal.signal_ )) {
870840 return false ;
@@ -911,9 +881,6 @@ void* VirtualGPU::allocKernArg(size_t size, size_t alignment) {
911881 }
912882
913883 resetKernArgPool ();
914-
915- // Reset the pool of signals
916- current_signal_ = 0 ;
917884 }
918885 } while (true );
919886
0 commit comments