@@ -453,8 +453,17 @@ bool VirtualGPU::dispatchGenericAqlPacket(
453453
454454 // TODO: placeholder to setup the kernel to populate start and end timestamp.
455455 if (timestamp_ != nullptr ) {
456+ if (current_signal_ >= signal_pool_.size ()) {
457+ ProfilingSignal profilingSignal = {};
458+ if (HSA_STATUS_SUCCESS != hsa_signal_create (0 , 0 , nullptr , &profilingSignal.signal_ )) {
459+ LogPrintfError (" Failed signal allocation id = %d" , current_signal_);
460+ return false ;
461+ }
462+ signal_pool_.push_back (profilingSignal);
463+ assert (current_signal_ < signal_pool_.size () && " Not enough signals" );
464+ }
456465 // Find signal slot
457- ProfilingSignal* profilingSignal = &signal_pool_[index & queueMask ];
466+ ProfilingSignal* profilingSignal = &signal_pool_[current_signal_++ ];
458467 // Make sure we save the old results in the TS structure
459468 if (profilingSignal->ts_ != nullptr ) {
460469 profilingSignal->ts_ ->checkGpuTime ();
@@ -526,6 +535,8 @@ bool VirtualGPU::dispatchGenericAqlPacket(
526535 LogPrintfError (" Failed signal [0x%lx] wait" , signal.handle );
527536 return false ;
528537 }
538+ // Reset the pool of signals
539+ current_signal_ = 0 ;
529540 }
530541
531542 return true ;
@@ -634,6 +645,9 @@ bool VirtualGPU::releaseGpuMemoryFence() {
634645 // Release the pool, since runtime just completed a barrier
635646 resetKernArgPool ();
636647
648+ // Reset the pool of signals
649+ current_signal_ = 0 ;
650+
637651 return true ;
638652}
639653
@@ -757,8 +771,9 @@ bool VirtualGPU::create() {
757771 gpu_queue_ = roc_device_.acquireQueue (queue_size, cooperative_, cuMask_, priority_);
758772 if (!gpu_queue_) return false ;
759773
774+ constexpr uint32_t kDefaultSignalPoolSize = 32 ;
760775 if (!initPool (dev ().settings ().kernargPoolSize_ ,
761- (profiling_ || (amd::IS_HIP)) ? queue_size : 0 )) {
776+ (profiling_ || (amd::IS_HIP)) ? kDefaultSignalPoolSize : 0 )) {
762777 LogError (" Couldn't allocate arguments/signals for the queue" );
763778 return false ;
764779 }
@@ -861,6 +876,9 @@ void* VirtualGPU::allocKernArg(size_t size, size_t alignment) {
861876 }
862877
863878 resetKernArgPool ();
879+
880+ // Reset the pool of signals
881+ current_signal_ = 0 ;
864882 }
865883 } while (true );
866884
0 commit comments