@@ -422,7 +422,20 @@ bool VirtualGPU::HwQueueTracker::Create() {
422422
423423// ================================================================================================
424424hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal (
425- hsa_signal_value_t init_val, Timestamp* ts) {
425+ hsa_signal_value_t init_val, Timestamp* ts, bool attach_signal) {
426+
427+ amd::Command* cmd = gpu_.command ();
428+ // If no signal is needed, decrement the refcount and clear the hw_event of current command
429+ if (!attach_signal) {
430+ if (nullptr != cmd) {
431+ if (cmd->HwEvent () != nullptr ) {
432+ reinterpret_cast <ProfilingSignal*>(cmd->HwEvent ())->release ();
433+ }
434+ cmd->SetHwEvent (nullptr );
435+ }
436+ return hsa_signal_t {0 };
437+ }
438+
426439 bool new_signal = false ;
427440
428441 // Peep signal +2 ahead to see if its done
@@ -503,8 +516,7 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal(
503516 prof_signal->engine_ = engine_;
504517 prof_signal->flags_ .isPacketDispatch_ = false ;
505518
506- // Store the HW event
507- amd::Command* cmd = gpu_.command ();
519+
508520 if (nullptr != cmd) {
509521 // Release any existing HwEvent before setting new one for the same command
510522 if (cmd->HwEvent () != nullptr ) {
@@ -1026,24 +1038,25 @@ bool VirtualGPU::dispatchGenericAqlPacket(
10261038
10271039 fence_state_ = static_cast <Device::CacheState>(expected_fence_state);
10281040
1029- if (timestamp_ != nullptr || attach_signal) {
1030- // Get active signal for current dispatch if profiling is necessary
1031- packet->completion_signal = Barriers ().ActiveSignal (kInitSignalValueOne , timestamp_);
1032-
1033- if (std::is_same<decltype (packet), hsa_kernel_dispatch_packet_t *>::value) {
1034- // If profiling is enabled, store the correlation ID in the dispatch packet. The profiler can
1035- // retrieve this correlation ID to attribute waves to specific dispatch locations.
1036- if (amd::activity_prof::IsEnabled (OP_ID_DISPATCH)) {
1037- auto dispatchPacket = reinterpret_cast <hsa_kernel_dispatch_packet_t *>(packet);
1038- dispatchPacket->reserved2 = timestamp_->command ().profilingInfo ().correlation_id_ ;
1039- }
1040-
1041- ProfilingSignal* current_signal = Barriers ().GetLastSignal ();
1042- current_signal->flags_ .isPacketDispatch_ = true ;
1041+ bool attachSignal = timestamp_ != nullptr || attach_signal;
1042+ // Get active signal for current dispatch if profiling is necessary
1043+ packet->completion_signal = Barriers ().ActiveSignal (kInitSignalValueOne ,
1044+ timestamp_, attachSignal);
10431045
1046+ if (std::is_same<decltype (packet), hsa_kernel_dispatch_packet_t *>::value
1047+ && timestamp_ != nullptr ) {
1048+ // If profiling is enabled, store the correlation ID in the dispatch packet. The profiler can
1049+ // retrieve this correlation ID to attribute waves to specific dispatch locations.
1050+ if (amd::activity_prof::IsEnabled (OP_ID_DISPATCH) ) {
1051+ auto dispatchPacket = reinterpret_cast <hsa_kernel_dispatch_packet_t *>(packet);
1052+ dispatchPacket->reserved2 = timestamp_->command ().profilingInfo ().correlation_id_ ;
10441053 }
1054+
1055+ ProfilingSignal* current_signal = Barriers ().GetLastSignal ();
1056+ current_signal->flags_ .isPacketDispatch_ = true ;
10451057 }
10461058
1059+
10471060 // Make sure the slot is free for usage
10481061 while ((index - hsa_queue_load_read_index_scacquire (gpu_queue_)) >= sw_queue_size) {
10491062 amd::Os::yield ();
0 commit comments