@@ -110,6 +110,7 @@ static unsigned extractAqlBits(unsigned v, unsigned pos, unsigned width) {
110110
111111// ================================================================================================
112112void Timestamp::checkGpuTime () {
113+ amd::ScopedLock s (lock_);
113114 if (HwProfiling ()) {
114115 uint64_t start = std::numeric_limits<uint64_t >::max ();
115116 uint64_t end = 0 ;
@@ -140,7 +141,6 @@ void Timestamp::checkGpuTime() {
140141 ClPrint (amd::LOG_INFO, amd::LOG_SIG, " Signal = (0x%lx), start = %ld, "
141142 " end = %ld" , it->signal_ .handle , start, end);
142143 }
143- it->ts_ = nullptr ;
144144 it->done_ = true ;
145145 }
146146 signals_.clear ();
@@ -399,6 +399,7 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal(
399399 if (ts != 0 ) {
400400 // Save HSA signal earlier to make sure the possible callback will have a valid
401401 // value for processing
402+ ts->retain ();
402403 prof_signal->ts_ = ts;
403404 ts->AddProfilingSignal (prof_signal);
404405 // If direct dispatch is enabled and the batch head isn't null, then it's a marker and
@@ -437,7 +438,7 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal(
437438// ================================================================================================
438439std::vector<hsa_signal_t >& VirtualGPU::HwQueueTracker::WaitingSignal (HwQueueEngine engine) {
439440 bool explicit_wait = false ;
440- // Rest all current waiting signals
441+ // Reset all current waiting signals
441442 waiting_signals_.clear ();
442443
443444 // Does runtime switch the active engine?
@@ -499,21 +500,22 @@ std::vector<hsa_signal_t>& VirtualGPU::HwQueueTracker::WaitingSignal(HwQueueEngi
499500
500501// ================================================================================================
501502bool VirtualGPU::HwQueueTracker::CpuWaitForSignal (ProfilingSignal* signal) {
502- amd::ScopedLock lock (signal->LockSignalOps ());
503503 // Wait for the current signal
504- if (! signal->done_ ) {
504+ if (signal->ts_ != nullptr ) {
505505 // Update timestamp values if requested
506- if (signal->ts_ != nullptr ) {
507- signal->ts_ ->checkGpuTime ();
508- } else {
509- ClPrint (amd::LOG_DEBUG, amd::LOG_COPY, " [%zx]!\t Host wait on completion_signal=0x%zx" ,
510- std::this_thread::get_id (), signal->signal_ .handle );
511- if (!WaitForSignal (signal->signal_ , gpu_.ActiveWait ())) {
512- LogPrintfError (" Failed signal [0x%lx] wait" , signal->signal_ );
513- return false ;
514- }
515- signal->done_ = true ;
506+ auto ts = signal->ts_ ;
507+ ts->checkGpuTime ();
508+ ts->release ();
509+ signal->ts_ = nullptr ;
510+ } else if (!signal->done_ ) {
511+ amd::ScopedLock lock (signal->LockSignalOps ());
512+ ClPrint (amd::LOG_DEBUG, amd::LOG_COPY, " [%zx]!\t Host wait on completion_signal=0x%zx" ,
513+ std::this_thread::get_id (), signal->signal_ .handle );
514+ if (!WaitForSignal (signal->signal_ , gpu_.ActiveWait ())) {
515+ LogPrintfError (" Failed signal [0x%lx] wait" , signal->signal_ );
516+ return false ;
516517 }
518+ signal->done_ = true ;
517519 }
518520 return true ;
519521}
@@ -1079,7 +1081,7 @@ VirtualGPU::~VirtualGPU() {
10791081 releasePinnedMem ();
10801082
10811083 if (timestamp_ != nullptr ) {
1082- delete timestamp_;
1084+ timestamp_-> release () ;
10831085 timestamp_ = nullptr ;
10841086 LogError (" There was a timestamp that was not used; deleting." );
10851087 }
@@ -1315,7 +1317,7 @@ void VirtualGPU::updateCommandsState(amd::Command* list) const {
13151317 ts = reinterpret_cast <Timestamp*>(current->data ());
13161318 startTimeStamp = ts->getStart ();
13171319 endTimeStamp = ts->getEnd ();
1318- delete ts ;
1320+ ts-> release () ;
13191321 current->setData (nullptr );
13201322 } else {
13211323 // If we don't have a command that contains a valid timestamp,
0 commit comments