Skip to content
This repository was archived by the owner on Jan 26, 2024. It is now read-only.

Commit d14ee44

Browse files
gandryeyanusha GodavarthySurya
authored andcommitted
SWDEV-296329 - Add lock protection for Timestamp update
There is a possible race condition when signal reuse can have access to a destroyed Timestamp object, because the callback was running asynchronously. Use reference counter and lock to allow asynchronous timestamp update Change-Id: I6224f7c62cb0a03a7466fcc512e5e5afb06736fa
1 parent 100ddb5 commit d14ee44

File tree

2 files changed

+22
-18
lines changed

2 files changed

+22
-18
lines changed

device/rocm/rocvirtual.cpp

Lines changed: 18 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -110,6 +110,7 @@ static unsigned extractAqlBits(unsigned v, unsigned pos, unsigned width) {
110110

111111
// ================================================================================================
112112
void Timestamp::checkGpuTime() {
113+
amd::ScopedLock s(lock_);
113114
if (HwProfiling()) {
114115
uint64_t start = std::numeric_limits<uint64_t>::max();
115116
uint64_t end = 0;
@@ -140,7 +141,6 @@ void Timestamp::checkGpuTime() {
140141
ClPrint(amd::LOG_INFO, amd::LOG_SIG, "Signal = (0x%lx), start = %ld, "
141142
"end = %ld", it->signal_.handle, start, end);
142143
}
143-
it->ts_ = nullptr;
144144
it->done_ = true;
145145
}
146146
signals_.clear();
@@ -399,6 +399,7 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal(
399399
if (ts != 0) {
400400
// Save HSA signal earlier to make sure the possible callback will have a valid
401401
// value for processing
402+
ts->retain();
402403
prof_signal->ts_ = ts;
403404
ts->AddProfilingSignal(prof_signal);
404405
// If direct dispatch is enabled and the batch head isn't null, then it's a marker and
@@ -437,7 +438,7 @@ hsa_signal_t VirtualGPU::HwQueueTracker::ActiveSignal(
437438
// ================================================================================================
438439
std::vector<hsa_signal_t>& VirtualGPU::HwQueueTracker::WaitingSignal(HwQueueEngine engine) {
439440
bool explicit_wait = false;
440-
// Rest all current waiting signals
441+
// Reset all current waiting signals
441442
waiting_signals_.clear();
442443

443444
// Does runtime switch the active engine?
@@ -499,21 +500,22 @@ std::vector<hsa_signal_t>& VirtualGPU::HwQueueTracker::WaitingSignal(HwQueueEngi
499500

500501
// ================================================================================================
501502
bool VirtualGPU::HwQueueTracker::CpuWaitForSignal(ProfilingSignal* signal) {
502-
amd::ScopedLock lock(signal->LockSignalOps());
503503
// Wait for the current signal
504-
if (!signal->done_) {
504+
if (signal->ts_ != nullptr) {
505505
// Update timestamp values if requested
506-
if (signal->ts_ != nullptr) {
507-
signal->ts_->checkGpuTime();
508-
} else {
509-
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "[%zx]!\t Host wait on completion_signal=0x%zx",
510-
std::this_thread::get_id(), signal->signal_.handle);
511-
if (!WaitForSignal(signal->signal_, gpu_.ActiveWait())) {
512-
LogPrintfError("Failed signal [0x%lx] wait", signal->signal_);
513-
return false;
514-
}
515-
signal->done_ = true;
506+
auto ts = signal->ts_;
507+
ts->checkGpuTime();
508+
ts->release();
509+
signal->ts_ = nullptr;
510+
} else if (!signal->done_) {
511+
amd::ScopedLock lock(signal->LockSignalOps());
512+
ClPrint(amd::LOG_DEBUG, amd::LOG_COPY, "[%zx]!\t Host wait on completion_signal=0x%zx",
513+
std::this_thread::get_id(), signal->signal_.handle);
514+
if (!WaitForSignal(signal->signal_, gpu_.ActiveWait())) {
515+
LogPrintfError("Failed signal [0x%lx] wait", signal->signal_);
516+
return false;
516517
}
518+
signal->done_ = true;
517519
}
518520
return true;
519521
}
@@ -1079,7 +1081,7 @@ VirtualGPU::~VirtualGPU() {
10791081
releasePinnedMem();
10801082

10811083
if (timestamp_ != nullptr) {
1082-
delete timestamp_;
1084+
timestamp_->release();
10831085
timestamp_ = nullptr;
10841086
LogError("There was a timestamp that was not used; deleting.");
10851087
}
@@ -1315,7 +1317,7 @@ void VirtualGPU::updateCommandsState(amd::Command* list) const {
13151317
ts = reinterpret_cast<Timestamp*>(current->data());
13161318
startTimeStamp = ts->getStart();
13171319
endTimeStamp = ts->getEnd();
1318-
delete ts;
1320+
ts->release();
13191321
current->setData(nullptr);
13201322
} else {
13211323
// If we don't have a command that contains a valid timestamp,

device/rocm/rocvirtual.hpp

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -82,7 +82,7 @@ inline bool WaitForSignal(hsa_signal_t signal, bool active_wait = false) {
8282

8383
// Timestamp for keeping track of some profiling information for various commands
8484
// including EnqueueNDRangeKernel and clEnqueueCopyBuffer.
85-
class Timestamp : public amd::HeapObject {
85+
class Timestamp : public amd::ReferenceCountedObject {
8686
private:
8787
static double ticksToTime_;
8888

@@ -93,6 +93,7 @@ class Timestamp : public amd::HeapObject {
9393
amd::Command* parsedCommand_; //!< Command down the list, considering command_ as head
9494
std::vector<ProfilingSignal*> signals_; //!< The list of all signals, associated with the TS
9595
hsa_signal_t callback_signal_; //!< Signal associated with a callback for possible later update
96+
amd::Monitor lock_; //!< Serialize timestamp update
9697

9798
Timestamp(const Timestamp&) = delete;
9899
Timestamp& operator=(const Timestamp&) = delete;
@@ -104,7 +105,8 @@ class Timestamp : public amd::HeapObject {
104105
, gpu_(gpu)
105106
, command_(command)
106107
, parsedCommand_(nullptr)
107-
, callback_signal_(hsa_signal_t{}) {}
108+
, callback_signal_(hsa_signal_t{})
109+
, lock_("Timestamp lock", true) {}
108110

109111
~Timestamp() {}
110112

0 commit comments

Comments
 (0)