Skip to content
This repository was archived by the owner on Jan 26, 2024. It is now read-only.

Commit 90af834

Browse files
committed
Revert "Reduce the number of allocated signals"
This reverts commit 9e354e2. Reason for revert: <INSERT REASONING HERE> Change-Id: I627774837d1dc19c50f879719e40932195448e9f
1 parent 9e354e2 commit 90af834

File tree

2 files changed

+5
-39
lines changed

2 files changed

+5
-39
lines changed

device/rocm/rocvirtual.cpp

Lines changed: 5 additions & 38 deletions
Original file line numberDiff line numberDiff line change
@@ -455,21 +455,8 @@ bool VirtualGPU::dispatchGenericAqlPacket(
455455

456456
// TODO: placeholder to setup the kernel to populate start and end timestamp.
457457
if (timestamp_ != nullptr) {
458-
// Pool size must grow to the size of pending AQL packets
459-
const uint32_t pool_size = index - read;
460-
if (pool_size >= signal_pool_.size()) {
461-
ProfilingSignal profiling_signal = {};
462-
if (HSA_STATUS_SUCCESS != hsa_signal_create(0, 0, nullptr, &profiling_signal.signal_)) {
463-
LogPrintfError("Failed signal allocation id = %d", pool_size);
464-
return false;
465-
}
466-
signal_pool_.push_back(profiling_signal);
467-
assert(queueSize >= signal_pool_.size() && "Pool will be reallocated!");
468-
}
469-
// Move index inside the valid pool
470-
++current_signal_ %= signal_pool_.size();
471458
// Find signal slot
472-
ProfilingSignal* profilingSignal = &signal_pool_[current_signal_];
459+
ProfilingSignal* profilingSignal = &signal_pool_[index & queueMask];
473460
// Make sure we save the old results in the TS structure
474461
if (profilingSignal->ts_ != nullptr) {
475462
profilingSignal->ts_->checkGpuTime();
@@ -549,8 +536,6 @@ bool VirtualGPU::dispatchGenericAqlPacket(
549536
LogPrintfError("Failed signal [0x%lx] wait", signal.handle);
550537
return false;
551538
}
552-
// Reset the pool of signals
553-
current_signal_ = 0;
554539
}
555540

556541
return true;
@@ -635,14 +620,8 @@ void VirtualGPU::ResetQueueStates() {
635620
// Release all memory dependencies
636621
memoryDependency().clear();
637622

638-
if (dev().settings().barrier_sync_) {
639-
// Release the pool, since runtime just completed a barrier
640-
// @note: Runtime can reset kernel arg pool only if the barrier with L2 invalidation was issued
641-
resetKernArgPool();
642-
} else {
643-
// Reset the pool of signals
644-
current_signal_ = 0;
645-
}
623+
// Release the pool, since runtime just completed a barrier
624+
resetKernArgPool();
646625
}
647626

648627
// ================================================================================================
@@ -854,17 +833,8 @@ bool VirtualGPU::initPool(size_t kernarg_pool_size, uint signal_pool_count) {
854833
}
855834

856835
if (signal_pool_count != 0) {
857-
// Reserve signal pool for all entries in the queue, since profiling logic will save the
858-
// pointer in timestamp info for the future references
859-
signal_pool_.reserve(signal_pool_count);
860-
// If barrier is disable, then allocate a small portion of all signals and grow the array later.
861-
// @note: the optimization requires a wait for signal on reuse, which is only available when
862-
// the barrier is disabled
863-
constexpr uint32_t kDefaultSignalPoolSize = 32;
864-
const uint32_t default_signal_pool_size = (dev().settings().barrier_sync_) ?
865-
signal_pool_count : kDefaultSignalPoolSize;
866-
signal_pool_.resize(default_signal_pool_size);
867-
for (uint i = 0; i < default_signal_pool_size; ++i) {
836+
signal_pool_.resize(signal_pool_count);
837+
for (uint i = 0; i < signal_pool_count; ++i) {
868838
ProfilingSignal profilingSignal;
869839
if (HSA_STATUS_SUCCESS != hsa_signal_create(0, 0, nullptr, &profilingSignal.signal_)) {
870840
return false;
@@ -911,9 +881,6 @@ void* VirtualGPU::allocKernArg(size_t size, size_t alignment) {
911881
}
912882

913883
resetKernArgPool();
914-
915-
// Reset the pool of signals
916-
current_signal_ = 0;
917884
}
918885
} while (true);
919886

device/rocm/rocvirtual.hpp

Lines changed: 0 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -388,7 +388,6 @@ class VirtualGPU : public device::VirtualDevice {
388388
uint kernarg_pool_cur_offset_;
389389

390390
std::vector<ProfilingSignal> signal_pool_; //!< Pool of signals for profiling
391-
uint32_t current_signal_ = 0; //!< Current avaialble signal in the pool
392391
friend class Timestamp;
393392

394393
// PM4 packet for gfx8 performance counter

0 commit comments

Comments
 (0)