Skip to content
This repository was archived by the owner on Jan 26, 2024. It is now read-only.

Commit c59ec7f

Browse files
committed
Reduce the default size of the signal pool
Implement dynamic signal pool grow per allocated queue Change-Id: Ie8b17937d72c29cc49e59639c4b2023ea984b14c
1 parent 2384c91 commit c59ec7f

File tree

2 files changed

+22
-2
lines changed

2 files changed

+22
-2
lines changed

device/rocm/rocvirtual.cpp

Lines changed: 20 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -453,8 +453,17 @@ bool VirtualGPU::dispatchGenericAqlPacket(
453453

454454
// TODO: placeholder to setup the kernel to populate start and end timestamp.
455455
if (timestamp_ != nullptr) {
456+
if (current_signal_ >= signal_pool_.size()) {
457+
ProfilingSignal profilingSignal = {};
458+
if (HSA_STATUS_SUCCESS != hsa_signal_create(0, 0, nullptr, &profilingSignal.signal_)) {
459+
LogPrintfError("Failed signal allocation id = %d", current_signal_);
460+
return false;
461+
}
462+
signal_pool_.push_back(profilingSignal);
463+
assert(current_signal_ < signal_pool_.size() && "Not enough signals");
464+
}
456465
// Find signal slot
457-
ProfilingSignal* profilingSignal = &signal_pool_[index & queueMask];
466+
ProfilingSignal* profilingSignal = &signal_pool_[current_signal_++];
458467
// Make sure we save the old results in the TS structure
459468
if (profilingSignal->ts_ != nullptr) {
460469
profilingSignal->ts_->checkGpuTime();
@@ -526,6 +535,8 @@ bool VirtualGPU::dispatchGenericAqlPacket(
526535
LogPrintfError("Failed signal [0x%lx] wait", signal.handle);
527536
return false;
528537
}
538+
// Reset the pool of signals
539+
current_signal_ = 0;
529540
}
530541

531542
return true;
@@ -634,6 +645,9 @@ bool VirtualGPU::releaseGpuMemoryFence() {
634645
// Release the pool, since runtime just completed a barrier
635646
resetKernArgPool();
636647

648+
// Reset the pool of signals
649+
current_signal_ = 0;
650+
637651
return true;
638652
}
639653

@@ -757,8 +771,9 @@ bool VirtualGPU::create() {
757771
gpu_queue_ = roc_device_.acquireQueue(queue_size, cooperative_, cuMask_, priority_);
758772
if (!gpu_queue_) return false;
759773

774+
constexpr uint32_t kDefaultSignalPoolSize = 32;
760775
if (!initPool(dev().settings().kernargPoolSize_,
761-
(profiling_ || (amd::IS_HIP)) ? queue_size : 0)) {
776+
(profiling_ || (amd::IS_HIP)) ? kDefaultSignalPoolSize : 0)) {
762777
LogError("Couldn't allocate arguments/signals for the queue");
763778
return false;
764779
}
@@ -861,6 +876,9 @@ void* VirtualGPU::allocKernArg(size_t size, size_t alignment) {
861876
}
862877

863878
resetKernArgPool();
879+
880+
// Reset the pool of signals
881+
current_signal_ = 0;
864882
}
865883
} while (true);
866884

device/rocm/rocvirtual.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,8 @@ class VirtualGPU : public device::VirtualDevice {
356356
uint kernarg_pool_cur_offset_;
357357

358358
std::vector<ProfilingSignal> signal_pool_; //!< Pool of signals for profiling
359+
uint32_t current_signal_ = 0; //!< Current avaialble signal in the pool
360+
359361
friend class Timestamp;
360362

361363
// PM4 packet for gfx8 performance counter

0 commit comments

Comments
 (0)