Skip to content

Commit df75490

Browse files
committed
ROCm 6.2.1 updates
1 parent 7f45923 commit df75490

File tree

6 files changed

+198
-86
lines changed

6 files changed

+198
-86
lines changed

src/core/inc/runtime.h

Lines changed: 8 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -667,11 +667,15 @@ class Runtime {
667667
// Deprecated HSA Region API GPU (for legacy APU support only)
668668
Agent* region_gpu_;
669669

670-
AsyncEventsControl async_events_control_;
671-
672-
AsyncEvents async_events_;
670+
struct AsyncEventsInfo {
671+
AsyncEventsControl control;
672+
AsyncEvents events;
673+
AsyncEvents new_events;
674+
bool monitor_exceptions;
675+
};
673676

674-
AsyncEvents new_async_events_;
677+
struct AsyncEventsInfo asyncSignals_;
678+
struct AsyncEventsInfo asyncExceptions_;
675679

676680
// System clock frequency.
677681
uint64_t sys_clock_freq_;

src/core/inc/signal.h

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,20 @@ class Signal {
356356
uint64_t timeout_hint, hsa_wait_state_t wait_hint,
357357
hsa_signal_value_t* satisfying_value);
358358

359+
/// @brief Dedicated funtion to wait on signals that are not of type HSA_EVENTTYPE_SIGNAL
360+
/// these events can only be received by calling the underlying driver (i.e via the hsaKmtWaitOnMultipleEvents_Ext
361+
/// function call). We still need to have 1 signal of type HSA_EVENT_TYPE_SIGNAL attached to the list of signals
362+
/// to be able to force hsaKmtWaitOnMultipleEvents_Ext to return.
363+
/// @param signal_count Number of hsa_signals
364+
/// @param hsa_signals Pointer to array of signals. All signals should have a valid EopEvent()
365+
/// @param conds list of conditions
366+
/// @param values list of values
367+
/// @param satisfying_value value to be satisfied
368+
/// @return index of signal that satisfies condition
369+
static uint32_t WaitAnyExceptions(uint32_t signal_count, const hsa_signal_t* hsa_signals,
370+
const hsa_signal_condition_t* conds, const hsa_signal_value_t* values,
371+
hsa_signal_value_t* satisfying_value);
372+
359373
__forceinline bool IsType(rtti_t id) { return _IsA(id); }
360374

361375
/// @brief Prevents the signal from being destroyed until the matching Release().

src/core/runtime/amd_aql_queue.cpp

Lines changed: 15 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1290,21 +1290,6 @@ bool AqlQueue::ExceptionHandler(hsa_signal_value_t error_code, void* arg) {
12901290
return false;
12911291
}
12921292

1293-
// Fallback if KFD does not support GPU core dump. In this case, there core dump is
1294-
// generated by hsa-runtime.
1295-
if (!core::Runtime::runtime_singleton_->KfdVersion().supports_core_dump &&
1296-
queue->agent_->isa()->GetMajorVersion() != 11) {
1297-
1298-
if (pcs::PcsRuntime::instance()->SessionsActive())
1299-
fprintf(stderr, "GPU core dump skipped because PC Sampling active\n");
1300-
else if (amd::coredump::dump_gpu_core())
1301-
fprintf(stderr, "GPU core dump failed\n");
1302-
// supports_core_dump flag is overwritten to avoid generate core dump file again
1303-
// caught by a different exception handler. Such as VMFaultHandler.
1304-
core::Runtime::runtime_singleton_->KfdVersion(
1305-
core::Runtime::runtime_singleton_->KfdVersion().supports_exception_debugging, true);
1306-
}
1307-
13081293
for (auto& error : QueueErrors) {
13091294
if (error_code & (1 << (error.code - 1))) {
13101295
errorCode = error.status;
@@ -1322,6 +1307,21 @@ bool AqlQueue::ExceptionHandler(hsa_signal_value_t error_code, void* arg) {
13221307
return false;
13231308
}
13241309

1310+
// Fallback if KFD does not support GPU core dump. In this case, there core dump is
1311+
// generated by hsa-runtime.
1312+
if (!core::Runtime::runtime_singleton_->KfdVersion().supports_core_dump &&
1313+
queue->agent_->isa()->GetMajorVersion() != 11) {
1314+
1315+
if (pcs::PcsRuntime::instance()->SessionsActive())
1316+
fprintf(stderr, "GPU core dump skipped because PC Sampling active\n");
1317+
else if (amd::coredump::dump_gpu_core())
1318+
fprintf(stderr, "GPU core dump failed\n");
1319+
// supports_core_dump flag is overwritten to avoid generate core dump file again
1320+
// caught by a different exception handler. Such as VMFaultHandler.
1321+
core::Runtime::runtime_singleton_->KfdVersion(
1322+
core::Runtime::runtime_singleton_->KfdVersion().supports_exception_debugging, true);
1323+
}
1324+
13251325
queue->Suspend();
13261326
if (queue->errors_callback_ != nullptr) {
13271327
queue->errors_callback_(errorCode, queue->public_handle(), queue->errors_data_);

src/core/runtime/runtime.cpp

Lines changed: 54 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -755,35 +755,44 @@ hsa_status_t Runtime::SetAsyncSignalHandler(hsa_signal_t signal,
755755
hsa_signal_value_t value,
756756
hsa_amd_signal_handler handler,
757757
void* arg) {
758-
// Indicate that this signal is in use.
759-
if (signal.handle != 0) hsa_signal_handle(signal)->Retain();
760758

761-
ScopedAcquire<HybridMutex> scope_lock(&async_events_control_.lock);
759+
struct AsyncEventsInfo* asyncInfo = &asyncSignals_;
760+
761+
if (signal.handle != 0) {
762+
// Indicate that this signal is in use.
763+
hsa_signal_handle(signal)->Retain();
764+
765+
core::Signal* coreSignal = core::Signal::Convert(signal);
766+
if (coreSignal->EopEvent() && coreSignal->EopEvent()->EventData.EventType != HSA_EVENTTYPE_SIGNAL)
767+
asyncInfo = &asyncExceptions_;
768+
}
769+
770+
ScopedAcquire<HybridMutex> scope_lock(&asyncInfo->control.lock);
762771

763772
// Lazy initializer
764-
if (async_events_control_.async_events_thread_ == NULL) {
773+
if (asyncInfo->control.async_events_thread_ == NULL) {
765774
// Create monitoring thread control signal
766-
auto err = HSA::hsa_signal_create(0, 0, NULL, &async_events_control_.wake);
775+
auto err = HSA::hsa_signal_create(0, 0, NULL, &asyncInfo->control.wake);
767776
if (err != HSA_STATUS_SUCCESS) {
768777
assert(false && "Asyncronous events control signal creation error.");
769778
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
770779
}
771-
async_events_.PushBack(async_events_control_.wake, HSA_SIGNAL_CONDITION_NE,
772-
0, NULL, NULL);
780+
asyncInfo->events.PushBack(asyncInfo->control.wake, HSA_SIGNAL_CONDITION_NE,
781+
0, NULL, NULL);
773782

774783
// Start event monitoring thread
775-
async_events_control_.exit = false;
776-
async_events_control_.async_events_thread_ =
777-
os::CreateThread(AsyncEventsLoop, NULL);
778-
if (async_events_control_.async_events_thread_ == NULL) {
784+
asyncInfo->control.exit = false;
785+
asyncInfo->control.async_events_thread_ =
786+
os::CreateThread(AsyncEventsLoop, asyncInfo);
787+
if (asyncInfo->control.async_events_thread_ == NULL) {
779788
assert(false && "Asyncronous events thread creation error.");
780789
return HSA_STATUS_ERROR_OUT_OF_RESOURCES;
781790
}
782791
}
783792

784-
new_async_events_.PushBack(signal, cond, value, handler, arg);
793+
asyncInfo->new_events.PushBack(signal, cond, value, handler, arg);
785794

786-
hsa_signal_handle(async_events_control_.wake)->StoreRelease(1);
795+
hsa_signal_handle(asyncInfo->control.wake)->StoreRelease(1);
787796

788797
return HSA_STATUS_SUCCESS;
789798
}
@@ -1499,18 +1508,35 @@ hsa_status_t Runtime::IPCDetach(void* ptr) {
14991508
return HSA_STATUS_SUCCESS;
15001509
}
15011510

1502-
void Runtime::AsyncEventsLoop(void*) {
1503-
auto& async_events_control_ = runtime_singleton_->async_events_control_;
1504-
auto& async_events_ = runtime_singleton_->async_events_;
1505-
auto& new_async_events_ = runtime_singleton_->new_async_events_;
1511+
void Runtime::AsyncEventsLoop(void* _eventsInfo) {
1512+
struct AsyncEventsInfo* eventsInfo = reinterpret_cast<struct AsyncEventsInfo*>(_eventsInfo);
1513+
1514+
auto& async_events_control_ = eventsInfo->control;
1515+
auto& async_events_ = eventsInfo->events;
1516+
auto& new_async_events_ = eventsInfo->new_events;
15061517

15071518
while (!async_events_control_.exit) {
15081519
// Wait for a signal
15091520
hsa_signal_value_t value;
1510-
uint32_t index = AMD::hsa_amd_signal_wait_any(
1511-
uint32_t(async_events_.Size()), &async_events_.signal_[0],
1512-
&async_events_.cond_[0], &async_events_.value_[0], uint64_t(-1),
1513-
HSA_WAIT_STATE_BLOCKED, &value);
1521+
uint32_t index = 0;
1522+
1523+
if (eventsInfo->monitor_exceptions) {
1524+
index = Signal::WaitAnyExceptions(
1525+
uint32_t(async_events_.Size()),
1526+
&async_events_.signal_[0],
1527+
&async_events_.cond_[0],
1528+
&async_events_.value_[0],
1529+
&value);
1530+
} else {
1531+
index = AMD::hsa_amd_signal_wait_any(
1532+
uint32_t(async_events_.Size()),
1533+
&async_events_.signal_[0],
1534+
&async_events_.cond_[0],
1535+
&async_events_.value_[0],
1536+
uint64_t(-1),
1537+
HSA_WAIT_STATE_BLOCKED,
1538+
&value);
1539+
}
15141540

15151541
// Reset the control signal
15161542
if (index == 0) {
@@ -1875,7 +1901,11 @@ Runtime::Runtime()
18751901
hw_exception_event_(nullptr),
18761902
hw_exception_signal_(nullptr),
18771903
ref_count_(0),
1878-
kfd_version{} {}
1904+
kfd_version{} {
1905+
1906+
asyncSignals_.monitor_exceptions = false;
1907+
asyncExceptions_.monitor_exceptions = true;
1908+
}
18791909

18801910
hsa_status_t Runtime::Load() {
18811911
os::cpuid_t cpuinfo;
@@ -1953,7 +1983,8 @@ void Runtime::Unload() {
19531983
std::for_each(disabled_gpu_agents_.begin(), disabled_gpu_agents_.end(), DeleteObject());
19541984
disabled_gpu_agents_.clear();
19551985

1956-
async_events_control_.Shutdown();
1986+
asyncSignals_.control.Shutdown();
1987+
asyncExceptions_.control.Shutdown();
19571988

19581989
if (vm_fault_signal_ != nullptr) {
19591990
vm_fault_signal_->DestroySignal();

src/core/runtime/signal.cpp

Lines changed: 107 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -255,23 +255,8 @@ uint32_t Signal::WaitAny(uint32_t signal_count, const hsa_signal_t* hsa_signals,
255255
while (true) {
256256
// Cannot mwaitx - polling multiple signals
257257
for (uint32_t i = 0; i < signal_count; i++) {
258-
if (!signals[i]->IsValid()) return uint32_t(-1);
259-
260-
// Handling special event.
261-
if (signals[i]->EopEvent() != NULL) {
262-
const HSA_EVENTTYPE event_type =
263-
signals[i]->EopEvent()->EventData.EventType;
264-
if (event_type == HSA_EVENTTYPE_MEMORY) {
265-
const HsaMemoryAccessFault& fault =
266-
signals[i]->EopEvent()->EventData.EventData.MemoryAccessFault;
267-
if (fault.Flags == HSA_EVENTID_MEMORY_FATAL_PROCESS) {
268-
return i;
269-
}
270-
} else if (event_type == HSA_EVENTTYPE_HW_EXCEPTION) {
271-
const HsaHwException& exception = signals[i]->EopEvent()->EventData.EventData.HwException;
272-
if (exception.MemoryLost) return i;
273-
}
274-
}
258+
if (!signals[i]->IsValid())
259+
return uint32_t(-1);
275260

276261
value =
277262
atomic::Load(&signals[i]->signal_.value, std::memory_order_relaxed);
@@ -325,6 +310,111 @@ uint32_t Signal::WaitAny(uint32_t signal_count, const hsa_signal_t* hsa_signals,
325310
}
326311
}
327312

313+
/*
314+
* Special handler to wait listen for exceptions from underlying driver.
315+
*/
316+
uint32_t Signal::WaitAnyExceptions(uint32_t signal_count, const hsa_signal_t* hsa_signals,
317+
const hsa_signal_condition_t* conds, const hsa_signal_value_t* values,
318+
hsa_signal_value_t* satisfying_value) {
319+
320+
uint32_t wait_ms = uint32_t(-1);
321+
hsa_signal_handle* signals =
322+
reinterpret_cast<hsa_signal_handle*>(const_cast<hsa_signal_t*>(hsa_signals));
323+
324+
for (uint32_t i = 0; i < signal_count; i++) signals[i]->Retain();
325+
326+
MAKE_SCOPE_GUARD([&]() {
327+
for (uint32_t i = 0; i < signal_count; i++) signals[i]->Release();
328+
});
329+
330+
uint32_t prior = 0;
331+
for (uint32_t i = 0; i < signal_count; i++) prior = Max(prior, signals[i]->waiting_++);
332+
333+
334+
MAKE_SCOPE_GUARD([&]() {
335+
for (uint32_t i = 0; i < signal_count; i++) signals[i]->waiting_--;
336+
});
337+
338+
if (!core::Runtime::runtime_singleton_->KfdVersion().supports_event_age)
339+
// Allow only the first waiter to sleep. Without event age tracking,
340+
// race condition can cause some threads to sleep without wakeup since missing interrupt.
341+
if (prior != 0) wait_ms = 0;
342+
343+
HsaEvent** evts = new HsaEvent* [signal_count];
344+
MAKE_SCOPE_GUARD([&]() { delete[] evts; });
345+
346+
uint32_t unique_evts = 0;
347+
348+
for (uint32_t i = 0; i < signal_count; i++) {
349+
assert(signals[i]->EopEvent() != NULL);
350+
evts[i] = signals[i]->EopEvent();
351+
}
352+
353+
std::sort(evts, evts + signal_count);
354+
HsaEvent** end = std::unique(evts, evts + signal_count);
355+
unique_evts = uint32_t(end - evts);
356+
357+
uint64_t event_age[unique_evts];
358+
memset(event_age, 0, unique_evts * sizeof(uint64_t));
359+
if (core::Runtime::runtime_singleton_->KfdVersion().supports_event_age)
360+
for (uint32_t i = 0; i < unique_evts; i++)
361+
event_age[i] = 1;
362+
363+
int64_t value;
364+
365+
bool condition_met = false;
366+
while (true) {
367+
// Cannot mwaitx - polling multiple signals
368+
369+
for (uint32_t i = 0; i < signal_count; i++) {
370+
if (!signals[i]->IsValid())
371+
return uint32_t(-1);
372+
373+
const HSA_EVENTTYPE event_type = signals[i]->EopEvent()->EventData.EventType;
374+
if (event_type == HSA_EVENTTYPE_MEMORY) {
375+
const HsaMemoryAccessFault& fault =
376+
signals[i]->EopEvent()->EventData.EventData.MemoryAccessFault;
377+
if (fault.Flags == HSA_EVENTID_MEMORY_FATAL_PROCESS) return i;
378+
} else if (event_type == HSA_EVENTTYPE_HW_EXCEPTION) {
379+
const HsaHwException& exception =
380+
signals[i]->EopEvent()->EventData.EventData.HwException;
381+
if (exception.MemoryLost) return i;
382+
}
383+
384+
value = atomic::Load(&signals[i]->signal_.value, std::memory_order_relaxed);
385+
386+
switch (conds[i]) {
387+
case HSA_SIGNAL_CONDITION_EQ: {
388+
condition_met = (value == values[i]);
389+
break;
390+
}
391+
case HSA_SIGNAL_CONDITION_NE: {
392+
condition_met = (value != values[i]);
393+
break;
394+
}
395+
case HSA_SIGNAL_CONDITION_GTE: {
396+
condition_met = (value >= values[i]);
397+
break;
398+
}
399+
case HSA_SIGNAL_CONDITION_LT: {
400+
condition_met = (value < values[i]);
401+
break;
402+
}
403+
default: {
404+
return uint32_t(-1);
405+
}
406+
}
407+
if (condition_met) {
408+
if (satisfying_value != NULL) *satisfying_value = value;
409+
// Some other signal in the list satisfied condition
410+
return i;
411+
}
412+
}
413+
414+
hsaKmtWaitOnMultipleEvents_Ext(evts, unique_evts, false, wait_ms, event_age);
415+
} //while
416+
}
417+
328418
SignalGroup::SignalGroup(uint32_t num_signals, const hsa_signal_t* hsa_signals)
329419
: count(num_signals) {
330420
if (count != 0) {

src/inc/hsa_ext_amd.h

Lines changed: 0 additions & 27 deletions
Original file line numberDiff line numberDiff line change
@@ -2822,33 +2822,6 @@ hsa_status_t hsa_amd_portable_close_dmabuf(int dmabuf);
28222822
hsa_status_t hsa_amd_vmem_address_reserve(void** va, size_t size, uint64_t address,
28232823
uint64_t flags);
28242824

2825-
/**
2826-
* @brief Allocate a reserved address range
2827-
*
2828-
* Reserve a virtual address range. The size must be a multiple of the system page size.
2829-
* If it is not possible to allocate the address specified by @p address, then @p va will be
2830-
* a different address range.
2831-
* Address range should be released by calling hsa_amd_vmem_address_free.
2832-
*
2833-
* @param[out] va virtual address allocated
2834-
* @param[in] size of address range requested
2835-
* @param[in] address requested
2836-
* @param[in] flags currently unsupported
2837-
*
2838-
* @retval ::HSA_STATUS_SUCCESS Address range allocated successfully
2839-
*
2840-
* @retval ::HSA_STATUS_ERROR_NOT_INITIALIZED The HSA runtime has not been
2841-
* initialized.
2842-
*
2843-
* @retval ::HSA_STATUS_ERROR_OUT_OF_RESOURCES Insufficient resources to allocate an address
2844-
* range of this size.
2845-
*
2846-
* Note that this API will be deprecated in a future release and replaced by
2847-
* hsa_amd_vmem_address_reserve_align
2848-
*/
2849-
hsa_status_t hsa_amd_vmem_address_reserve(void** va, size_t size, uint64_t address,
2850-
uint64_t flags);
2851-
28522825
/**
28532826
* @brief Allocate a reserved address range
28542827
*

0 commit comments

Comments
 (0)