Skip to content

Commit 890399a

Browse files
saleelkdayatsin-amd
authored andcommitted
rocr: Skip uSleep for non-interrupt signals
- When waiting on non-interrupt signals, do not uSleep. This causes regressions compared to interrupt signal usage. - Cleanup code. Change-Id: I706bda0b13e64ffec0b607c1915d8380a2ce0dea
1 parent 166b083 commit 890399a

File tree

4 files changed

+92
-148
lines changed

4 files changed

+92
-148
lines changed

runtime/hsa-runtime/core/inc/signal.h

Lines changed: 52 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -60,9 +60,17 @@
6060

6161
#include "core/util/utils.h"
6262
#include "core/util/locks.h"
63+
#include "core/util/timer.h"
6364

6465
#include "inc/amd_hsa_signal.h"
6566

67+
#if defined(__i386__) || defined(__x86_64__)
68+
#include <mwaitxintrin.h>
69+
#ifndef MWAITX_ECX_TIMER_ENABLE
70+
#define MWAITX_ECX_TIMER_ENABLE 0x2 // BIT(1)
71+
#endif
72+
#endif
73+
6674
// Allow hsa_signal_t to be keys in STL structures.
6775
namespace std {
6876
template <> struct less<hsa_signal_t> {
@@ -76,6 +84,50 @@ template <> struct less<hsa_signal_t> {
7684
}
7785

7886
namespace rocr {
87+
namespace timer {
88+
inline timer::fast_clock::duration GetFastTimeout(uint64_t timeout) {
89+
uint64_t hsa_freq = 0;
90+
HSA::hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &hsa_freq);
91+
return timer::duration_from_seconds<timer::fast_clock::duration>(
92+
double(timeout) / double(hsa_freq));
93+
}
94+
95+
inline void CheckAbortTimeout(const timer::fast_clock::time_point& start_time,
96+
uint32_t signal_abort_timeout) {
97+
if (signal_abort_timeout) {
98+
const timer::fast_clock::duration abort_timeout =
99+
std::chrono::seconds(signal_abort_timeout);
100+
if (timer::fast_clock::now() - start_time > abort_timeout) {
101+
throw AMD::hsa_exception(HSA_STATUS_ERROR_FATAL,
102+
"Signal wait abort timeout.\n");
103+
}
104+
}
105+
}
106+
107+
inline void DoMwaitx(int64_t* addr, uint32_t timeout, bool timer_enable = false) {
108+
#if defined(__i386__) || defined(__x86_64__)
109+
_mm_monitorx(addr, 0, 0);
110+
_mm_mwaitx(0, timeout, timer_enable ? MWAITX_ECX_TIMER_ENABLE : 0);
111+
#endif
112+
}
113+
} // namespace timer
114+
115+
inline bool CheckSignalCondition(int64_t value, hsa_signal_condition_t condition,
116+
hsa_signal_value_t compare_value) {
117+
switch (condition) {
118+
case HSA_SIGNAL_CONDITION_EQ:
119+
return value == compare_value;
120+
case HSA_SIGNAL_CONDITION_NE:
121+
return value != compare_value;
122+
case HSA_SIGNAL_CONDITION_GTE:
123+
return value >= compare_value;
124+
case HSA_SIGNAL_CONDITION_LT:
125+
return value < compare_value;
126+
default:
127+
return false;
128+
}
129+
}
130+
79131
namespace core {
80132
class Agent;
81133
class Signal;

runtime/hsa-runtime/core/runtime/default_signal.cpp

Lines changed: 12 additions & 60 deletions
Original file line numberDiff line numberDiff line change
@@ -41,7 +41,6 @@
4141
////////////////////////////////////////////////////////////////////////////////
4242

4343
#include "core/inc/default_signal.h"
44-
#include "core/util/timer.h"
4544

4645
#if defined(__i386__) || defined(__x86_64__)
4746
#include <mwaitxintrin.h>
@@ -83,78 +82,31 @@ hsa_signal_value_t BusyWaitSignal::WaitRelaxed(hsa_signal_condition_t condition,
8382

8483
waiting_++;
8584
MAKE_SCOPE_GUARD([&]() { waiting_--; });
86-
bool condition_met = false;
87-
int64_t value;
8885

8986
const uint32_t &signal_abort_timeout =
9087
core::Runtime::runtime_singleton_->flag().signal_abort_timeout();
9188

92-
timer::fast_clock::time_point start_time, time;
93-
start_time = timer::fast_clock::now();
94-
95-
// Set a polling timeout value
96-
// Should be a few times bigger than null kernel latency
97-
const timer::fast_clock::duration kMaxElapsed = std::chrono::microseconds(200);
98-
99-
uint64_t hsa_freq = 0;
100-
HSA::hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &hsa_freq);
101-
const timer::fast_clock::duration fast_timeout =
102-
timer::duration_from_seconds<timer::fast_clock::duration>(
103-
double(timeout) / double(hsa_freq));
104-
105-
#if defined(__i386__) || defined(__x86_64__)
106-
if (g_use_mwaitx) _mm_monitorx(const_cast<int64_t*>(&signal_.value), 0, 0);
107-
#endif
89+
const timer::fast_clock::time_point start_time = timer::fast_clock::now();
90+
const timer::fast_clock::duration fast_timeout = timer::GetFastTimeout(timeout);
10891

10992
while (true) {
11093
if (!IsValid()) return 0;
11194

112-
value = atomic::Load(&signal_.value, std::memory_order_relaxed);
113-
114-
switch (condition) {
115-
case HSA_SIGNAL_CONDITION_EQ: {
116-
condition_met = (value == compare_value);
117-
break;
118-
}
119-
case HSA_SIGNAL_CONDITION_NE: {
120-
condition_met = (value != compare_value);
121-
break;
122-
}
123-
case HSA_SIGNAL_CONDITION_GTE: {
124-
condition_met = (value >= compare_value);
125-
break;
126-
}
127-
case HSA_SIGNAL_CONDITION_LT: {
128-
condition_met = (value < compare_value);
129-
break;
130-
}
131-
default:
132-
return 0;
133-
}
134-
if (condition_met) return hsa_signal_value_t(value);
95+
int64_t value = atomic::Load(&signal_.value, std::memory_order_relaxed);
13596

136-
time = timer::fast_clock::now();
137-
if (time - start_time > fast_timeout) {
138-
value = atomic::Load(&signal_.value, std::memory_order_relaxed);
139-
return hsa_signal_value_t(value);
97+
if (CheckSignalCondition(value, condition, compare_value)) {
98+
return value;
14099
}
141100

142-
if (signal_abort_timeout) {
143-
const timer::fast_clock::duration abort_timeout =
144-
std::chrono::seconds(signal_abort_timeout);
145-
146-
if(time - start_time > abort_timeout)
147-
throw AMD::hsa_exception(HSA_STATUS_ERROR_FATAL,
148-
"Signal wait abort timeout.\n");
101+
if (timer::fast_clock::now() - start_time > fast_timeout) {
102+
return value;
149103
}
150104

151-
if (time - start_time > kMaxElapsed) {
152-
os::uSleep(20);
153-
#if defined(__i386__) || defined(__x86_64__)
154-
} else if (g_use_mwaitx) {
155-
_mm_mwaitx(0, 60000, MWAITX_ECX_TIMER_ENABLE); // 60000 ~20us on a 1.5Ghz CPU
156-
_mm_monitorx(const_cast<int64_t*>(&signal_.value), 0, 0);
157-
#endif
105+
timer::CheckAbortTimeout(start_time, signal_abort_timeout);
106+
107+
if (g_use_mwaitx) {
108+
// Use timer-enabled mwaitx for busy waiting
109+
timer::DoMwaitx(const_cast<int64_t*>(&signal_.value), 60000, true);
158110
}
159111
}
160112
}

runtime/hsa-runtime/core/runtime/interrupt_signal.cpp

Lines changed: 27 additions & 87 deletions
Original file line numberDiff line numberDiff line change
@@ -42,14 +42,8 @@
4242

4343
#include "core/inc/interrupt_signal.h"
4444
#include "core/inc/runtime.h"
45-
#include "core/util/timer.h"
4645
#include "core/util/locks.h"
4746

48-
#if defined(__i386__) || defined(__x86_64__)
49-
#include <mwaitxintrin.h>
50-
#define MWAITX_ECX_TIMER_ENABLE 0x2 // BIT(1)
51-
#endif
52-
5347
namespace rocr {
5448
namespace core {
5549

@@ -141,118 +135,64 @@ void InterruptSignal::StoreRelease(hsa_signal_value_t value) {
141135
SetEvent();
142136
}
143137

144-
hsa_signal_value_t InterruptSignal::WaitRelaxed(
145-
hsa_signal_condition_t condition, hsa_signal_value_t compare_value,
146-
uint64_t timeout, hsa_wait_state_t wait_hint) {
138+
hsa_signal_value_t InterruptSignal::WaitRelaxed(hsa_signal_condition_t condition,
139+
hsa_signal_value_t compare_value,
140+
uint64_t timeout,
141+
hsa_wait_state_t wait_hint) {
147142
Retain();
148143
MAKE_SCOPE_GUARD([&]() { Release(); });
149144

150145
uint32_t prior = waiting_++;
151146
MAKE_SCOPE_GUARD([&]() { waiting_--; });
152147

153-
uint64_t event_age = 1;
148+
uint64_t event_age = core::Runtime::runtime_singleton_->KfdVersion().supports_event_age ? 1 : 0;
149+
if (!event_age && prior != 0) wait_hint = HSA_WAIT_STATE_ACTIVE;
154150

151+
const timer::fast_clock::time_point start_time = timer::fast_clock::now();
152+
const timer::fast_clock::duration fast_timeout = timer::GetFastTimeout(timeout);
153+
const timer::fast_clock::duration kMaxElapsed = std::chrono::microseconds(200);
155154
const uint32_t &signal_abort_timeout =
156155
core::Runtime::runtime_singleton_->flag().signal_abort_timeout();
157156

158-
if (!core::Runtime::runtime_singleton_->KfdVersion().supports_event_age) {
159-
event_age = 0;
160-
// Allow only the first waiter to sleep. Without event age tracking,
161-
// race condition can cause some threads to sleep without wakeup since missing interrupt.
162-
if (prior != 0) wait_hint = HSA_WAIT_STATE_ACTIVE;
163-
}
164-
165-
int64_t value;
166-
167-
timer::fast_clock::time_point start_time = timer::fast_clock::now();
168-
169-
// Set a polling timeout value
170-
// Should be a few times bigger than null kernel latency
171-
const timer::fast_clock::duration kMaxElapsed = std::chrono::microseconds(200);
172-
173-
uint64_t hsa_freq = 0;
174-
HSA::hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &hsa_freq);
175-
const timer::fast_clock::duration fast_timeout =
176-
timer::duration_from_seconds<timer::fast_clock::duration>(
177-
double(timeout) / double(hsa_freq));
178-
179-
bool condition_met = false;
180-
181-
#if defined(__i386__) || defined(__x86_64__)
182-
if (g_use_mwaitx) _mm_monitorx(const_cast<int64_t*>(&signal_.value), 0, 0);
183-
#endif
184-
185157
while (true) {
186158
if (!IsValid()) return 0;
187159

188-
value = atomic::Load(&signal_.value, std::memory_order_relaxed);
160+
int64_t value = atomic::Load(&signal_.value, std::memory_order_relaxed);
189161

190-
switch (condition) {
191-
case HSA_SIGNAL_CONDITION_EQ: {
192-
condition_met = (value == compare_value);
193-
break;
194-
}
195-
case HSA_SIGNAL_CONDITION_NE: {
196-
condition_met = (value != compare_value);
197-
break;
198-
}
199-
case HSA_SIGNAL_CONDITION_GTE: {
200-
condition_met = (value >= compare_value);
201-
break;
202-
}
203-
case HSA_SIGNAL_CONDITION_LT: {
204-
condition_met = (value < compare_value);
205-
break;
206-
}
207-
default:
208-
return 0;
162+
if (CheckSignalCondition(value, condition, compare_value)) {
163+
return value;
209164
}
210-
if (condition_met) return hsa_signal_value_t(value);
211165

212-
timer::fast_clock::time_point time = timer::fast_clock::now();
213-
if (time - start_time > fast_timeout) {
214-
value = atomic::Load(&signal_.value, std::memory_order_relaxed);
215-
return hsa_signal_value_t(value);
166+
auto now = timer::fast_clock::now();
167+
if (now - start_time > fast_timeout) {
168+
return value;
216169
}
217170

218-
if (signal_abort_timeout) {
219-
const timer::fast_clock::duration abort_timeout =
220-
std::chrono::seconds(signal_abort_timeout);
221-
222-
if(time - start_time > abort_timeout)
223-
throw AMD::hsa_exception(HSA_STATUS_ERROR_FATAL,
224-
"Signal wait abort timeout.\n");
225-
}
171+
timer::CheckAbortTimeout(start_time, signal_abort_timeout);
226172

227173
if (wait_hint == HSA_WAIT_STATE_ACTIVE) {
228-
#if defined(__i386__) || defined(__x86_64__)
229174
if (g_use_mwaitx) {
230-
_mm_mwaitx(0, 0, 0);
231-
_mm_monitorx(const_cast<int64_t*>(&signal_.value), 0, 0);
175+
// Short timeout for active waiting
176+
timer::DoMwaitx(const_cast<int64_t*>(&signal_.value), 1000);
232177
}
233-
#endif
234178
continue;
235179
}
236180

237-
if (time - start_time < kMaxElapsed) {
238-
// os::uSleep(20);
239-
#if defined(__i386__) || defined(__x86_64__)
181+
if (now - start_time < kMaxElapsed) {
240182
if (g_use_mwaitx) {
241-
_mm_mwaitx(0, 60000, MWAITX_ECX_TIMER_ENABLE);
242-
_mm_monitorx(const_cast<int64_t*>(&signal_.value), 0, 0);
183+
// Longer timeout with timer for passive waiting
184+
timer::DoMwaitx(const_cast<int64_t*>(&signal_.value), 60000, true);
243185
}
244-
#endif
245186
continue;
246187
}
247188

248-
uint32_t wait_ms;
249-
auto time_remaining = fast_timeout - (time - start_time);
250-
uint64_t ct = timer::duration_cast<std::chrono::milliseconds>(
251-
time_remaining).count();
189+
auto remaining_ms = timer::duration_cast<std::chrono::milliseconds>(
190+
fast_timeout - (now - start_time)).count();
252191

253-
wait_ms = static_cast<uint32_t>(std::min(ct, 0xFFFFFFFEUL));
254-
if (signal_abort_timeout)
255-
wait_ms = std::min(wait_ms, signal_abort_timeout * 1000);
192+
uint32_t wait_ms = std::min<uint32_t>(
193+
static_cast<uint32_t>(std::min<uint64_t>(remaining_ms, 0xFFFFFFFEUL)),
194+
static_cast<uint32_t>(signal_abort_timeout ? signal_abort_timeout * 1000 : 0xFFFFFFFFUL)
195+
);
256196

257197
hsaKmtWaitOnEvent_Ext(event_, wait_ms, &event_age);
258198
}

runtime/hsa-runtime/core/runtime/runtime.cpp

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1659,7 +1659,7 @@ void Runtime::AsyncEventsLoop(void* _eventsInfo) {
16591659
for (size_t i = index; i < async_events_.Size(); i++) {
16601660
hsa_signal_handle sig(async_events_.signal_[i]);
16611661
value[0] = atomic::Load(&sig->signal_.value, std::memory_order_relaxed);
1662-
if (checkCondition(async_events_.cond_[i], value[0], async_events_.value_[i])) {
1662+
if (CheckSignalCondition(value[0], async_events_.cond_[i], async_events_.value_[i])) {
16631663
if (i == 0) {
16641664
hsa_signal_handle(async_events_control_.wake)->StoreRelaxed(0);
16651665
} else {

0 commit comments

Comments
 (0)