|
42 | 42 |
|
43 | 43 | #include "core/inc/interrupt_signal.h" |
44 | 44 | #include "core/inc/runtime.h" |
45 | | -#include "core/util/timer.h" |
46 | 45 | #include "core/util/locks.h" |
47 | 46 |
|
48 | | -#if defined(__i386__) || defined(__x86_64__) |
49 | | -#include <mwaitxintrin.h> |
50 | | -#define MWAITX_ECX_TIMER_ENABLE 0x2 // BIT(1) |
51 | | -#endif |
52 | | - |
53 | 47 | namespace rocr { |
54 | 48 | namespace core { |
55 | 49 |
|
@@ -141,118 +135,64 @@ void InterruptSignal::StoreRelease(hsa_signal_value_t value) { |
141 | 135 | SetEvent(); |
142 | 136 | } |
143 | 137 |
|
144 | | -hsa_signal_value_t InterruptSignal::WaitRelaxed( |
145 | | - hsa_signal_condition_t condition, hsa_signal_value_t compare_value, |
146 | | - uint64_t timeout, hsa_wait_state_t wait_hint) { |
| 138 | +hsa_signal_value_t InterruptSignal::WaitRelaxed(hsa_signal_condition_t condition, |
| 139 | + hsa_signal_value_t compare_value, |
| 140 | + uint64_t timeout, |
| 141 | + hsa_wait_state_t wait_hint) { |
147 | 142 | Retain(); |
148 | 143 | MAKE_SCOPE_GUARD([&]() { Release(); }); |
149 | 144 |
|
150 | 145 | uint32_t prior = waiting_++; |
151 | 146 | MAKE_SCOPE_GUARD([&]() { waiting_--; }); |
152 | 147 |
|
153 | | - uint64_t event_age = 1; |
| 148 | + uint64_t event_age = core::Runtime::runtime_singleton_->KfdVersion().supports_event_age ? 1 : 0; |
| 149 | + if (!event_age && prior != 0) wait_hint = HSA_WAIT_STATE_ACTIVE; |
154 | 150 |
|
| 151 | + const timer::fast_clock::time_point start_time = timer::fast_clock::now(); |
| 152 | + const timer::fast_clock::duration fast_timeout = timer::GetFastTimeout(timeout); |
| 153 | + const timer::fast_clock::duration kMaxElapsed = std::chrono::microseconds(200); |
155 | 154 | const uint32_t &signal_abort_timeout = |
156 | 155 | core::Runtime::runtime_singleton_->flag().signal_abort_timeout(); |
157 | 156 |
|
158 | | - if (!core::Runtime::runtime_singleton_->KfdVersion().supports_event_age) { |
159 | | - event_age = 0; |
160 | | - // Allow only the first waiter to sleep. Without event age tracking, |
161 | | - // race condition can cause some threads to sleep without wakeup since missing interrupt. |
162 | | - if (prior != 0) wait_hint = HSA_WAIT_STATE_ACTIVE; |
163 | | - } |
164 | | - |
165 | | - int64_t value; |
166 | | - |
167 | | - timer::fast_clock::time_point start_time = timer::fast_clock::now(); |
168 | | - |
169 | | - // Set a polling timeout value |
170 | | - // Should be a few times bigger than null kernel latency |
171 | | - const timer::fast_clock::duration kMaxElapsed = std::chrono::microseconds(200); |
172 | | - |
173 | | - uint64_t hsa_freq = 0; |
174 | | - HSA::hsa_system_get_info(HSA_SYSTEM_INFO_TIMESTAMP_FREQUENCY, &hsa_freq); |
175 | | - const timer::fast_clock::duration fast_timeout = |
176 | | - timer::duration_from_seconds<timer::fast_clock::duration>( |
177 | | - double(timeout) / double(hsa_freq)); |
178 | | - |
179 | | - bool condition_met = false; |
180 | | - |
181 | | -#if defined(__i386__) || defined(__x86_64__) |
182 | | - if (g_use_mwaitx) _mm_monitorx(const_cast<int64_t*>(&signal_.value), 0, 0); |
183 | | -#endif |
184 | | - |
185 | 157 | while (true) { |
186 | 158 | if (!IsValid()) return 0; |
187 | 159 |
|
188 | | - value = atomic::Load(&signal_.value, std::memory_order_relaxed); |
| 160 | + int64_t value = atomic::Load(&signal_.value, std::memory_order_relaxed); |
189 | 161 |
|
190 | | - switch (condition) { |
191 | | - case HSA_SIGNAL_CONDITION_EQ: { |
192 | | - condition_met = (value == compare_value); |
193 | | - break; |
194 | | - } |
195 | | - case HSA_SIGNAL_CONDITION_NE: { |
196 | | - condition_met = (value != compare_value); |
197 | | - break; |
198 | | - } |
199 | | - case HSA_SIGNAL_CONDITION_GTE: { |
200 | | - condition_met = (value >= compare_value); |
201 | | - break; |
202 | | - } |
203 | | - case HSA_SIGNAL_CONDITION_LT: { |
204 | | - condition_met = (value < compare_value); |
205 | | - break; |
206 | | - } |
207 | | - default: |
208 | | - return 0; |
| 162 | + if (CheckSignalCondition(value, condition, compare_value)) { |
| 163 | + return value; |
209 | 164 | } |
210 | | - if (condition_met) return hsa_signal_value_t(value); |
211 | 165 |
|
212 | | - timer::fast_clock::time_point time = timer::fast_clock::now(); |
213 | | - if (time - start_time > fast_timeout) { |
214 | | - value = atomic::Load(&signal_.value, std::memory_order_relaxed); |
215 | | - return hsa_signal_value_t(value); |
| 166 | + auto now = timer::fast_clock::now(); |
| 167 | + if (now - start_time > fast_timeout) { |
| 168 | + return value; |
216 | 169 | } |
217 | 170 |
|
218 | | - if (signal_abort_timeout) { |
219 | | - const timer::fast_clock::duration abort_timeout = |
220 | | - std::chrono::seconds(signal_abort_timeout); |
221 | | - |
222 | | - if(time - start_time > abort_timeout) |
223 | | - throw AMD::hsa_exception(HSA_STATUS_ERROR_FATAL, |
224 | | - "Signal wait abort timeout.\n"); |
225 | | - } |
| 171 | + timer::CheckAbortTimeout(start_time, signal_abort_timeout); |
226 | 172 |
|
227 | 173 | if (wait_hint == HSA_WAIT_STATE_ACTIVE) { |
228 | | -#if defined(__i386__) || defined(__x86_64__) |
229 | 174 | if (g_use_mwaitx) { |
230 | | - _mm_mwaitx(0, 0, 0); |
231 | | - _mm_monitorx(const_cast<int64_t*>(&signal_.value), 0, 0); |
| 175 | + // Short timeout for active waiting |
| 176 | + timer::DoMwaitx(const_cast<int64_t*>(&signal_.value), 1000); |
232 | 177 | } |
233 | | -#endif |
234 | 178 | continue; |
235 | 179 | } |
236 | 180 |
|
237 | | - if (time - start_time < kMaxElapsed) { |
238 | | - // os::uSleep(20); |
239 | | -#if defined(__i386__) || defined(__x86_64__) |
| 181 | + if (now - start_time < kMaxElapsed) { |
240 | 182 | if (g_use_mwaitx) { |
241 | | - _mm_mwaitx(0, 60000, MWAITX_ECX_TIMER_ENABLE); |
242 | | - _mm_monitorx(const_cast<int64_t*>(&signal_.value), 0, 0); |
| 183 | + // Longer timeout with timer for passive waiting |
| 184 | + timer::DoMwaitx(const_cast<int64_t*>(&signal_.value), 60000, true); |
243 | 185 | } |
244 | | -#endif |
245 | 186 | continue; |
246 | 187 | } |
247 | 188 |
|
248 | | - uint32_t wait_ms; |
249 | | - auto time_remaining = fast_timeout - (time - start_time); |
250 | | - uint64_t ct = timer::duration_cast<std::chrono::milliseconds>( |
251 | | - time_remaining).count(); |
| 189 | + auto remaining_ms = timer::duration_cast<std::chrono::milliseconds>( |
| 190 | + fast_timeout - (now - start_time)).count(); |
252 | 191 |
|
253 | | - wait_ms = static_cast<uint32_t>(std::min(ct, 0xFFFFFFFEUL)); |
254 | | - if (signal_abort_timeout) |
255 | | - wait_ms = std::min(wait_ms, signal_abort_timeout * 1000); |
| 192 | + uint32_t wait_ms = std::min<uint32_t>( |
| 193 | + static_cast<uint32_t>(std::min<uint64_t>(remaining_ms, 0xFFFFFFFEUL)), |
| 194 | + static_cast<uint32_t>(signal_abort_timeout ? signal_abort_timeout * 1000 : 0xFFFFFFFFUL) |
| 195 | + ); |
256 | 196 |
|
257 | 197 | hsaKmtWaitOnEvent_Ext(event_, wait_ms, &event_age); |
258 | 198 | } |
|
0 commit comments