Skip to content

Commit 943d454

Browse files
authored
Adding device-side AMDGPU signal/queue utils. (iree-org#21042)
The signal and queue headers are derived from the HSA spec, ROCR implementation, and LLVM device library/codegen.
1 parent 4a936e4 commit 943d454

File tree

4 files changed

+1018
-0
lines changed

4 files changed

+1018
-0
lines changed

runtime/src/iree/hal/drivers/amdgpu/device/CMakeLists.txt

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -17,6 +17,9 @@ set(_BITCODE_HDRS
1717
"kernels.h"
1818
"support/common.h"
1919
"support/kernel_args.h"
20+
"support/mutex.h"
21+
"support/queue.h"
22+
"support/signal.h"
2023
)
2124

2225
#===------------------------------------------------------------------------===#
Lines changed: 68 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,68 @@
1+
// Copyright 2025 The IREE Authors
2+
//
3+
// Licensed under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
7+
#ifndef IREE_HAL_DRIVERS_AMDGPU_DEVICE_SUPPORT_MUTEX_H_
8+
#define IREE_HAL_DRIVERS_AMDGPU_DEVICE_SUPPORT_MUTEX_H_
9+
10+
#include "iree/hal/drivers/amdgpu/device/support/common.h"
11+
12+
//===----------------------------------------------------------------------===//
13+
// iree_hal_amdgpu_device_mutex_t
14+
//===----------------------------------------------------------------------===//
15+
16+
// Device spin-lock mutex.
17+
// This can run on the host as well but is optimized for device usage. Spinning
18+
// on the host is a bad idea. Spinning on the device is _also_ a bad idea, but
19+
// does have its uses.
20+
//
21+
// Note that because atomics are not guaranteed to work off-agent this is only
22+
// to be used for intra-agent exclusion such as when multiple queues on the
23+
// same agent are sharing a data structure.
24+
//
25+
// Reference: https://rigtorp.se/spinlock/
26+
typedef iree_amdgpu_scoped_atomic_uint32_t iree_hal_amdgpu_device_mutex_t;
27+
28+
#define IREE_HAL_AMDGPU_DEVICE_MUTEX_UNLOCKED 0u
29+
#define IREE_HAL_AMDGPU_DEVICE_MUTEX_LOCKED 1u
30+
31+
// Initializes a mutex to the unlocked state.
32+
static inline void iree_hal_amdgpu_device_mutex_initialize(
33+
iree_hal_amdgpu_device_mutex_t* IREE_AMDGPU_RESTRICT out_mutex) {
34+
uint32_t initial_value = IREE_HAL_AMDGPU_DEVICE_MUTEX_UNLOCKED;
35+
IREE_AMDGPU_SCOPED_ATOMIC_INIT(out_mutex, initial_value);
36+
}
37+
38+
// Spins until a lock on the mutex is acquired.
39+
static inline void iree_hal_amdgpu_device_mutex_lock(
40+
iree_hal_amdgpu_device_mutex_t* IREE_AMDGPU_RESTRICT mutex) {
41+
for (;;) {
42+
// Optimistically assume the lock is free on the first try.
43+
uint32_t prev = IREE_HAL_AMDGPU_DEVICE_MUTEX_UNLOCKED;
44+
if (iree_amdgpu_scoped_atomic_compare_exchange_strong(
45+
mutex, &prev, IREE_HAL_AMDGPU_DEVICE_MUTEX_LOCKED,
46+
iree_amdgpu_memory_order_acquire, iree_amdgpu_memory_order_acquire,
47+
iree_amdgpu_memory_scope_system)) {
48+
return;
49+
}
50+
// Wait for lock to be released without generating cache misses.
51+
while (iree_amdgpu_scoped_atomic_load(mutex,
52+
iree_amdgpu_memory_order_relaxed,
53+
iree_amdgpu_memory_scope_system)) {
54+
// Yield for a bit to give the other thread a chance to unlock.
55+
iree_amdgpu_yield();
56+
}
57+
}
58+
}
59+
60+
// Unlocks a mutex. Must be called with the lock held by the caller.
61+
static inline void iree_hal_amdgpu_device_mutex_unlock(
62+
iree_hal_amdgpu_device_mutex_t* IREE_AMDGPU_RESTRICT mutex) {
63+
iree_amdgpu_scoped_atomic_store(mutex, IREE_HAL_AMDGPU_DEVICE_MUTEX_UNLOCKED,
64+
iree_amdgpu_memory_order_release,
65+
iree_amdgpu_memory_scope_system);
66+
}
67+
68+
#endif // IREE_HAL_DRIVERS_AMDGPU_DEVICE_SUPPORT_MUTEX_H_

0 commit comments

Comments
 (0)