|
| 1 | +// Copyright 2025 The IREE Authors |
| 2 | +// |
| 3 | +// Licensed under the Apache License v2.0 with LLVM Exceptions. |
| 4 | +// See https://llvm.org/LICENSE.txt for license information. |
| 5 | +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception |
| 6 | + |
| 7 | +#ifndef IREE_HAL_DRIVERS_AMDGPU_DEVICE_SUPPORT_MUTEX_H_ |
| 8 | +#define IREE_HAL_DRIVERS_AMDGPU_DEVICE_SUPPORT_MUTEX_H_ |
| 9 | + |
| 10 | +#include "iree/hal/drivers/amdgpu/device/support/common.h" |
| 11 | + |
| 12 | +//===----------------------------------------------------------------------===// |
| 13 | +// iree_hal_amdgpu_device_mutex_t |
| 14 | +//===----------------------------------------------------------------------===// |
| 15 | + |
| 16 | +// Device spin-lock mutex. |
| 17 | +// This can run on the host as well but is optimized for device usage. Spinning |
| 18 | +// on the host is a bad idea. Spinning on the device is _also_ a bad idea, but |
| 19 | +// does have its uses. |
| 20 | +// |
| 21 | +// Note that because atomics are not guaranteed to work off-agent this is only |
| 22 | +// to be used for intra-agent exclusion such as when multiple queues on the |
| 23 | +// same agent are sharing a data structure. |
| 24 | +// |
| 25 | +// Reference: https://rigtorp.se/spinlock/ |
| 26 | +typedef iree_amdgpu_scoped_atomic_uint32_t iree_hal_amdgpu_device_mutex_t; |
| 27 | + |
| 28 | +#define IREE_HAL_AMDGPU_DEVICE_MUTEX_UNLOCKED 0u |
| 29 | +#define IREE_HAL_AMDGPU_DEVICE_MUTEX_LOCKED 1u |
| 30 | + |
| 31 | +// Initializes a mutex to the unlocked state. |
| 32 | +static inline void iree_hal_amdgpu_device_mutex_initialize( |
| 33 | + iree_hal_amdgpu_device_mutex_t* IREE_AMDGPU_RESTRICT out_mutex) { |
| 34 | + uint32_t initial_value = IREE_HAL_AMDGPU_DEVICE_MUTEX_UNLOCKED; |
| 35 | + IREE_AMDGPU_SCOPED_ATOMIC_INIT(out_mutex, initial_value); |
| 36 | +} |
| 37 | + |
| 38 | +// Spins until a lock on the mutex is acquired. |
| 39 | +static inline void iree_hal_amdgpu_device_mutex_lock( |
| 40 | + iree_hal_amdgpu_device_mutex_t* IREE_AMDGPU_RESTRICT mutex) { |
| 41 | + for (;;) { |
| 42 | + // Optimistically assume the lock is free on the first try. |
| 43 | + uint32_t prev = IREE_HAL_AMDGPU_DEVICE_MUTEX_UNLOCKED; |
| 44 | + if (iree_amdgpu_scoped_atomic_compare_exchange_strong( |
| 45 | + mutex, &prev, IREE_HAL_AMDGPU_DEVICE_MUTEX_LOCKED, |
| 46 | + iree_amdgpu_memory_order_acquire, iree_amdgpu_memory_order_acquire, |
| 47 | + iree_amdgpu_memory_scope_system)) { |
| 48 | + return; |
| 49 | + } |
| 50 | + // Wait for lock to be released without generating cache misses. |
| 51 | + while (iree_amdgpu_scoped_atomic_load(mutex, |
| 52 | + iree_amdgpu_memory_order_relaxed, |
| 53 | + iree_amdgpu_memory_scope_system)) { |
| 54 | + // Yield for a bit to give the other thread a chance to unlock. |
| 55 | + iree_amdgpu_yield(); |
| 56 | + } |
| 57 | + } |
| 58 | +} |
| 59 | + |
| 60 | +// Unlocks a mutex. Must be called with the lock held by the caller. |
| 61 | +static inline void iree_hal_amdgpu_device_mutex_unlock( |
| 62 | + iree_hal_amdgpu_device_mutex_t* IREE_AMDGPU_RESTRICT mutex) { |
| 63 | + iree_amdgpu_scoped_atomic_store(mutex, IREE_HAL_AMDGPU_DEVICE_MUTEX_UNLOCKED, |
| 64 | + iree_amdgpu_memory_order_release, |
| 65 | + iree_amdgpu_memory_scope_system); |
| 66 | +} |
| 67 | + |
| 68 | +#endif // IREE_HAL_DRIVERS_AMDGPU_DEVICE_SUPPORT_MUTEX_H_ |
0 commit comments