Skip to content

Commit b11b346

Browse files
author
Georgi Mirazchiyski
committed
[HIP] Enable acq_rel and seq_cst memory order capabilities for atomics on HIP
1 parent 2c4303c commit b11b346

File tree

1 file changed

+12
-1
lines changed

1 file changed

+12
-1
lines changed

source/adapters/hip/device.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -779,7 +779,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
779779
ur_memory_order_capability_flags_t Capabilities =
780780
UR_MEMORY_ORDER_CAPABILITY_FLAG_RELAXED |
781781
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQUIRE |
782-
UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE;
782+
UR_MEMORY_ORDER_CAPABILITY_FLAG_RELEASE |
783+
UR_MEMORY_ORDER_CAPABILITY_FLAG_ACQ_REL;
784+
#if __HIP_PLATFORM_NVIDIA__
785+
// Nvidia introduced fence.sc for seq_cst only since SM 7.0.
786+
int Major = 0;
787+
UR_CHECK_ERROR(hipDeviceGetAttribute(
788+
&Major, hipDeviceAttributeComputeCapabilityMajor, hDevice->get()));
789+
if (Major >= 7)
790+
Capabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST;
791+
#else
792+
Capabilities |= UR_MEMORY_ORDER_CAPABILITY_FLAG_SEQ_CST;
793+
#endif
783794
return ReturnValue(Capabilities);
784795
}
785796
case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {

0 commit comments

Comments
 (0)