Skip to content

Commit 6c0131f

Browse files
committed
[CL] Atomic fence scope capability for Intel FPGA driver
This patch implements a workaround for the Intel FPGA driver which is currently an OpenCL 1.2 device which also supports `UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE` rather than the baseline for OpenCL 1.2 devices. The workaround unconditionally queries for `CL_DEVICE_ATOMIC_FENCE_CAPABILITIES`, if it is successful the value is used. Otherwise the error code is ignored and the baseline capabilities are returned.
1 parent 0816206 commit 6c0131f

File tree

1 file changed

+41
-23
lines changed

1 file changed

+41
-23
lines changed

source/adapters/opencl/device.cpp

Lines changed: 41 additions & 23 deletions
Original file line numberDiff line numberDiff line change
@@ -569,6 +569,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
569569
return ReturnValue(
570570
static_cast<ur_memory_order_capability_flags_t>(URCapabilities));
571571
}
572+
572573
case UR_DEVICE_INFO_ATOMIC_MEMORY_SCOPE_CAPABILITIES: {
573574
/* Initialize result to minimum mandated capabilities according to
574575
* SYCL2020 4.6.3.2. Because scopes are hierarchical, wider scopes support
@@ -624,6 +625,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
624625
return ReturnValue(
625626
static_cast<ur_memory_scope_capability_flags_t>(URCapabilities));
626627
}
628+
627629
case UR_DEVICE_INFO_ATOMIC_FENCE_ORDER_CAPABILITIES: {
628630
/* Initialize result to minimum mandated capabilities according to
629631
* SYCL2020 4.6.3.2 */
@@ -671,6 +673,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
671673
return ReturnValue(
672674
static_cast<ur_memory_order_capability_flags_t>(URCapabilities));
673675
}
676+
674677
case UR_DEVICE_INFO_ATOMIC_FENCE_SCOPE_CAPABILITIES: {
675678
/* Initialize result to minimum mandated capabilities according to
676679
* SYCL2020 4.6.3.2. Because scopes are hierarchical, wider scopes support
@@ -686,38 +689,53 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
686689
CL_RETURN_ON_FAILURE(cl_adapter::getDeviceVersion(
687690
cl_adapter::cast<cl_device_id>(hDevice), DevVer));
688691

689-
cl_device_atomic_capabilities CLCapabilities;
692+
auto convertCapabilities =
693+
[](cl_device_atomic_capabilities CLCapabilities) {
694+
ur_memory_scope_capability_flags_t URCapabilities = 0;
695+
/* Because scopes are hierarchical, wider scopes support all narrower
696+
* scopes. At a minimum, each device must support WORK_ITEM,
697+
* SUB_GROUP and WORK_GROUP.
698+
* (https://github.com/KhronosGroup/SYCL-Docs/pull/382). We already
699+
* initialized to these minimum mandated capabilities. Just check
700+
* wider scopes. */
701+
if (CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) {
702+
URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE;
703+
}
704+
705+
if (CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) {
706+
URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM;
707+
}
708+
return URCapabilities;
709+
};
710+
690711
if (DevVer >= oclv::V3_0) {
712+
cl_device_atomic_capabilities CLCapabilities;
691713
CL_RETURN_ON_FAILURE(clGetDeviceInfo(
692714
cl_adapter::cast<cl_device_id>(hDevice),
693715
CL_DEVICE_ATOMIC_FENCE_CAPABILITIES,
694716
sizeof(cl_device_atomic_capabilities), &CLCapabilities, nullptr));
695-
696717
assert((CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_WORK_GROUP) &&
697718
"Violates minimum mandated guarantee");
719+
URCapabilities |= convertCapabilities(CLCapabilities);
720+
} else if (DevVer >= oclv::V2_0) {
721+
/* OpenCL 2.x minimum mandated capabilities are WORK_GROUP | DEVICE |
722+
ALL_DEVICES */
723+
URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE |
724+
UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM;
698725

699-
/* Because scopes are hierarchical, wider scopes support all narrower
700-
* scopes. At a minimum, each device must support WORK_ITEM, SUB_GROUP and
701-
* WORK_GROUP. (https://github.com/KhronosGroup/SYCL-Docs/pull/382). We
702-
* already initialized to these minimum mandated capabilities. Just check
703-
* wider scopes. */
704-
if (CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_DEVICE) {
705-
URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE;
706-
}
707-
708-
if (CLCapabilities & CL_DEVICE_ATOMIC_SCOPE_ALL_DEVICES) {
709-
URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM;
710-
}
711726
} else {
712-
/* This info is only available in OpenCL version >= 3.0. Just return
713-
* minimum mandated capabilities for older versions. OpenCL 1.x minimum
714-
* mandated capabilities are WORK_GROUP, we already initialized using it.
715-
*/
716-
if (DevVer >= oclv::V2_0) {
717-
/* OpenCL 2.x minimum mandated capabilities are WORK_GROUP | DEVICE |
718-
* ALL_DEVICES */
719-
URCapabilities |= UR_MEMORY_SCOPE_CAPABILITY_FLAG_DEVICE |
720-
UR_MEMORY_SCOPE_CAPABILITY_FLAG_SYSTEM;
727+
// FIXME: Special case for Intel FPGA driver which is currently an
728+
// OpenCL 1.2 device but is more capable than the default. This is a
729+
// temporary work around until the Intel FPGA driver is updated to
730+
// OpenCL 3.0. If the query is successful, then use the result but do
731+
// not return an error if the query is unsuccessful as this is expected
732+
// of an OpenCL 1.2 driver.
733+
cl_device_atomic_capabilities CLCapabilities;
734+
if (CL_SUCCESS == clGetDeviceInfo(cl_adapter::cast<cl_device_id>(hDevice),
735+
CL_DEVICE_ATOMIC_FENCE_CAPABILITIES,
736+
sizeof(cl_device_atomic_capabilities),
737+
&CLCapabilities, nullptr)) {
738+
URCapabilities |= convertCapabilities(CLCapabilities);
721739
}
722740
}
723741

0 commit comments

Comments
 (0)