Skip to content

Commit 94ea653

Browse files
committed
Report device fp support via config rather than extension string.
We're trying to move the UR adapters away from returning hard coded OpenCL extension strings to report device capabilities, this is the first change in that direction.
1 parent dd7d5c6 commit 94ea653

File tree

4 files changed

+56
-54
lines changed

4 files changed

+56
-54
lines changed

source/adapters/cuda/device.cpp

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -437,8 +437,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
437437
return ReturnValue(MemBaseAddrAlign);
438438
}
439439
case UR_DEVICE_INFO_HALF_FP_CONFIG: {
440-
// TODO: is this config consistent across all NVIDIA GPUs?
441-
return ReturnValue(0u);
440+
int Major = 0;
441+
int Minor = 0;
442+
443+
UR_CHECK_ERROR(cuDeviceGetAttribute(
444+
&Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()));
445+
UR_CHECK_ERROR(cuDeviceGetAttribute(
446+
&Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hDevice->get()));
447+
448+
if ((Major >= 6) || ((Major == 5) && (Minor >= 3))) {
449+
// TODO: is this config consistent across all NVIDIA GPUs?
450+
ur_device_fp_capability_flags_t Config =
451+
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
452+
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
453+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
454+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
455+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
456+
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
457+
return ReturnValue(Config);
458+
} else {
459+
return ReturnValue(0u);
460+
}
442461
}
443462
case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
444463
// TODO: is this config consistent across all NVIDIA GPUs?
@@ -616,26 +635,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
616635
}
617636
case UR_DEVICE_INFO_EXTENSIONS: {
618637

619-
std::string SupportedExtensions = "cl_khr_fp64 cl_khr_subgroups ";
638+
std::string SupportedExtensions = "cl_khr_subgroups ";
620639
SupportedExtensions += "cl_intel_devicelib_assert ";
621640
// Return supported for the UR command-buffer experimental feature
622641
SupportedExtensions += "ur_exp_command_buffer ";
623642
SupportedExtensions += "ur_exp_usm_p2p ";
624643
SupportedExtensions += "ur_exp_launch_properties ";
625644
SupportedExtensions += " ";
626645

627-
int Major = 0;
628-
int Minor = 0;
629-
630-
UR_CHECK_ERROR(cuDeviceGetAttribute(
631-
&Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()));
632-
UR_CHECK_ERROR(cuDeviceGetAttribute(
633-
&Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hDevice->get()));
634-
635-
if ((Major >= 6) || ((Major == 5) && (Minor >= 3))) {
636-
SupportedExtensions += "cl_khr_fp16 ";
637-
}
638-
639646
return ReturnValue(SupportedExtensions.c_str());
640647
}
641648
case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: {

source/adapters/hip/device.cpp

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -370,29 +370,44 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
370370
return ReturnValue(MemBaseAddrAlign);
371371
}
372372
case UR_DEVICE_INFO_HALF_FP_CONFIG: {
373-
return ReturnValue(0u);
374-
}
375-
case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
376373
ur_device_fp_capability_flags_t Config =
377374
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
378375
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
379376
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
380377
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
381378
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
382-
UR_DEVICE_FP_CAPABILITY_FLAG_FMA |
383-
UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT;
379+
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
384380
return ReturnValue(Config);
385381
}
386-
case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: {
382+
case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
387383
ur_device_fp_capability_flags_t Config =
388384
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
389385
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
390386
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
391387
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
392388
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
393-
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
389+
UR_DEVICE_FP_CAPABILITY_FLAG_FMA |
390+
UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT;
394391
return ReturnValue(Config);
395392
}
393+
case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: {
394+
hipDeviceProp_t Props;
395+
detail::ur::assertion(hipGetDeviceProperties(&Props, hDevice->get()) ==
396+
hipSuccess);
397+
398+
if (Props.arch.hasDoubles) {
399+
ur_device_fp_capability_flags_t Config =
400+
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
401+
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
402+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
403+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
404+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
405+
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
406+
return ReturnValue(Config);
407+
} else {
408+
return ReturnValue(0u);
409+
}
410+
}
396411
case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: {
397412
return ReturnValue(UR_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE);
398413
}
@@ -581,16 +596,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
581596

582597
SupportedExtensions += " ";
583598

584-
hipDeviceProp_t Props;
585-
detail::ur::assertion(hipGetDeviceProperties(&Props, hDevice->get()) ==
586-
hipSuccess);
587-
588-
if (Props.arch.hasDoubles) {
589-
SupportedExtensions += "cl_khr_fp64 ";
590-
}
591-
592-
SupportedExtensions += "cl_khr_fp16 ";
593-
594599
return ReturnValue(SupportedExtensions.c_str());
595600
}
596601
case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: {

source/adapters/level_zero/device.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -260,8 +260,6 @@ ur_result_t urDeviceGetInfo(
260260
// for performance.
261261
// cl_intel_required_subgroup_size - Extension to allow programmers to
262262
// optionally specify the required subgroup size for a kernel function.
263-
// cl_khr_fp16 - Optional half floating-point support.
264-
// cl_khr_fp64 - Support for double floating-point precision.
265263
// cl_khr_int64_base_atomics, cl_khr_int64_extended_atomics - Optional
266264
// extensions that implement atomic operations on 64-bit signed and
267265
// unsigned integers to locations in __global and __local memory.
@@ -271,10 +269,6 @@ ur_result_t urDeviceGetInfo(
271269
// Hardcoding some extensions we know are supported by all Level Zero
272270
// devices.
273271
SupportedExtensions += (ZE_SUPPORTED_EXTENSIONS);
274-
if (Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP16)
275-
SupportedExtensions += ("cl_khr_fp16 ");
276-
if (Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP64)
277-
SupportedExtensions += ("cl_khr_fp64 ");
278272
if (Device->ZeDeviceModuleProperties->flags &
279273
ZE_DEVICE_MODULE_FLAG_INT64_ATOMICS)
280274
// int64AtomicsSupported indicates support for both.

source/adapters/native_cpu/device.cpp

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -154,10 +154,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
154154
case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY:
155155
return ReturnValue(bool{1});
156156
case UR_DEVICE_INFO_EXTENSIONS:
157-
// TODO : Populate return string accordingly - e.g. cl_khr_fp16,
158-
// cl_khr_fp64, cl_khr_int64_base_atomics,
159-
// cl_khr_int64_extended_atomics
160-
return ReturnValue("cl_khr_fp16, cl_khr_fp64 ");
157+
return ReturnValue("");
161158
case UR_DEVICE_INFO_VERSION:
162159
return ReturnValue("0.1");
163160
case UR_DEVICE_INFO_COMPILER_AVAILABLE:
@@ -193,19 +190,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
193190
case UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH:
194191
// Default minimum values required by the SYCL specification.
195192
return ReturnValue(size_t{2048});
196-
case UR_DEVICE_INFO_HALF_FP_CONFIG: {
197-
// todo:
198-
ur_device_fp_capability_flags_t HalfFPValue = 0;
199-
return ReturnValue(HalfFPValue);
200-
}
201-
case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
202-
// todo
203-
ur_device_fp_capability_flags_t SingleFPValue = 0;
204-
return ReturnValue(SingleFPValue);
205-
}
193+
case UR_DEVICE_INFO_HALF_FP_CONFIG:
194+
case UR_DEVICE_INFO_SINGLE_FP_CONFIG:
206195
case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: {
207-
ur_device_fp_capability_flags_t DoubleFPValue = 0;
208-
return ReturnValue(DoubleFPValue);
196+
// All fp types are supported, return minimum flags to indicate support.
197+
// TODO: look at this in more detail.
198+
ur_device_fp_capability_flags_t SupportedFlags =
199+
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
200+
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
201+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
202+
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
203+
;
204+
return ReturnValue(SupportedFlags);
209205
}
210206
case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS:
211207
return ReturnValue(uint32_t{3});

0 commit comments

Comments
 (0)