Skip to content

Commit 88ef7a8

Browse files
committed
Report device fp support via config rather than extension string.
We're trying to move the UR adapters away from returning hard coded OpenCL extension strings to report device capabilities, this is the first change in that direction.
1 parent 262bea8 commit 88ef7a8

File tree

4 files changed

+46
-39
lines changed

4 files changed

+46
-39
lines changed

source/adapters/cuda/device.cpp

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -436,8 +436,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
436436
return ReturnValue(MemBaseAddrAlign);
437437
}
438438
case UR_DEVICE_INFO_HALF_FP_CONFIG: {
439-
// TODO: is this config consistent across all NVIDIA GPUs?
440-
return ReturnValue(0u);
439+
int Major = 0;
440+
int Minor = 0;
441+
442+
UR_CHECK_ERROR(cuDeviceGetAttribute(
443+
&Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()));
444+
UR_CHECK_ERROR(cuDeviceGetAttribute(
445+
&Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hDevice->get()));
446+
447+
if ((Major >= 6) || ((Major == 5) && (Minor >= 3))) {
448+
// TODO: is this config consistent across all NVIDIA GPUs?
449+
ur_device_fp_capability_flags_t Config =
450+
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
451+
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
452+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
453+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
454+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
455+
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
456+
return ReturnValue(Config);
457+
} else {
458+
return ReturnValue(0u);
459+
}
441460
}
442461
case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
443462
// TODO: is this config consistent across all NVIDIA GPUs?
@@ -615,26 +634,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
615634
}
616635
case UR_DEVICE_INFO_EXTENSIONS: {
617636

618-
std::string SupportedExtensions = "cl_khr_fp64 cl_khr_subgroups ";
637+
std::string SupportedExtensions = "cl_khr_subgroups ";
619638
SupportedExtensions += "cl_intel_devicelib_assert ";
620639
// Return supported for the UR command-buffer experimental feature
621640
SupportedExtensions += "ur_exp_command_buffer ";
622641
SupportedExtensions += "ur_exp_usm_p2p ";
623642
SupportedExtensions += "ur_exp_launch_properties ";
624643
SupportedExtensions += " ";
625644

626-
int Major = 0;
627-
int Minor = 0;
628-
629-
UR_CHECK_ERROR(cuDeviceGetAttribute(
630-
&Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()));
631-
UR_CHECK_ERROR(cuDeviceGetAttribute(
632-
&Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hDevice->get()));
633-
634-
if ((Major >= 6) || ((Major == 5) && (Minor >= 3))) {
635-
SupportedExtensions += "cl_khr_fp16 ";
636-
}
637-
638645
return ReturnValue(SupportedExtensions.c_str());
639646
}
640647
case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: {

source/adapters/hip/device.cpp

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -355,29 +355,44 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
355355
return ReturnValue(MemBaseAddrAlign);
356356
}
357357
case UR_DEVICE_INFO_HALF_FP_CONFIG: {
358-
return ReturnValue(0u);
359-
}
360-
case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
361358
ur_device_fp_capability_flags_t Config =
362359
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
363360
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
364361
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
365362
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
366363
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
367-
UR_DEVICE_FP_CAPABILITY_FLAG_FMA |
368-
UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT;
364+
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
369365
return ReturnValue(Config);
370366
}
371-
case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: {
367+
case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
372368
ur_device_fp_capability_flags_t Config =
373369
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
374370
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
375371
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
376372
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
377373
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
378-
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
374+
UR_DEVICE_FP_CAPABILITY_FLAG_FMA |
375+
UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT;
379376
return ReturnValue(Config);
380377
}
378+
case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: {
379+
hipDeviceProp_t Props;
380+
detail::ur::assertion(hipGetDeviceProperties(&Props, hDevice->get()) ==
381+
hipSuccess);
382+
383+
if (Props.arch.hasDoubles) {
384+
ur_device_fp_capability_flags_t Config =
385+
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
386+
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
387+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
388+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
389+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
390+
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
391+
return ReturnValue(Config);
392+
} else {
393+
return ReturnValue(0u);
394+
}
395+
}
381396
case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: {
382397
return ReturnValue(UR_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE);
383398
}
@@ -566,16 +581,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
566581

567582
SupportedExtensions += " ";
568583

569-
hipDeviceProp_t Props;
570-
detail::ur::assertion(hipGetDeviceProperties(&Props, hDevice->get()) ==
571-
hipSuccess);
572-
573-
if (Props.arch.hasDoubles) {
574-
SupportedExtensions += "cl_khr_fp64 ";
575-
}
576-
577-
SupportedExtensions += "cl_khr_fp16 ";
578-
579584
return ReturnValue(SupportedExtensions.c_str());
580585
}
581586
case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: {

source/adapters/level_zero/device.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -258,8 +258,6 @@ ur_result_t urDeviceGetInfo(
258258
// for performance.
259259
// cl_intel_required_subgroup_size - Extension to allow programmers to
260260
// optionally specify the required subgroup size for a kernel function.
261-
// cl_khr_fp16 - Optional half floating-point support.
262-
// cl_khr_fp64 - Support for double floating-point precision.
263261
// cl_khr_int64_base_atomics, cl_khr_int64_extended_atomics - Optional
264262
// extensions that implement atomic operations on 64-bit signed and
265263
// unsigned integers to locations in __global and __local memory.
@@ -269,10 +267,6 @@ ur_result_t urDeviceGetInfo(
269267
// Hardcoding some extensions we know are supported by all Level Zero
270268
// devices.
271269
SupportedExtensions += (ZE_SUPPORTED_EXTENSIONS);
272-
if (Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP16)
273-
SupportedExtensions += ("cl_khr_fp16 ");
274-
if (Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP64)
275-
SupportedExtensions += ("cl_khr_fp64 ");
276270
if (Device->ZeDeviceModuleProperties->flags &
277271
ZE_DEVICE_MODULE_FLAG_INT64_ATOMICS)
278272
// int64AtomicsSupported indicates support for both.

source/adapters/opencl/device.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -527,7 +527,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
527527
cl_adapter::cast<cl_device_id>(hDevice), {"cl_khr_fp16"}, Supported));
528528

529529
if (!Supported) {
530-
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
530+
// Report no support with an empty bitfield
531+
return ReturnValue(ur_device_fp_capability_flags_t(0u));
531532
}
532533
}
533534

0 commit comments

Comments
 (0)