Skip to content

Commit fabf436

Browse files
committed
Report device fp support via config rather than extension string.
We're trying to move the UR adapters away from returning hard coded OpenCL extension strings to report device capabilities, this is the first change in that direction.
1 parent 7d864b6 commit fabf436

File tree

4 files changed

+56
-54
lines changed

4 files changed

+56
-54
lines changed

source/adapters/cuda/device.cpp

Lines changed: 22 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -437,8 +437,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
437437
return ReturnValue(MemBaseAddrAlign);
438438
}
439439
case UR_DEVICE_INFO_HALF_FP_CONFIG: {
440-
// TODO: is this config consistent across all NVIDIA GPUs?
441-
return ReturnValue(0u);
440+
int Major = 0;
441+
int Minor = 0;
442+
443+
UR_CHECK_ERROR(cuDeviceGetAttribute(
444+
&Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()));
445+
UR_CHECK_ERROR(cuDeviceGetAttribute(
446+
&Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hDevice->get()));
447+
448+
if ((Major >= 6) || ((Major == 5) && (Minor >= 3))) {
449+
// TODO: is this config consistent across all NVIDIA GPUs?
450+
ur_device_fp_capability_flags_t Config =
451+
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
452+
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
453+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
454+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
455+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
456+
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
457+
return ReturnValue(Config);
458+
} else {
459+
return ReturnValue(0u);
460+
}
442461
}
443462
case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
444463
// TODO: is this config consistent across all NVIDIA GPUs?
@@ -616,26 +635,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
616635
}
617636
case UR_DEVICE_INFO_EXTENSIONS: {
618637

619-
std::string SupportedExtensions = "cl_khr_fp64 cl_khr_subgroups ";
638+
std::string SupportedExtensions = "cl_khr_subgroups ";
620639
SupportedExtensions += "cl_intel_devicelib_assert ";
621640
// Return supported for the UR command-buffer experimental feature
622641
SupportedExtensions += "ur_exp_command_buffer ";
623642
SupportedExtensions += "ur_exp_usm_p2p ";
624643
SupportedExtensions += "ur_exp_launch_properties ";
625644
SupportedExtensions += " ";
626645

627-
int Major = 0;
628-
int Minor = 0;
629-
630-
UR_CHECK_ERROR(cuDeviceGetAttribute(
631-
&Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()));
632-
UR_CHECK_ERROR(cuDeviceGetAttribute(
633-
&Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hDevice->get()));
634-
635-
if ((Major >= 6) || ((Major == 5) && (Minor >= 3))) {
636-
SupportedExtensions += "cl_khr_fp16 ";
637-
}
638-
639646
return ReturnValue(SupportedExtensions.c_str());
640647
}
641648
case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: {

source/adapters/hip/device.cpp

Lines changed: 22 additions & 17 deletions
Original file line numberDiff line numberDiff line change
@@ -369,29 +369,44 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
369369
return ReturnValue(MemBaseAddrAlign);
370370
}
371371
case UR_DEVICE_INFO_HALF_FP_CONFIG: {
372-
return ReturnValue(0u);
373-
}
374-
case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
375372
ur_device_fp_capability_flags_t Config =
376373
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
377374
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
378375
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
379376
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
380377
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
381-
UR_DEVICE_FP_CAPABILITY_FLAG_FMA |
382-
UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT;
378+
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
383379
return ReturnValue(Config);
384380
}
385-
case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: {
381+
case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
386382
ur_device_fp_capability_flags_t Config =
387383
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
388384
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
389385
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
390386
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
391387
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
392-
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
388+
UR_DEVICE_FP_CAPABILITY_FLAG_FMA |
389+
UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT;
393390
return ReturnValue(Config);
394391
}
392+
case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: {
393+
hipDeviceProp_t Props;
394+
detail::ur::assertion(hipGetDeviceProperties(&Props, hDevice->get()) ==
395+
hipSuccess);
396+
397+
if (Props.arch.hasDoubles) {
398+
ur_device_fp_capability_flags_t Config =
399+
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
400+
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
401+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
402+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
403+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
404+
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
405+
return ReturnValue(Config);
406+
} else {
407+
return ReturnValue(0u);
408+
}
409+
}
395410
case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: {
396411
return ReturnValue(UR_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE);
397412
}
@@ -580,16 +595,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
580595

581596
SupportedExtensions += " ";
582597

583-
hipDeviceProp_t Props;
584-
detail::ur::assertion(hipGetDeviceProperties(&Props, hDevice->get()) ==
585-
hipSuccess);
586-
587-
if (Props.arch.hasDoubles) {
588-
SupportedExtensions += "cl_khr_fp64 ";
589-
}
590-
591-
SupportedExtensions += "cl_khr_fp16 ";
592-
593598
return ReturnValue(SupportedExtensions.c_str());
594599
}
595600
case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: {

source/adapters/level_zero/device.cpp

Lines changed: 0 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -260,8 +260,6 @@ ur_result_t urDeviceGetInfo(
260260
// for performance.
261261
// cl_intel_required_subgroup_size - Extension to allow programmers to
262262
// optionally specify the required subgroup size for a kernel function.
263-
// cl_khr_fp16 - Optional half floating-point support.
264-
// cl_khr_fp64 - Support for double floating-point precision.
265263
// cl_khr_int64_base_atomics, cl_khr_int64_extended_atomics - Optional
266264
// extensions that implement atomic operations on 64-bit signed and
267265
// unsigned integers to locations in __global and __local memory.
@@ -271,10 +269,6 @@ ur_result_t urDeviceGetInfo(
271269
// Hardcoding some extensions we know are supported by all Level Zero
272270
// devices.
273271
SupportedExtensions += (ZE_SUPPORTED_EXTENSIONS);
274-
if (Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP16)
275-
SupportedExtensions += ("cl_khr_fp16 ");
276-
if (Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP64)
277-
SupportedExtensions += ("cl_khr_fp64 ");
278272
if (Device->ZeDeviceModuleProperties->flags &
279273
ZE_DEVICE_MODULE_FLAG_INT64_ATOMICS)
280274
// int64AtomicsSupported indicates support for both.

source/adapters/native_cpu/device.cpp

Lines changed: 12 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -154,10 +154,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
154154
case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY:
155155
return ReturnValue(bool{1});
156156
case UR_DEVICE_INFO_EXTENSIONS:
157-
// TODO : Populate return string accordingly - e.g. cl_khr_fp16,
158-
// cl_khr_fp64, cl_khr_int64_base_atomics,
159-
// cl_khr_int64_extended_atomics
160-
return ReturnValue("cl_khr_fp16, cl_khr_fp64 ");
157+
return ReturnValue("");
161158
case UR_DEVICE_INFO_VERSION:
162159
return ReturnValue("0.1");
163160
case UR_DEVICE_INFO_COMPILER_AVAILABLE:
@@ -193,19 +190,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
193190
case UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH:
194191
// Default minimum values required by the SYCL specification.
195192
return ReturnValue(size_t{2048});
196-
case UR_DEVICE_INFO_HALF_FP_CONFIG: {
197-
// todo:
198-
ur_device_fp_capability_flags_t HalfFPValue = 0;
199-
return ReturnValue(HalfFPValue);
200-
}
201-
case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
202-
// todo
203-
ur_device_fp_capability_flags_t SingleFPValue = 0;
204-
return ReturnValue(SingleFPValue);
205-
}
193+
case UR_DEVICE_INFO_HALF_FP_CONFIG:
194+
case UR_DEVICE_INFO_SINGLE_FP_CONFIG:
206195
case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: {
207-
ur_device_fp_capability_flags_t DoubleFPValue = 0;
208-
return ReturnValue(DoubleFPValue);
196+
// All fp types are supported, return minimum flags to indicate support.
197+
// TODO: look at this in more detail.
198+
ur_device_fp_capability_flags_t SupportedFlags =
199+
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
200+
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
201+
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
202+
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
203+
;
204+
return ReturnValue(SupportedFlags);
209205
}
210206
case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS:
211207
return ReturnValue(uint32_t{3});

0 commit comments

Comments
 (0)