Skip to content
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
37 changes: 22 additions & 15 deletions source/adapters/cuda/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -437,8 +437,27 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
return ReturnValue(MemBaseAddrAlign);
}
case UR_DEVICE_INFO_HALF_FP_CONFIG: {
// TODO: is this config consistent across all NVIDIA GPUs?
return ReturnValue(0u);
int Major = 0;
int Minor = 0;

UR_CHECK_ERROR(cuDeviceGetAttribute(
&Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()));
UR_CHECK_ERROR(cuDeviceGetAttribute(
&Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hDevice->get()));

if ((Major >= 6) || ((Major == 5) && (Minor >= 3))) {
// TODO: is this config consistent across all NVIDIA GPUs?
ur_device_fp_capability_flags_t Config =
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
return ReturnValue(Config);
} else {
return ReturnValue(0u);
}
}
case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
// TODO: is this config consistent across all NVIDIA GPUs?
Expand Down Expand Up @@ -616,26 +635,14 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
}
case UR_DEVICE_INFO_EXTENSIONS: {

std::string SupportedExtensions = "cl_khr_fp64 cl_khr_subgroups ";
std::string SupportedExtensions = "cl_khr_subgroups ";
SupportedExtensions += "cl_intel_devicelib_assert ";
// Return supported for the UR command-buffer experimental feature
SupportedExtensions += "ur_exp_command_buffer ";
SupportedExtensions += "ur_exp_usm_p2p ";
SupportedExtensions += "ur_exp_launch_properties ";
SupportedExtensions += " ";

int Major = 0;
int Minor = 0;

UR_CHECK_ERROR(cuDeviceGetAttribute(
&Major, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MAJOR, hDevice->get()));
UR_CHECK_ERROR(cuDeviceGetAttribute(
&Minor, CU_DEVICE_ATTRIBUTE_COMPUTE_CAPABILITY_MINOR, hDevice->get()));

if ((Major >= 6) || ((Major == 5) && (Minor >= 3))) {
SupportedExtensions += "cl_khr_fp16 ";
}

return ReturnValue(SupportedExtensions.c_str());
}
case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: {
Expand Down
39 changes: 22 additions & 17 deletions source/adapters/hip/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -370,29 +370,44 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
return ReturnValue(MemBaseAddrAlign);
}
case UR_DEVICE_INFO_HALF_FP_CONFIG: {
return ReturnValue(0u);
}
case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
ur_device_fp_capability_flags_t Config =
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
UR_DEVICE_FP_CAPABILITY_FLAG_FMA |
UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT;
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
return ReturnValue(Config);
}
case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: {
case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
ur_device_fp_capability_flags_t Config =
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
UR_DEVICE_FP_CAPABILITY_FLAG_FMA |
UR_DEVICE_FP_CAPABILITY_FLAG_CORRECTLY_ROUNDED_DIVIDE_SQRT;
return ReturnValue(Config);
}
case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: {
hipDeviceProp_t Props;
detail::ur::assertion(hipGetDeviceProperties(&Props, hDevice->get()) ==
hipSuccess);

if (Props.arch.hasDoubles) {
ur_device_fp_capability_flags_t Config =
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_ZERO |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_INF |
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
return ReturnValue(Config);
} else {
return ReturnValue(0u);
}
}
case UR_DEVICE_INFO_GLOBAL_MEM_CACHE_TYPE: {
return ReturnValue(UR_DEVICE_MEM_CACHE_TYPE_READ_WRITE_CACHE);
}
Expand Down Expand Up @@ -581,16 +596,6 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,

SupportedExtensions += " ";

hipDeviceProp_t Props;
detail::ur::assertion(hipGetDeviceProperties(&Props, hDevice->get()) ==
hipSuccess);

if (Props.arch.hasDoubles) {
SupportedExtensions += "cl_khr_fp64 ";
}

SupportedExtensions += "cl_khr_fp16 ";

return ReturnValue(SupportedExtensions.c_str());
}
case UR_DEVICE_INFO_PRINTF_BUFFER_SIZE: {
Expand Down
6 changes: 0 additions & 6 deletions source/adapters/level_zero/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,8 +260,6 @@ ur_result_t urDeviceGetInfo(
// for performance.
// cl_intel_required_subgroup_size - Extension to allow programmers to
// optionally specify the required subgroup size for a kernel function.
// cl_khr_fp16 - Optional half floating-point support.
// cl_khr_fp64 - Support for double floating-point precision.
// cl_khr_int64_base_atomics, cl_khr_int64_extended_atomics - Optional
// extensions that implement atomic operations on 64-bit signed and
// unsigned integers to locations in __global and __local memory.
Expand All @@ -271,10 +269,6 @@ ur_result_t urDeviceGetInfo(
// Hardcoding some extensions we know are supported by all Level Zero
// devices.
SupportedExtensions += (ZE_SUPPORTED_EXTENSIONS);
if (Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP16)
SupportedExtensions += ("cl_khr_fp16 ");
if (Device->ZeDeviceModuleProperties->flags & ZE_DEVICE_MODULE_FLAG_FP64)
SupportedExtensions += ("cl_khr_fp64 ");
if (Device->ZeDeviceModuleProperties->flags &
ZE_DEVICE_MODULE_FLAG_INT64_ATOMICS)
// int64AtomicsSupported indicates support for both.
Expand Down
28 changes: 12 additions & 16 deletions source/adapters/native_cpu/device.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -154,10 +154,7 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
case UR_DEVICE_INFO_HOST_UNIFIED_MEMORY:
return ReturnValue(bool{1});
case UR_DEVICE_INFO_EXTENSIONS:
// TODO : Populate return string accordingly - e.g. cl_khr_fp16,
// cl_khr_fp64, cl_khr_int64_base_atomics,
// cl_khr_int64_extended_atomics
return ReturnValue("cl_khr_fp16, cl_khr_fp64 ");
return ReturnValue("");
case UR_DEVICE_INFO_VERSION:
return ReturnValue("0.1");
case UR_DEVICE_INFO_COMPILER_AVAILABLE:
Expand Down Expand Up @@ -193,19 +190,18 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
case UR_DEVICE_INFO_IMAGE3D_MAX_DEPTH:
// Default minimum values required by the SYCL specification.
return ReturnValue(size_t{2048});
case UR_DEVICE_INFO_HALF_FP_CONFIG: {
// todo:
ur_device_fp_capability_flags_t HalfFPValue = 0;
return ReturnValue(HalfFPValue);
}
case UR_DEVICE_INFO_SINGLE_FP_CONFIG: {
// todo
ur_device_fp_capability_flags_t SingleFPValue = 0;
return ReturnValue(SingleFPValue);
}
case UR_DEVICE_INFO_HALF_FP_CONFIG:
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I think half support will depend on the architecture - although x86 seems to emulate - still this may still be better than what we had before, and we don't officially support the "bad" architectures I think yet.

case UR_DEVICE_INFO_SINGLE_FP_CONFIG:
case UR_DEVICE_INFO_DOUBLE_FP_CONFIG: {
ur_device_fp_capability_flags_t DoubleFPValue = 0;
return ReturnValue(DoubleFPValue);
// All fp types are supported, return minimum flags to indicate support.
Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did we falsely say before we didn't support double etc?

Copy link
Contributor Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

no the adapter was reporting "cl_khr_fp16, cl_khr_fp64 " for UR_DEVICE_INFO_EXTENSIONS, which I've removed to replace with this

Copy link

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

so this bit didn't get called for the extensions then, but was still wrong?

// TODO: look at this in more detail.
ur_device_fp_capability_flags_t SupportedFlags =
UR_DEVICE_FP_CAPABILITY_FLAG_DENORM |
UR_DEVICE_FP_CAPABILITY_FLAG_INF_NAN |
UR_DEVICE_FP_CAPABILITY_FLAG_ROUND_TO_NEAREST |
UR_DEVICE_FP_CAPABILITY_FLAG_FMA;
;
return ReturnValue(SupportedFlags);
}
case UR_DEVICE_INFO_MAX_WORK_ITEM_DIMENSIONS:
return ReturnValue(uint32_t{3});
Expand Down