diff --git a/unified-runtime/include/ur_api.h b/unified-runtime/include/ur_api.h index 711f761fa95f..b50a56f903f2 100644 --- a/unified-runtime/include/ur_api.h +++ b/unified-runtime/include/ur_api.h @@ -2241,6 +2241,8 @@ typedef enum ur_device_info_t { UR_DEVICE_INFO_MIN_POWER_LIMIT = 125, /// [int32_t][optional-query] return max power limit in milliwatts. UR_DEVICE_INFO_MAX_POWER_LIMIT = 126, + /// [::ur_bool_t] support for native bfloat16 conversions + UR_DEVICE_INFO_BFLOAT16_CONVERSIONS_NATIVE = 127, /// [::ur_bool_t] Returns true if the device supports the use of /// command-buffers. UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP = 0x1000, diff --git a/unified-runtime/include/ur_print.hpp b/unified-runtime/include/ur_print.hpp index 98ed3d9990c1..e97063011a4c 100644 --- a/unified-runtime/include/ur_print.hpp +++ b/unified-runtime/include/ur_print.hpp @@ -2954,6 +2954,9 @@ inline std::ostream &operator<<(std::ostream &os, enum ur_device_info_t value) { case UR_DEVICE_INFO_MAX_POWER_LIMIT: os << "UR_DEVICE_INFO_MAX_POWER_LIMIT"; break; + case UR_DEVICE_INFO_BFLOAT16_CONVERSIONS_NATIVE: + os << "UR_DEVICE_INFO_BFLOAT16_CONVERSIONS_NATIVE"; + break; case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: os << "UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP"; break; @@ -4686,6 +4689,19 @@ inline ur_result_t printTagged(std::ostream &os, const void *ptr, os << ")"; } break; + case UR_DEVICE_INFO_BFLOAT16_CONVERSIONS_NATIVE: { + const ur_bool_t *tptr = (const ur_bool_t *)ptr; + if (sizeof(ur_bool_t) > size) { + os << "invalid size (is: " << size + << ", expected: >=" << sizeof(ur_bool_t) << ")"; + return UR_RESULT_ERROR_INVALID_SIZE; + } + os << (const void *)(tptr) << " ("; + + os << *tptr; + + os << ")"; + } break; case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: { const ur_bool_t *tptr = (const ur_bool_t *)ptr; if (sizeof(ur_bool_t) > size) { diff --git a/unified-runtime/scripts/core/device.yml b/unified-runtime/scripts/core/device.yml index a0c93262eb73..a335a333ef12 100644 --- a/unified-runtime/scripts/core/device.yml +++ b/unified-runtime/scripts/core/device.yml @@ -460,6 +460,8 @@ etors: desc: "[int32_t][optional-query] return min power limit in milliwatts." - name: MAX_POWER_LIMIT desc: "[int32_t][optional-query] return max power limit in milliwatts." + - name: BFLOAT16_CONVERSIONS_NATIVE + desc: "[$x_bool_t] support for native bfloat16 conversions" --- #-------------------------------------------------------------------------- type: function desc: "Retrieves various information about device" diff --git a/unified-runtime/source/adapters/cuda/device.cpp b/unified-runtime/source/adapters/cuda/device.cpp index d984ad03ad7e..1fc901e1f10d 100644 --- a/unified-runtime/source/adapters/cuda/device.cpp +++ b/unified-runtime/source/adapters/cuda/device.cpp @@ -254,6 +254,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, UR_MEMORY_SCOPE_CAPABILITY_FLAG_WORK_GROUP; return ReturnValue(Capabilities); } + case UR_DEVICE_INFO_BFLOAT16_CONVERSIONS_NATIVE: + return ReturnValue(false); case UR_DEVICE_INFO_SUB_GROUP_SIZES_INTEL: { // NVIDIA devices only support one sub-group size (the warp size) int WarpSize = 0; diff --git a/unified-runtime/source/adapters/hip/device.cpp b/unified-runtime/source/adapters/hip/device.cpp index bef96fac18d6..ffbcfb5bce91 100644 --- a/unified-runtime/source/adapters/hip/device.cpp +++ b/unified-runtime/source/adapters/hip/device.cpp @@ -1023,6 +1023,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, #else return ReturnValue(ur_bool_t{false}); #endif + case UR_DEVICE_INFO_BFLOAT16_CONVERSIONS_NATIVE: + return ReturnValue(false); case UR_DEVICE_INFO_ASYNC_BARRIER: return ReturnValue(false); case UR_DEVICE_INFO_IL_VERSION: diff --git a/unified-runtime/source/adapters/level_zero/device.cpp b/unified-runtime/source/adapters/level_zero/device.cpp index 827a63361c12..2240de6fb983 100644 --- a/unified-runtime/source/adapters/level_zero/device.cpp +++ b/unified-runtime/source/adapters/level_zero/device.cpp @@ -1038,6 +1038,14 @@ ur_result_t urDeviceGetInfo( return ze2urResult(errc); return ReturnValue(UrRootDev); } + case UR_DEVICE_INFO_BFLOAT16_CONVERSIONS_NATIVE: { + bool Bfloat16ConversionSupport = + (Device->Platform->zeDriverExtensionMap.count( + ZE_BFLOAT16_CONVERSIONS_EXT_NAME)) || + ((Device->ZeDeviceProperties->deviceId & 0xfff) == 0x201 || + (Device->ZeDeviceProperties->deviceId & 0xff0) == 0xbd0); + return ReturnValue(Bfloat16ConversionSupport); + } case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: return ReturnValue(true); case UR_DEVICE_INFO_COMMAND_BUFFER_UPDATE_CAPABILITIES_EXP: { diff --git a/unified-runtime/source/adapters/native_cpu/device.cpp b/unified-runtime/source/adapters/native_cpu/device.cpp index 21bab5982441..ef6b4549e49d 100644 --- a/unified-runtime/source/adapters/native_cpu/device.cpp +++ b/unified-runtime/source/adapters/native_cpu/device.cpp @@ -344,6 +344,8 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(bool{0}); case UR_DEVICE_INFO_ATOMIC_64: return ReturnValue(bool{1}); + case UR_DEVICE_INFO_BFLOAT16_CONVERSIONS_NATIVE: + return ReturnValue(bool{0}); case UR_DEVICE_INFO_MEM_CHANNEL_SUPPORT: return ReturnValue(bool{0}); case UR_DEVICE_INFO_IMAGE_SRGB: diff --git a/unified-runtime/source/adapters/opencl/device.cpp b/unified-runtime/source/adapters/opencl/device.cpp index 3e466b9f04db..4378f95b5b2c 100644 --- a/unified-runtime/source/adapters/opencl/device.cpp +++ b/unified-runtime/source/adapters/opencl/device.cpp @@ -1514,6 +1514,13 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice, return ReturnValue(UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_LOAD | UR_EXP_DEVICE_2D_BLOCK_ARRAY_CAPABILITY_FLAG_STORE); } + case UR_DEVICE_INFO_BFLOAT16_CONVERSIONS_NATIVE: { + bool Supported = false; + UR_RETURN_ON_FAILURE(cl_adapter::checkDeviceExtensions( + cl_adapter::cast(hDevice), + {"cl_intel_bfloat16_conversions"}, Supported)); + return ReturnValue(Supported); + } case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP: { cl_device_id Dev = cl_adapter::cast(hDevice); size_t ExtSize = 0; diff --git a/unified-runtime/test/conformance/device/urDeviceGetInfo.cpp b/unified-runtime/test/conformance/device/urDeviceGetInfo.cpp index 6ddfc973088d..00f303c1b206 100644 --- a/unified-runtime/test/conformance/device/urDeviceGetInfo.cpp +++ b/unified-runtime/test/conformance/device/urDeviceGetInfo.cpp @@ -2561,6 +2561,23 @@ TEST_P(urDeviceGetInfoTest, SuccessUseNativeAssert) { property_value); } +TEST_P(urDeviceGetInfoTest, SuccessBfloat16ConversionsNative) { + size_t property_size = 0; + const ur_device_info_t property_name = + UR_DEVICE_INFO_BFLOAT16_CONVERSIONS_NATIVE; + + ASSERT_SUCCESS_OR_OPTIONAL_QUERY( + urDeviceGetInfo(device, property_name, 0, nullptr, &property_size), + property_name); + ASSERT_EQ(property_size, sizeof(ur_bool_t)); + + uint32_t property_value = 0; + ASSERT_QUERY_RETURNS_VALUE(urDeviceGetInfo(device, property_name, + property_size, &property_value, + nullptr), + property_value); +} + TEST_P(urDeviceGetInfoTest, SuccessThrottleReasons) { // TODO: enable when driver/library version mismatch is fixed in CI. // See https://github.com/intel/llvm/issues/17614 diff --git a/unified-runtime/tools/urinfo/urinfo.hpp b/unified-runtime/tools/urinfo/urinfo.hpp index c44146df0727..dcf324d32895 100644 --- a/unified-runtime/tools/urinfo/urinfo.hpp +++ b/unified-runtime/tools/urinfo/urinfo.hpp @@ -341,6 +341,9 @@ inline void printDeviceInfos(ur_device_handle_t hDevice, std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_MAX_POWER_LIMIT); std::cout << prefix; + printDeviceInfo(hDevice, + UR_DEVICE_INFO_BFLOAT16_CONVERSIONS_NATIVE); + std::cout << prefix; printDeviceInfo(hDevice, UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP); std::cout << prefix;