Skip to content

Commit c77cfed

Browse files
author
Georgi Mirazchiyski
committed
[Cuda] Implement device info ext properties for HW_THREADS_PER_EU and EU_SIMD_WIDTH on Cuda
1 parent 50f66ae commit c77cfed

File tree

2 files changed

+18
-2
lines changed

2 files changed

+18
-2
lines changed

source/adapters/cuda/device.cpp

Lines changed: 12 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1082,13 +1082,23 @@ UR_APIEXPORT ur_result_t UR_APICALL urDeviceGetInfo(ur_device_handle_t hDevice,
10821082
case UR_DEVICE_INFO_COMPOSITE_DEVICE:
10831083
case UR_DEVICE_INFO_MAX_READ_WRITE_IMAGE_ARGS:
10841084
case UR_DEVICE_INFO_GPU_EU_COUNT:
1085-
case UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH:
10861085
case UR_DEVICE_INFO_GPU_EU_SLICES:
10871086
case UR_DEVICE_INFO_GPU_SUBSLICES_PER_SLICE:
10881087
case UR_DEVICE_INFO_GPU_EU_COUNT_PER_SUBSLICE:
1089-
case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU:
10901088
return UR_RESULT_ERROR_UNSUPPORTED_ENUMERATION;
10911089

1090+
case UR_DEVICE_INFO_GPU_HW_THREADS_PER_EU: {
1091+
return ReturnValue(hDevice->getMaxHwThreadsPerCU());
1092+
}
1093+
case UR_DEVICE_INFO_GPU_EU_SIMD_WIDTH: {
1094+
int WarpSize{0};
1095+
UR_CHECK_ERROR(cuDeviceGetAttribute(
1096+
&WarpSize, CU_DEVICE_ATTRIBUTE_WARP_SIZE, hDevice->get()));
1097+
detail::ur::assertion(WarpSize > 0);
1098+
uint32_t WarpCount = hDevice->getMaxHwThreadsPerCU() / WarpSize;
1099+
return ReturnValue(WarpCount);
1100+
}
1101+
10921102
case UR_DEVICE_INFO_COMMAND_BUFFER_SUPPORT_EXP:
10931103
case UR_DEVICE_INFO_COMMAND_BUFFER_EVENT_SUPPORT_EXP:
10941104
return ReturnValue(true);

source/adapters/cuda/device.hpp

Lines changed: 6 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,7 @@ struct ur_device_handle_t_ {
3333
int MaxChosenLocalMem{0};
3434
bool MaxLocalMemSizeChosen{false};
3535
uint32_t NumComputeUnits{0};
36+
uint32_t MaxHwThreadsPerCU{0};
3637

3738
public:
3839
ur_device_handle_t_(native_type cuDevice, CUcontext cuContext, CUevent evBase,
@@ -58,6 +59,9 @@ struct ur_device_handle_t_ {
5859
UR_CHECK_ERROR(cuDeviceGetAttribute(
5960
reinterpret_cast<int *>(&NumComputeUnits),
6061
CU_DEVICE_ATTRIBUTE_MULTIPROCESSOR_COUNT, cuDevice));
62+
UR_CHECK_ERROR(cuDeviceGetAttribute(
63+
reinterpret_cast<int *>(&MaxHwThreadsPerCU),
64+
CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_MULTIPROCESSOR, cuDevice));
6165

6266
// Set local mem max size if env var is present
6367
static const char *LocalMemSizePtrUR =
@@ -114,6 +118,8 @@ struct ur_device_handle_t_ {
114118
bool maxLocalMemSizeChosen() { return MaxLocalMemSizeChosen; };
115119

116120
uint32_t getNumComputeUnits() const noexcept { return NumComputeUnits; };
121+
122+
uint32_t getMaxHwThreadsPerCU() const noexcept { return MaxHwThreadsPerCU; };
117123
};
118124

119125
int getAttribute(ur_device_handle_t Device, CUdevice_attribute Attribute);

0 commit comments

Comments
 (0)