Skip to content

Commit ffb756d

Browse files
authored
[Offload] Add OL_DEVICE_INFO_MAX_WORK_SIZE[_PER_DIMENSION] (llvm#155823)
This is the total number of work items that the device supports (the equivalent work group properties are for only a single work group).
1 parent 9e5d8bd commit ffb756d

File tree

6 files changed

+44
-3
lines changed

6 files changed

+44
-3
lines changed

offload/liboffload/API/Device.td

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -33,6 +33,8 @@ def ol_device_info_t : Enum {
3333
TaggedEtor<"DRIVER_VERSION", "char[]", "Driver version">,
3434
TaggedEtor<"MAX_WORK_GROUP_SIZE", "uint32_t", "Maximum total work group size in work items">,
3535
TaggedEtor<"MAX_WORK_GROUP_SIZE_PER_DIMENSION", "ol_dimensions_t", "Maximum work group size in each dimension">,
36+
TaggedEtor<"MAX_WORK_SIZE", "uint32_t", "Maximum total work items">,
37+
TaggedEtor<"MAX_WORK_SIZE_PER_DIMENSION", "ol_dimensions_t", "Maximum work items in each dimension">,
3638
TaggedEtor<"VENDOR_ID", "uint32_t", "A unique vendor device identifier assigned by PCI-SIG">,
3739
TaggedEtor<"NUM_COMPUTE_UNITS", "uint32_t", "The number of parallel compute units available to the device">,
3840
TaggedEtor<"MAX_CLOCK_FREQUENCY", "uint32_t", "The maximum configured clock frequency of this device in MHz">,

offload/liboffload/src/OffloadImpl.cpp

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -456,6 +456,7 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
456456
}
457457

458458
case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE:
459+
case OL_DEVICE_INFO_MAX_WORK_SIZE:
459460
case OL_DEVICE_INFO_VENDOR_ID:
460461
case OL_DEVICE_INFO_NUM_COMPUTE_UNITS:
461462
case OL_DEVICE_INFO_ADDRESS_BITS:
@@ -472,6 +473,7 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
472473
return Info.write(static_cast<uint32_t>(Value));
473474
}
474475

476+
case OL_DEVICE_INFO_MAX_WORK_SIZE_PER_DIMENSION:
475477
case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE_PER_DIMENSION: {
476478
// {x, y, z} triples
477479
ol_dimensions_t Out{0, 0, 0};
@@ -510,6 +512,8 @@ Error olGetDeviceInfoImplDetailHost(ol_device_handle_t Device,
510512
assert(Device == OffloadContext::get().HostDevice());
511513
InfoWriter Info(PropSize, PropValue, PropSizeRet);
512514

515+
constexpr auto uint32_max = std::numeric_limits<uint32_t>::max();
516+
513517
switch (PropName) {
514518
case OL_DEVICE_INFO_PLATFORM:
515519
return Info.write<void *>(Device->Platform);
@@ -527,6 +531,11 @@ Error olGetDeviceInfoImplDetailHost(ol_device_handle_t Device,
527531
return Info.write<uint32_t>(1);
528532
case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE_PER_DIMENSION:
529533
return Info.write<ol_dimensions_t>(ol_dimensions_t{1, 1, 1});
534+
case OL_DEVICE_INFO_MAX_WORK_SIZE:
535+
return Info.write<uint32_t>(uint32_max);
536+
case OL_DEVICE_INFO_MAX_WORK_SIZE_PER_DIMENSION:
537+
return Info.write<ol_dimensions_t>(
538+
ol_dimensions_t{uint32_max, uint32_max, uint32_max});
530539
case OL_DEVICE_INFO_VENDOR_ID:
531540
return Info.write<uint32_t>(0);
532541
case OL_DEVICE_INFO_NUM_COMPUTE_UNITS:

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2924,11 +2924,12 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
29242924

29252925
Status = getDeviceAttrRaw(HSA_AGENT_INFO_GRID_MAX_SIZE, TmpUInt);
29262926
if (Status == HSA_STATUS_SUCCESS)
2927-
Info.add("Grid Max Size", TmpUInt);
2927+
Info.add("Grid Max Size", TmpUInt, "", DeviceInfo::MAX_WORK_SIZE);
29282928

29292929
Status = getDeviceAttrRaw(HSA_AGENT_INFO_GRID_MAX_DIM, GridMaxDim);
29302930
if (Status == HSA_STATUS_SUCCESS) {
2931-
auto &MaxDim = *Info.add("Grid Max Size per Dimension");
2931+
auto &MaxDim = *Info.add("Grid Max Size per Dimension", std::monostate{},
2932+
"", DeviceInfo::MAX_WORK_SIZE_PER_DIMENSION);
29322933
MaxDim.add("x", GridMaxDim.x);
29332934
MaxDim.add("y", GridMaxDim.y);
29342935
MaxDim.add("z", GridMaxDim.z);

offload/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -1120,7 +1120,13 @@ struct CUDADeviceTy : public GenericDeviceTy {
11201120
if (Res == CUDA_SUCCESS)
11211121
MaxBlock.add("z", TmpInt);
11221122

1123-
auto &MaxGrid = *Info.add("Maximum Grid Dimensions", "");
1123+
// TODO: I assume CUDA devices have no limit on the amount of threads,
1124+
// verify this
1125+
Info.add("Maximum Grid Size", std::numeric_limits<uint32_t>::max(), "",
1126+
DeviceInfo::MAX_WORK_SIZE);
1127+
1128+
auto &MaxGrid = *Info.add("Maximum Grid Dimensions", std::monostate{}, "",
1129+
DeviceInfo::MAX_WORK_SIZE_PER_DIMENSION);
11241130
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_GRID_DIM_X, TmpInt);
11251131
if (Res == CUDA_SUCCESS)
11261132
MaxGrid.add("x", TmpInt);

offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp

Lines changed: 13 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -145,6 +145,19 @@ TEST_P(olGetDeviceInfoTest, SuccessMaxWorkGroupSizePerDimension) {
145145
ASSERT_GT(Value.z, 0u);
146146
}
147147

148+
OL_DEVICE_INFO_TEST_VALUE_GT(MaxWorkSize, uint32_t,
149+
OL_DEVICE_INFO_MAX_WORK_SIZE, 0);
150+
151+
TEST_P(olGetDeviceInfoTest, SuccessMaxWorkSizePerDimension) {
152+
ol_dimensions_t Value{0, 0, 0};
153+
ASSERT_SUCCESS(olGetDeviceInfo(Device,
154+
OL_DEVICE_INFO_MAX_WORK_SIZE_PER_DIMENSION,
155+
sizeof(Value), &Value));
156+
ASSERT_GT(Value.x, 0u);
157+
ASSERT_GT(Value.y, 0u);
158+
ASSERT_GT(Value.z, 0u);
159+
}
160+
148161
OL_DEVICE_INFO_TEST_DEVICE_VALUE_GT(VendorId, uint32_t,
149162
OL_DEVICE_INFO_VENDOR_ID, 0);
150163
OL_DEVICE_INFO_TEST_HOST_SUCCESS(VendorId, uint32_t, OL_DEVICE_INFO_VENDOR_ID);

offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,6 +36,8 @@ OL_DEVICE_INFO_SIZE_TEST_NONZERO(Vendor, OL_DEVICE_INFO_VENDOR);
3636
OL_DEVICE_INFO_SIZE_TEST_NONZERO(DriverVersion, OL_DEVICE_INFO_DRIVER_VERSION);
3737
OL_DEVICE_INFO_SIZE_TEST_EQ(MaxWorkGroupSize, uint32_t,
3838
OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE);
39+
OL_DEVICE_INFO_SIZE_TEST_EQ(MaxWorkSize, uint32_t,
40+
OL_DEVICE_INFO_MAX_WORK_SIZE);
3941
OL_DEVICE_INFO_SIZE_TEST_EQ(VendorId, uint32_t, OL_DEVICE_INFO_VENDOR_ID);
4042
OL_DEVICE_INFO_SIZE_TEST_EQ(NumComputeUnits, uint32_t,
4143
OL_DEVICE_INFO_NUM_COMPUTE_UNITS);
@@ -77,6 +79,14 @@ TEST_P(olGetDeviceInfoSizeTest, SuccessMaxWorkGroupSizePerDimension) {
7779
ASSERT_EQ(Size, sizeof(uint32_t) * 3);
7880
}
7981

82+
TEST_P(olGetDeviceInfoSizeTest, SuccessMaxWorkSizePerDimension) {
83+
size_t Size = 0;
84+
ASSERT_SUCCESS(olGetDeviceInfoSize(
85+
Device, OL_DEVICE_INFO_MAX_WORK_SIZE_PER_DIMENSION, &Size));
86+
ASSERT_EQ(Size, sizeof(ol_dimensions_t));
87+
ASSERT_EQ(Size, sizeof(uint32_t) * 3);
88+
}
89+
8090
TEST_P(olGetDeviceInfoSizeTest, InvalidNullHandle) {
8191
size_t Size = 0;
8292
ASSERT_ERROR(OL_ERRC_INVALID_NULL_HANDLE,

0 commit comments

Comments
 (0)