Skip to content

Commit d03692a

Browse files
authored
[Offload] Rework MAX_WORK_GROUP_SIZE (#151926)
`MAX_WORK_GROUP_SIZE` now represents the maximum total number of work groups the device can allocate, rather than the maximum per dimension. `MAX_WORK_GROUP_SIZE_PER_DIMENSION` has been added, which has the old behaviour.
1 parent c6fd3d3 commit d03692a

File tree

6 files changed

+38
-6
lines changed

6 files changed

+38
-6
lines changed

offload/liboffload/API/Device.td

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -32,7 +32,8 @@ def DeviceInfo : Enum {
3232
TaggedEtor<"NAME", "char[]", "Device name">,
3333
TaggedEtor<"VENDOR", "char[]", "Device vendor">,
3434
TaggedEtor<"DRIVER_VERSION", "char[]", "Driver version">,
35-
TaggedEtor<"MAX_WORK_GROUP_SIZE", "ol_dimensions_t", "Maximum work group size in each dimension">,
35+
TaggedEtor<"MAX_WORK_GROUP_SIZE", "uint32_t", "Maximum total work group size in work items">,
36+
TaggedEtor<"MAX_WORK_GROUP_SIZE_PER_DIMENSION", "ol_dimensions_t", "Maximum work group size in each dimension">,
3637
];
3738
}
3839

offload/liboffload/src/OffloadImpl.cpp

Lines changed: 14 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -326,6 +326,18 @@ Error olGetDeviceInfoImplDetail(ol_device_handle_t Device,
326326
}
327327

328328
case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE: {
329+
// Uint32 values
330+
if (!std::holds_alternative<uint64_t>(Entry->Value))
331+
return makeError(ErrorCode::BACKEND_FAILURE,
332+
"plugin returned incorrect type");
333+
auto Value = std::get<uint64_t>(Entry->Value);
334+
if (Value > std::numeric_limits<uint32_t>::max())
335+
return makeError(ErrorCode::BACKEND_FAILURE,
336+
"plugin returned out of range device info");
337+
return Info.write(static_cast<uint32_t>(Value));
338+
}
339+
340+
case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE_PER_DIMENSION: {
329341
// {x, y, z} triples
330342
ol_dimensions_t Out{0, 0, 0};
331343

@@ -375,6 +387,8 @@ Error olGetDeviceInfoImplDetailHost(ol_device_handle_t Device,
375387
case OL_DEVICE_INFO_DRIVER_VERSION:
376388
return Info.writeString(LLVM_VERSION_STRING);
377389
case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE:
390+
return Info.write<uint64_t>(1);
391+
case OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE_PER_DIMENSION:
378392
return Info.write<ol_dimensions_t>(ol_dimensions_t{1, 1, 1});
379393
default:
380394
return createOffloadError(ErrorCode::INVALID_ENUMERATION,

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -2702,13 +2702,14 @@ struct AMDGPUDeviceTy : public GenericDeviceTy, AMDGenericDeviceTy {
27022702

27032703
Status = getDeviceAttrRaw(HSA_AGENT_INFO_WORKGROUP_MAX_SIZE, TmpUInt);
27042704
if (Status == HSA_STATUS_SUCCESS)
2705-
Info.add("Workgroup Max Size", TmpUInt);
2705+
Info.add("Workgroup Max Size", TmpUInt, "",
2706+
DeviceInfo::MAX_WORK_GROUP_SIZE);
27062707

27072708
Status = getDeviceAttrRaw(HSA_AGENT_INFO_WORKGROUP_MAX_DIM, WorkgrpMaxDim);
27082709
if (Status == HSA_STATUS_SUCCESS) {
27092710
auto &MaxSize =
27102711
*Info.add("Workgroup Max Size per Dimension", std::monostate{}, "",
2711-
DeviceInfo::MAX_WORK_GROUP_SIZE);
2712+
DeviceInfo::MAX_WORK_GROUP_SIZE_PER_DIMENSION);
27122713
MaxSize.add("x", WorkgrpMaxDim[0]);
27132714
MaxSize.add("y", WorkgrpMaxDim[1]);
27142715
MaxSize.add("z", WorkgrpMaxDim[2]);

offload/plugins-nextgen/cuda/src/rtl.cpp

Lines changed: 3 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -977,10 +977,11 @@ struct CUDADeviceTy : public GenericDeviceTy {
977977

978978
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_THREADS_PER_BLOCK, TmpInt);
979979
if (Res == CUDA_SUCCESS)
980-
Info.add("Maximum Threads per Block", TmpInt);
980+
Info.add("Maximum Threads per Block", TmpInt, "",
981+
DeviceInfo::MAX_WORK_GROUP_SIZE);
981982

982983
auto &MaxBlock = *Info.add("Maximum Block Dimensions", std::monostate{}, "",
983-
DeviceInfo::MAX_WORK_GROUP_SIZE);
984+
DeviceInfo::MAX_WORK_GROUP_SIZE_PER_DIMENSION);
984985
Res = getDeviceAttrRaw(CU_DEVICE_ATTRIBUTE_MAX_BLOCK_DIM_X, TmpInt);
985986
if (Res == CUDA_SUCCESS)
986987
MaxBlock.add("x", TmpInt);

offload/unittests/OffloadAPI/device/olGetDeviceInfo.cpp

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -78,9 +78,17 @@ TEST_P(olGetDeviceInfoTest, SuccessDriverVersion) {
7878
}
7979

8080
TEST_P(olGetDeviceInfoTest, SuccessMaxWorkGroupSize) {
81-
ol_dimensions_t Value{0, 0, 0};
81+
uint32_t Value;
8282
ASSERT_SUCCESS(olGetDeviceInfo(Device, OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE,
8383
sizeof(Value), &Value));
84+
ASSERT_GT(Value, 0u);
85+
}
86+
87+
TEST_P(olGetDeviceInfoTest, SuccessMaxWorkGroupSizePerDimension) {
88+
ol_dimensions_t Value{0, 0, 0};
89+
ASSERT_SUCCESS(
90+
olGetDeviceInfo(Device, OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE_PER_DIMENSION,
91+
sizeof(Value), &Value));
8492
ASSERT_GT(Value.x, 0u);
8593
ASSERT_GT(Value.y, 0u);
8694
ASSERT_GT(Value.z, 0u);

offload/unittests/OffloadAPI/device/olGetDeviceInfoSize.cpp

Lines changed: 7 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -48,6 +48,13 @@ TEST_P(olGetDeviceInfoSizeTest, SuccessMaxWorkGroupSize) {
4848
size_t Size = 0;
4949
ASSERT_SUCCESS(
5050
olGetDeviceInfoSize(Device, OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE, &Size));
51+
ASSERT_EQ(Size, sizeof(uint32_t));
52+
}
53+
54+
TEST_P(olGetDeviceInfoSizeTest, SuccessMaxWorkGroupSizePerDimension) {
55+
size_t Size = 0;
56+
ASSERT_SUCCESS(olGetDeviceInfoSize(
57+
Device, OL_DEVICE_INFO_MAX_WORK_GROUP_SIZE_PER_DIMENSION, &Size));
5158
ASSERT_EQ(Size, sizeof(ol_dimensions_t));
5259
ASSERT_EQ(Size, sizeof(uint32_t) * 3);
5360
}

0 commit comments

Comments
 (0)