Skip to content

Commit 62c09ea

Browse files
dhruvachakmemfrob
authored andcommitted
Revert "[libomptarget] [amdgpu] Fix default setting of max flat workgroup size"
This reverts commit 2240b41ee4f30fe938975677a0a5a2c5c26d271b. A value of 0 for KernDescVal WG_Size implies it is unknown, so it should be set to the default. The above change was made without this assumption. Reviewed By: JonChesterfield Differential Revision: https://reviews.llvm.org/D105250
1 parent 56e230e commit 62c09ea

File tree

1 file changed

+5
-4
lines changed
  • openmp/libomptarget/plugins/amdgpu/src

1 file changed

+5
-4
lines changed

openmp/libomptarget/plugins/amdgpu/src/rtl.cpp

Lines changed: 5 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -1711,9 +1711,10 @@ __tgt_target_table *__tgt_rtl_load_binary_locked(int32_t device_id,
17111711
// Get ExecMode
17121712
ExecModeVal = KernDescVal.Mode;
17131713
DP("ExecModeVal %d\n", ExecModeVal);
1714-
// If KernDescVal.WG_Size is 0, it is equivalent to not
1715-
// specified. Hence, max_flat_workgroup_size is filtered out in
1716-
// getLaunchVals
1714+
if (KernDescVal.WG_Size == 0) {
1715+
KernDescVal.WG_Size = RTLDeviceInfoTy::Default_WG_Size;
1716+
DP("Setting KernDescVal.WG_Size to default %d\n", KernDescVal.WG_Size);
1717+
}
17171718
WGSizeVal = KernDescVal.WG_Size;
17181719
DP("WGSizeVal %d\n", WGSizeVal);
17191720
check("Loading KernDesc computation property", err);
@@ -1930,7 +1931,7 @@ launchVals getLaunchVals(int ConstWGSize, int ExecutionMode, int EnvTeamLimit,
19301931
}
19311932
}
19321933
// check flat_max_work_group_size attr here
1933-
if (ConstWGSize > 0 && threadsPerGroup > ConstWGSize) {
1934+
if (threadsPerGroup > ConstWGSize) {
19341935
threadsPerGroup = ConstWGSize;
19351936
DP("Reduced threadsPerGroup to flat-attr-group-size limit %d\n",
19361937
threadsPerGroup);

0 commit comments

Comments
 (0)