Skip to content

Commit 7197fa5

Browse files
authored
[Offload] Cleanups for bare kernels after recent upstream change. (llvm#2085)
2 parents 20c12f7 + 726a686 commit 7197fa5

File tree

4 files changed

+16
-29
lines changed

4 files changed

+16
-29
lines changed

clang/lib/CodeGen/CGOpenMPRuntimeGPU.cpp

Lines changed: 1 addition & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1048,10 +1048,9 @@ void CGOpenMPRuntimeGPU::emitTargetOutlinedFunction(
10481048
CGM.emitNxResult("[No-Loop/Big-Jump-Loop/Xteam]", D,
10491049
CodeGenModule::NxNonSPMD));
10501050
}
1051-
// Note that bare kernels always run in SPMD mode.
10521051
setPropertyExecutionMode(
10531052
CGM, OutlinedFn->getName(),
1054-
IsBareKernel ? OMP_TGT_EXEC_MODE_SPMD
1053+
IsBareKernel ? OMP_TGT_EXEC_MODE_BARE
10551054
: computeExecutionMode(Mode, DirectiveStmt, CGM));
10561055

10571056
if (Mode && DirectiveStmt)

offload/plugins-nextgen/amdgpu/src/rtl.cpp

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -961,11 +961,10 @@ struct AMDGPUKernelTy : public GenericKernelTy {
961961
/// user-defined threads and block clauses.
962962
uint32_t getNumThreads(GenericDeviceTy &GenericDevice,
963963
uint32_t ThreadLimitClause[3]) const override {
964-
// On amd-staging, bare kernels go through this codepath. Legacy flang
965-
// kernels show up as bare kernels since kernel-env is not generated.
966-
// In order to accomodate bare ekrnels, disable this assert.
967-
// assert(ThreadLimitClause[1] == 1 && ThreadLimitClause[2] == 1 &&
968-
// "Multi dimensional launch not supported yet.");
964+
assert(!isBareMode() && "bare kernel should not call this function");
965+
966+
assert(ThreadLimitClause[1] == 1 && ThreadLimitClause[2] == 1 &&
967+
"Multi dimensional launch not supported yet.");
969968

970969
// Honor OMP_TEAMS_THREAD_LIMIT environment variable and
971970
// num_threads/thread_limit clause for BigJumpLoop and NoLoop kernel types.
@@ -1014,11 +1013,10 @@ struct AMDGPUKernelTy : public GenericKernelTy {
10141013
uint32_t NumTeamsClause[3], uint64_t LoopTripCount,
10151014
uint32_t &NumThreads,
10161015
bool IsNumThreadsFromUser) const override {
1017-
// On amd-staging, bare kernels go through this codepath. Legacy flang
1018-
// kernels show up as bare kernels since kernel-env is not generated.
1019-
// In order to accomodate bare ekrnels, disable this assert.
1020-
// assert(NumTeamsClause[1] == 1 && NumTeamsClause[2] == 1 &&
1021-
// "Multi dimensional launch not supported yet.");
1016+
assert(!isBareMode() && "bare kernel should not call this function");
1017+
1018+
assert(NumTeamsClause[1] == 1 && NumTeamsClause[2] == 1 &&
1019+
"Multi dimensional launch not supported yet.");
10221020

10231021
const auto getNumGroupsFromThreadsAndTripCount =
10241022
[](const uint64_t TripCount, const uint32_t NumThreads) {

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 2 additions & 5 deletions
Original file line numberDiff line numberDiff line change
@@ -356,6 +356,7 @@ struct GenericKernelTy {
356356
return ExecutionMode == OMP_TGT_EXEC_MODE_GENERIC;
357357
}
358358
bool isSPMDMode() const { return ExecutionMode == OMP_TGT_EXEC_MODE_SPMD; }
359+
bool isBareMode() const { return ExecutionMode == OMP_TGT_EXEC_MODE_BARE; }
359360

360361
/// AMD-only execution modes
361362
bool isBigJumpLoopMode() const {
@@ -374,7 +375,7 @@ struct GenericKernelTy {
374375
protected:
375376
/// Get the execution mode name of the kernel.
376377
const char *getExecutionModeName() const {
377-
switch (KernelEnvironment.Configuration.ExecMode) {
378+
switch (ExecutionMode) {
378379
case OMP_TGT_EXEC_MODE_BARE:
379380
return "BARE";
380381
case OMP_TGT_EXEC_MODE_SPMD:
@@ -440,10 +441,6 @@ struct GenericKernelTy {
440441
uint32_t BlockLimitClause[3],
441442
uint64_t LoopTripCount, uint32_t &NumThreads,
442443
bool IsNumThreadsFromUser) const;
443-
/// Indicate if the kernel works in Generic SPMD, Generic or SPMD mode.
444-
bool isBareMode() const {
445-
return KernelEnvironment.Configuration.ExecMode == OMP_TGT_EXEC_MODE_BARE;
446-
}
447444

448445
/// The kernel name.
449446
const char *Name;

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 5 additions & 12 deletions
Original file line numberDiff line numberDiff line change
@@ -514,10 +514,10 @@ Error GenericKernelTy::init(GenericDeviceTy &GenericDevice,
514514
// Consume the error since it is acceptable to fail.
515515
[[maybe_unused]] std::string ErrStr = toString(std::move(Err));
516516
DP("Failed to read execution mode for '%s': %s\n"
517-
"Using default SPMD (2) execution mode\n",
517+
"Using default Bare (0) execution mode\n",
518518
Name, ErrStr.data());
519519

520-
ExecutionMode = OMP_TGT_EXEC_MODE_SPMD;
520+
ExecutionMode = OMP_TGT_EXEC_MODE_BARE;
521521
} else {
522522
// Check that the retrieved execution mode is valid.
523523
if (!GenericKernelTy::isValidExecutionMode(ExecModeGlobal.getValue()))
@@ -744,12 +744,7 @@ Error GenericKernelTy::launch(GenericDeviceTy &GenericDevice, void **ArgPtrs,
744744
KernelRecord->getLaunchParamsForKernel(*this, GenericDevice);
745745
NumBlocks[0] = Teams;
746746
NumThreads[0] = Threads;
747-
} else {
748-
749-
// TODO fix workaround since IsBareKernel is not properly set for legacy
750-
// flang and specialized kernels since they don't use kernel-env. While
751-
// we can check for specialized kernels, we can't for legacy flang. So,
752-
// on amd-staging, all kernels including bare ones use this codepath.
747+
} else if (!isBareMode()) {
753748
NumThreads[0] = getNumThreads(GenericDevice, NumThreads);
754749

755750
std::pair<bool, uint32_t> AdjustInfo = adjustNumThreadsForLowTripCount(
@@ -823,8 +818,7 @@ KernelLaunchParamsTy GenericKernelTy::prepareArgs(
823818

824819
uint32_t GenericKernelTy::getNumThreads(GenericDeviceTy &GenericDevice,
825820
uint32_t ThreadLimitClause[3]) const {
826-
// TODO fix workaround since IsBareKernel is not properly set for all kernels.
827-
// assert(!IsBareKernel && "bare kernel should not call this function");
821+
assert(!isBareMode() && "bare kernel should not call this function");
828822

829823
assert(ThreadLimitClause[1] == 1 && ThreadLimitClause[2] == 1 &&
830824
"Multi dimensional launch not supported yet.");
@@ -846,8 +840,7 @@ uint32_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
846840
uint64_t LoopTripCount,
847841
uint32_t &NumThreads,
848842
bool IsNumThreadsFromUser) const {
849-
// TODO fix workaround since IsBareKernel is not properly set for all kernels.
850-
// assert(!IsBareKernel && "bare kernel should not call this function");
843+
assert(!isBareMode() && "bare kernel should not call this function");
851844

852845
assert(NumTeamsClause[1] == 1 && NumTeamsClause[2] == 1 &&
853846
"Multi dimensional launch not supported yet.");

0 commit comments

Comments
 (0)