Skip to content

Commit 87db8e9

Browse files
[OpenMP][Offload] Add SPMD-No-Loop mode to OpenMP offload runtime (llvm#154105)
Kernels which are marked as SPMD-No-Loop should be launched with sufficient number of teams and threads to cover loop iteration space. No-Loop mode is described in RFC: https://discourse.llvm.org/t/rfc-no-loop-mode-for-openmp-gpu-kernels/87517/
1 parent bd2539f commit 87db8e9

File tree

4 files changed

+16
-3
lines changed

4 files changed

+16
-3
lines changed

llvm/include/llvm/Frontend/OpenMP/OMPDeviceConstants.h

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -23,7 +23,8 @@ enum OMPTgtExecModeFlags : unsigned char {
2323
OMP_TGT_EXEC_MODE_GENERIC = 1 << 0,
2424
OMP_TGT_EXEC_MODE_SPMD = 1 << 1,
2525
OMP_TGT_EXEC_MODE_GENERIC_SPMD =
26-
OMP_TGT_EXEC_MODE_GENERIC | OMP_TGT_EXEC_MODE_SPMD
26+
OMP_TGT_EXEC_MODE_GENERIC | OMP_TGT_EXEC_MODE_SPMD,
27+
OMP_TGT_EXEC_MODE_SPMD_NO_LOOP = 1 << 2 | OMP_TGT_EXEC_MODE_SPMD
2728
};
2829

2930
} // end namespace omp

offload/DeviceRTL/src/Kernel.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ enum OMPTgtExecModeFlags : unsigned char {
3030
OMP_TGT_EXEC_MODE_GENERIC = 1 << 0,
3131
OMP_TGT_EXEC_MODE_SPMD = 1 << 1,
3232
OMP_TGT_EXEC_MODE_GENERIC_SPMD =
33-
OMP_TGT_EXEC_MODE_GENERIC | OMP_TGT_EXEC_MODE_SPMD
33+
OMP_TGT_EXEC_MODE_GENERIC | OMP_TGT_EXEC_MODE_SPMD,
34+
OMP_TGT_EXEC_MODE_SPMD_NO_LOOP = 1 << 2 | OMP_TGT_EXEC_MODE_SPMD
3435
};
3536

3637
static void

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 8 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -434,6 +434,8 @@ struct GenericKernelTy {
434434
return "Generic";
435435
case OMP_TGT_EXEC_MODE_GENERIC_SPMD:
436436
return "Generic-SPMD";
437+
case OMP_TGT_EXEC_MODE_SPMD_NO_LOOP:
438+
return "SPMD-No-Loop";
437439
}
438440
llvm_unreachable("Unknown execution mode!");
439441
}
@@ -471,7 +473,8 @@ struct GenericKernelTy {
471473
uint32_t BlockLimitClause[3], uint64_t LoopTripCount,
472474
uint32_t &NumThreads, bool IsNumThreadsFromUser) const;
473475

474-
/// Indicate if the kernel works in Generic SPMD, Generic or SPMD mode.
476+
/// Indicate if the kernel works in Generic SPMD, Generic, No-Loop
477+
/// or SPMD mode.
475478
bool isGenericSPMDMode() const {
476479
return KernelEnvironment.Configuration.ExecMode ==
477480
OMP_TGT_EXEC_MODE_GENERIC_SPMD;
@@ -486,6 +489,10 @@ struct GenericKernelTy {
486489
bool isBareMode() const {
487490
return KernelEnvironment.Configuration.ExecMode == OMP_TGT_EXEC_MODE_BARE;
488491
}
492+
bool isNoLoopMode() const {
493+
return KernelEnvironment.Configuration.ExecMode ==
494+
OMP_TGT_EXEC_MODE_SPMD_NO_LOOP;
495+
}
489496

490497
/// The kernel name.
491498
std::string Name;

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -662,6 +662,10 @@ uint32_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
662662
return std::min(NumTeamsClause[0], GenericDevice.getBlockLimit());
663663
}
664664

665+
// Return the number of teams required to cover the loop iterations.
666+
if (isNoLoopMode())
667+
return LoopTripCount > 0 ? (((LoopTripCount - 1) / NumThreads) + 1) : 1;
668+
665669
uint64_t DefaultNumBlocks = GenericDevice.getDefaultNumBlocks();
666670
uint64_t TripCountNumBlocks = std::numeric_limits<uint64_t>::max();
667671
if (LoopTripCount > 0) {

0 commit comments

Comments
 (0)