Skip to content

Commit 100879e

Browse files
committed
merge main into amd-staging
2 parents 7afab7e + 87db8e9 commit 100879e

File tree

3 files changed

+10
-5
lines changed

3 files changed

+10
-5
lines changed

offload/DeviceRTL/src/Kernel.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -30,7 +30,8 @@ enum OMPTgtExecModeFlags : unsigned char {
3030
OMP_TGT_EXEC_MODE_GENERIC = 1 << 0,
3131
OMP_TGT_EXEC_MODE_SPMD = 1 << 1,
3232
OMP_TGT_EXEC_MODE_GENERIC_SPMD =
33-
OMP_TGT_EXEC_MODE_GENERIC | OMP_TGT_EXEC_MODE_SPMD
33+
OMP_TGT_EXEC_MODE_GENERIC | OMP_TGT_EXEC_MODE_SPMD,
34+
OMP_TGT_EXEC_MODE_SPMD_NO_LOOP = 1 << 2 | OMP_TGT_EXEC_MODE_SPMD
3435
};
3536

3637
static void

offload/plugins-nextgen/common/include/PluginInterface.h

Lines changed: 4 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -551,10 +551,10 @@ struct GenericKernelTy {
551551
/// The number of threads \p NumThreads can be adjusted by this method.
552552
/// \p IsNumThreadsFromUser is true is \p NumThreads is defined by user via
553553
/// thread_limit clause.
554-
virtual uint32_t getNumBlocks(GenericDeviceTy &GenericDevice,
555-
uint32_t BlockLimitClause[3],
556-
uint64_t LoopTripCount, uint32_t &NumThreads,
557-
bool IsNumThreadsFromUser) const;
554+
virtual
555+
uint32_t getNumBlocks(GenericDeviceTy &GenericDevice,
556+
uint32_t BlockLimitClause[3], uint64_t LoopTripCount,
557+
uint32_t &NumThreads, bool IsNumThreadsFromUser) const;
558558

559559
/// The kernel name.
560560
std::string Name;

offload/plugins-nextgen/common/src/PluginInterface.cpp

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -858,6 +858,10 @@ uint32_t GenericKernelTy::getNumBlocks(GenericDeviceTy &GenericDevice,
858858
return std::min(NumTeamsClause[0], GenericDevice.getBlockLimit());
859859
}
860860

861+
// Return the number of teams required to cover the loop iterations.
862+
if (isNoLoopMode())
863+
return LoopTripCount > 0 ? (((LoopTripCount - 1) / NumThreads) + 1) : 1;
864+
861865
uint64_t DefaultNumBlocks = GenericDevice.getDefaultNumBlocks();
862866
uint64_t TripCountNumBlocks = std::numeric_limits<uint64_t>::max();
863867
if (LoopTripCount > 0) {

0 commit comments

Comments
 (0)