Skip to content

Commit c32147a

Browse files
committed
[OpenMP] Fix num_iters in __kmpc_*_loop DeviceRTL functions
This PR should only be merged if llvm#133435 is approved upstream. It includes changes in that PR and changes to codegen undoing a currently downstream-only workaround for the issue that would break if the upstream PR is merged on its own.
1 parent ddc4e69 commit c32147a

File tree

2 files changed

+5
-18
lines changed

2 files changed

+5
-18
lines changed

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Lines changed: 2 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -4523,23 +4523,10 @@ getKmpcForStaticLoopForType(Type *Ty, OpenMPIRBuilder *OMPBuilder,
45234523
static void createTargetLoopWorkshareCall(
45244524
OpenMPIRBuilder *OMPBuilder, WorksharingLoopType LoopType,
45254525
BasicBlock *InsertBlock, Value *Ident, Value *LoopBodyArg,
4526-
Type *ParallelTaskPtr, Value *TripCountOrig, Function &LoopBodyFn) {
4526+
Type *ParallelTaskPtr, Value *TripCount, Function &LoopBodyFn) {
4527+
Type *TripCountTy = TripCount->getType();
45274528
Module &M = OMPBuilder->M;
45284529
IRBuilder<> &Builder = OMPBuilder->Builder;
4529-
Value *TripCount = TripCountOrig;
4530-
// FIXME(JAN): The trip count is 1 larger than it should be for GPU, this may
4531-
// not be the right way to fix it, but this works for now.
4532-
if (OMPBuilder->Config.isGPU()) {
4533-
Builder.restoreIP({InsertBlock, std::prev(InsertBlock->end())});
4534-
LLVMContext &Ctx = M.getContext();
4535-
Type *IVTy = TripCountOrig->getType();
4536-
Type *InternalIVTy = IVTy->getIntegerBitWidth() <= 32
4537-
? Type::getInt32Ty(Ctx)
4538-
: Type::getInt64Ty(Ctx);
4539-
Constant *One = ConstantInt::get(InternalIVTy, 1);
4540-
TripCount = Builder.CreateSub(TripCountOrig, One, "modified_trip_count");
4541-
}
4542-
Type *TripCountTy = TripCount->getType();
45434530
FunctionCallee RTLFn =
45444531
getKmpcForStaticLoopForType(TripCountTy, OMPBuilder, LoopType);
45454532
SmallVector<Value *, 8> RealArgs;

offload/DeviceRTL/src/Workshare.cpp

Lines changed: 3 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -969,19 +969,19 @@ template <typename Ty> class StaticLoopChunker {
969969
IdentTy *loc, void (*fn)(TY, void *), void *arg, TY num_iters, \
970970
TY num_threads, TY block_chunk, TY thread_chunk) { \
971971
ompx::StaticLoopChunker<TY>::DistributeFor( \
972-
loc, fn, arg, num_iters + 1, num_threads, block_chunk, thread_chunk); \
972+
loc, fn, arg, num_iters, num_threads, block_chunk, thread_chunk); \
973973
} \
974974
[[gnu::flatten, clang::always_inline]] void \
975975
__kmpc_distribute_static_loop##BW(IdentTy *loc, void (*fn)(TY, void *), \
976976
void *arg, TY num_iters, \
977977
TY block_chunk) { \
978-
ompx::StaticLoopChunker<TY>::Distribute(loc, fn, arg, num_iters + 1, \
978+
ompx::StaticLoopChunker<TY>::Distribute(loc, fn, arg, num_iters, \
979979
block_chunk); \
980980
} \
981981
[[gnu::flatten, clang::always_inline]] void __kmpc_for_static_loop##BW( \
982982
IdentTy *loc, void (*fn)(TY, void *), void *arg, TY num_iters, \
983983
TY num_threads, TY thread_chunk) { \
984-
ompx::StaticLoopChunker<TY>::For(loc, fn, arg, num_iters + 1, num_threads, \
984+
ompx::StaticLoopChunker<TY>::For(loc, fn, arg, num_iters, num_threads, \
985985
thread_chunk); \
986986
}
987987

0 commit comments

Comments
 (0)