@@ -4130,6 +4130,23 @@ Expected<CanonicalLoopInfo *> OpenMPIRBuilder::createCanonicalLoop(
41304130 return createCanonicalLoop (LoopLoc, BodyGen, TripCount, Name);
41314131}
41324132
4133+ // Returns an LLVM function to call for initializing loop bounds using OpenMP
4134+ // static scheduling for composite `distribute parallel for` depending on
4135+ // `type`. Only i32 and i64 are supported by the runtime. Always interpret
4136+ // integers as unsigned similarly to CanonicalLoopInfo.
4137+ static FunctionCallee
4138+ getKmpcDistForStaticInitForType (Type *Ty, Module &M,
4139+ OpenMPIRBuilder &OMPBuilder) {
4140+ unsigned Bitwidth = Ty->getIntegerBitWidth ();
4141+ if (Bitwidth == 32 )
4142+ return OMPBuilder.getOrCreateRuntimeFunction (
4143+ M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_4u);
4144+ if (Bitwidth == 64 )
4145+ return OMPBuilder.getOrCreateRuntimeFunction (
4146+ M, omp::RuntimeFunction::OMPRTL___kmpc_dist_for_static_init_8u);
4147+ llvm_unreachable (" unknown OpenMP loop iterator bitwidth" );
4148+ }
4149+
41334150// Returns an LLVM function to call for initializing loop bounds using OpenMP
41344151// static scheduling depending on `type`. Only i32 and i64 are supported by the
41354152// runtime. Always interpret integers as unsigned similarly to
@@ -4164,7 +4181,10 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(
41644181 // Declare useful OpenMP runtime functions.
41654182 Value *IV = CLI->getIndVar ();
41664183 Type *IVTy = IV->getType ();
4167- FunctionCallee StaticInit = getKmpcForStaticInitForType (IVTy, M, *this );
4184+ FunctionCallee StaticInit =
4185+ LoopType == WorksharingLoopType::DistributeForStaticLoop
4186+ ? getKmpcDistForStaticInitForType (IVTy, M, *this )
4187+ : getKmpcForStaticInitForType (IVTy, M, *this );
41684188 FunctionCallee StaticFini =
41694189 getOrCreateRuntimeFunction (M, omp::OMPRTL___kmpc_for_static_fini);
41704190
@@ -4200,9 +4220,15 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(
42004220
42014221 // Call the "init" function and update the trip count of the loop with the
42024222 // value it produced.
4203- Builder.CreateCall (StaticInit,
4204- {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound,
4205- PUpperBound, PStride, One, Zero});
4223+ SmallVector<Value *, 10 > Args (
4224+ {SrcLoc, ThreadNum, SchedulingType, PLastIter, PLowerBound, PUpperBound});
4225+ if (LoopType == WorksharingLoopType::DistributeForStaticLoop) {
4226+ Value *PDistUpperBound =
4227+ Builder.CreateAlloca (IVTy, nullptr , " p.distupperbound" );
4228+ Args.push_back (PDistUpperBound);
4229+ }
4230+ Args.append ({PStride, One, Zero});
4231+ Builder.CreateCall (StaticInit, Args);
42064232 Value *LowerBound = Builder.CreateLoad (IVTy, PLowerBound);
42074233 Value *InclusiveUpperBound = Builder.CreateLoad (IVTy, PUpperBound);
42084234 Value *TripCountMinusOne = Builder.CreateSub (InclusiveUpperBound, LowerBound);
0 commit comments