Skip to content

Commit bbcb902

Browse files
committed
Handle dist_schedule in unique workshare loop.
It should be noted that at this stage, the wsloop is inserted at the wrong location, and the insertion point needs to be handled to ensure the `.ll` output is the same as Clang.
1 parent 08ed236 commit bbcb902

File tree

3 files changed

+32
-81
lines changed

3 files changed

+32
-81
lines changed

llvm/include/llvm/Frontend/OpenMP/OMPIRBuilder.h

Lines changed: 3 additions & 15 deletions
Original file line numberDiff line numberDiff line change
@@ -1098,15 +1098,12 @@ class OpenMPIRBuilder {
10981098
/// \param LoopType Type of workshare loop.
10991099
/// \param HasDistSchedule Defines if the clause being lowered is
11001100
/// dist_schedule as this is handled slightly differently
1101-
/// \param DistScheduleSchedType Defines the Schedule Type for the Distribute
1102-
/// loop. Defaults to None if no Distribute loop is present.
11031101
///
11041102
/// \returns Point where to insert code after the workshare construct.
11051103
InsertPointOrErrorTy applyStaticWorkshareLoop(
11061104
DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
11071105
omp::WorksharingLoopType LoopType, bool NeedsBarrier,
1108-
bool HasDistSchedule = false,
1109-
omp::OMPScheduleType DistScheduleSchedType = omp::OMPScheduleType::None);
1106+
bool HasDistSchedule = false);
11101107

11111108
/// Modifies the canonical loop a statically-scheduled workshare loop with a
11121109
/// user-specified chunk size.
@@ -1121,20 +1118,13 @@ class OpenMPIRBuilder {
11211118
/// \param ChunkSize The user-specified chunk size.
11221119
/// \param SchedType Optional type of scheduling to be passed to the init
11231120
/// function.
1124-
/// \param DistScheduleChunkSize The size of dist_shcedule chunk considered
1125-
/// as a unit when
1126-
/// scheduling. If \p nullptr, defaults to 1.
1127-
/// \param DistScheduleSchedType Defines the Schedule Type for the Distribute
1128-
/// loop. Defaults to None if no Distribute loop is present.
11291121
///
11301122
/// \returns Point where to insert code after the workshare construct.
11311123
InsertPointOrErrorTy applyStaticChunkedWorkshareLoop(
11321124
DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
11331125
bool NeedsBarrier, Value *ChunkSize,
11341126
omp::OMPScheduleType SchedType =
1135-
omp::OMPScheduleType::UnorderedStaticChunked,
1136-
Value *DistScheduleChunkSize = nullptr,
1137-
omp::OMPScheduleType DistScheduleSchedType = omp::OMPScheduleType::None);
1127+
omp::OMPScheduleType::UnorderedStaticChunked);
11381128

11391129
/// Modifies the canonical loop to be a dynamically-scheduled workshare loop.
11401130
///
@@ -1216,8 +1206,6 @@ class OpenMPIRBuilder {
12161206
/// \param HasDistSchedule Defines if the clause being lowered is
12171207
/// dist_schedule as this is handled slightly differently
12181208
///
1219-
/// \param ChunkSize The chunk size for dist_schedule loop
1220-
///
12211209
/// \returns Point where to insert code after the workshare construct.
12221210
LLVM_ABI InsertPointOrErrorTy applyWorkshareLoop(
12231211
DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
@@ -1228,7 +1216,7 @@ class OpenMPIRBuilder {
12281216
bool HasOrderedClause = false,
12291217
omp::WorksharingLoopType LoopType =
12301218
omp::WorksharingLoopType::ForStaticLoop,
1231-
bool HasDistSchedule = false, Value *DistScheduleChunkSize = nullptr);
1219+
bool HasDistSchedule = false);
12321220

12331221
/// Tile a loop nest.
12341222
///

llvm/lib/Frontend/OpenMP/OMPIRBuilder.cpp

Lines changed: 15 additions & 46 deletions
Original file line numberDiff line numberDiff line change
@@ -172,7 +172,7 @@ static const omp::GV &getGridValue(const Triple &T, Function *Kernel) {
172172
/// arguments.
173173
static OMPScheduleType
174174
getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks,
175-
bool HasSimdModifier, bool HasDistScheduleChunks) {
175+
bool HasSimdModifier) {
176176
// Currently, the default schedule it static.
177177
switch (ClauseKind) {
178178
case OMP_SCHEDULE_Default:
@@ -190,7 +190,7 @@ getOpenMPBaseScheduleType(llvm::omp::ScheduleKind ClauseKind, bool HasChunks,
190190
return HasSimdModifier ? OMPScheduleType::BaseRuntimeSimd
191191
: OMPScheduleType::BaseRuntime;
192192
case OMP_SCHEDULE_Distribute:
193-
return HasDistScheduleChunks ? OMPScheduleType::BaseDistributeChunked
193+
return HasChunks ? OMPScheduleType::BaseDistributeChunked
194194
: OMPScheduleType::BaseDistribute;
195195
}
196196
llvm_unreachable("unhandled schedule clause argument");
@@ -260,10 +260,9 @@ getOpenMPMonotonicityScheduleType(OMPScheduleType ScheduleType,
260260
static OMPScheduleType
261261
computeOpenMPScheduleType(ScheduleKind ClauseKind, bool HasChunks,
262262
bool HasSimdModifier, bool HasMonotonicModifier,
263-
bool HasNonmonotonicModifier, bool HasOrderedClause,
264-
bool HasDistScheduleChunks) {
263+
bool HasNonmonotonicModifier, bool HasOrderedClause) {
265264
OMPScheduleType BaseSchedule = getOpenMPBaseScheduleType(
266-
ClauseKind, HasChunks, HasSimdModifier, HasDistScheduleChunks);
265+
ClauseKind, HasChunks, HasSimdModifier);
267266
OMPScheduleType OrderedSchedule =
268267
getOpenMPOrderingScheduleType(BaseSchedule, HasOrderedClause);
269268
OMPScheduleType Result = getOpenMPMonotonicityScheduleType(
@@ -4643,8 +4642,7 @@ static FunctionCallee getKmpcForStaticInitForType(Type *Ty, Module &M,
46434642

46444643
OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(
46454644
DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
4646-
WorksharingLoopType LoopType, bool NeedsBarrier, bool HasDistSchedule,
4647-
OMPScheduleType DistScheduleSchedType) {
4645+
WorksharingLoopType LoopType, bool NeedsBarrier, bool HasDistSchedule) {
46484646
assert(CLI->isValid() && "Requires a valid canonical loop");
46494647
assert(!isConflictIP(AllocaIP, CLI->getPreheaderIP()) &&
46504648
"Require dedicated allocate IP");
@@ -4714,12 +4712,6 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyStaticWorkshareLoop(
47144712
Builder.CreateCall(StaticInit, Args);
47154713
};
47164714
BuildInitCall(SchedulingType, Builder);
4717-
if (HasDistSchedule &&
4718-
LoopType != WorksharingLoopType::DistributeStaticLoop) {
4719-
Constant *DistScheduleSchedType = ConstantInt::get(
4720-
I32Type, static_cast<int>(omp::OMPScheduleType::OrderedDistribute));
4721-
BuildInitCall(DistScheduleSchedType, Builder);
4722-
}
47234715
Value *LowerBound = Builder.CreateLoad(IVTy, PLowerBound);
47244716
Value *InclusiveUpperBound = Builder.CreateLoad(IVTy, PUpperBound);
47254717
Value *TripCountMinusOne = Builder.CreateSub(InclusiveUpperBound, LowerBound);
@@ -4792,10 +4784,9 @@ static void applyParallelAccessesMetadata(CanonicalLoopInfo *CLI,
47924784
OpenMPIRBuilder::InsertPointOrErrorTy
47934785
OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
47944786
DebugLoc DL, CanonicalLoopInfo *CLI, InsertPointTy AllocaIP,
4795-
bool NeedsBarrier, Value *ChunkSize, OMPScheduleType SchedType,
4796-
Value *DistScheduleChunkSize, OMPScheduleType DistScheduleSchedType) {
4787+
bool NeedsBarrier, Value *ChunkSize, OMPScheduleType SchedType) {
47974788
assert(CLI->isValid() && "Requires a valid canonical loop");
4798-
assert(ChunkSize || DistScheduleChunkSize && "Chunk size is required");
4789+
assert(ChunkSize && "Chunk size is required");
47994790

48004791
LLVMContext &Ctx = CLI->getFunction()->getContext();
48014792
Value *IV = CLI->getIndVar();
@@ -4817,7 +4808,7 @@ OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
48174808
LoopInfo &&LI = LIA.run(*F, FAM);
48184809
Loop *L = LI.getLoopFor(CLI->getHeader());
48194810
SmallVector<Metadata *> LoopMDList;
4820-
if (ChunkSize || DistScheduleChunkSize)
4811+
if (ChunkSize)
48214812
applyParallelAccessesMetadata(CLI, Ctx, L, LI, LoopMDList);
48224813
addLoopMetadata(CLI, LoopMDList);
48234814

@@ -4839,22 +4830,17 @@ OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
48394830
CLI->setLastIter(PLastIter);
48404831

48414832
// Set up the source location value for the OpenMP runtime.
4842-
Builder.restoreIP(CLI->getPreheaderIP());
4833+
Builder.restoreIP(CLI->getPreheaderIP()); // -> sets insert point to omploop! Why?
48434834
Builder.SetCurrentDebugLocation(DL);
48444835

48454836
// TODO: Detect overflow in ubsan or max-out with current tripcount.
48464837
Value *CastedChunkSize = Builder.CreateZExtOrTrunc(
48474838
ChunkSize ? ChunkSize : Zero, InternalIVTy, "chunksize");
4848-
Value *CastestDistScheduleChunkSize = Builder.CreateZExtOrTrunc(
4849-
DistScheduleChunkSize ? DistScheduleChunkSize : Zero, InternalIVTy,
4850-
"distschedulechunksize");
48514839
Value *CastedTripCount =
48524840
Builder.CreateZExt(OrigTripCount, InternalIVTy, "tripcount");
48534841

48544842
Constant *SchedulingType =
48554843
ConstantInt::get(I32Type, static_cast<int>(SchedType));
4856-
Constant *DistSchedulingType =
4857-
ConstantInt::get(I32Type, static_cast<int>(DistScheduleSchedType));
48584844
Builder.CreateStore(Zero, PLowerBound);
48594845
Value *OrigUpperBound = Builder.CreateSub(CastedTripCount, One);
48604846
Builder.CreateStore(OrigUpperBound, PUpperBound);
@@ -4877,14 +4863,6 @@ OpenMPIRBuilder::applyStaticChunkedWorkshareLoop(
48774863
/*chunk=*/ChunkSize});
48784864
};
48794865
BuildInitCall(SchedulingType, CastedChunkSize, Builder);
4880-
if (DistScheduleSchedType != OMPScheduleType::None &&
4881-
SchedType != OMPScheduleType::OrderedDistributeChunked &&
4882-
SchedType != OMPScheduleType::OrderedDistribute) {
4883-
// We want to emit a second init function call for the dist_schedule clause
4884-
// to the Distribute construct. This should only be done however if a
4885-
// Workshare Loop is nested within a Distribute Construct
4886-
BuildInitCall(DistSchedulingType, CastestDistScheduleChunkSize, Builder);
4887-
}
48884866

48894867
// Load values written by the "init" function.
48904868
Value *FirstChunkStart =
@@ -5208,35 +5186,27 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyWorkshareLoop(
52085186
bool NeedsBarrier, omp::ScheduleKind SchedKind, Value *ChunkSize,
52095187
bool HasSimdModifier, bool HasMonotonicModifier,
52105188
bool HasNonmonotonicModifier, bool HasOrderedClause,
5211-
WorksharingLoopType LoopType, bool HasDistSchedule,
5212-
Value *DistScheduleChunkSize) {
5189+
WorksharingLoopType LoopType, bool HasDistSchedule) {
52135190
if (Config.isTargetDevice())
52145191
return applyWorkshareLoopTarget(DL, CLI, AllocaIP, LoopType);
52155192
OMPScheduleType EffectiveScheduleType = computeOpenMPScheduleType(
52165193
SchedKind, ChunkSize, HasSimdModifier, HasMonotonicModifier,
5217-
HasNonmonotonicModifier, HasOrderedClause, DistScheduleChunkSize);
5194+
HasNonmonotonicModifier, HasOrderedClause);
52185195

52195196
bool IsOrdered = (EffectiveScheduleType & OMPScheduleType::ModifierOrdered) ==
52205197
OMPScheduleType::ModifierOrdered;
5221-
OMPScheduleType DistScheduleSchedType = OMPScheduleType::None;
5222-
if (HasDistSchedule) {
5223-
DistScheduleSchedType = DistScheduleChunkSize
5224-
? OMPScheduleType::OrderedDistributeChunked
5225-
: OMPScheduleType::OrderedDistribute;
5226-
}
52275198
switch (EffectiveScheduleType & ~OMPScheduleType::ModifierMask) {
52285199
case OMPScheduleType::BaseStatic:
52295200
case OMPScheduleType::BaseDistribute:
5230-
assert(!ChunkSize || !DistScheduleChunkSize &&
5201+
assert(!ChunkSize &&
52315202
"No chunk size with static-chunked schedule");
52325203
if (IsOrdered && !HasDistSchedule)
52335204
return applyDynamicWorkshareLoop(DL, CLI, AllocaIP, EffectiveScheduleType,
52345205
NeedsBarrier, ChunkSize);
52355206
// FIXME: Monotonicity ignored?
5236-
if (DistScheduleChunkSize)
5207+
if (ChunkSize)
52375208
return applyStaticChunkedWorkshareLoop(
5238-
DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
5239-
DistScheduleChunkSize, DistScheduleSchedType);
5209+
DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType);
52405210
return applyStaticWorkshareLoop(DL, CLI, AllocaIP, LoopType, NeedsBarrier,
52415211
HasDistSchedule);
52425212

@@ -5247,8 +5217,7 @@ OpenMPIRBuilder::InsertPointOrErrorTy OpenMPIRBuilder::applyWorkshareLoop(
52475217
NeedsBarrier, ChunkSize);
52485218
// FIXME: Monotonicity ignored?
52495219
return applyStaticChunkedWorkshareLoop(
5250-
DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType,
5251-
DistScheduleChunkSize, DistScheduleSchedType);
5220+
DL, CLI, AllocaIP, NeedsBarrier, ChunkSize, EffectiveScheduleType);
52525221

52535222
case OMPScheduleType::BaseRuntime:
52545223
case OMPScheduleType::BaseAuto:

mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp

Lines changed: 14 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -2464,6 +2464,7 @@ convertOmpTaskwaitOp(omp::TaskwaitOp twOp, llvm::IRBuilderBase &builder,
24642464
static LogicalResult
24652465
convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
24662466
LLVM::ModuleTranslation &moduleTranslation) {
2467+
printf("CONVERTING WSLOOP\n");
24672468
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
24682469
auto wsloopOp = cast<omp::WsloopOp>(opInst);
24692470
if (failed(checkImplementationStatus(opInst)))
@@ -2487,19 +2488,6 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
24872488
chunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
24882489
}
24892490

2490-
omp::DistributeOp distributeOp = nullptr;
2491-
llvm::Value *distScheduleChunk = nullptr;
2492-
bool hasDistSchedule = false;
2493-
if (llvm::isa_and_present<omp::DistributeOp>(opInst.getParentOp())) {
2494-
distributeOp = cast<omp::DistributeOp>(opInst.getParentOp());
2495-
hasDistSchedule = distributeOp.getDistScheduleStatic();
2496-
if (distributeOp.getDistScheduleChunkSize()) {
2497-
llvm::Value *chunkVar = moduleTranslation.lookupValue(
2498-
distributeOp.getDistScheduleChunkSize());
2499-
distScheduleChunk = builder.CreateSExtOrTrunc(chunkVar, ivType);
2500-
}
2501-
}
2502-
25032491
PrivateVarsInfo privateVarsInfo(wsloopOp);
25042492

25052493
SmallVector<omp::DeclareReductionOp> reductionDecls;
@@ -2600,13 +2588,15 @@ convertOmpWsloop(Operation &opInst, llvm::IRBuilderBase &builder,
26002588
}
26012589

26022590
builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
2591+
printf("loopInfo Address: %p\n", loopInfo);
2592+
printf("Applying omp.wloop Workshare Loop\n");
26032593
llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
26042594
ompBuilder->applyWorkshareLoop(
26052595
ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
26062596
convertToScheduleKind(schedule), chunk, isSimd,
26072597
scheduleMod == omp::ScheduleModifier::monotonic,
26082598
scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
2609-
workshareLoopType, hasDistSchedule, distScheduleChunk);
2599+
workshareLoopType);
26102600

26112601
if (failed(handleError(wsloopIP, opInst)))
26122602
return failure();
@@ -3052,6 +3042,7 @@ convertOmpLoopNest(Operation &opInst, llvm::IRBuilderBase &builder,
30523042

30533043
// Update the stack frame created for this loop to point to the resulting loop
30543044
// after applying transformations.
3045+
printf("Applying loopInfo\n");
30553046
moduleTranslation.stackWalk<OpenMPLoopInfoStackFrame>(
30563047
[&](OpenMPLoopInfoStackFrame &frame) {
30573048
frame.loopInfo = ompBuilder->collapseLoops(ompLoc.DL, loopInfos, {});
@@ -4767,6 +4758,7 @@ convertOmpTargetData(Operation *op, llvm::IRBuilderBase &builder,
47674758
static LogicalResult
47684759
convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
47694760
LLVM::ModuleTranslation &moduleTranslation) {
4761+
printf("CONVERTING DISTRIBUTE\n");
47704762
llvm::OpenMPIRBuilder *ompBuilder = moduleTranslation.getOpenMPBuilder();
47714763
auto distributeOp = cast<omp::DistributeOp>(opInst);
47724764
if (failed(checkImplementationStatus(opInst)))
@@ -4835,21 +4827,20 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
48354827
llvm::OpenMPIRBuilder::LocationDescription ompLoc(builder);
48364828
llvm::Expected<llvm::BasicBlock *> regionBlock =
48374829
convertOmpOpRegions(distributeOp.getRegion(), "omp.distribute.region",
4838-
builder, moduleTranslation);
4830+
builder, moduleTranslation); // -> this is causing Schedule to be emitted first.
48394831
if (!regionBlock)
48404832
return regionBlock.takeError();
48414833
builder.SetInsertPoint(*regionBlock, (*regionBlock)->begin());
48424834

48434835
// Skip applying a workshare loop below when translating 'distribute
48444836
// parallel do' (it's been already handled by this point while translating
48454837
// the nested omp.wsloop).
4846-
if (!isa_and_present<omp::WsloopOp>(distributeOp.getNestedWrapper())) {
4838+
if (!isa_and_present<omp::WsloopOp>(distributeOp.getNestedWrapper()) || distributeOp.getDistScheduleStatic()) {
48474839
// TODO: Add support for clauses which are valid for DISTRIBUTE
48484840
// constructs. Static schedule is the default.
48494841
bool hasDistSchedule = distributeOp.getDistScheduleStatic();
48504842
auto schedule = hasDistSchedule ? omp::ClauseScheduleKind::Distribute
48514843
: omp::ClauseScheduleKind::Static;
4852-
// dist_schedule clauses are ordered - otherise this should be false
48534844
bool isOrdered = hasDistSchedule;
48544845
std::optional<omp::ScheduleModifier> scheduleMod;
48554846
bool isSimd = false;
@@ -4859,14 +4850,17 @@ convertOmpDistribute(Operation &opInst, llvm::IRBuilderBase &builder,
48594850
llvm::Value *chunk = moduleTranslation.lookupValue(
48604851
distributeOp.getDistScheduleChunkSize());
48614852
llvm::CanonicalLoopInfo *loopInfo =
4862-
findCurrentLoopInfo(moduleTranslation);
4853+
findCurrentLoopInfo(moduleTranslation); // Do we need a new loop info here?
4854+
printf("loopInfo Address: %p\n", loopInfo);
4855+
printf("InsertPoint Name : %s\n", builder.GetInsertBlock()->getName().str().c_str());
4856+
printf("Applying omp.ditribute Workshare Loop\n");
48634857
llvm::OpenMPIRBuilder::InsertPointOrErrorTy wsloopIP =
48644858
ompBuilder->applyWorkshareLoop(
48654859
ompLoc.DL, loopInfo, allocaIP, loopNeedsBarrier,
48664860
convertToScheduleKind(schedule), chunk, isSimd,
48674861
scheduleMod == omp::ScheduleModifier::monotonic,
48684862
scheduleMod == omp::ScheduleModifier::nonmonotonic, isOrdered,
4869-
workshareLoopType, hasDistSchedule, chunk);
4863+
workshareLoopType, hasDistSchedule);
48704864

48714865
if (!wsloopIP)
48724866
return wsloopIP.takeError();
@@ -5907,7 +5901,7 @@ convertHostOrTargetOperation(Operation *op, llvm::IRBuilderBase &builder,
59075901
!dyn_cast_if_present<omp::LoopWrapperInterface>(op->getParentOp());
59085902

59095903
if (isOutermostLoopWrapper)
5910-
moduleTranslation.stackPush<OpenMPLoopInfoStackFrame>();
5904+
moduleTranslation.stackPush<OpenMPLoopInfoStackFrame>(); // -> Need another one of these when Distribute AND WSLoop is present?
59115905

59125906
auto result =
59135907
llvm::TypeSwitch<Operation *, LogicalResult>(op)

0 commit comments

Comments
 (0)