@@ -30,19 +30,39 @@ class GenericLoopConversionPattern
3030 : public mlir::OpConversionPattern<mlir::omp::LoopOp> {
3131public:
3232 enum class GenericLoopCombinedInfo {
33- None ,
33+ Standalone ,
3434 TargetTeamsLoop,
3535 TargetParallelLoop
3636 };
3737
3838 using mlir::OpConversionPattern<mlir::omp::LoopOp>::OpConversionPattern;
3939
40+ explicit GenericLoopConversionPattern (mlir::MLIRContext *ctx)
41+ : mlir::OpConversionPattern<mlir::omp::LoopOp>{ctx} {
42+ // Enable rewrite recursion to make sure nested `loop` directives are
43+ // handled.
44+ this ->setHasBoundedRewriteRecursion (true );
45+ }
46+
4047 mlir::LogicalResult
4148 matchAndRewrite (mlir::omp::LoopOp loopOp, OpAdaptor adaptor,
4249 mlir::ConversionPatternRewriter &rewriter) const override {
4350 assert (mlir::succeeded (checkLoopConversionSupportStatus (loopOp)));
4451
45- rewriteToDistributeParallelDo (loopOp, rewriter);
52+ GenericLoopCombinedInfo combinedInfo = findGenericLoopCombineInfo (loopOp);
53+
54+ switch (combinedInfo) {
55+ case GenericLoopCombinedInfo::Standalone:
56+ rewriteToSimdLoop (loopOp, rewriter);
57+ break ;
58+ case GenericLoopCombinedInfo::TargetParallelLoop:
59+ llvm_unreachable (" not yet implemented: `parallel loop` direcitve" );
60+ break ;
61+ case GenericLoopCombinedInfo::TargetTeamsLoop:
62+ rewriteToDistributeParallelDo (loopOp, rewriter);
63+ break ;
64+ }
65+
4666 rewriter.eraseOp (loopOp);
4767 return mlir::success ();
4868 }
@@ -52,9 +72,8 @@ class GenericLoopConversionPattern
5272 GenericLoopCombinedInfo combinedInfo = findGenericLoopCombineInfo (loopOp);
5373
5474 switch (combinedInfo) {
55- case GenericLoopCombinedInfo::None:
56- return loopOp.emitError (
57- " not yet implemented: Standalone `omp loop` directive" );
75+ case GenericLoopCombinedInfo::Standalone:
76+ break ;
5877 case GenericLoopCombinedInfo::TargetParallelLoop:
5978 return loopOp.emitError (
6079 " not yet implemented: Combined `omp target parallel loop` directive" );
@@ -86,7 +105,7 @@ class GenericLoopConversionPattern
86105 static GenericLoopCombinedInfo
87106 findGenericLoopCombineInfo (mlir::omp::LoopOp loopOp) {
88107 mlir::Operation *parentOp = loopOp->getParentOp ();
89- GenericLoopCombinedInfo result = GenericLoopCombinedInfo::None ;
108+ GenericLoopCombinedInfo result = GenericLoopCombinedInfo::Standalone ;
90109
91110 if (auto teamsOp = mlir::dyn_cast_if_present<mlir::omp::TeamsOp>(parentOp))
92111 if (mlir::isa_and_present<mlir::omp::TargetOp>(teamsOp->getParentOp ()))
@@ -100,6 +119,62 @@ class GenericLoopConversionPattern
100119 return result;
101120 }
102121
122+ // / Rewrites standalone `loop` directives to equivalent `simd` constructs.
123+ // / The reasoning behind this decision is that according to the spec (version
124+ // / 5.2, section 11.7.1):
125+ // /
126+ // / "If the bind clause is not specified on a construct for which it may be
127+ // / specified and the construct is closely nested inside a teams or parallel
128+ // / construct, the effect is as if binding is teams or parallel. If none of
129+ // / those conditions hold, the binding region is not defined."
130+ // /
131+ // / which means that standalone `loop` directives have undefined binding
132+ // / region. Moreover, the spec says (in the next paragraph):
133+ // /
134+ // / "The specified binding region determines the binding thread set.
135+ // / Specifically, if the binding region is a teams region, then the binding
136+ // / thread set is the set of initial threads that are executing that region
137+ // / while if the binding region is a parallel region, then the binding thread
138+ // / set is the team of threads that are executing that region. If the binding
139+ // / region is not defined, then the binding thread set is the encountering
140+ // / thread."
141+ // /
142+ // / which means that the binding thread set for a standalone `loop` directive
143+ // / is only the encountering thread.
144+ // /
145+ // / Since the encountering thread is the binding thread (set) for a
146+ // / standalone `loop` directive, the best we can do in such case is to "simd"
147+ // / the directive.
148+ void rewriteToSimdLoop (mlir::omp::LoopOp loopOp,
149+ mlir::ConversionPatternRewriter &rewriter) const {
150+ loopOp.emitWarning (" Detected standalone OpenMP `loop` directive, the "
151+ " associated loop will be rewritten to `simd`." );
152+ mlir::omp::SimdOperands simdClauseOps;
153+ simdClauseOps.privateVars = loopOp.getPrivateVars ();
154+
155+ auto privateSyms = loopOp.getPrivateSyms ();
156+ if (privateSyms)
157+ simdClauseOps.privateSyms .assign (privateSyms->begin (),
158+ privateSyms->end ());
159+
160+ Fortran::common::openmp::EntryBlockArgs simdArgs;
161+ simdArgs.priv .vars = simdClauseOps.privateVars ;
162+
163+ auto simdOp =
164+ rewriter.create <mlir::omp::SimdOp>(loopOp.getLoc (), simdClauseOps);
165+ mlir::Block *simdBlock =
166+ genEntryBlock (rewriter, simdArgs, simdOp.getRegion ());
167+
168+ mlir::IRMapping mapper;
169+ mlir::Block &loopBlock = *loopOp.getRegion ().begin ();
170+
171+ for (auto [loopOpArg, simdopArg] :
172+ llvm::zip_equal (loopBlock.getArguments (), simdBlock->getArguments ()))
173+ mapper.map (loopOpArg, simdopArg);
174+
175+ rewriter.clone (*loopOp.begin (), mapper);
176+ }
177+
103178 void rewriteToDistributeParallelDo (
104179 mlir::omp::LoopOp loopOp,
105180 mlir::ConversionPatternRewriter &rewriter) const {
0 commit comments