@@ -28,64 +28,80 @@ namespace looputils {
2828// / Stores info needed about the induction/iteration variable for each `do
2929// / concurrent` in a loop nest.
3030struct InductionVariableInfo {
31+ InductionVariableInfo (fir::DoLoopOp doLoop) { populateInfo (doLoop); }
32+
3133 // / The operation allocating memory for iteration variable.
3234 mlir::Operation *iterVarMemDef;
33- };
35+ // / the operation(s) updating the iteration variable with the current
36+ // / iteration number.
37+ llvm::SmallVector<mlir::Operation *, 2 > indVarUpdateOps;
3438
35- using LoopNestToIndVarMap =
36- llvm::MapVector<fir::DoLoopOp, InductionVariableInfo>;
37-
38- // / For the \p doLoop parameter, find the operation that declares its iteration
39- // / variable or allocates memory for it.
40- // /
41- // / For example, give the following loop:
42- // / ```
43- // / ...
44- // / %i:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : ...
45- // / ...
46- // / fir.do_loop %ind_var = %lb to %ub step %s unordered {
47- // / %ind_var_conv = fir.convert %ind_var : (index) -> i32
48- // / fir.store %ind_var_conv to %i#1 : !fir.ref<i32>
49- // / ...
50- // / }
51- // / ```
52- // /
53- // / This function returns the `hlfir.declare` op for `%i`.
54- // /
55- // / Note: The current implementation is dependent on how flang emits loop
56- // / bodies; which is sufficient for the current simple test/use cases. If this
57- // / proves to be insufficient, this should be made more generic.
58- mlir::Operation *findLoopIterationVarMemDecl (fir::DoLoopOp doLoop) {
59- mlir::Value result = nullptr ;
60-
61- // Checks if a StoreOp is updating the memref of the loop's iteration
62- // variable.
63- auto isStoringIV = [&](fir::StoreOp storeOp) {
64- // Direct store into the IV memref.
65- if (storeOp.getValue () == doLoop.getInductionVar ())
66- return true ;
67-
68- // Indirect store into the IV memref.
69- if (auto convertOp = mlir::dyn_cast<fir::ConvertOp>(
70- storeOp.getValue ().getDefiningOp ())) {
71- if (convertOp.getOperand () == doLoop.getInductionVar ())
39+ private:
40+ // / For the \p doLoop parameter, find the following:
41+ // /
42+ // / 1. The operation that declares its iteration variable or allocates memory
43+ // / for it. For example, give the following loop:
44+ // / ```
45+ // / ...
46+ // / %i:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : ...
47+ // / ...
48+ // / fir.do_loop %ind_var = %lb to %ub step %s unordered {
49+ // / %ind_var_conv = fir.convert %ind_var : (index) -> i32
50+ // / fir.store %ind_var_conv to %i#1 : !fir.ref<i32>
51+ // / ...
52+ // / }
53+ // / ```
54+ // /
55+ // / This function sets the `iterVarMemDef` member to the `hlfir.declare` op
56+ // / for `%i`.
57+ // /
58+ // / 2. The operation(s) that update the loop's iteration variable from its
59+ // / induction variable. For the above example, the `indVarUpdateOps` is
60+ // / populated with the first 2 ops in the loop's body.
61+ // /
62+ // / Note: The current implementation is dependent on how flang emits loop
63+ // / bodies; which is sufficient for the current simple test/use cases. If this
64+ // / proves to be insufficient, this should be made more generic.
65+ void populateInfo (fir::DoLoopOp doLoop) {
66+ mlir::Value result = nullptr ;
67+
68+ // Checks if a StoreOp is updating the memref of the loop's iteration
69+ // variable.
70+ auto isStoringIV = [&](fir::StoreOp storeOp) {
71+ // Direct store into the IV memref.
72+ if (storeOp.getValue () == doLoop.getInductionVar ()) {
73+ indVarUpdateOps.push_back (storeOp);
7274 return true ;
73- }
74-
75- return false ;
76- };
75+ }
7776
78- for (mlir::Operation &op : doLoop) {
79- if (auto storeOp = mlir::dyn_cast<fir::StoreOp>(op))
80- if (isStoringIV (storeOp)) {
81- result = storeOp.getMemref ();
82- break ;
77+ // Indirect store into the IV memref.
78+ if (auto convertOp = mlir::dyn_cast<fir::ConvertOp>(
79+ storeOp.getValue ().getDefiningOp ())) {
80+ if (convertOp.getOperand () == doLoop.getInductionVar ()) {
81+ indVarUpdateOps.push_back (convertOp);
82+ indVarUpdateOps.push_back (storeOp);
83+ return true ;
84+ }
8385 }
86+
87+ return false ;
88+ };
89+
90+ for (mlir::Operation &op : doLoop) {
91+ if (auto storeOp = mlir::dyn_cast<fir::StoreOp>(op))
92+ if (isStoringIV (storeOp)) {
93+ result = storeOp.getMemref ();
94+ break ;
95+ }
96+ }
97+
98+ assert (result != nullptr && result.getDefiningOp () != nullptr );
99+ iterVarMemDef = result.getDefiningOp ();
84100 }
101+ };
85102
86- assert (result != nullptr && result.getDefiningOp () != nullptr );
87- return result.getDefiningOp ();
88- }
103+ using LoopNestToIndVarMap =
104+ llvm::MapVector<fir::DoLoopOp, InductionVariableInfo>;
89105
90106// / Loop \p innerLoop is considered perfectly-nested inside \p outerLoop iff
91107// / there are no operations in \p outerloop's body other than:
@@ -181,10 +197,7 @@ mlir::LogicalResult collectLoopNest(fir::DoLoopOp currentLoop,
181197 assert (currentLoop.getUnordered ());
182198
183199 while (true ) {
184- loopNest.insert (
185- {currentLoop,
186- InductionVariableInfo{findLoopIterationVarMemDecl (currentLoop)}});
187-
200+ loopNest.insert ({currentLoop, InductionVariableInfo (currentLoop)});
188201 llvm::SmallVector<fir::DoLoopOp> unorderedLoops;
189202
190203 for (auto nestedLoop : currentLoop.getRegion ().getOps <fir::DoLoopOp>())
@@ -210,6 +223,96 @@ mlir::LogicalResult collectLoopNest(fir::DoLoopOp currentLoop,
210223
211224 return mlir::success ();
212225}
226+
227+ // / Prepares the `fir.do_loop` nest to be easily mapped to OpenMP. In
228+ // / particular, this function would take this input IR:
229+ // / ```
230+ // / fir.do_loop %i_iv = %i_lb to %i_ub step %i_step unordered {
231+ // / fir.store %i_iv to %i#1 : !fir.ref<i32>
232+ // / %j_lb = arith.constant 1 : i32
233+ // / %j_ub = arith.constant 10 : i32
234+ // / %j_step = arith.constant 1 : index
235+ // /
236+ // / fir.do_loop %j_iv = %j_lb to %j_ub step %j_step unordered {
237+ // / fir.store %j_iv to %j#1 : !fir.ref<i32>
238+ // / ...
239+ // / }
240+ // / }
241+ // / ```
242+ // /
243+ // / into the following form (using generic op form since the result is
244+ // / technically an invalid `fir.do_loop` op:
245+ // /
246+ // / ```
247+ // / "fir.do_loop"(%i_lb, %i_ub, %i_step) <{unordered}> ({
248+ // / ^bb0(%i_iv: index):
249+ // / %j_lb = "arith.constant"() <{value = 1 : i32}> : () -> i32
250+ // / %j_ub = "arith.constant"() <{value = 10 : i32}> : () -> i32
251+ // / %j_step = "arith.constant"() <{value = 1 : index}> : () -> index
252+ // /
253+ // / "fir.do_loop"(%j_lb, %j_ub, %j_step) <{unordered}> ({
254+ // / ^bb0(%new_i_iv: index, %new_j_iv: index):
255+ // / "fir.store"(%new_i_iv, %i#1) : (i32, !fir.ref<i32>) -> ()
256+ // / "fir.store"(%new_j_iv, %j#1) : (i32, !fir.ref<i32>) -> ()
257+ // / ...
258+ // / })
259+ // / ```
260+ // /
261+ // / What happened to the loop nest is the following:
262+ // /
263+ // / * the innermost loop's entry block was updated from having one operand to
264+ // / having `n` operands where `n` is the number of loops in the nest,
265+ // /
266+ // / * the outer loop(s)' ops that update the IVs were sank inside the innermost
267+ // / loop (see the `"fir.store"(%new_i_iv, %i#1)` op above),
268+ // /
269+ // / * the innermost loop's entry block's arguments were mapped in order from the
270+ // / outermost to the innermost IV.
271+ // /
272+ // / With this IR change, we can directly inline the innermost loop's region into
273+ // / the newly generated `omp.loop_nest` op.
274+ // /
275+ // / Note that this function has a pre-condition that \p loopNest consists of
276+ // / perfectly nested loops; i.e. there are no in-between ops between 2 nested
277+ // / loops except for the ops to setup the inner loop's LB, UB, and step. These
278+ // / ops are handled/cloned by `genLoopNestClauseOps(..)`.
279+ void sinkLoopIVArgs (mlir::ConversionPatternRewriter &rewriter,
280+ looputils::LoopNestToIndVarMap &loopNest) {
281+ if (loopNest.size () <= 1 )
282+ return ;
283+
284+ fir::DoLoopOp innermostLoop = loopNest.back ().first ;
285+ mlir::Operation &innermostFirstOp = innermostLoop.getRegion ().front ().front ();
286+
287+ llvm::SmallVector<mlir::Type> argTypes;
288+ llvm::SmallVector<mlir::Location> argLocs;
289+
290+ for (auto &[doLoop, indVarInfo] : llvm::drop_end (loopNest)) {
291+ // Sink the IV update ops to the innermost loop. We need to do for all loops
292+ // except for the innermost one, hence the `drop_end` usage above.
293+ for (mlir::Operation *op : indVarInfo.indVarUpdateOps )
294+ op->moveBefore (&innermostFirstOp);
295+
296+ argTypes.push_back (doLoop.getInductionVar ().getType ());
297+ argLocs.push_back (doLoop.getInductionVar ().getLoc ());
298+ }
299+
300+ mlir::Region &innermmostRegion = innermostLoop.getRegion ();
301+ // Extend the innermost entry block with arguments to represent the outer IVs.
302+ innermmostRegion.addArguments (argTypes, argLocs);
303+
304+ unsigned idx = 1 ;
305+ // In reverse, remap the IVs of the loop nest from the old values to the new
306+ // ones. We do that in reverse since the first argument before this loop is
307+ // the old IV for the innermost loop. Therefore, we want to replace it first
308+ // before the old value (1st argument in the block) is remapped to be the IV
309+ // of the outermost loop in the nest.
310+ for (auto &[doLoop, _] : llvm::reverse (loopNest)) {
311+ doLoop.getInductionVar ().replaceAllUsesWith (
312+ innermmostRegion.getArgument (innermmostRegion.getNumArguments () - idx));
313+ ++idx;
314+ }
315+ }
213316} // namespace looputils
214317
215318class DoConcurrentConversion : public mlir ::OpConversionPattern<fir::DoLoopOp> {
@@ -236,6 +339,7 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
236339 " Some `do concurent` loops are not perfectly-nested. "
237340 " These will be serialized." );
238341
342+ looputils::sinkLoopIVArgs (rewriter, loopNest);
239343 mlir::IRMapping mapper;
240344 genParallelOp (doLoop.getLoc (), rewriter, loopNest, mapper);
241345 mlir::omp::LoopNestOperands loopNestClauseOps;
0 commit comments