@@ -161,18 +161,71 @@ void cloneOrMapRegionOutsiders(fir::FirOpBuilder &builder,
161161namespace {
162162namespace looputils {
163163// / Stores info needed about the induction/iteration variable for each `do
164- // / concurrent` in a loop nest. This includes:
165- // / * the operation allocating memory for iteration variable,
166- // / * the operation(s) updating the iteration variable with the current
167- // / iteration number.
164+ // / concurrent` in a loop nest.
168165struct InductionVariableInfo {
166+ // / The operation allocating memory for iteration variable.
169167 mlir::Operation *iterVarMemDef;
168+
169+ // / the operation(s) updating the iteration variable with the current
170+ // / iteration number.
170171 llvm::SetVector<mlir::Operation *> indVarUpdateOps;
171172};
172173
173174using LoopNestToIndVarMap =
174175 llvm::MapVector<fir::DoLoopOp, InductionVariableInfo>;
175176
177+ // / For the \p doLoop parameter, find the operation that declares its iteration
178+ // / variable or allocates memory for it.
179+ // /
180+ // / For example, give the following loop:
181+ // / ```
182+ // / ...
183+ // / %i:2 = hlfir.declare %0 {uniq_name = "_QFEi"} : ...
184+ // / ...
185+ // / fir.do_loop %ind_var = %lb to %ub step %s unordered {
186+ // / %ind_var_conv = fir.convert %ind_var : (index) -> i32
187+ // / fir.store %ind_var_conv to %i#1 : !fir.ref<i32>
188+ // / ...
189+ // / }
190+ // / ```
191+ // /
192+ // / This function returns the `hlfir.declare` op for `%i`.
193+ // /
194+ // / Note: The current implementation is dependent on how flang emits loop
195+ // / bodies; which is sufficient for the current simple test/use cases. If this
196+ // / proves to be insufficient, this should be made more generic.
197+ mlir::Operation *findLoopIterationVarMemDecl (fir::DoLoopOp doLoop) {
198+ mlir::Value result = nullptr ;
199+
200+ // Checks if a StoreOp is updating the memref of the loop's iteration
201+ // variable.
202+ auto isStoringIV = [&](fir::StoreOp storeOp) {
203+ // Direct store into the IV memref.
204+ if (storeOp.getValue () == doLoop.getInductionVar ())
205+ return true ;
206+
207+ // Indirect store into the IV memref.
208+ if (auto convertOp = mlir::dyn_cast<fir::ConvertOp>(
209+ storeOp.getValue ().getDefiningOp ())) {
210+ if (convertOp.getOperand () == doLoop.getInductionVar ())
211+ return true ;
212+ }
213+
214+ return false ;
215+ };
216+
217+ for (mlir::Operation &op : doLoop) {
218+ if (auto storeOp = mlir::dyn_cast<fir::StoreOp>(op))
219+ if (isStoringIV (storeOp)) {
220+ result = storeOp.getMemref ();
221+ break ;
222+ }
223+ }
224+
225+ assert (result != nullptr && result.getDefiningOp () != nullptr );
226+ return result.getDefiningOp ();
227+ }
228+
176229// / Given an operation `op`, this returns true if `op`'s operand is ultimately
177230// / the loop's induction variable. Detecting this helps finding the live-in
178231// / value corresponding to the induction variable in case the induction variable
@@ -412,7 +465,7 @@ mlir::LogicalResult collectLoopNest(fir::DoLoopOp currentLoop,
412465 loopNest.insert (
413466 {currentLoop,
414467 InductionVariableInfo{
415- findLoopIndVarMemDecl (currentLoop),
468+ findLoopIterationVarMemDecl (currentLoop),
416469 std::move (looputils::extractIndVarUpdateOps (currentLoop))}});
417470 llvm::SmallVector<fir::DoLoopOp> unorderedLoops;
418471
@@ -715,6 +768,104 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
715768 using LiveInShapeInfoMap =
716769 llvm::DenseMap<mlir::Value, TargetDeclareShapeCreationInfo>;
717770
771+ mlir::omp::ParallelOp genParallelOp (mlir::Location loc,
772+ mlir::ConversionPatternRewriter &rewriter,
773+ looputils::LoopNestToIndVarMap &loopNest,
774+ mlir::IRMapping &mapper) const {
775+ auto parallelOp = rewriter.create <mlir::omp::ParallelOp>(loc);
776+ rewriter.createBlock (¶llelOp.getRegion ());
777+ rewriter.setInsertionPoint (rewriter.create <mlir::omp::TerminatorOp>(loc));
778+
779+ genLoopNestIndVarAllocs (rewriter, loopNest, mapper);
780+ return parallelOp;
781+ }
782+
783+ void genLoopNestIndVarAllocs (mlir::ConversionPatternRewriter &rewriter,
784+ looputils::LoopNestToIndVarMap &loopNest,
785+ mlir::IRMapping &mapper) const {
786+
787+ for (auto &[_, indVarInfo] : loopNest)
788+ genInductionVariableAlloc (rewriter, indVarInfo.iterVarMemDef , mapper);
789+ }
790+
791+ mlir::Operation *
792+ genInductionVariableAlloc (mlir::ConversionPatternRewriter &rewriter,
793+ mlir::Operation *indVarMemDef,
794+ mlir::IRMapping &mapper) const {
795+ assert (
796+ indVarMemDef != nullptr &&
797+ " Induction variable memdef is expected to have a defining operation." );
798+
799+ llvm::SmallSetVector<mlir::Operation *, 2 > indVarDeclareAndAlloc;
800+ for (auto operand : indVarMemDef->getOperands ())
801+ indVarDeclareAndAlloc.insert (operand.getDefiningOp ());
802+ indVarDeclareAndAlloc.insert (indVarMemDef);
803+
804+ mlir::Operation *result;
805+ for (mlir::Operation *opToClone : indVarDeclareAndAlloc)
806+ result = rewriter.clone (*opToClone, mapper);
807+
808+ return result;
809+ }
810+
811+ void genLoopNestClauseOps (
812+ mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
813+ looputils::LoopNestToIndVarMap &loopNest, mlir::IRMapping &mapper,
814+ mlir::omp::LoopNestOperands &loopNestClauseOps,
815+ mlir::omp::TargetOperands *targetClauseOps = nullptr ) const {
816+ assert (loopNestClauseOps.loopLowerBounds .empty () &&
817+ " Loop nest bounds were already emitted!" );
818+
819+ auto populateBounds = [](mlir::Value var,
820+ llvm::SmallVectorImpl<mlir::Value> &bounds) {
821+ bounds.push_back (var.getDefiningOp ()->getResult (0 ));
822+ };
823+
824+ auto hostEvalCapture = [&](mlir::Value var,
825+ llvm::SmallVectorImpl<mlir::Value> &bounds) {
826+ populateBounds (var, bounds);
827+
828+ if (targetClauseOps)
829+ targetClauseOps->hostEvalVars .push_back (var);
830+ };
831+
832+ for (auto &[doLoop, _] : loopNest) {
833+ hostEvalCapture (doLoop.getLowerBound (),
834+ loopNestClauseOps.loopLowerBounds );
835+ hostEvalCapture (doLoop.getUpperBound (),
836+ loopNestClauseOps.loopUpperBounds );
837+ hostEvalCapture (doLoop.getStep (), loopNestClauseOps.loopSteps );
838+ }
839+
840+ loopNestClauseOps.loopInclusive = rewriter.getUnitAttr ();
841+ }
842+
843+ mlir::omp::LoopNestOp
844+ genWsLoopOp (mlir::ConversionPatternRewriter &rewriter, fir::DoLoopOp doLoop,
845+ mlir::IRMapping &mapper,
846+ const mlir::omp::LoopNestOperands &clauseOps,
847+ bool isComposite) const {
848+
849+ auto wsloopOp = rewriter.create <mlir::omp::WsloopOp>(doLoop.getLoc ());
850+ wsloopOp.setComposite (isComposite);
851+ rewriter.createBlock (&wsloopOp.getRegion ());
852+
853+ auto loopNestOp =
854+ rewriter.create <mlir::omp::LoopNestOp>(doLoop.getLoc (), clauseOps);
855+
856+ // Clone the loop's body inside the loop nest construct using the
857+ // mapped values.
858+ rewriter.cloneRegionBefore (doLoop.getRegion (), loopNestOp.getRegion (),
859+ loopNestOp.getRegion ().begin (), mapper);
860+
861+ mlir::Operation *terminator = loopNestOp.getRegion ().back ().getTerminator ();
862+ rewriter.setInsertionPointToEnd (&loopNestOp.getRegion ().back ());
863+ rewriter.create <mlir::omp::YieldOp>(terminator->getLoc ());
864+ rewriter.eraseOp (terminator);
865+
866+ return loopNestOp;
867+ }
868+
718869 void
719870 genBoundsOps (mlir::ConversionPatternRewriter &rewriter, mlir::Location loc,
720871 mlir::Value shape, llvm::SmallVectorImpl<mlir::Value> &boundsOps,
@@ -983,51 +1134,6 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
9831134 return teamsOp;
9841135 }
9851136
986- void genLoopNestClauseOps (
987- mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
988- looputils::LoopNestToIndVarMap &loopNest, mlir::IRMapping &mapper,
989- mlir::omp::LoopNestOperands &loopNestClauseOps,
990- mlir::omp::TargetOperands *targetClauseOps = nullptr ) const {
991- assert (loopNestClauseOps.loopLowerBounds .empty () &&
992- " Loop nest bounds were already emitted!" );
993-
994- // Clones the chain of ops defining a certain loop bound or its step into
995- // the parallel region. For example, if the value of a bound is defined by a
996- // `fir.convert`op, this lambda clones the `fir.convert` as well as the
997- // value it converts from. We do this since `omp.target` regions are
998- // isolated from above.
999- auto cloneBoundOrStepOpChain =
1000- [&](mlir::Operation *operation) -> mlir::Operation * {
1001- llvm::SetVector<mlir::Operation *> opChain;
1002- looputils::collectIndirectConstOpChain (operation, opChain);
1003-
1004- mlir::Operation *result;
1005- for (mlir::Operation *link : opChain)
1006- result = rewriter.clone (*link, mapper);
1007-
1008- return result;
1009- };
1010-
1011- auto hostEvalCapture = [&](mlir::Value var,
1012- llvm::SmallVectorImpl<mlir::Value> &bounds) {
1013- var = cloneBoundOrStepOpChain (var.getDefiningOp ())->getResult (0 );
1014- bounds.push_back (var);
1015-
1016- if (targetClauseOps)
1017- targetClauseOps->hostEvalVars .push_back (var);
1018- };
1019-
1020- for (auto &[doLoop, _] : loopNest) {
1021- hostEvalCapture (doLoop.getLowerBound (),
1022- loopNestClauseOps.loopLowerBounds );
1023- hostEvalCapture (doLoop.getUpperBound (),
1024- loopNestClauseOps.loopUpperBounds );
1025- hostEvalCapture (doLoop.getStep (), loopNestClauseOps.loopSteps );
1026- }
1027-
1028- loopNestClauseOps.loopInclusive = rewriter.getUnitAttr ();
1029- }
1030-
10311137 mlir::omp::DistributeOp
10321138 genDistributeOp (mlir::Location loc,
10331139 mlir::ConversionPatternRewriter &rewriter) const {
@@ -1038,72 +1144,6 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
10381144 return distOp;
10391145 }
10401146
1041- void genLoopNestIndVarAllocs (mlir::ConversionPatternRewriter &rewriter,
1042- looputils::LoopNestToIndVarMap &loopNest,
1043- mlir::IRMapping &mapper) const {
1044-
1045- for (auto &[_, indVarInfo] : loopNest)
1046- genInductionVariableAlloc (rewriter, indVarInfo.iterVarMemDef , mapper);
1047- }
1048-
1049- mlir::Operation *
1050- genInductionVariableAlloc (mlir::ConversionPatternRewriter &rewriter,
1051- mlir::Operation *indVarMemDef,
1052- mlir::IRMapping &mapper) const {
1053- assert (
1054- indVarMemDef != nullptr &&
1055- " Induction variable memdef is expected to have a defining operation." );
1056-
1057- llvm::SmallSetVector<mlir::Operation *, 2 > indVarDeclareAndAlloc;
1058- for (auto operand : indVarMemDef->getOperands ())
1059- indVarDeclareAndAlloc.insert (operand.getDefiningOp ());
1060- indVarDeclareAndAlloc.insert (indVarMemDef);
1061-
1062- mlir::Operation *result;
1063- for (mlir::Operation *opToClone : indVarDeclareAndAlloc)
1064- result = rewriter.clone (*opToClone, mapper);
1065-
1066- return result;
1067- }
1068-
1069- mlir::omp::ParallelOp genParallelOp (mlir::Location loc,
1070- mlir::ConversionPatternRewriter &rewriter,
1071- looputils::LoopNestToIndVarMap &loopNest,
1072- mlir::IRMapping &mapper) const {
1073- auto parallelOp = rewriter.create <mlir::omp::ParallelOp>(loc);
1074- rewriter.createBlock (¶llelOp.getRegion ());
1075- rewriter.setInsertionPoint (rewriter.create <mlir::omp::TerminatorOp>(loc));
1076-
1077- genLoopNestIndVarAllocs (rewriter, loopNest, mapper);
1078- return parallelOp;
1079- }
1080-
1081- mlir::omp::LoopNestOp
1082- genWsLoopOp (mlir::ConversionPatternRewriter &rewriter, fir::DoLoopOp doLoop,
1083- mlir::IRMapping &mapper,
1084- const mlir::omp::LoopNestOperands &clauseOps,
1085- bool isComposite) const {
1086-
1087- auto wsloopOp = rewriter.create <mlir::omp::WsloopOp>(doLoop.getLoc ());
1088- wsloopOp.setComposite (isComposite);
1089- rewriter.createBlock (&wsloopOp.getRegion ());
1090-
1091- auto loopNestOp =
1092- rewriter.create <mlir::omp::LoopNestOp>(doLoop.getLoc (), clauseOps);
1093-
1094- // Clone the loop's body inside the loop nest construct using the
1095- // mapped values.
1096- rewriter.cloneRegionBefore (doLoop.getRegion (), loopNestOp.getRegion (),
1097- loopNestOp.getRegion ().begin (), mapper);
1098-
1099- mlir::Operation *terminator = loopNestOp.getRegion ().back ().getTerminator ();
1100- rewriter.setInsertionPointToEnd (&loopNestOp.getRegion ().back ());
1101- rewriter.create <mlir::omp::YieldOp>(terminator->getLoc ());
1102- rewriter.eraseOp (terminator);
1103-
1104- return loopNestOp;
1105- }
1106-
11071147 bool mapToDevice;
11081148 llvm::DenseSet<fir::DoLoopOp> &concurrentLoopsToSkip;
11091149};
@@ -1152,8 +1192,6 @@ class DoConcurrentConversionPass
11521192
11531193 if (mlir::failed (mlir::applyFullConversion (getOperation (), target,
11541194 std::move (patterns)))) {
1155- mlir::emitError (mlir::UnknownLoc::get (context),
1156- " error in converting do-concurrent op" );
11571195 signalPassFailure ();
11581196 }
11591197 }
0 commit comments