@@ -42,13 +42,32 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
4242 mlir::omp::ParallelOp parallelOp =
4343 rewriter.create <mlir::omp::ParallelOp>(doLoop.getLoc ());
4444
45- rewriter.createBlock (¶llelOp.getRegion ());
46- mlir::Block &block = parallelOp.getRegion ().back ();
45+ mlir::Block *block = rewriter.createBlock (¶llelOp.getRegion ());
4746
48- rewriter.setInsertionPointToEnd (& block);
47+ rewriter.setInsertionPointToEnd (block);
4948 rewriter.create <mlir::omp::TerminatorOp>(doLoop.getLoc ());
5049
51- rewriter.setInsertionPointToStart (&block);
50+ rewriter.setInsertionPointToStart (block);
51+
52+ // ==== TODO (1) Start ====
53+ //
54+ // The goal of the few lines below is to collect and clone
55+ // the list of operations that define the loop's lower and upper bounds as
56+ // well as the step. Should we, instead of doing this here, split it into 2
57+ // stages?
58+ //
59+ // 1. **Stage 1**: add an analysis that extracts all the relevant
60+ // operations defining the lower-bound, upper-bound, and
61+ // step.
62+ // 2. **Stage 2**: clone the collected operations in the parallel region.
63+ //
64+ // So far, the pass has been tested with very simple loops (where the bounds
65+ // and step are constants) so the goal of **Stage 1** is to have a
66+ // well-defined component that has the sole responsibility of collecting all
67+ // the relevant ops relevant to the loop header. This was we can test this
68+ // in isolation for more complex loops and better organize the code. **Stage
69+ // 2** would then be responsible for the actual cloning of the collected
70+ // loop header preparation/allocation operations.
5271
5372 // Clone the LB, UB, step defining ops inside the parallel region.
5473 llvm::SmallVector<mlir::Value> lowerBound, upperBound, step;
@@ -58,16 +77,34 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
5877 rewriter.clone (*doLoop.getUpperBound ().getDefiningOp ())->getResult (0 ));
5978 step.push_back (
6079 rewriter.clone (*doLoop.getStep ().getDefiningOp ())->getResult (0 ));
80+ // ==== TODO (1) End ====
6181
6282 auto wsLoopOp = rewriter.create <mlir::omp::WsLoopOp>(
6383 doLoop.getLoc (), lowerBound, upperBound, step);
6484 wsLoopOp.setInclusive (true );
6585
6686 auto outlineableOp =
6787 mlir::dyn_cast<mlir::omp::OutlineableOpenMPOpInterface>(*parallelOp);
68- assert (outlineableOp);
6988 rewriter.setInsertionPointToStart (outlineableOp.getAllocaBlock ());
7089
90+ // ==== TODO (2) Start ====
91+ //
92+ // The goal of the following simple work-list algorithm and
93+ // the following `for` loop is to collect all the operations related to the
94+ // allocation of the induction variable for the `do concurrent` loop. The
95+ // operations collected by this algorithm are very similar to what is
96+ // usually emitted for privatized variables, e.g. for omp.parallel loops.
97+ // Therefore, I think we can:
98+ //
99+ // 1. **Stage 1**: Add an analysis that colects all these operations. The
100+ // goal is similar to **Stage 1** of TODO (1): isolate the
101+ // algorithm is an individually-testable component so that
102+ // we properly implement and test it for more complicated
103+ // `do concurrent` loops.
104+ // 1. **Stage 2**: Using the collected operations, create and populate an
105+ // `omp.private {type=private}` op to server as the
106+ // delayed privatizer for the new work-sharing loop.
107+
71108 // For the induction variable, we need to privative its allocation and
72109 // binding inside the parallel region.
73110 llvm::SmallSetVector<mlir::Operation *, 2 > workList;
@@ -101,6 +138,12 @@ class DoConcurrentConversion : public mlir::OpConversionPattern<fir::DoLoopOp> {
101138 }
102139 declareAndAllocasToClone.insert (storeTarget);
103140 }
141+ // ==== TODO (2) End ====
142+ //
143+ // TODO (1 & 2): Isolating analyses proposed in both TODOs, I think we can
144+ // more easily generalize the pass to work for targets other than OpenMP,
145+ // e.g. OpenACC, I think can, can reuse the results of the analyses and only
146+ // change the code-gen/rewriting.
104147
105148 mlir::IRMapping mapper;
106149
0 commit comments