|
18 | 18 | #include "flang/Optimizer/Builder/Todo.h" |
19 | 19 | #include "flang/Optimizer/Dialect/FIROps.h" |
20 | 20 | #include "flang/Optimizer/Dialect/FIRType.h" |
21 | | -#include "flang/Optimizer/HLFIR/HLFIRDialect.h" |
22 | 21 | #include "flang/Optimizer/Support/FatalError.h" |
23 | 22 | #include "mlir/Dialect/Arith/IR/Arith.h" |
24 | 23 | #include "mlir/Dialect/Func/IR/FuncOps.h" |
@@ -135,144 +134,6 @@ inline void intrinsicTypeTODO(fir::FirOpBuilder &builder, mlir::Type type, |
135 | 134 | " in " + intrinsicName); |
136 | 135 | } |
137 | 136 |
|
138 | | -using MinlocBodyOpGeneratorTy = llvm::function_ref<mlir::Value( |
139 | | - fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value, |
140 | | - mlir::Value, mlir::Value, const llvm::SmallVectorImpl<mlir::Value> &)>; |
141 | | -using InitValGeneratorTy = llvm::function_ref<mlir::Value( |
142 | | - fir::FirOpBuilder &, mlir::Location, const mlir::Type &)>; |
143 | | -using AddrGeneratorTy = llvm::function_ref<mlir::Value( |
144 | | - fir::FirOpBuilder &, mlir::Location, const mlir::Type &, mlir::Value, |
145 | | - mlir::Value)>; |
146 | | - |
147 | | -// Produces a loop nest for a Minloc intrinsic. |
148 | | -inline void genMinMaxlocReductionLoop( |
149 | | - fir::FirOpBuilder &builder, mlir::Value array, |
150 | | - fir::InitValGeneratorTy initVal, fir::MinlocBodyOpGeneratorTy genBody, |
151 | | - fir::AddrGeneratorTy getAddrFn, unsigned rank, mlir::Type elementType, |
152 | | - mlir::Location loc, mlir::Type maskElemType, mlir::Value resultArr, |
153 | | - bool maskMayBeLogicalScalar) { |
154 | | - mlir::IndexType idxTy = builder.getIndexType(); |
155 | | - |
156 | | - mlir::Value zeroIdx = builder.createIntegerConstant(loc, idxTy, 0); |
157 | | - |
158 | | - fir::SequenceType::Shape flatShape(rank, |
159 | | - fir::SequenceType::getUnknownExtent()); |
160 | | - mlir::Type arrTy = fir::SequenceType::get(flatShape, elementType); |
161 | | - mlir::Type boxArrTy = fir::BoxType::get(arrTy); |
162 | | - array = builder.create<fir::ConvertOp>(loc, boxArrTy, array); |
163 | | - |
164 | | - mlir::Type resultElemType = hlfir::getFortranElementType(resultArr.getType()); |
165 | | - mlir::Value flagSet = builder.createIntegerConstant(loc, resultElemType, 1); |
166 | | - mlir::Value zero = builder.createIntegerConstant(loc, resultElemType, 0); |
167 | | - mlir::Value flagRef = builder.createTemporary(loc, resultElemType); |
168 | | - builder.create<fir::StoreOp>(loc, zero, flagRef); |
169 | | - |
170 | | - mlir::Value init = initVal(builder, loc, elementType); |
171 | | - llvm::SmallVector<mlir::Value, Fortran::common::maxRank> bounds; |
172 | | - |
173 | | - assert(rank > 0 && "rank cannot be zero"); |
174 | | - mlir::Value one = builder.createIntegerConstant(loc, idxTy, 1); |
175 | | - |
176 | | - // Compute all the upper bounds before the loop nest. |
177 | | - // It is not strictly necessary for performance, since the loop nest |
178 | | - // does not have any store operations and any LICM optimization |
179 | | - // should be able to optimize the redundancy. |
180 | | - for (unsigned i = 0; i < rank; ++i) { |
181 | | - mlir::Value dimIdx = builder.createIntegerConstant(loc, idxTy, i); |
182 | | - auto dims = |
183 | | - builder.create<fir::BoxDimsOp>(loc, idxTy, idxTy, idxTy, array, dimIdx); |
184 | | - mlir::Value len = dims.getResult(1); |
185 | | - // We use C indexing here, so len-1 as loopcount |
186 | | - mlir::Value loopCount = builder.create<mlir::arith::SubIOp>(loc, len, one); |
187 | | - bounds.push_back(loopCount); |
188 | | - } |
189 | | - // Create a loop nest consisting of OP operations. |
190 | | - // Collect the loops' induction variables into indices array, |
191 | | - // which will be used in the innermost loop to load the input |
192 | | - // array's element. |
193 | | - // The loops are generated such that the innermost loop processes |
194 | | - // the 0 dimension. |
195 | | - llvm::SmallVector<mlir::Value, Fortran::common::maxRank> indices; |
196 | | - for (unsigned i = rank; 0 < i; --i) { |
197 | | - mlir::Value step = one; |
198 | | - mlir::Value loopCount = bounds[i - 1]; |
199 | | - auto loop = |
200 | | - builder.create<fir::DoLoopOp>(loc, zeroIdx, loopCount, step, false, |
201 | | - /*finalCountValue=*/false, init); |
202 | | - init = loop.getRegionIterArgs()[0]; |
203 | | - indices.push_back(loop.getInductionVar()); |
204 | | - // Set insertion point to the loop body so that the next loop |
205 | | - // is inserted inside the current one. |
206 | | - builder.setInsertionPointToStart(loop.getBody()); |
207 | | - } |
208 | | - |
209 | | - // Reverse the indices such that they are ordered as: |
210 | | - // <dim-0-idx, dim-1-idx, ...> |
211 | | - std::reverse(indices.begin(), indices.end()); |
212 | | - mlir::Value reductionVal = |
213 | | - genBody(builder, loc, elementType, array, flagRef, init, indices); |
214 | | - |
215 | | - // Unwind the loop nest and insert ResultOp on each level |
216 | | - // to return the updated value of the reduction to the enclosing |
217 | | - // loops. |
218 | | - for (unsigned i = 0; i < rank; ++i) { |
219 | | - auto result = builder.create<fir::ResultOp>(loc, reductionVal); |
220 | | - // Proceed to the outer loop. |
221 | | - auto loop = mlir::cast<fir::DoLoopOp>(result->getParentOp()); |
222 | | - reductionVal = loop.getResult(0); |
223 | | - // Set insertion point after the loop operation that we have |
224 | | - // just processed. |
225 | | - builder.setInsertionPointAfter(loop.getOperation()); |
226 | | - } |
227 | | - // End of loop nest. The insertion point is after the outermost loop. |
228 | | - if (maskMayBeLogicalScalar) { |
229 | | - if (fir::IfOp ifOp = |
230 | | - mlir::dyn_cast<fir::IfOp>(builder.getBlock()->getParentOp())) { |
231 | | - builder.create<fir::ResultOp>(loc, reductionVal); |
232 | | - builder.setInsertionPointAfter(ifOp); |
233 | | - // Redefine flagSet to escape scope of ifOp |
234 | | - flagSet = builder.createIntegerConstant(loc, resultElemType, 1); |
235 | | - reductionVal = ifOp.getResult(0); |
236 | | - } |
237 | | - } |
238 | | - |
239 | | - // Check for case where array was full of max values. |
240 | | - // flag will be 0 if mask was never true, 1 if mask was true as some point, |
241 | | - // this is needed to avoid catching cases where we didn't access any elements |
242 | | - // e.g. mask=.FALSE. |
243 | | - mlir::Value flagValue = |
244 | | - builder.create<fir::LoadOp>(loc, resultElemType, flagRef); |
245 | | - mlir::Value flagCmp = builder.create<mlir::arith::CmpIOp>( |
246 | | - loc, mlir::arith::CmpIPredicate::eq, flagValue, flagSet); |
247 | | - fir::IfOp ifMaskTrueOp = |
248 | | - builder.create<fir::IfOp>(loc, flagCmp, /*withElseRegion=*/false); |
249 | | - builder.setInsertionPointToStart(&ifMaskTrueOp.getThenRegion().front()); |
250 | | - |
251 | | - mlir::Value testInit = initVal(builder, loc, elementType); |
252 | | - fir::IfOp ifMinSetOp; |
253 | | - if (elementType.isa<mlir::FloatType>()) { |
254 | | - mlir::Value cmp = builder.create<mlir::arith::CmpFOp>( |
255 | | - loc, mlir::arith::CmpFPredicate::OEQ, testInit, reductionVal); |
256 | | - ifMinSetOp = builder.create<fir::IfOp>(loc, cmp, |
257 | | - /*withElseRegion*/ false); |
258 | | - } else { |
259 | | - mlir::Value cmp = builder.create<mlir::arith::CmpIOp>( |
260 | | - loc, mlir::arith::CmpIPredicate::eq, testInit, reductionVal); |
261 | | - ifMinSetOp = builder.create<fir::IfOp>(loc, cmp, |
262 | | - /*withElseRegion*/ false); |
263 | | - } |
264 | | - builder.setInsertionPointToStart(&ifMinSetOp.getThenRegion().front()); |
265 | | - |
266 | | - // Load output array with 1s instead of 0s |
267 | | - for (unsigned int i = 0; i < rank; ++i) { |
268 | | - mlir::Value index = builder.createIntegerConstant(loc, idxTy, i); |
269 | | - mlir::Value resultElemAddr = |
270 | | - getAddrFn(builder, loc, resultElemType, resultArr, index); |
271 | | - builder.create<fir::StoreOp>(loc, flagSet, resultElemAddr); |
272 | | - } |
273 | | - builder.setInsertionPointAfter(ifMaskTrueOp); |
274 | | -} |
275 | | - |
276 | 137 | inline fir::CUDADataAttributeAttr |
277 | 138 | getCUDADataAttribute(mlir::MLIRContext *mlirContext, |
278 | 139 | std::optional<Fortran::common::CUDADataAttr> cudaAttr) { |
|
0 commit comments