diff --git a/flang/lib/Lower/OpenMP.cpp b/flang/lib/Lower/OpenMP.cpp index eeba87fcd1511..c5c9859dfd67e 100644 --- a/flang/lib/Lower/OpenMP.cpp +++ b/flang/lib/Lower/OpenMP.cpp @@ -2075,6 +2075,7 @@ createAndSetPrivatizedLoopVar(Fortran::lower::AbstractConverter &converter, firOpBuilder.setInsertionPointToStart(firOpBuilder.getAllocaBlock()); mlir::Type tempTy = converter.genType(*sym); + llvm::outs() << "Temp type = " << tempTy << "\n"; mlir::Value temp = firOpBuilder.create( loc, tempTy, /*pinned=*/true, /*lengthParams=*/mlir::ValueRange{}, /*shapeParams*/ mlir::ValueRange{}, @@ -2088,6 +2089,103 @@ createAndSetPrivatizedLoopVar(Fortran::lower::AbstractConverter &converter, return storeOp; } +/// Create the body (block) for an OpenMP Loop Operation. +/// +/// \param [in] op - the operation the body belongs to. +/// \param [inout] converter - converter to use for the clauses. +/// \param [in] loc - location in source code. +/// \param [in] eval - current PFT node/evaluation. +/// \oaran [in] clauses - list of clauses to process. +/// \param [in] args - block arguments (induction variable[s]) for the +//// region. +/// \param [in] outerCombined - is this an outer operation - prevents +/// privatization. +template +static void createBodyOfLoopOp( + Op &op, Fortran::lower::AbstractConverter &converter, mlir::Location &loc, + Fortran::lower::pft::Evaluation &eval, + const Fortran::parser::OmpClauseList *clauses = nullptr, + const llvm::SmallVector &args = {}, + bool outerCombined = false, DataSharingProcessor *dsp = nullptr) { + fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); + // If an argument for the region is provided then create the block with that + // argument. Also update the symbol's address with the mlir argument value. + // e.g. For loops the argument is the induction variable. And all further + // uses of the induction variable should use this mlir value. + mlir::Operation *storeOp = nullptr; + assert(args.size() > 0); + std::size_t loopVarTypeSize = 0; + for (const Fortran::semantics::Symbol *arg : args) + loopVarTypeSize = std::max(loopVarTypeSize, arg->GetUltimate().size()); + mlir::Type loopVarType = getLoopVarType(converter, loopVarTypeSize); + llvm::SmallVector tiv; + llvm::SmallVector locs; + for (int i = 0; i < (int)args.size(); i++) { + tiv.push_back(loopVarType); + locs.push_back(loc); + } + int offset = 0; + // The argument is not currently in memory, so make a temporary for the + // argument, and store it there, then bind that location to the argument. + for (const Fortran::semantics::Symbol *arg : args) { + mlir::Type symType = converter.genType(*arg); + mlir::Type symRefType = firOpBuilder.getRefType(symType); + tiv.push_back(symRefType); + locs.push_back(loc); + offset++; + } + firOpBuilder.createBlock(&op.getRegion(), {}, tiv, locs); + + int argIndex = 0; + for (const Fortran::semantics::Symbol *arg : args) { + mlir::Value addrVal = + fir::getBase(op.getRegion().front().getArgument(argIndex+offset)); + converter.bindSymbol(*arg, addrVal); + mlir::Type symType = converter.genType(*arg); + mlir::Value indexVal = + fir::getBase(op.getRegion().front().getArgument(argIndex)); + mlir::Value cvtVal = firOpBuilder.createConvert(loc, symType, indexVal); + addrVal = converter.getSymbolAddress(*arg); + storeOp = firOpBuilder.create(loc, cvtVal, addrVal); + argIndex++; + } + // Set the insert for the terminator operation to go at the end of the + // block - this is either empty or the block with the stores above, + // the end of the block works for both. + mlir::Block &block = op.getRegion().back(); + firOpBuilder.setInsertionPointToEnd(&block); + + // If it is an unstructured region and is not the outer region of a combined + // construct, create empty blocks for all evaluations. + if (eval.lowerAsUnstructured() && !outerCombined) + Fortran::lower::createEmptyRegionBlocks( + firOpBuilder, eval.getNestedEvaluations()); + + // Insert the terminator. + Fortran::lower::genOpenMPTerminator(firOpBuilder, op.getOperation(), loc); + // Reset the insert point to before the terminator. + resetBeforeTerminator(firOpBuilder, storeOp, block); + + // Handle privatization. Do not privatize if this is the outer operation. + if (clauses && !outerCombined) { + constexpr bool isLoop = std::is_same_v || + std::is_same_v; + if (!dsp) { + DataSharingProcessor proc(converter, *clauses, eval); + proc.processStep1(); + proc.processStep2(op, isLoop); + } else { + if (isLoop && args.size() > 0) + dsp->setLoopIV(converter.getSymbolAddress(*args[0])); + dsp->processStep2(op, isLoop); + } + + if (storeOp) + firOpBuilder.setInsertionPointAfter(storeOp); + } +} + /// Create the body (block) for an OpenMP Operation. /// /// \param [in] op - the operation the body belongs to. @@ -2914,7 +3012,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, const Fortran::parser::OpenMPLoopConstruct &loopConstruct) { fir::FirOpBuilder &firOpBuilder = converter.getFirOpBuilder(); llvm::SmallVector lowerBound, upperBound, step, linearVars, - linearStepVars, reductionVars; + linearStepVars, privateVars, reductionVars; mlir::Value scheduleChunkClauseOperand; mlir::IntegerAttr orderedClauseOperand; mlir::omp::ClauseOrderKindAttr orderClauseOperand; @@ -3023,9 +3121,23 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, return; } + // Collect the loops to collapse. + Fortran::lower::pft::Evaluation *doConstructEval = + &eval.getFirstNestedEvaluation(); + Fortran::lower::pft::Evaluation *doLoop = + &doConstructEval->getFirstNestedEvaluation(); + auto *doStmt = doLoop->getIf(); + assert(doStmt && "Expected do loop to be in the nested evaluation"); + const auto &loopControl = + std::get>(doStmt->t); + const Fortran::parser::LoopControl::Bounds *bounds = + std::get_if(&loopControl->u); + assert(bounds && "Expected bounds for worksharing do loop"); + privateVars.push_back(converter.getSymbolAddress(*bounds->name.thing.symbol)); + auto wsLoopOp = firOpBuilder.create( currentLocation, lowerBound, upperBound, step, linearVars, linearStepVars, - reductionVars, + privateVars, reductionVars, reductionDeclSymbols.empty() ? nullptr : mlir::ArrayAttr::get(firOpBuilder.getContext(), @@ -3061,7 +3173,7 @@ static void genOMP(Fortran::lower::AbstractConverter &converter, wsLoopOp.setNowaitAttr(nowaitClauseOperand); } - createBodyOfOp(wsLoopOp, converter, currentLocation, + createBodyOfLoopOp(wsLoopOp, converter, currentLocation, eval, &loopOpClauseList, iv, /*outer=*/false, &dsp); } diff --git a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td index 8ff5380f71ad4..bd348e0b3769a 100644 --- a/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td +++ b/mlir/include/mlir/Dialect/OpenMP/OpenMPOps.td @@ -479,6 +479,7 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments, Variadic:$step, Variadic:$linear_vars, Variadic:$linear_step_vars, + Variadic:$privates, Variadic:$reduction_vars, OptionalAttr:$reductions, OptionalAttr:$schedule_val, @@ -517,6 +518,7 @@ def WsLoopOp : OpenMP_Op<"wsloop", [AttrSizedOperandSegments, |`nowait` $nowait |`ordered` `(` $ordered_val `)` |`order` `(` custom($order_val) `)` + |`private` `(` custom($privates, type($privates)) `)` |`reduction` `(` custom( $reduction_vars, type($reduction_vars), $reductions diff --git a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp index 20df0099cbd24..3da37231e3c52 100644 --- a/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp +++ b/mlir/lib/Dialect/OpenMP/IR/OpenMPDialect.cpp @@ -178,6 +178,67 @@ void printClauseAttr(OpAsmPrinter &p, Operation *op, ClauseAttr attr) { p << stringifyEnum(attr.getValue()); } +static ParseResult +parsePrivateEntries(OpAsmParser &parser, + SmallVectorImpl &privateOperands, + SmallVectorImpl &privateOperandTypes) { + OpAsmParser::UnresolvedOperand arg; + OpAsmParser::UnresolvedOperand blockArg; + Type argType; + auto parseEntries = [&]() -> ParseResult { + if (parser.parseOperand(arg) || parser.parseArrow() || + parser.parseOperand(blockArg)) + return failure(); + privateOperands.push_back(arg); + return success(); + }; + + auto parseTypes = [&]() -> ParseResult { + if (parser.parseType(argType)) + return failure(); + privateOperandTypes.push_back(argType); + return success(); + }; + + if (parser.parseCommaSeparatedList(parseEntries)) + return failure(); + + if (parser.parseColon()) + return failure(); + + if (parser.parseCommaSeparatedList(parseTypes)) + return failure(); + + return success(); +} + +static void printPrivateEntries(OpAsmPrinter &p, Operation *op, + OperandRange privateOperands, + TypeRange privateOperandTypes) { + auto ®ion = op->getRegion(0); + + unsigned argIndex = 0; + unsigned offset = 0; + if (auto wsLoop = dyn_cast(op)) + offset = wsLoop.getNumLoops(); + for (const auto &privOperand : privateOperands) { + const auto &blockArg = region.front().getArgument(argIndex+offset); + p << privOperand << " -> " << blockArg; + argIndex++; + if (argIndex < privateOperands.size()) + p << ", "; + } + p << " : "; + + argIndex = 0; + for (const auto &privOperandType : privateOperandTypes) { + p << privOperandType; + argIndex++; + if (argIndex < privateOperands.size()) + p << ", "; + } +} + //===----------------------------------------------------------------------===// // Parser and printer for Linear Clause //===----------------------------------------------------------------------===// @@ -1086,7 +1147,14 @@ void printLoopControl(OpAsmPrinter &p, Operation *op, Region ®ion, ValueRange steps, TypeRange loopVarTypes, UnitAttr inclusive) { auto args = region.front().getArguments(); - p << " (" << args << ") : " << args[0].getType() << " = (" << lowerBound + p << " ("; + unsigned numLoops = steps.size(); + for (unsigned i=0; i attributes) { build(builder, state, lowerBound, upperBound, step, /*linear_vars=*/ValueRange(), - /*linear_step_vars=*/ValueRange(), /*reduction_vars=*/ValueRange(), + /*linear_step_vars=*/ValueRange(), /*private_vars=*/ValueRange(), + /*reduction_vars=*/ValueRange(), /*reductions=*/nullptr, /*schedule_val=*/nullptr, /*schedule_chunk_var=*/nullptr, /*schedule_modifier=*/nullptr, /*simd_modifier=*/false, /*nowait=*/false, /*ordered_val=*/nullptr, diff --git a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp index 4f6200d29a70a..26ec12e427ff3 100644 --- a/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp +++ b/mlir/lib/Target/LLVMIR/Dialect/OpenMP/OpenMPToLLVMIRTranslation.cpp @@ -833,6 +833,24 @@ static void collectReductionInfo( } } +/// Allocate space for privatized reduction variables. +void +allocPrivatizationVars(omp::WsLoopOp loop, llvm::IRBuilderBase &builder, + LLVM::ModuleTranslation &moduleTranslation, + llvm::OpenMPIRBuilder::InsertPointTy &allocaIP) { + unsigned offset = loop.getNumLoops(); + unsigned numArgs = loop.getRegion().front().getNumArguments(); + llvm::IRBuilderBase::InsertPointGuard guard(builder); + builder.restoreIP(allocaIP); + for (unsigned i = offset; i < numArgs; ++i) { + if (auto op = loop.getPrivates()[i-offset].getDefiningOp()) { + llvm::Value *var = builder.CreateAlloca(moduleTranslation.convertType(op.getResultPtrElementType())); + // moduleTranslation.convertType(loop.getPrivates()[i-offset].getType())); + moduleTranslation.mapValue(loop.getRegion().front().getArgument(i), var); + } + } +} + /// Converts an OpenMP workshare loop into LLVM IR using OpenMPIRBuilder. static LogicalResult convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, @@ -861,6 +879,8 @@ convertOmpWsLoop(Operation &opInst, llvm::IRBuilderBase &builder, llvm::OpenMPIRBuilder::InsertPointTy allocaIP = findAllocaInsertPoint(builder, moduleTranslation); + allocPrivatizationVars(loop, builder, moduleTranslation, allocaIP); + SmallVector privateReductionVariables; DenseMap reductionVariableMap; allocReductionVars(loop, builder, moduleTranslation, allocaIP, reductionDecls, diff --git a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir index 3fbeaebb592a4..86576831f67df 100644 --- a/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir +++ b/mlir/test/Conversion/OpenMPToLLVM/convert-to-llvmir.mlir @@ -79,7 +79,7 @@ func.func @wsloop(%arg0: index, %arg1: index, %arg2: index, %arg3: index, %arg4: // CHECK: "test.payload"(%[[CAST_ARG6]], %[[CAST_ARG7]]) : (index, index) -> () "test.payload"(%arg6, %arg7) : (index, index) -> () omp.yield - }) {operandSegmentSizes = array} : (index, index, index, index, index, index) -> () + }) {operandSegmentSizes = array} : (index, index, index, index, index, index) -> () omp.terminator } return diff --git a/mlir/test/Dialect/OpenMP/ops.mlir b/mlir/test/Dialect/OpenMP/ops.mlir index 4d88d9ac86fe1..233e3503881a2 100644 --- a/mlir/test/Dialect/OpenMP/ops.mlir +++ b/mlir/test/Dialect/OpenMP/ops.mlir @@ -141,7 +141,7 @@ func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memre "omp.wsloop" (%lb, %ub, %step) ({ ^bb0(%iv: index): omp.yield - }) {operandSegmentSizes = array, ordered_val = 1} : + }) {operandSegmentSizes = array, ordered_val = 1} : (index, index, index) -> () // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref) schedule(static) @@ -149,7 +149,7 @@ func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memre "omp.wsloop" (%lb, %ub, %step, %data_var, %linear_var) ({ ^bb0(%iv: index): omp.yield - }) {operandSegmentSizes = array, schedule_val = #omp} : + }) {operandSegmentSizes = array, schedule_val = #omp} : (index, index, index, memref, i32) -> () // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref, %{{.*}} = %{{.*}} : memref) schedule(static) @@ -157,7 +157,7 @@ func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memre "omp.wsloop" (%lb, %ub, %step, %data_var, %data_var, %linear_var, %linear_var) ({ ^bb0(%iv: index): omp.yield - }) {operandSegmentSizes = array, schedule_val = #omp} : + }) {operandSegmentSizes = array, schedule_val = #omp} : (index, index, index, memref, memref, i32, i32) -> () // CHECK: omp.wsloop linear(%{{.*}} = %{{.*}} : memref) schedule(dynamic = %{{.*}}) ordered(2) @@ -165,7 +165,7 @@ func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memre "omp.wsloop" (%lb, %ub, %step, %data_var, %linear_var, %chunk_var) ({ ^bb0(%iv: index): omp.yield - }) {operandSegmentSizes = array, schedule_val = #omp, ordered_val = 2} : + }) {operandSegmentSizes = array, schedule_val = #omp, ordered_val = 2} : (index, index, index, memref, i32, i32) -> () // CHECK: omp.wsloop schedule(auto) nowait @@ -173,7 +173,7 @@ func.func @omp_wsloop(%lb : index, %ub : index, %step : index, %data_var : memre "omp.wsloop" (%lb, %ub, %step) ({ ^bb0(%iv: index): omp.yield - }) {operandSegmentSizes = array, nowait, schedule_val = #omp} : + }) {operandSegmentSizes = array, nowait, schedule_val = #omp} : (index, index, index) -> () return diff --git a/mlir/test/Target/LLVMIR/openmp-llvm.mlir b/mlir/test/Target/LLVMIR/openmp-llvm.mlir index 1c02c0265462c..e0caaf06c03a1 100644 --- a/mlir/test/Target/LLVMIR/openmp-llvm.mlir +++ b/mlir/test/Target/LLVMIR/openmp-llvm.mlir @@ -310,7 +310,7 @@ llvm.func @wsloop_simple(%arg0: !llvm.ptr) { llvm.store %3, %4 : f32, !llvm.ptr omp.yield // CHECK: call void @__kmpc_for_static_fini(ptr @[[$loc_struct]], - }) {operandSegmentSizes = array} : (i64, i64, i64) -> () + }) {operandSegmentSizes = array} : (i64, i64, i64) -> () omp.terminator } llvm.return @@ -330,7 +330,7 @@ llvm.func @wsloop_inclusive_1(%arg0: !llvm.ptr) { %4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i64) -> !llvm.ptr, f32 llvm.store %3, %4 : f32, !llvm.ptr omp.yield - }) {operandSegmentSizes = array} : (i64, i64, i64) -> () + }) {operandSegmentSizes = array} : (i64, i64, i64) -> () llvm.return } @@ -348,7 +348,7 @@ llvm.func @wsloop_inclusive_2(%arg0: !llvm.ptr) { %4 = llvm.getelementptr %arg0[%arg1] : (!llvm.ptr, i64) -> !llvm.ptr, f32 llvm.store %3, %4 : f32, !llvm.ptr omp.yield - }) {inclusive, operandSegmentSizes = array} : (i64, i64, i64) -> () + }) {inclusive, operandSegmentSizes = array} : (i64, i64, i64) -> () llvm.return }