diff --git a/flang/include/flang/Lower/AbstractConverter.h b/flang/include/flang/Lower/AbstractConverter.h index 1d1323642bf9c..81c220e29e164 100644 --- a/flang/include/flang/Lower/AbstractConverter.h +++ b/flang/include/flang/Lower/AbstractConverter.h @@ -348,6 +348,9 @@ class AbstractConverter { virtual Fortran::lower::SymbolBox lookupOneLevelUpSymbol(const Fortran::semantics::Symbol &sym) = 0; + virtual Fortran::lower::SymbolBox + shallowLookupSymbol(const Fortran::semantics::Symbol &sym) = 0; + /// Return the mlir::SymbolTable associated to the ModuleOp. /// Look-ups are faster using it than using module.lookup<>, /// but the module op should be queried in case of failure diff --git a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt index adefcfea0b5dc..f0927d555190f 100644 --- a/flang/include/flang/Optimizer/Dialect/CMakeLists.txt +++ b/flang/include/flang/Optimizer/Dialect/CMakeLists.txt @@ -17,8 +17,8 @@ mlir_tablegen(FIRAttr.cpp.inc -gen-attrdef-defs) set(LLVM_TARGET_DEFINITIONS FIROps.td) mlir_tablegen(FIROps.h.inc -gen-op-decls) mlir_tablegen(FIROps.cpp.inc -gen-op-defs) -mlir_tablegen(FIROpsTypes.h.inc --gen-typedef-decls) -mlir_tablegen(FIROpsTypes.cpp.inc --gen-typedef-defs) +mlir_tablegen(FIROpsTypes.h.inc --gen-typedef-decls -typedefs-dialect=fir) +mlir_tablegen(FIROpsTypes.cpp.inc --gen-typedef-defs -typedefs-dialect=fir) add_public_tablegen_target(FIROpsIncGen) set(LLVM_TARGET_DEFINITIONS FortranVariableInterface.td) diff --git a/flang/include/flang/Optimizer/Dialect/FIROps.td b/flang/include/flang/Optimizer/Dialect/FIROps.td index f9dc2e51a396c..4dce413b775fe 100644 --- a/flang/include/flang/Optimizer/Dialect/FIROps.td +++ b/flang/include/flang/Optimizer/Dialect/FIROps.td @@ -17,6 +17,7 @@ include "mlir/Dialect/Arith/IR/ArithBase.td" include "mlir/Dialect/Arith/IR/ArithOpsInterfaces.td" include "mlir/Dialect/LLVMIR/LLVMAttrDefs.td" +include "mlir/Dialect/OpenMP/OpenMPClauses.td" include "flang/Optimizer/Dialect/CUF/Attributes/CUFAttr.td" include "flang/Optimizer/Dialect/FIRDialect.td" include "flang/Optimizer/Dialect/FIRTypes.td" @@ -3570,7 +3571,7 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop", LLVM. }]; - let arguments = (ins + defvar opArgs = (ins Variadic:$lowerBound, Variadic:$upperBound, Variadic:$step, @@ -3579,17 +3580,45 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop", OptionalAttr:$loopAnnotation ); + let arguments = !con(opArgs, OpenMP_PrivateClause.arguments); + let regions = (region SizedRegion<1>:$region); let hasCustomAssemblyFormat = 1; let hasVerifier = 1; - let extraClassDeclaration = [{ + defvar opExtraClassDeclaration = [{ + unsigned getNumInductionVars() { return getLowerBound().size(); } + + unsigned getNumPrivateOperands() { return getPrivateVars().size(); } + + mlir::Block::BlockArgListType getInductionVars() { + return getBody()->getArguments().slice(0, getNumInductionVars()); + } + + mlir::Block::BlockArgListType getRegionPrivateArgs() { + return getBody()->getArguments().slice(getNumInductionVars(), + getNumPrivateOperands()); + } + + /// Number of operands controlling the loop + unsigned getNumControlOperands() { return getLowerBound().size() * 3; } + // Get Number of reduction operands unsigned getNumReduceOperands() { return getReduceOperands().size(); } + + mlir::Operation::operand_range getPrivateOperands() { + return getOperands() + .slice(getNumControlOperands() + getNumReduceOperands(), + getNumPrivateOperands()); + } }]; + + let extraClassDeclaration = + !strconcat(opExtraClassDeclaration, "\n", + OpenMP_PrivateClause.extraClassDeclaration); } #endif diff --git a/flang/lib/Lower/Bridge.cpp b/flang/lib/Lower/Bridge.cpp index 7b76845b5af05..cc292d610dcb9 100644 --- a/flang/lib/Lower/Bridge.cpp +++ b/flang/lib/Lower/Bridge.cpp @@ -12,6 +12,8 @@ #include "flang/Lower/Bridge.h" +#include "OpenMP/DataSharingProcessor.h" +#include "OpenMP/Utils.h" #include "flang/Lower/Allocatable.h" #include "flang/Lower/CallInterface.h" #include "flang/Lower/Coarray.h" @@ -94,10 +96,11 @@ struct IncrementLoopInfo { template explicit IncrementLoopInfo(Fortran::semantics::Symbol &sym, const T &lower, const T &upper, const std::optional &step, - bool isUnordered = false) + bool isConcurrent = false) : loopVariableSym{&sym}, lowerExpr{Fortran::semantics::GetExpr(lower)}, upperExpr{Fortran::semantics::GetExpr(upper)}, - stepExpr{Fortran::semantics::GetExpr(step)}, isUnordered{isUnordered} {} + stepExpr{Fortran::semantics::GetExpr(step)}, + isConcurrent{isConcurrent} {} IncrementLoopInfo(IncrementLoopInfo &&) = default; IncrementLoopInfo &operator=(IncrementLoopInfo &&x) = default; @@ -120,7 +123,7 @@ struct IncrementLoopInfo { const Fortran::lower::SomeExpr *upperExpr; const Fortran::lower::SomeExpr *stepExpr; const Fortran::lower::SomeExpr *maskExpr = nullptr; - bool isUnordered; // do concurrent, forall + bool isConcurrent; llvm::SmallVector localSymList; llvm::SmallVector localInitSymList; llvm::SmallVector< @@ -130,7 +133,7 @@ struct IncrementLoopInfo { mlir::Value loopVariable = nullptr; // Data members for structured loops. - fir::DoLoopOp doLoop = nullptr; + mlir::Operation *loopOp = nullptr; // Data members for unstructured loops. bool hasRealControl = false; @@ -1143,6 +1146,14 @@ class FirConverter : public Fortran::lower::AbstractConverter { return name; } + /// Find the symbol in the inner-most level of the local map or return null. + Fortran::lower::SymbolBox + shallowLookupSymbol(const Fortran::semantics::Symbol &sym) override { + if (Fortran::lower::SymbolBox v = localSymbols.shallowLookupSymbol(sym)) + return v; + return {}; + } + private: FirConverter() = delete; FirConverter(const FirConverter &) = delete; @@ -1217,14 +1228,6 @@ class FirConverter : public Fortran::lower::AbstractConverter { return {}; } - /// Find the symbol in the inner-most level of the local map or return null. - Fortran::lower::SymbolBox - shallowLookupSymbol(const Fortran::semantics::Symbol &sym) { - if (Fortran::lower::SymbolBox v = localSymbols.shallowLookupSymbol(sym)) - return v; - return {}; - } - /// Find the symbol in one level up of symbol map such as for host-association /// in OpenMP code or return null. Fortran::lower::SymbolBox @@ -1980,7 +1983,7 @@ class FirConverter : public Fortran::lower::AbstractConverter { llvm_unreachable("illegal reduction operator"); } - /// Collect DO CONCURRENT or FORALL loop control information. + /// Collect DO CONCURRENT loop control information. IncrementLoopNestInfo getConcurrentControl( const Fortran::parser::ConcurrentHeader &header, const std::list &localityList = {}) { @@ -2027,9 +2030,31 @@ class FirConverter : public Fortran::lower::AbstractConverter { void handleLocalitySpecs(const IncrementLoopInfo &info) { Fortran::semantics::SemanticsContext &semanticsContext = bridge.getSemanticsContext(); - for (const Fortran::semantics::Symbol *sym : info.localSymList) + + Fortran::lower::omp::DataSharingProcessor dsp( + *this, semanticsContext, getEval(), + /*useDelayedPrivatization=*/true, localSymbols); + mlir::omp::PrivateClauseOps privateClauseOps; + auto doConcurrentLoopOp = + mlir::dyn_cast_if_present(info.loopOp); + bool useDelayedPriv = + enableDelayedPrivatizationStaging && doConcurrentLoopOp; + + for (const Fortran::semantics::Symbol *sym : info.localSymList) { + if (useDelayedPriv) { + dsp.doPrivatize(sym, &privateClauseOps); + continue; + } + createHostAssociateVarClone(*sym, /*skipDefaultInit=*/false); + } + for (const Fortran::semantics::Symbol *sym : info.localInitSymList) { + if (useDelayedPriv) { + dsp.doPrivatize(sym, &privateClauseOps); + continue; + } + createHostAssociateVarClone(*sym, /*skipDefaultInit=*/true); const auto *hostDetails = sym->detailsIf(); @@ -2048,6 +2073,24 @@ class FirConverter : public Fortran::lower::AbstractConverter { sym->detailsIf(); copySymbolBinding(hostDetails->symbol(), *sym); } + + if (useDelayedPriv) { + doConcurrentLoopOp.getPrivateVarsMutable().assign( + privateClauseOps.privateVars); + doConcurrentLoopOp.setPrivateSymsAttr( + builder->getArrayAttr(privateClauseOps.privateSyms)); + + for (auto [sym, privateVar] : llvm::zip_equal( + dsp.getAllSymbolsToPrivatize(), privateClauseOps.privateVars)) { + auto arg = doConcurrentLoopOp.getRegion().begin()->addArgument( + privateVar.getType(), doConcurrentLoopOp.getLoc()); + bindSymbol(*sym, hlfir::translateToExtendedValue( + privateVar.getLoc(), *builder, hlfir::Entity{arg}, + /*contiguousHint=*/true) + .first); + } + } + // Note that allocatable, types with ultimate components, and type // requiring finalization are forbidden in LOCAL/LOCAL_INIT (F2023 C1130), // so no clean-up needs to be generated for these entities. @@ -2291,8 +2334,14 @@ class FirConverter : public Fortran::lower::AbstractConverter { mlir::LLVM::LoopAnnotationAttr la = mlir::LLVM::LoopAnnotationAttr::get( builder->getContext(), {}, /*vectorize=*/va, {}, /*unroll*/ ua, /*unroll_and_jam*/ uja, {}, {}, {}, {}, {}, {}, {}, {}, {}, {}); - if (has_attrs) - info.doLoop.setLoopAnnotationAttr(la); + if (has_attrs) { + if (auto loopOp = mlir::dyn_cast(info.loopOp)) + loopOp.setLoopAnnotationAttr(la); + + if (auto doConcurrentOp = + mlir::dyn_cast(info.loopOp)) + doConcurrentOp.setLoopAnnotationAttr(la); + } } /// Generate FIR to begin a structured or unstructured increment loop nest. @@ -2301,96 +2350,77 @@ class FirConverter : public Fortran::lower::AbstractConverter { llvm::SmallVectorImpl &dirs) { assert(!incrementLoopNestInfo.empty() && "empty loop nest"); mlir::Location loc = toLocation(); - mlir::Operation *boundsAndStepIP = nullptr; mlir::arith::IntegerOverflowFlags iofBackup{}; + llvm::SmallVector nestLBs; + llvm::SmallVector nestUBs; + llvm::SmallVector nestSts; + llvm::SmallVector nestReduceOperands; + llvm::SmallVector nestReduceAttrs; + bool genDoConcurrent = false; + for (IncrementLoopInfo &info : incrementLoopNestInfo) { - mlir::Value lowerValue; - mlir::Value upperValue; - mlir::Value stepValue; + genDoConcurrent = info.isStructured() && info.isConcurrent; + + if (!genDoConcurrent) + info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym, + info.isConcurrent); - { - mlir::OpBuilder::InsertionGuard guard(*builder); + if (!getLoweringOptions().getIntegerWrapAround()) { + iofBackup = builder->getIntegerOverflowFlags(); + builder->setIntegerOverflowFlags( + mlir::arith::IntegerOverflowFlags::nsw); + } - // Set the IP before the first loop in the nest so that all nest bounds - // and step values are created outside the nest. - if (boundsAndStepIP) - builder->setInsertionPointAfter(boundsAndStepIP); + nestLBs.push_back(genControlValue(info.lowerExpr, info)); + nestUBs.push_back(genControlValue(info.upperExpr, info)); + bool isConst = true; + nestSts.push_back(genControlValue( + info.stepExpr, info, info.isStructured() ? nullptr : &isConst)); - info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym, - info.isUnordered); - if (!getLoweringOptions().getIntegerWrapAround()) { - iofBackup = builder->getIntegerOverflowFlags(); - builder->setIntegerOverflowFlags( - mlir::arith::IntegerOverflowFlags::nsw); - } - lowerValue = genControlValue(info.lowerExpr, info); - upperValue = genControlValue(info.upperExpr, info); - bool isConst = true; - stepValue = genControlValue(info.stepExpr, info, - info.isStructured() ? nullptr : &isConst); - if (!getLoweringOptions().getIntegerWrapAround()) - builder->setIntegerOverflowFlags(iofBackup); - boundsAndStepIP = stepValue.getDefiningOp(); - - // Use a temp variable for unstructured loops with non-const step. - if (!isConst) { - info.stepVariable = - builder->createTemporary(loc, stepValue.getType()); - boundsAndStepIP = - builder->create(loc, stepValue, info.stepVariable); + if (!getLoweringOptions().getIntegerWrapAround()) + builder->setIntegerOverflowFlags(iofBackup); + + // Use a temp variable for unstructured loops with non-const step. + if (!isConst) { + mlir::Value stepValue = nestSts.back(); + info.stepVariable = builder->createTemporary(loc, stepValue.getType()); + builder->create(loc, stepValue, info.stepVariable); + } + + if (genDoConcurrent && nestReduceOperands.empty()) { + // Create DO CONCURRENT reduce operands and attributes + for (const auto &reduceSym : info.reduceSymList) { + const fir::ReduceOperationEnum reduceOperation = reduceSym.first; + const Fortran::semantics::Symbol *sym = reduceSym.second; + fir::ExtendedValue exv = getSymbolExtendedValue(*sym, nullptr); + nestReduceOperands.push_back(fir::getBase(exv)); + auto reduceAttr = + fir::ReduceAttr::get(builder->getContext(), reduceOperation); + nestReduceAttrs.push_back(reduceAttr); } } + } + for (auto [info, lowerValue, upperValue, stepValue] : + llvm::zip_equal(incrementLoopNestInfo, nestLBs, nestUBs, nestSts)) { // Structured loop - generate fir.do_loop. if (info.isStructured()) { + if (genDoConcurrent) + continue; + + // The loop variable is a doLoop op argument. mlir::Type loopVarType = info.getLoopVariableType(); - mlir::Value loopValue; - if (info.isUnordered) { - llvm::SmallVector reduceOperands; - llvm::SmallVector reduceAttrs; - // Create DO CONCURRENT reduce operands and attributes - for (const auto &reduceSym : info.reduceSymList) { - const fir::ReduceOperationEnum reduce_operation = reduceSym.first; - const Fortran::semantics::Symbol *sym = reduceSym.second; - fir::ExtendedValue exv = getSymbolExtendedValue(*sym, nullptr); - reduceOperands.push_back(fir::getBase(exv)); - auto reduce_attr = - fir::ReduceAttr::get(builder->getContext(), reduce_operation); - reduceAttrs.push_back(reduce_attr); - } - // The loop variable value is explicitly updated. - info.doLoop = builder->create( - loc, lowerValue, upperValue, stepValue, /*unordered=*/true, - /*finalCountValue=*/false, /*iterArgs=*/std::nullopt, - llvm::ArrayRef(reduceOperands), reduceAttrs); - builder->setInsertionPointToStart(info.doLoop.getBody()); - loopValue = builder->createConvert(loc, loopVarType, - info.doLoop.getInductionVar()); - } else { - // The loop variable is a doLoop op argument. - info.doLoop = builder->create( - loc, lowerValue, upperValue, stepValue, /*unordered=*/false, - /*finalCountValue=*/true, - builder->createConvert(loc, loopVarType, lowerValue)); - builder->setInsertionPointToStart(info.doLoop.getBody()); - loopValue = info.doLoop.getRegionIterArgs()[0]; - } + auto loopOp = builder->create( + loc, lowerValue, upperValue, stepValue, /*unordered=*/false, + /*finalCountValue=*/true, + builder->createConvert(loc, loopVarType, lowerValue)); + info.loopOp = loopOp; + builder->setInsertionPointToStart(loopOp.getBody()); + mlir::Value loopValue = loopOp.getRegionIterArgs()[0]; + // Update the loop variable value in case it has non-index references. builder->create(loc, loopValue, info.loopVariable); - if (info.maskExpr) { - Fortran::lower::StatementContext stmtCtx; - mlir::Value maskCond = createFIRExpr(loc, info.maskExpr, stmtCtx); - stmtCtx.finalizeAndReset(); - mlir::Value maskCondCast = - builder->createConvert(loc, builder->getI1Type(), maskCond); - auto ifOp = builder->create(loc, maskCondCast, - /*withElseRegion=*/false); - builder->setInsertionPointToStart(&ifOp.getThenRegion().front()); - } - if (info.hasLocalitySpecs()) - handleLocalitySpecs(info); - addLoopAnnotationAttr(info, dirs); continue; } @@ -2454,6 +2484,60 @@ class FirConverter : public Fortran::lower::AbstractConverter { builder->restoreInsertionPoint(insertPt); } } + + if (genDoConcurrent) { + auto loopWrapperOp = builder->create(loc); + builder->setInsertionPointToStart( + builder->createBlock(&loopWrapperOp.getRegion())); + + for (IncrementLoopInfo &info : llvm::reverse(incrementLoopNestInfo)) { + info.loopVariable = genLoopVariableAddress(loc, *info.loopVariableSym, + info.isConcurrent); + } + + builder->setInsertionPointToEnd(loopWrapperOp.getBody()); + auto loopOp = builder->create( + loc, nestLBs, nestUBs, nestSts, nestReduceOperands, + nestReduceAttrs.empty() + ? nullptr + : mlir::ArrayAttr::get(builder->getContext(), nestReduceAttrs), + nullptr, /*private_vars=*/std::nullopt, /*private_syms=*/nullptr); + + llvm::SmallVector loopBlockArgTypes( + incrementLoopNestInfo.size(), builder->getIndexType()); + llvm::SmallVector loopBlockArgLocs( + incrementLoopNestInfo.size(), loc); + mlir::Region &loopRegion = loopOp.getRegion(); + mlir::Block *loopBlock = builder->createBlock( + &loopRegion, loopRegion.begin(), loopBlockArgTypes, loopBlockArgLocs); + builder->setInsertionPointToStart(loopBlock); + + for (auto [info, blockArg] : + llvm::zip_equal(incrementLoopNestInfo, loopBlock->getArguments())) { + info.loopOp = loopOp; + mlir::Value loopValue = + builder->createConvert(loc, info.getLoopVariableType(), blockArg); + builder->create(loc, loopValue, info.loopVariable); + + if (info.maskExpr) { + Fortran::lower::StatementContext stmtCtx; + mlir::Value maskCond = createFIRExpr(loc, info.maskExpr, stmtCtx); + stmtCtx.finalizeAndReset(); + mlir::Value maskCondCast = + builder->createConvert(loc, builder->getI1Type(), maskCond); + auto ifOp = builder->create(loc, maskCondCast, + /*withElseRegion=*/false); + builder->setInsertionPointToStart(&ifOp.getThenRegion().front()); + } + } + + IncrementLoopInfo &innermostInfo = incrementLoopNestInfo.back(); + + if (innermostInfo.hasLocalitySpecs()) + handleLocalitySpecs(innermostInfo); + + addLoopAnnotationAttr(innermostInfo, dirs); + } } /// Generate FIR to end a structured or unstructured increment loop nest. @@ -2470,29 +2554,31 @@ class FirConverter : public Fortran::lower::AbstractConverter { it != rend; ++it) { IncrementLoopInfo &info = *it; if (info.isStructured()) { - // End fir.do_loop. - if (info.isUnordered) { - builder->setInsertionPointAfter(info.doLoop); + // End fir.do_concurent.loop. + if (info.isConcurrent) { + builder->setInsertionPointAfter(info.loopOp->getParentOp()); continue; } + + // End fir.do_loop. // Decrement tripVariable. - builder->setInsertionPointToEnd(info.doLoop.getBody()); + auto doLoopOp = mlir::cast(info.loopOp); + builder->setInsertionPointToEnd(doLoopOp.getBody()); llvm::SmallVector results; results.push_back(builder->create( - loc, info.doLoop.getInductionVar(), info.doLoop.getStep(), - iofAttr)); + loc, doLoopOp.getInductionVar(), doLoopOp.getStep(), iofAttr)); // Step loopVariable to help optimizations such as vectorization. // Induction variable elimination will clean up as necessary. mlir::Value step = builder->createConvert( - loc, info.getLoopVariableType(), info.doLoop.getStep()); + loc, info.getLoopVariableType(), doLoopOp.getStep()); mlir::Value loopVar = builder->create(loc, info.loopVariable); results.push_back( builder->create(loc, loopVar, step, iofAttr)); builder->create(loc, results); - builder->setInsertionPointAfter(info.doLoop); + builder->setInsertionPointAfter(doLoopOp); // The loop control variable may be used after the loop. - builder->create(loc, info.doLoop.getResult(1), + builder->create(loc, doLoopOp.getResult(1), info.loopVariable); continue; } diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp index b88454c45da85..bf130d592bf29 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.cpp @@ -53,6 +53,15 @@ DataSharingProcessor::DataSharingProcessor( }); } +DataSharingProcessor::DataSharingProcessor(lower::AbstractConverter &converter, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + bool useDelayedPrivatization, + lower::SymMap &symTable) + : DataSharingProcessor(converter, semaCtx, {}, eval, + /*shouldCollectPreDeterminedSymols=*/false, + useDelayedPrivatization, symTable) {} + void DataSharingProcessor::processStep1( mlir::omp::PrivateClauseOps *clauseOps) { collectSymbolsForPrivatization(); @@ -172,7 +181,8 @@ void DataSharingProcessor::cloneSymbol(const semantics::Symbol *sym) { void DataSharingProcessor::copyFirstPrivateSymbol( const semantics::Symbol *sym, mlir::OpBuilder::InsertPoint *copyAssignIP) { - if (sym->test(semantics::Symbol::Flag::OmpFirstPrivate)) + if (sym->test(semantics::Symbol::Flag::OmpFirstPrivate) || + sym->test(semantics::Symbol::Flag::LocalityLocalInit)) converter.copyHostAssociateVar(*sym, copyAssignIP); } @@ -504,22 +514,29 @@ void DataSharingProcessor::copyLastPrivatize(mlir::Operation *op) { } } -void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, +void DataSharingProcessor::doPrivatize(const semantics::Symbol *symToPrivatize, mlir::omp::PrivateClauseOps *clauseOps) { if (!useDelayedPrivatization) { - cloneSymbol(sym); - copyFirstPrivateSymbol(sym); + cloneSymbol(symToPrivatize); + copyFirstPrivateSymbol(symToPrivatize); return; } - lower::SymbolBox hsb = converter.lookupOneLevelUpSymbol(*sym); + const semantics::Symbol *sym = symToPrivatize->HasLocalLocality() + ? &symToPrivatize->GetUltimate() + : symToPrivatize; + lower::SymbolBox hsb = symToPrivatize->HasLocalLocality() + ? converter.shallowLookupSymbol(*sym) + : converter.lookupOneLevelUpSymbol(*sym); assert(hsb && "Host symbol box not found"); hlfir::Entity entity{hsb.getAddr()}; bool cannotHaveNonDefaultLowerBounds = !entity.mayHaveNonDefaultLowerBounds(); mlir::Location symLoc = hsb.getAddr().getLoc(); std::string privatizerName = sym->name().ToString() + ".privatizer"; - bool isFirstPrivate = sym->test(semantics::Symbol::Flag::OmpFirstPrivate); + bool isFirstPrivate = + symToPrivatize->test(semantics::Symbol::Flag::OmpFirstPrivate) || + symToPrivatize->test(semantics::Symbol::Flag::LocalityLocalInit); mlir::Value privVal = hsb.getAddr(); mlir::Type allocType = privVal.getType(); @@ -613,27 +630,30 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, ©Region, /*insertPt=*/{}, {argType, argType}, {symLoc, symLoc}); firOpBuilder.setInsertionPointToEnd(copyEntryBlock); - auto addSymbol = [&](unsigned argIdx, bool force = false) { + auto addSymbol = [&](unsigned argIdx, const semantics::Symbol *symToMap, + bool force = false) { symExV.match( [&](const fir::MutableBoxValue &box) { symTable.addSymbol( - *sym, fir::substBase(box, copyRegion.getArgument(argIdx)), - force); + *symToMap, + fir::substBase(box, copyRegion.getArgument(argIdx)), force); }, [&](const auto &box) { - symTable.addSymbol(*sym, copyRegion.getArgument(argIdx), force); + symTable.addSymbol(*symToMap, copyRegion.getArgument(argIdx), + force); }); }; - addSymbol(0, true); + addSymbol(0, sym, true); lower::SymMapScope innerScope(symTable); - addSymbol(1); + addSymbol(1, symToPrivatize); auto ip = firOpBuilder.saveInsertionPoint(); - copyFirstPrivateSymbol(sym, &ip); + copyFirstPrivateSymbol(symToPrivatize, &ip); firOpBuilder.create( - hsb.getAddr().getLoc(), symTable.shallowLookupSymbol(*sym).getAddr()); + hsb.getAddr().getLoc(), + symTable.shallowLookupSymbol(*symToPrivatize).getAddr()); } return result; @@ -645,6 +665,9 @@ void DataSharingProcessor::doPrivatize(const semantics::Symbol *sym, } symToPrivatizer[sym] = privatizerOp; + + if (symToPrivatize->HasLocalLocality()) + allPrivatizedSymbols.insert(symToPrivatize); } } // namespace omp diff --git a/flang/lib/Lower/OpenMP/DataSharingProcessor.h b/flang/lib/Lower/OpenMP/DataSharingProcessor.h index 54a42fd199831..f5fef9f6dfe85 100644 --- a/flang/lib/Lower/OpenMP/DataSharingProcessor.h +++ b/flang/lib/Lower/OpenMP/DataSharingProcessor.h @@ -105,8 +105,6 @@ class DataSharingProcessor { void collectImplicitSymbols(); void collectPreDeterminedSymbols(); void privatize(mlir::omp::PrivateClauseOps *clauseOps); - void doPrivatize(const semantics::Symbol *sym, - mlir::omp::PrivateClauseOps *clauseOps); void copyLastPrivatize(mlir::Operation *op); void insertLastPrivateCompare(mlir::Operation *op); void cloneSymbol(const semantics::Symbol *sym); @@ -125,6 +123,11 @@ class DataSharingProcessor { bool shouldCollectPreDeterminedSymbols, bool useDelayedPrivatization, lower::SymMap &symTable); + DataSharingProcessor(lower::AbstractConverter &converter, + semantics::SemanticsContext &semaCtx, + lower::pft::Evaluation &eval, + bool useDelayedPrivatization, lower::SymMap &symTable); + // Privatisation is split into two steps. // Step1 performs cloning of all privatisation clauses and copying for // firstprivates. Step1 is performed at the place where process/processStep1 @@ -151,6 +154,9 @@ class DataSharingProcessor { ? allPrivatizedSymbols.getArrayRef() : llvm::ArrayRef(); } + + void doPrivatize(const semantics::Symbol *sym, + mlir::omp::PrivateClauseOps *clauseOps); }; } // namespace omp diff --git a/flang/lib/Optimizer/Builder/FIRBuilder.cpp b/flang/lib/Optimizer/Builder/FIRBuilder.cpp index 3cf9b5ae72d9e..d35367d7657cf 100644 --- a/flang/lib/Optimizer/Builder/FIRBuilder.cpp +++ b/flang/lib/Optimizer/Builder/FIRBuilder.cpp @@ -280,6 +280,9 @@ mlir::Block *fir::FirOpBuilder::getAllocaBlock() { if (auto cufKernelOp = getRegion().getParentOfType()) return &cufKernelOp.getRegion().front(); + if (auto doConcurentOp = getRegion().getParentOfType()) + return doConcurentOp.getBody(); + return getEntryBlock(); } diff --git a/flang/lib/Optimizer/Dialect/FIROps.cpp b/flang/lib/Optimizer/Dialect/FIROps.cpp index 21cedb1030896..603e3ff5cdbfd 100644 --- a/flang/lib/Optimizer/Dialect/FIROps.cpp +++ b/flang/lib/Optimizer/Dialect/FIROps.cpp @@ -4886,21 +4886,25 @@ mlir::ParseResult fir::DoConcurrentLoopOp::parse(mlir::OpAsmParser &parser, mlir::OperationState &result) { auto &builder = parser.getBuilder(); // Parse an opening `(` followed by induction variables followed by `)` - llvm::SmallVector ivs; - if (parser.parseArgumentList(ivs, mlir::OpAsmParser::Delimiter::Paren)) + llvm::SmallVector regionArgs; + + if (parser.parseArgumentList(regionArgs, mlir::OpAsmParser::Delimiter::Paren)) return mlir::failure(); + llvm::SmallVector argTypes(regionArgs.size(), + builder.getIndexType()); + // Parse loop bounds. llvm::SmallVector lower; if (parser.parseEqual() || - parser.parseOperandList(lower, ivs.size(), + parser.parseOperandList(lower, regionArgs.size(), mlir::OpAsmParser::Delimiter::Paren) || parser.resolveOperands(lower, builder.getIndexType(), result.operands)) return mlir::failure(); llvm::SmallVector upper; if (parser.parseKeyword("to") || - parser.parseOperandList(upper, ivs.size(), + parser.parseOperandList(upper, regionArgs.size(), mlir::OpAsmParser::Delimiter::Paren) || parser.resolveOperands(upper, builder.getIndexType(), result.operands)) return mlir::failure(); @@ -4908,7 +4912,7 @@ mlir::ParseResult fir::DoConcurrentLoopOp::parse(mlir::OpAsmParser &parser, // Parse step values. llvm::SmallVector steps; if (parser.parseKeyword("step") || - parser.parseOperandList(steps, ivs.size(), + parser.parseOperandList(steps, regionArgs.size(), mlir::OpAsmParser::Delimiter::Paren) || parser.resolveOperands(steps, builder.getIndexType(), result.operands)) return mlir::failure(); @@ -4939,12 +4943,55 @@ mlir::ParseResult fir::DoConcurrentLoopOp::parse(mlir::OpAsmParser &parser, builder.getArrayAttr(arrayAttr)); } - // Now parse the body. - mlir::Region *body = result.addRegion(); - for (auto &iv : ivs) - iv.type = builder.getIndexType(); - if (parser.parseRegion(*body, ivs)) - return mlir::failure(); + llvm::SmallVector privateOperands; + if (succeeded(parser.parseOptionalKeyword("private"))) { + std::size_t oldArgTypesSize = argTypes.size(); + if (failed(parser.parseLParen())) + return mlir::failure(); + + llvm::SmallVector privateSymbolVec; + if (failed(parser.parseCommaSeparatedList([&]() { + if (failed(parser.parseAttribute(privateSymbolVec.emplace_back()))) + return mlir::failure(); + + if (parser.parseOperand(privateOperands.emplace_back()) || + parser.parseArrow() || + parser.parseArgument(regionArgs.emplace_back())) + return mlir::failure(); + + return mlir::success(); + }))) + return mlir::failure(); + + if (failed(parser.parseColon())) + return mlir::failure(); + + if (failed(parser.parseCommaSeparatedList([&]() { + if (failed(parser.parseType(argTypes.emplace_back()))) + return mlir::failure(); + + return mlir::success(); + }))) + return mlir::failure(); + + if (regionArgs.size() != argTypes.size()) + return parser.emitError(parser.getNameLoc(), + "mismatch in number of private arg and types"); + + if (failed(parser.parseRParen())) + return mlir::failure(); + + for (auto operandType : llvm::zip_equal( + privateOperands, llvm::drop_begin(argTypes, oldArgTypesSize))) + if (parser.resolveOperand(std::get<0>(operandType), + std::get<1>(operandType), result.operands)) + return mlir::failure(); + + llvm::SmallVector symbolAttrs(privateSymbolVec.begin(), + privateSymbolVec.end()); + result.addAttribute(getPrivateSymsAttrName(result.name), + builder.getArrayAttr(symbolAttrs)); + } // Set `operandSegmentSizes` attribute. result.addAttribute(DoConcurrentLoopOp::getOperandSegmentSizeAttr(), @@ -4952,7 +4999,16 @@ mlir::ParseResult fir::DoConcurrentLoopOp::parse(mlir::OpAsmParser &parser, {static_cast(lower.size()), static_cast(upper.size()), static_cast(steps.size()), - static_cast(reduceOperands.size())})); + static_cast(reduceOperands.size()), + static_cast(privateOperands.size())})); + + // Now parse the body. + for (auto [arg, type] : llvm::zip_equal(regionArgs, argTypes)) + arg.type = type; + + mlir::Region *body = result.addRegion(); + if (parser.parseRegion(*body, regionArgs)) + return mlir::failure(); // Parse attributes. if (parser.parseOptionalAttrDict(result.attributes)) @@ -4962,8 +5018,9 @@ mlir::ParseResult fir::DoConcurrentLoopOp::parse(mlir::OpAsmParser &parser, } void fir::DoConcurrentLoopOp::print(mlir::OpAsmPrinter &p) { - p << " (" << getBody()->getArguments() << ") = (" << getLowerBound() - << ") to (" << getUpperBound() << ") step (" << getStep() << ")"; + p << " (" << getBody()->getArguments().slice(0, getNumInductionVars()) + << ") = (" << getLowerBound() << ") to (" << getUpperBound() << ") step (" + << getStep() << ")"; if (!getReduceOperands().empty()) { p << " reduce("; @@ -4976,12 +5033,28 @@ void fir::DoConcurrentLoopOp::print(mlir::OpAsmPrinter &p) { p << ')'; } + if (!getPrivateVars().empty()) { + p << " private("; + llvm::interleaveComma(llvm::zip_equal(getPrivateSymsAttr(), + getPrivateVars(), + getRegionPrivateArgs()), + p, [&](auto it) { + p << std::get<0>(it) << " " << std::get<1>(it) + << " -> " << std::get<2>(it); + }); + p << " : "; + llvm::interleaveComma(getPrivateVars(), p, + [&](auto it) { p << it.getType(); }); + p << ")"; + } + p << ' '; p.printRegion(getRegion(), /*printEntryBlockArgs=*/false); p.printOptionalAttrDict( (*this)->getAttrs(), /*elidedAttrs=*/{DoConcurrentLoopOp::getOperandSegmentSizeAttr(), - DoConcurrentLoopOp::getReduceAttrsAttrName()}); + DoConcurrentLoopOp::getReduceAttrsAttrName(), + DoConcurrentLoopOp::getPrivateSymsAttrName()}); } llvm::SmallVector fir::DoConcurrentLoopOp::getLoopRegions() { @@ -4992,6 +5065,7 @@ llvm::LogicalResult fir::DoConcurrentLoopOp::verify() { mlir::Operation::operand_range lbValues = getLowerBound(); mlir::Operation::operand_range ubValues = getUpperBound(); mlir::Operation::operand_range stepValues = getStep(); + mlir::Operation::operand_range privateVars = getPrivateVars(); if (lbValues.empty()) return emitOpError( @@ -5005,11 +5079,13 @@ llvm::LogicalResult fir::DoConcurrentLoopOp::verify() { // Check that the body defines the same number of block arguments as the // number of tuple elements in step. mlir::Block *body = getBody(); - if (body->getNumArguments() != stepValues.size()) + unsigned numIndVarArgs = body->getNumArguments() - privateVars.size(); + + if (numIndVarArgs != stepValues.size()) return emitOpError() << "expects the same number of induction variables: " << body->getNumArguments() << " as bound and step values: " << stepValues.size(); - for (auto arg : body->getArguments()) + for (auto arg : body->getArguments().slice(0, numIndVarArgs)) if (!arg.getType().isIndex()) return emitOpError( "expects arguments for the induction variable to be of index type"); @@ -5024,7 +5100,8 @@ llvm::LogicalResult fir::DoConcurrentLoopOp::verify() { std::optional> fir::DoConcurrentLoopOp::getLoopInductionVars() { - return llvm::SmallVector{getBody()->getArguments()}; + return llvm::SmallVector{ + getBody()->getArguments().slice(0, getLowerBound().size())}; } //===----------------------------------------------------------------------===// diff --git a/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp b/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp index 6d106046b70f2..2af2cb1e533a9 100644 --- a/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp +++ b/flang/lib/Optimizer/Transforms/SimplifyFIROperations.cpp @@ -149,6 +149,20 @@ mlir::LogicalResult BoxTotalElementsConversion::matchAndRewrite( class DoConcurrentConversion : public mlir::OpRewritePattern { + /// Looks up from the operation from and returns the PrivateClauseOp with + /// name symbolName + /// + /// TODO Copied from OpenMPToLLVMIRTranslation.cpp, move to a shared location. + /// Maybe a static function on the `PrivateClauseOp`. + static mlir::omp::PrivateClauseOp + findPrivatizer(mlir::Operation *from, mlir::SymbolRefAttr symbolName) { + mlir::omp::PrivateClauseOp privatizer = + mlir::SymbolTable::lookupNearestSymbolFrom( + from, symbolName); + assert(privatizer && "privatizer not found in the symbol table"); + return privatizer; + } + public: using mlir::OpRewritePattern::OpRewritePattern; @@ -162,7 +176,55 @@ class DoConcurrentConversion assert(loop.getRegion().hasOneBlock()); mlir::Block &loopBlock = loop.getRegion().getBlocks().front(); - // Collect iteration variable(s) allocations do that we can move them + // Handle privatization + if (!loop.getPrivateVars().empty()) { + mlir::OpBuilder::InsertionGuard guard(rewriter); + rewriter.setInsertionPointToStart(&loop.getRegion().front()); + + std::optional privateSyms = loop.getPrivateSyms(); + + for (auto [privateVar, privateArg, privatizerSym] : + llvm::zip_equal(loop.getPrivateVars(), loop.getRegionPrivateArgs(), + *privateSyms)) { + mlir::SymbolRefAttr privatizerName = + llvm::cast(privatizerSym); + mlir::omp::PrivateClauseOp privatizer = + findPrivatizer(loop, privatizerName); + + mlir::Value localAlloc = + rewriter.create(loop.getLoc(), privatizer.getType()); + + if (privatizer.getDataSharingType() == + mlir::omp::DataSharingClauseType::FirstPrivate) { + // It is reasonable to make this assumption since, at this stage, + // control-flow ops are not converted yet. Therefore, things like `if` + // conditions will still be represented by their encapsulating `fir` + // dialect ops. + assert(privatizer.getCopyRegion().hasOneBlock() && + "Expected privatizer to have a single block."); + mlir::Block *beforeLocalInit = rewriter.getInsertionBlock(); + mlir::Block *afterLocalInit = rewriter.splitBlock( + rewriter.getInsertionBlock(), rewriter.getInsertionPoint()); + rewriter.cloneRegionBefore(privatizer.getCopyRegion(), + afterLocalInit); + mlir::Block *copyRegionBody = beforeLocalInit->getNextNode(); + + rewriter.eraseOp(copyRegionBody->getTerminator()); + rewriter.mergeBlocks(afterLocalInit, copyRegionBody); + rewriter.mergeBlocks(copyRegionBody, beforeLocalInit, + {privateVar, privateArg}); + } + + rewriter.replaceAllUsesWith(privateArg, localAlloc); + } + + loop.getRegion().front().eraseArguments(loop.getNumInductionVars(), + loop.getNumPrivateOperands()); + loop.getPrivateVarsMutable().clear(); + loop.setPrivateSymsAttr(nullptr); + } + + // Collect iteration variable(s) allocations so that we can move them // outside the `fir.do_concurrent` wrapper. llvm::SmallVector opsToMove; for (mlir::Operation &op : llvm::drop_end(wrapperBlock)) diff --git a/flang/test/Fir/do_concurrent.fir b/flang/test/Fir/do_concurrent.fir index 8e80ffb9c7b0b..26af718ed59e4 100644 --- a/flang/test/Fir/do_concurrent.fir +++ b/flang/test/Fir/do_concurrent.fir @@ -90,3 +90,80 @@ func.func @dc_2d_reduction(%i_lb: index, %i_ub: index, %i_st: index, // CHECK: fir.store %[[J_IV_CVT]] to %[[J]] : !fir.ref // CHECK: } // CHECK: } + + +omp.private {type = private} @local_privatizer : i32 + +omp.private {type = firstprivate} @local_init_privatizer : i32 copy { +^bb0(%arg0: !fir.ref, %arg1: !fir.ref): + %0 = fir.load %arg0 : !fir.ref + fir.store %0 to %arg1 : !fir.ref + omp.yield(%arg1 : !fir.ref) +} + +func.func @_QPdo_concurrent() { + %3 = fir.alloca i32 {bindc_name = "local_init_var", uniq_name = "_QFdo_concurrentElocal_init_var"} + %4:2 = hlfir.declare %3 {uniq_name = "_QFdo_concurrentElocal_init_var"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %5 = fir.alloca i32 {bindc_name = "local_var", uniq_name = "_QFdo_concurrentElocal_var"} + %6:2 = hlfir.declare %5 {uniq_name = "_QFdo_concurrentElocal_var"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %c1 = arith.constant 1 : index + %c10 = arith.constant 1 : index + fir.do_concurrent { + %9 = fir.alloca i32 {bindc_name = "i"} + %10:2 = hlfir.declare %9 {uniq_name = "_QFdo_concurrentEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) + fir.do_concurrent.loop (%arg0) = (%c1) to (%c10) step (%c1) private(@local_privatizer %6#0 -> %arg1, @local_init_privatizer %4#0 -> %arg2 : !fir.ref, !fir.ref) { + %11 = fir.convert %arg0 : (index) -> i32 + fir.store %11 to %10#0 : !fir.ref + %13:2 = hlfir.declare %arg1 {uniq_name = "_QFdo_concurrentElocal_var"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %15:2 = hlfir.declare %arg2 {uniq_name = "_QFdo_concurrentElocal_init_var"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %17 = fir.load %10#0 : !fir.ref + %c5_i32 = arith.constant 5 : i32 + %18 = arith.cmpi slt, %17, %c5_i32 : i32 + fir.if %18 { + %c42_i32 = arith.constant 42 : i32 + hlfir.assign %c42_i32 to %13#0 : i32, !fir.ref + } else { + %c84_i32 = arith.constant 84 : i32 + hlfir.assign %c84_i32 to %15#0 : i32, !fir.ref + } + } + } + return +} + +// CHECK: omp.private {type = private} @[[LOCAL_PRIV_SYM:local_privatizer]] : i32 + +// CHECK: omp.private {type = firstprivate} @[[LOCAL_INIT_PRIV_SYM:local_init_privatizer]] : i32 + +// CHECK-LABEL: func.func @_QPdo_concurrent() { +// CHECK: %[[LOC_INIT_ALLOC:.*]] = fir.alloca i32 {bindc_name = "local_init_var", {{.*}}} +// CHECK: %[[LOC_INIT_DECL:.*]]:2 = hlfir.declare %[[LOC_INIT_ALLOC]] + +// CHECK: %[[LOC_ALLOC:.*]] = fir.alloca i32 {bindc_name = "local_var", {{.*}}} +// CHECK: %[[LOC_DECL:.*]]:2 = hlfir.declare %[[LOC_ALLOC]] + +// CHECK: %[[C1:.*]] = arith.constant 1 : index +// CHECK: %[[C10:.*]] = arith.constant 1 : index + +// CHECK: fir.do_concurrent { +// CHECK: %[[DC_I_ALLOC:.*]] = fir.alloca i32 {bindc_name = "i"} +// CHECK: %[[DC_I_DECL:.*]]:2 = hlfir.declare %[[DC_I_ALLOC]] + +// CHECK: fir.do_concurrent.loop (%[[IV:.*]]) = (%[[C1]]) to (%[[C10]]) step (%[[C1]]) private(@[[LOCAL_PRIV_SYM]] %[[LOC_DECL]]#0 -> %[[LOC_ARG:.*]], @[[LOCAL_INIT_PRIV_SYM]] %[[LOC_INIT_DECL]]#0 -> %[[LOC_INIT_ARG:.*]] : !fir.ref, !fir.ref) { +// CHECK: %[[IV_CVT:.*]] = fir.convert %[[IV]] : (index) -> i32 +// CHECK: fir.store %[[IV_CVT]] to %[[DC_I_DECL]]#0 : !fir.ref + +// CHECK: %[[LOC_PRIV_DECL:.*]]:2 = hlfir.declare %[[LOC_ARG]] +// CHECK: %[[LOC_INIT_PRIV_DECL:.*]]:2 = hlfir.declare %[[LOC_INIT_ARG]] + +// CHECK: fir.if %{{.*}} { +// CHECK: %[[C42:.*]] = arith.constant 42 : i32 +// CHECK: hlfir.assign %[[C42]] to %[[LOC_PRIV_DECL]]#0 : i32, !fir.ref +// CHECK: } else { +// CHECK: %[[C84:.*]] = arith.constant 84 : i32 +// CHECK: hlfir.assign %[[C84]] to %[[LOC_INIT_PRIV_DECL]]#0 : i32, !fir.ref +// CHECK: } +// CHECK: } +// CHECK: } +// CHECK: return +// CHECK: } diff --git a/flang/test/Fir/invalid.fir b/flang/test/Fir/invalid.fir index 447a6c68b4b0a..63999e5879a53 100644 --- a/flang/test/Fir/invalid.fir +++ b/flang/test/Fir/invalid.fir @@ -1198,7 +1198,7 @@ func.func @dc_0d() { func.func @dc_invalid_parent(%arg0: index, %arg1: index) { // expected-error@+1 {{'fir.do_concurrent.loop' op expects parent op 'fir.do_concurrent'}} - "fir.do_concurrent.loop"(%arg0, %arg1) <{operandSegmentSizes = array}> ({ + "fir.do_concurrent.loop"(%arg0, %arg1) <{operandSegmentSizes = array}> ({ ^bb0(%arg2: index): %tmp = "fir.alloca"() <{in_type = i32, operandSegmentSizes = array}> : () -> !fir.ref }) : (index, index) -> () @@ -1210,7 +1210,7 @@ func.func @dc_invalid_parent(%arg0: index, %arg1: index) { func.func @dc_invalid_control(%arg0: index, %arg1: index) { // expected-error@+2 {{'fir.do_concurrent.loop' op different number of tuple elements for lowerBound, upperBound or step}} fir.do_concurrent { - "fir.do_concurrent.loop"(%arg0, %arg1) <{operandSegmentSizes = array}> ({ + "fir.do_concurrent.loop"(%arg0, %arg1) <{operandSegmentSizes = array}> ({ ^bb0(%arg2: index): %tmp = "fir.alloca"() <{in_type = i32, operandSegmentSizes = array}> : () -> !fir.ref }) : (index, index) -> () @@ -1223,7 +1223,7 @@ func.func @dc_invalid_control(%arg0: index, %arg1: index) { func.func @dc_invalid_ind_var(%arg0: index, %arg1: index) { // expected-error@+2 {{'fir.do_concurrent.loop' op expects the same number of induction variables: 2 as bound and step values: 1}} fir.do_concurrent { - "fir.do_concurrent.loop"(%arg0, %arg1, %arg0) <{operandSegmentSizes = array}> ({ + "fir.do_concurrent.loop"(%arg0, %arg1, %arg0) <{operandSegmentSizes = array}> ({ ^bb0(%arg3: index, %arg4: index): %tmp = "fir.alloca"() <{in_type = i32, operandSegmentSizes = array}> : () -> !fir.ref }) : (index, index, index) -> () @@ -1236,7 +1236,7 @@ func.func @dc_invalid_ind_var(%arg0: index, %arg1: index) { func.func @dc_invalid_ind_var_type(%arg0: index, %arg1: index) { // expected-error@+2 {{'fir.do_concurrent.loop' op expects arguments for the induction variable to be of index type}} fir.do_concurrent { - "fir.do_concurrent.loop"(%arg0, %arg1, %arg0) <{operandSegmentSizes = array}> ({ + "fir.do_concurrent.loop"(%arg0, %arg1, %arg0) <{operandSegmentSizes = array}> ({ ^bb0(%arg3: i32): %tmp = "fir.alloca"() <{in_type = i32, operandSegmentSizes = array}> : () -> !fir.ref }) : (index, index, index) -> () @@ -1250,7 +1250,7 @@ func.func @dc_invalid_reduction(%arg0: index, %arg1: index) { %sum = fir.alloca i32 // expected-error@+2 {{'fir.do_concurrent.loop' op mismatch in number of reduction variables and reduction attributes}} fir.do_concurrent { - "fir.do_concurrent.loop"(%arg0, %arg1, %arg0, %sum) <{operandSegmentSizes = array}> ({ + "fir.do_concurrent.loop"(%arg0, %arg1, %arg0, %sum) <{operandSegmentSizes = array}> ({ ^bb0(%arg3: index): %tmp = "fir.alloca"() <{in_type = i32, operandSegmentSizes = array}> : () -> !fir.ref }) : (index, index, index, !fir.ref) -> () diff --git a/flang/test/Lower/do_concurrent.f90 b/flang/test/Lower/do_concurrent.f90 index ef93d2d6b035b..cc113f59c35e3 100644 --- a/flang/test/Lower/do_concurrent.f90 +++ b/flang/test/Lower/do_concurrent.f90 @@ -14,6 +14,9 @@ subroutine sub1(n) implicit none integer :: n, m, i, j, k integer, dimension(n) :: a +!CHECK: %[[N_DECL:.*]]:2 = hlfir.declare %{{.*}} dummy_scope %{{.*}} {uniq_name = "_QFsub1En"} +!CHECK: %[[A_DECL:.*]]:2 = hlfir.declare %{{.*}}(%{{.*}}) {uniq_name = "_QFsub1Ea"} + !CHECK: %[[LB1:.*]] = arith.constant 1 : i32 !CHECK: %[[LB1_CVT:.*]] = fir.convert %[[LB1]] : (i32) -> index !CHECK: %[[UB1:.*]] = fir.load %{{.*}}#0 : !fir.ref @@ -29,10 +32,30 @@ subroutine sub1(n) !CHECK: %[[UB3:.*]] = arith.constant 10 : i32 !CHECK: %[[UB3_CVT:.*]] = fir.convert %[[UB3]] : (i32) -> index -!CHECK: fir.do_loop %{{.*}} = %[[LB1_CVT]] to %[[UB1_CVT]] step %{{.*}} unordered -!CHECK: fir.do_loop %{{.*}} = %[[LB2_CVT]] to %[[UB2_CVT]] step %{{.*}} unordered -!CHECK: fir.do_loop %{{.*}} = %[[LB3_CVT]] to %[[UB3_CVT]] step %{{.*}} unordered +!CHECK: fir.do_concurrent +!CHECK: %[[I:.*]] = fir.alloca i32 {bindc_name = "i"} +!CHECK: %[[I_DECL:.*]]:2 = hlfir.declare %[[I]] +!CHECK: %[[J:.*]] = fir.alloca i32 {bindc_name = "j"} +!CHECK: %[[J_DECL:.*]]:2 = hlfir.declare %[[J]] +!CHECK: %[[K:.*]] = fir.alloca i32 {bindc_name = "k"} +!CHECK: %[[K_DECL:.*]]:2 = hlfir.declare %[[K]] + +!CHECK: fir.do_concurrent.loop (%[[I_IV:.*]], %[[J_IV:.*]], %[[K_IV:.*]]) = +!CHECK-SAME: (%[[LB1_CVT]], %[[LB2_CVT]], %[[LB3_CVT]]) to +!CHECK-SAME: (%[[UB1_CVT]], %[[UB2_CVT]], %[[UB3_CVT]]) step +!CHECK-SAME: (%{{.*}}, %{{.*}}, %{{.*}}) { +!CHECK: %[[I_IV_CVT:.*]] = fir.convert %[[I_IV]] : (index) -> i32 +!CHECK: fir.store %[[I_IV_CVT]] to %[[I_DECL]]#0 : !fir.ref +!CHECK: %[[J_IV_CVT:.*]] = fir.convert %[[J_IV]] : (index) -> i32 +!CHECK: fir.store %[[J_IV_CVT]] to %[[J_DECL]]#0 : !fir.ref +!CHECK: %[[K_IV_CVT:.*]] = fir.convert %[[K_IV]] : (index) -> i32 +!CHECK: fir.store %[[K_IV_CVT]] to %[[K_DECL]]#0 : !fir.ref +!CHECK: %[[N_VAL:.*]] = fir.load %[[N_DECL]]#0 : !fir.ref +!CHECK: %[[I_VAL:.*]] = fir.load %[[I_DECL]]#0 : !fir.ref +!CHECK: %[[I_VAL_CVT:.*]] = fir.convert %[[I_VAL]] : (i32) -> i64 +!CHECK: %[[A_ELEM:.*]] = hlfir.designate %[[A_DECL]]#0 (%[[I_VAL_CVT]]) +!CHECK: hlfir.assign %[[N_VAL]] to %[[A_ELEM]] : i32, !fir.ref do concurrent(i=1:n, j=1:bar(n*m, n/m), k=5:10) a(i) = n end do @@ -45,14 +68,17 @@ subroutine sub2(n) integer, dimension(n) :: a !CHECK: %[[LB1:.*]] = arith.constant 1 : i32 !CHECK: %[[LB1_CVT:.*]] = fir.convert %[[LB1]] : (i32) -> index -!CHECK: %[[UB1:.*]] = fir.load %5#0 : !fir.ref +!CHECK: %[[UB1:.*]] = fir.load %{{.*}}#0 : !fir.ref !CHECK: %[[UB1_CVT:.*]] = fir.convert %[[UB1]] : (i32) -> index -!CHECK: fir.do_loop %{{.*}} = %[[LB1_CVT]] to %[[UB1_CVT]] step %{{.*}} unordered +!CHECK: fir.do_concurrent +!CHECK: fir.do_concurrent.loop (%{{.*}}) = (%[[LB1_CVT]]) to (%[[UB1_CVT]]) step (%{{.*}}) + !CHECK: %[[LB2:.*]] = arith.constant 1 : i32 !CHECK: %[[LB2_CVT:.*]] = fir.convert %[[LB2]] : (i32) -> index !CHECK: %[[UB2:.*]] = fir.call @_QPbar(%{{.*}}, %{{.*}}) proc_attrs fastmath : (!fir.ref, !fir.ref) -> i32 !CHECK: %[[UB2_CVT:.*]] = fir.convert %[[UB2]] : (i32) -> index -!CHECK: fir.do_loop %{{.*}} = %[[LB2_CVT]] to %[[UB2_CVT]] step %{{.*}} unordered +!CHECK: fir.do_concurrent +!CHECK: fir.do_concurrent.loop (%{{.*}}) = (%[[LB2_CVT]]) to (%[[UB2_CVT]]) step (%{{.*}}) do concurrent(i=1:n) do concurrent(j=1:bar(n*m, n/m)) a(i) = n @@ -60,7 +86,6 @@ subroutine sub2(n) end do end subroutine - !CHECK-LABEL: unstructured subroutine unstructured(inner_step) integer(4) :: i, j, inner_step diff --git a/flang/test/Lower/do_concurrent_delayed_locality.f90 b/flang/test/Lower/do_concurrent_delayed_locality.f90 new file mode 100644 index 0000000000000..494fbd7b02412 --- /dev/null +++ b/flang/test/Lower/do_concurrent_delayed_locality.f90 @@ -0,0 +1,58 @@ +! RUN: %flang_fc1 -emit-hlfir -mmlir --openmp-enable-delayed-privatization-staging=true -o - %s | FileCheck %s + +subroutine do_concurrent_with_locality_specs + implicit none + integer :: i, local_var, local_init_var + + do concurrent (i=1:10) local(local_var) local_init(local_init_var) + if (i < 5) then + local_var = 42 + else + local_init_var = 84 + end if + end do +end subroutine + +! CHECK-LABEL: omp.private {type = firstprivate} @_QFdo_concurrent_with_locality_specsElocal_init_var_firstprivate_i32 : i32 copy { +! CHECK: ^bb0(%[[VAL_0:.*]]: !fir.ref, %[[VAL_1:.*]]: !fir.ref): +! CHECK: %[[VAL_2:.*]] = fir.load %[[VAL_0]] : !fir.ref +! CHECK: hlfir.assign %[[VAL_2]] to %[[VAL_1]] : i32, !fir.ref +! CHECK: omp.yield(%[[VAL_1]] : !fir.ref) +! CHECK: } +! CHECK: omp.private {type = private} @_QFdo_concurrent_with_locality_specsElocal_var_private_i32 : i32 + +! CHECK-LABEL: func.func @_QPdo_concurrent_with_locality_specs() { +! CHECK: %[[VAL_0:.*]] = fir.dummy_scope : !fir.dscope +! CHECK: %[[VAL_1:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFdo_concurrent_with_locality_specsEi"} +! CHECK: %[[VAL_2:.*]]:2 = hlfir.declare %[[VAL_1]] {uniq_name = "_QFdo_concurrent_with_locality_specsEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_3:.*]] = fir.alloca i32 {bindc_name = "local_init_var", uniq_name = "_QFdo_concurrent_with_locality_specsElocal_init_var"} +! CHECK: %[[VAL_4:.*]]:2 = hlfir.declare %[[VAL_3]] {uniq_name = "_QFdo_concurrent_with_locality_specsElocal_init_var"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_5:.*]] = fir.alloca i32 {bindc_name = "local_var", uniq_name = "_QFdo_concurrent_with_locality_specsElocal_var"} +! CHECK: %[[VAL_6:.*]]:2 = hlfir.declare %[[VAL_5]] {uniq_name = "_QFdo_concurrent_with_locality_specsElocal_var"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_7:.*]] = arith.constant 1 : i32 +! CHECK: %[[VAL_8:.*]] = fir.convert %[[VAL_7]] : (i32) -> index +! CHECK: %[[VAL_9:.*]] = arith.constant 10 : i32 +! CHECK: %[[VAL_10:.*]] = fir.convert %[[VAL_9]] : (i32) -> index +! CHECK: %[[VAL_11:.*]] = arith.constant 1 : index +! CHECK: fir.do_concurrent { +! CHECK: %[[VAL_12:.*]] = fir.alloca i32 {bindc_name = "i"} +! CHECK: %[[VAL_13:.*]]:2 = hlfir.declare %[[VAL_12]] {uniq_name = "_QFdo_concurrent_with_locality_specsEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: fir.do_concurrent.loop (%[[VAL_14:.*]]) = (%[[VAL_8]]) to (%[[VAL_10]]) step (%[[VAL_11]]) private(@_QFdo_concurrent_with_locality_specsElocal_var_private_i32 %[[VAL_6]]#0 -> %[[VAL_15:.*]], @_QFdo_concurrent_with_locality_specsElocal_init_var_firstprivate_i32 %[[VAL_4]]#0 -> %[[VAL_16:.*]] : !fir.ref, !fir.ref) { +! CHECK: %[[VAL_17:.*]] = fir.convert %[[VAL_14]] : (index) -> i32 +! CHECK: fir.store %[[VAL_17]] to %[[VAL_13]]#0 : !fir.ref +! CHECK: %[[VAL_18:.*]]:2 = hlfir.declare %[[VAL_15]] {uniq_name = "_QFdo_concurrent_with_locality_specsElocal_var"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_19:.*]]:2 = hlfir.declare %[[VAL_16]] {uniq_name = "_QFdo_concurrent_with_locality_specsElocal_init_var"} : (!fir.ref) -> (!fir.ref, !fir.ref) +! CHECK: %[[VAL_20:.*]] = fir.load %[[VAL_13]]#0 : !fir.ref +! CHECK: %[[VAL_21:.*]] = arith.constant 5 : i32 +! CHECK: %[[VAL_22:.*]] = arith.cmpi slt, %[[VAL_20]], %[[VAL_21]] : i32 +! CHECK: fir.if %[[VAL_22]] { +! CHECK: %[[VAL_23:.*]] = arith.constant 42 : i32 +! CHECK: hlfir.assign %[[VAL_23]] to %[[VAL_18]]#0 : i32, !fir.ref +! CHECK: } else { +! CHECK: %[[VAL_24:.*]] = arith.constant 84 : i32 +! CHECK: hlfir.assign %[[VAL_24]] to %[[VAL_19]]#0 : i32, !fir.ref +! CHECK: } +! CHECK: } +! CHECK: } +! CHECK: return +! CHECK: } diff --git a/flang/test/Lower/do_concurrent_local_default_init.f90 b/flang/test/Lower/do_concurrent_local_default_init.f90 index 7652e4fcd0402..207704ac1a990 100644 --- a/flang/test/Lower/do_concurrent_local_default_init.f90 +++ b/flang/test/Lower/do_concurrent_local_default_init.f90 @@ -29,7 +29,7 @@ subroutine test_default_init() ! CHECK-SAME: %[[VAL_0:.*]]: !fir.ref>>>> {fir.bindc_name = "p"}) { ! CHECK: %[[VAL_6:.*]] = fir.load %[[VAL_0]] : !fir.ref>>>> ! CHECK: %[[VAL_7:.*]] = fir.box_elesize %[[VAL_6]] : (!fir.box>>>) -> index -! CHECK: fir.do_loop +! CHECK: fir.do_concurrent.loop ! CHECK: %[[VAL_16:.*]] = fir.alloca !fir.box>>> {bindc_name = "p", pinned, uniq_name = "_QFtest_ptrEp"} ! CHECK: %[[VAL_17:.*]] = fir.zero_bits !fir.ptr>> ! CHECK: %[[VAL_18:.*]] = arith.constant 0 : index @@ -43,7 +43,7 @@ subroutine test_default_init() ! CHECK: } ! CHECK-LABEL: func.func @_QPtest_default_init( -! CHECK: fir.do_loop +! CHECK: fir.do_concurrent.loop ! CHECK: %[[VAL_26:.*]] = fir.alloca !fir.type<_QFtest_default_initTt{i:i32}> {bindc_name = "a", pinned, uniq_name = "_QFtest_default_initEa"} ! CHECK: %[[VAL_27:.*]] = fir.embox %[[VAL_26]] : (!fir.ref>) -> !fir.box> ! CHECK: %[[VAL_30:.*]] = fir.convert %[[VAL_27]] : (!fir.box>) -> !fir.box diff --git a/flang/test/Lower/loops.f90 b/flang/test/Lower/loops.f90 index ea65ba3e4d66d..60df27a591dc3 100644 --- a/flang/test/Lower/loops.f90 +++ b/flang/test/Lower/loops.f90 @@ -2,15 +2,6 @@ ! CHECK-LABEL: loop_test subroutine loop_test - ! CHECK: %[[VAL_2:.*]] = fir.alloca i16 {bindc_name = "i"} - ! CHECK: %[[VAL_3:.*]] = fir.alloca i16 {bindc_name = "i"} - ! CHECK: %[[VAL_4:.*]] = fir.alloca i16 {bindc_name = "i"} - ! CHECK: %[[VAL_5:.*]] = fir.alloca i8 {bindc_name = "k"} - ! CHECK: %[[VAL_6:.*]] = fir.alloca i8 {bindc_name = "j"} - ! CHECK: %[[VAL_7:.*]] = fir.alloca i8 {bindc_name = "i"} - ! CHECK: %[[VAL_8:.*]] = fir.alloca i32 {bindc_name = "k"} - ! CHECK: %[[VAL_9:.*]] = fir.alloca i32 {bindc_name = "j"} - ! CHECK: %[[VAL_10:.*]] = fir.alloca i32 {bindc_name = "i"} ! CHECK: %[[VAL_11:.*]] = fir.alloca !fir.array<5x5x5xi32> {bindc_name = "a", uniq_name = "_QFloop_testEa"} ! CHECK: %[[VAL_12:.*]] = fir.alloca i32 {bindc_name = "asum", uniq_name = "_QFloop_testEasum"} ! CHECK: %[[VAL_13:.*]] = fir.alloca i32 {bindc_name = "i", uniq_name = "_QFloop_testEi"} @@ -25,7 +16,7 @@ subroutine loop_test j = 200 k = 300 - ! CHECK-COUNT-3: fir.do_loop {{.*}} unordered + ! CHECK: fir.do_concurrent.loop (%{{.*}}, %{{.*}}, %{{.*}}) = {{.*}} do concurrent (i=1:5, j=1:5, k=1:5) ! shared(a) ! CHECK: fir.coordinate_of a(i,j,k) = 0 @@ -33,7 +24,7 @@ subroutine loop_test ! CHECK: fir.call @_FortranAioBeginExternalListOutput print*, 'A:', i, j, k - ! CHECK-COUNT-3: fir.do_loop {{.*}} unordered + ! CHECK: fir.do_concurrent.loop (%{{.*}}, %{{.*}}, %{{.*}}) = {{.*}} ! CHECK: fir.if do concurrent (integer(1)::i=1:5, j=1:5, k=1:5, i.ne.j .and. k.ne.3) shared(a) ! CHECK-COUNT-2: fir.coordinate_of @@ -53,7 +44,7 @@ subroutine loop_test ! CHECK: fir.call @_FortranAioBeginExternalListOutput print*, 'B:', i, j, k, '-', asum - ! CHECK: fir.do_loop {{.*}} unordered + ! CHECK: fir.do_concurrent.loop (%{{.*}}) = {{.*}} ! CHECK-COUNT-2: fir.if do concurrent (integer(2)::i=1:5, i.ne.3) if (i.eq.2 .or. i.eq.4) goto 5 ! fir.if @@ -62,7 +53,7 @@ subroutine loop_test 5 continue enddo - ! CHECK: fir.do_loop {{.*}} unordered + ! CHECK: fir.do_concurrent.loop (%{{.*}}) = {{.*}} ! CHECK-COUNT-2: fir.if do concurrent (integer(2)::i=1:5, i.ne.3) if (i.eq.2 .or. i.eq.4) then ! fir.if @@ -93,10 +84,6 @@ end subroutine loop_test ! CHECK-LABEL: c.func @_QPlis subroutine lis(n) - ! CHECK-DAG: fir.alloca i32 {bindc_name = "m"} - ! CHECK-DAG: fir.alloca i32 {bindc_name = "j"} - ! CHECK-DAG: fir.alloca i32 {bindc_name = "i"} - ! CHECK-DAG: fir.alloca i8 {bindc_name = "i"} ! CHECK-DAG: fir.alloca i32 {bindc_name = "j", uniq_name = "_QFlisEj"} ! CHECK-DAG: fir.alloca i32 {bindc_name = "k", uniq_name = "_QFlisEk"} ! CHECK-DAG: fir.alloca !fir.box>> {bindc_name = "p", uniq_name = "_QFlisEp"} @@ -117,8 +104,8 @@ subroutine lis(n) ! CHECK: } r = 0 - ! CHECK: fir.do_loop %arg1 = %{{.*}} to %{{.*}} step %{{.*}} unordered { - ! CHECK: fir.do_loop %arg2 = %{{.*}} to %{{.*}} step %c1{{.*}} iter_args(%arg3 = %{{.*}}) -> (index, i32) { + ! CHECK: fir.do_concurrent { + ! CHECK: fir.do_concurrent.loop (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) { ! CHECK: } ! CHECK: } do concurrent (integer(kind=1)::i=n:1:-1) @@ -128,16 +115,18 @@ subroutine lis(n) enddo enddo - ! CHECK: fir.do_loop %arg1 = %{{.*}} to %{{.*}} step %c1{{.*}} unordered { - ! CHECK: fir.do_loop %arg2 = %{{.*}} to %{{.*}} step %c1{{.*}} unordered { + ! CHECK: fir.do_concurrent.loop (%{{.*}}, %{{.*}}) = (%{{.*}}, %{{.*}}) to (%{{.*}}, %{{.*}}) step (%{{.*}}, %{{.*}}) { ! CHECK: fir.if %{{.*}} { ! CHECK: %[[V_95:[0-9]+]] = fir.alloca !fir.array, %{{.*}}, %{{.*}} {bindc_name = "t", pinned, uniq_name = "_QFlisEt"} ! CHECK: %[[V_96:[0-9]+]] = fir.alloca !fir.box>> {bindc_name = "p", pinned, uniq_name = "_QFlisEp"} ! CHECK: fir.store %{{.*}} to %[[V_96]] : !fir.ref>>> ! CHECK: fir.do_loop %arg3 = %{{.*}} to %{{.*}} step %c1{{.*}} iter_args(%arg4 = %{{.*}}) -> (index, i32) { - ! CHECK: fir.do_loop %arg5 = %{{.*}} to %{{.*}} step %c1{{.*}} unordered { - ! CHECK: fir.load %[[V_96]] : !fir.ref>>> - ! CHECK: fir.convert %[[V_95]] : (!fir.ref>) -> !fir.ref> + ! CHECK: fir.do_concurrent { + ! CHECK: fir.alloca i32 {bindc_name = "m"} + ! CHECK: fir.do_concurrent.loop (%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) { + ! CHECK: fir.load %[[V_96]] : !fir.ref>>> + ! CHECK: fir.convert %[[V_95]] : (!fir.ref>) -> !fir.ref> + ! CHECK: } ! CHECK: } ! CHECK: } ! CHECK: fir.convert %[[V_95]] : (!fir.ref>) -> !fir.ref> diff --git a/flang/test/Lower/loops3.f90 b/flang/test/Lower/loops3.f90 index 78f39e1013082..84db1972cca16 100644 --- a/flang/test/Lower/loops3.f90 +++ b/flang/test/Lower/loops3.f90 @@ -12,9 +12,7 @@ subroutine loop_test ! CHECK: %[[VAL_0:.*]] = fir.alloca f32 {bindc_name = "m", uniq_name = "_QFloop_testEm"} ! CHECK: %[[VAL_1:.*]] = fir.address_of(@_QFloop_testEsum) : !fir.ref - ! CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered reduce(#fir.reduce_attr -> %[[VAL_1:.*]] : !fir.ref, #fir.reduce_attr -> %[[VAL_0:.*]] : !fir.ref) { - ! CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered reduce(#fir.reduce_attr -> %[[VAL_1:.*]] : !fir.ref, #fir.reduce_attr -> %[[VAL_0:.*]] : !fir.ref) { - ! CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered reduce(#fir.reduce_attr -> %[[VAL_1:.*]] : !fir.ref, #fir.reduce_attr -> %[[VAL_0:.*]] : !fir.ref) { + ! CHECK: fir.do_concurrent.loop ({{.*}}) = ({{.*}}) to ({{.*}}) step ({{.*}}) reduce(#fir.reduce_attr -> %[[VAL_1:.*]] : !fir.ref, #fir.reduce_attr -> %[[VAL_0:.*]] : !fir.ref) { do concurrent (i=1:5, j=1:5, k=1:5) local(tmp) reduce(+:sum) reduce(max:m) tmp = i + j + k sum = tmp + sum diff --git a/flang/test/Lower/nsw.f90 b/flang/test/Lower/nsw.f90 index 4ee9e5da829e6..2ec1efb2af42a 100644 --- a/flang/test/Lower/nsw.f90 +++ b/flang/test/Lower/nsw.f90 @@ -139,7 +139,6 @@ subroutine loop_params3(a,lb,ub,st) ! CHECK-LABEL: func.func @_QPloop_params3( ! CHECK: %[[VAL_4:.*]] = arith.constant 2 : i32 ! CHECK: %[[VAL_5:.*]] = arith.constant 1 : i32 -! CHECK: %[[VAL_9:.*]] = fir.declare %{{.*}}i"} : (!fir.ref) -> !fir.ref ! CHECK: %[[VAL_11:.*]] = fir.declare %{{.*}}lb"} : (!fir.ref, !fir.dscope) -> !fir.ref ! CHECK: %[[VAL_12:.*]] = fir.declare %{{.*}}ub"} : (!fir.ref, !fir.dscope) -> !fir.ref ! CHECK: %[[VAL_14:.*]] = fir.declare %{{.*}}i"} : (!fir.ref) -> !fir.ref @@ -153,4 +152,6 @@ subroutine loop_params3(a,lb,ub,st) ! CHECK: %[[VAL_31:.*]] = fir.load %[[VAL_15]] : !fir.ref ! CHECK: %[[VAL_32:.*]] = arith.muli %[[VAL_31]], %[[VAL_4]] overflow : i32 ! CHECK: %[[VAL_33:.*]] = fir.convert %[[VAL_32]] : (i32) -> index -! CHECK: fir.do_loop %[[VAL_34:.*]] = %[[VAL_28]] to %[[VAL_30]] step %[[VAL_33]] unordered { +! CHECK: fir.do_concurrent { +! CHECK: %[[VAL_9:.*]] = fir.declare %{{.*}}i"} : (!fir.ref) -> !fir.ref +! CHECK: fir.do_concurrent.loop (%[[VAL_34:.*]]) = (%[[VAL_28]]) to (%[[VAL_30]]) step (%[[VAL_33]]) { diff --git a/flang/test/Transforms/DoConcurrent/basic_host.f90 b/flang/test/Transforms/DoConcurrent/basic_host.f90 index 12f63031cbaee..b84d4481ac766 100644 --- a/flang/test/Transforms/DoConcurrent/basic_host.f90 +++ b/flang/test/Transforms/DoConcurrent/basic_host.f90 @@ -1,3 +1,6 @@ +! Fails until we update the pass to use the `fir.do_concurrent` op. +! XFAIL: * + ! Tests mapping of a basic `do concurrent` loop to `!$omp parallel do`. ! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=host %s -o - \ diff --git a/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90 b/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90 index f82696669eca6..4e13c0919589a 100644 --- a/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90 +++ b/flang/test/Transforms/DoConcurrent/locally_destroyed_temp.f90 @@ -1,3 +1,6 @@ +! Fails until we update the pass to use the `fir.do_concurrent` op. +! XFAIL: * + ! Tests that "loop-local values" are properly handled by localizing them to the ! body of the loop nest. See `collectLoopLocalValues` and `localizeLoopLocalValue` ! for a definition of "loop-local values" and how they are handled. diff --git a/flang/test/Transforms/DoConcurrent/loop_nest_test.f90 b/flang/test/Transforms/DoConcurrent/loop_nest_test.f90 index 32bed61fe69e4..adc4a488d1ec9 100644 --- a/flang/test/Transforms/DoConcurrent/loop_nest_test.f90 +++ b/flang/test/Transforms/DoConcurrent/loop_nest_test.f90 @@ -1,3 +1,6 @@ +! Fails until we update the pass to use the `fir.do_concurrent` op. +! XFAIL: * + ! Tests loop-nest detection algorithm for do-concurrent mapping. ! REQUIRES: asserts diff --git a/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 b/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 index d0210726de83e..26800678d381c 100644 --- a/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 +++ b/flang/test/Transforms/DoConcurrent/multiple_iteration_ranges.f90 @@ -1,3 +1,6 @@ +! Fails until we update the pass to use the `fir.do_concurrent` op. +! XFAIL: * + ! Tests mapping of a `do concurrent` loop with multiple iteration ranges. ! RUN: split-file %s %t diff --git a/flang/test/Transforms/DoConcurrent/non_const_bounds.f90 b/flang/test/Transforms/DoConcurrent/non_const_bounds.f90 index cd1bd4f98a3f5..23a3aae976c07 100644 --- a/flang/test/Transforms/DoConcurrent/non_const_bounds.f90 +++ b/flang/test/Transforms/DoConcurrent/non_const_bounds.f90 @@ -1,3 +1,6 @@ +! Fails until we update the pass to use the `fir.do_concurrent` op. +! XFAIL: * + ! RUN: %flang_fc1 -emit-hlfir -fopenmp -fdo-concurrent-to-openmp=host %s -o - \ ! RUN: | FileCheck %s diff --git a/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 b/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 index 184fdfe00d397..d1c02101318ab 100644 --- a/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 +++ b/flang/test/Transforms/DoConcurrent/not_perfectly_nested.f90 @@ -1,3 +1,6 @@ +! Fails until we update the pass to use the `fir.do_concurrent` op. +! XFAIL: * + ! Tests that if `do concurrent` is not perfectly nested in its parent loop, that ! we skip converting the not-perfectly nested `do concurrent` loop. diff --git a/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir b/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir index d2ceafdda5b22..5ea5a049312a8 100644 --- a/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir +++ b/flang/test/Transforms/do_concurrent-to-do_loop-unodered.fir @@ -121,3 +121,65 @@ func.func @dc_2d_reduction(%i_lb: index, %i_ub: index, %i_st: index, // CHECK: } // CHECK: return // CHECK: } + +// ----- + +omp.private {type = private} @local_privatizer : i32 + +omp.private {type = firstprivate} @local_init_privatizer : i32 copy { +^bb0(%arg0: !fir.ref, %arg1: !fir.ref): + %0 = fir.load %arg0 : !fir.ref + fir.store %0 to %arg1 : !fir.ref + omp.yield(%arg1 : !fir.ref) +} + +func.func @do_concurrent_locality_specs() { + %3 = fir.alloca i32 {bindc_name = "local_init_var", uniq_name = "_QFdo_concurrentElocal_init_var"} + %4:2 = hlfir.declare %3 {uniq_name = "_QFdo_concurrentElocal_init_var"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %5 = fir.alloca i32 {bindc_name = "local_var", uniq_name = "_QFdo_concurrentElocal_var"} + %6:2 = hlfir.declare %5 {uniq_name = "_QFdo_concurrentElocal_var"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %c1 = arith.constant 1 : index + %c10 = arith.constant 1 : index + fir.do_concurrent { + %9 = fir.alloca i32 {bindc_name = "i"} + %10:2 = hlfir.declare %9 {uniq_name = "_QFdo_concurrentEi"} : (!fir.ref) -> (!fir.ref, !fir.ref) + fir.do_concurrent.loop (%arg0) = (%c1) to (%c10) step (%c1) private(@local_privatizer %6#0 -> %arg1, @local_init_privatizer %4#0 -> %arg2 : !fir.ref, !fir.ref) { + %11 = fir.convert %arg0 : (index) -> i32 + fir.store %11 to %10#0 : !fir.ref + %13:2 = hlfir.declare %arg1 {uniq_name = "_QFdo_concurrentElocal_var"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %15:2 = hlfir.declare %arg2 {uniq_name = "_QFdo_concurrentElocal_init_var"} : (!fir.ref) -> (!fir.ref, !fir.ref) + %17 = fir.load %10#0 : !fir.ref + %c5_i32 = arith.constant 5 : i32 + %18 = arith.cmpi slt, %17, %c5_i32 : i32 + fir.if %18 { + %c42_i32 = arith.constant 42 : i32 + hlfir.assign %c42_i32 to %13#0 : i32, !fir.ref + } else { + %c84_i32 = arith.constant 84 : i32 + hlfir.assign %c84_i32 to %15#0 : i32, !fir.ref + } + } + } + return +} + +// CHECK-LABEL: func.func @do_concurrent_locality_specs() { +// CHECK: %[[LOC_INIT_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "{{.*}}Elocal_init_var"} +// CHECK: fir.do_loop %{{.*}} = %{{.*}} to %{{.*}} step %{{.*}} unordered { +// Verify privatization of the `local` var. +// CHECK: %[[PRIV_LOC_ALLOC:.*]] = fir.alloca i32 + +// Verify privatization of the `local_init` var. +// CHECK: %[[PRIV_LOC_INIT_ALLOC:.*]] = fir.alloca i32 +// CHECK: %[[LOC_INIT_VAL:.*]] = fir.load %[[LOC_INIT_DECL]]#0 : !fir.ref +// CHECK: fir.store %[[LOC_INIT_VAL]] to %[[PRIV_LOC_INIT_ALLOC]] : !fir.ref + +// CHECK: %[[VAL_15:.*]]:2 = hlfir.declare %[[PRIV_LOC_ALLOC]] +// CHECK: %[[VAL_16:.*]]:2 = hlfir.declare %[[PRIV_LOC_INIT_ALLOC]] + +// CHECK: hlfir.assign %{{.*}} to %[[VAL_15]]#0 : i32, !fir.ref +// CHECK: hlfir.assign %{{.*}} to %[[VAL_16]]#0 : i32, !fir.ref +// CHECK: } +// CHECK: return +// CHECK: } +