Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 12 additions & 0 deletions flang/include/flang/Optimizer/Dialect/FIROps.td
Original file line number Diff line number Diff line change
Expand Up @@ -3894,6 +3894,18 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
return getReduceVars().size();
}

unsigned getInductionVarsStart() {
return 0;
}

unsigned getLocalOperandsStart() {
return getNumInductionVars();
}

unsigned getReduceOperandsStart() {
return getLocalOperandsStart() + getNumLocalOperands();
}

mlir::Block::BlockArgListType getInductionVars() {
return getBody()->getArguments().slice(0, getNumInductionVars());
}
Expand Down
200 changes: 122 additions & 78 deletions flang/lib/Optimizer/OpenMP/DoConcurrentConversion.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -138,6 +138,9 @@ void collectLoopLiveIns(fir::DoConcurrentLoopOp loop,

liveIns.push_back(operand->get());
});

for (mlir::Value local : loop.getLocalVars())
liveIns.push_back(local);
}

/// Collects values that are local to a loop: "loop-local values". A loop-local
Expand Down Expand Up @@ -298,8 +301,7 @@ class DoConcurrentConversion
.getIsTargetDevice();

mlir::omp::TargetOperands targetClauseOps;
genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper,
loopNestClauseOps,
genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, loopNestClauseOps,
isTargetDevice ? nullptr : &targetClauseOps);

LiveInShapeInfoMap liveInShapeInfoMap;
Expand All @@ -321,14 +323,13 @@ class DoConcurrentConversion
}

mlir::omp::ParallelOp parallelOp =
genParallelOp(doLoop.getLoc(), rewriter, ivInfos, mapper);
genParallelOp(rewriter, loop, ivInfos, mapper);

// Only set as composite when part of `distribute parallel do`.
parallelOp.setComposite(mapToDevice);

if (!mapToDevice)
genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, mapper,
loopNestClauseOps);
genLoopNestClauseOps(doLoop.getLoc(), rewriter, loop, loopNestClauseOps);

for (mlir::Value local : locals)
looputils::localizeLoopLocalValue(local, parallelOp.getRegion(),
Expand All @@ -337,10 +338,38 @@ class DoConcurrentConversion
if (mapToDevice)
genDistributeOp(doLoop.getLoc(), rewriter).setComposite(/*val=*/true);

mlir::omp::LoopNestOp ompLoopNest =
auto [loopNestOp, wsLoopOp] =
genWsLoopOp(rewriter, loop, mapper, loopNestClauseOps,
/*isComposite=*/mapToDevice);

// `local` region arguments are transferred/cloned from the `do concurrent`
// loop to the loopnest op when the region is cloned above. Instead, these
// region arguments should be on the workshare loop's region.
if (mapToDevice) {
for (auto [parallelArg, loopNestArg] : llvm::zip_equal(
parallelOp.getRegion().getArguments(),
loopNestOp.getRegion().getArguments().slice(
loop.getLocalOperandsStart(), loop.getNumLocalOperands())))
rewriter.replaceAllUsesWith(loopNestArg, parallelArg);

for (auto [wsloopArg, loopNestArg] : llvm::zip_equal(
wsLoopOp.getRegion().getArguments(),
loopNestOp.getRegion().getArguments().slice(
loop.getReduceOperandsStart(), loop.getNumReduceOperands())))
rewriter.replaceAllUsesWith(loopNestArg, wsloopArg);
} else {
for (auto [wsloopArg, loopNestArg] :
llvm::zip_equal(wsLoopOp.getRegion().getArguments(),
loopNestOp.getRegion().getArguments().drop_front(
loopNestClauseOps.loopLowerBounds.size())))
rewriter.replaceAllUsesWith(loopNestArg, wsloopArg);
}

for (unsigned i = 0;
i < loop.getLocalVars().size() + loop.getReduceVars().size(); ++i)
loopNestOp.getRegion().eraseArgument(
loopNestClauseOps.loopLowerBounds.size());

rewriter.setInsertionPoint(doLoop);
fir::FirOpBuilder builder(
rewriter,
Expand All @@ -361,7 +390,7 @@ class DoConcurrentConversion
// Mark `unordered` loops that are not perfectly nested to be skipped from
// the legality check of the `ConversionTarget` since we are not interested
// in mapping them to OpenMP.
ompLoopNest->walk([&](fir::DoConcurrentOp doLoop) {
loopNestOp->walk([&](fir::DoConcurrentOp doLoop) {
concurrentLoopsToSkip.insert(doLoop);
});

Expand All @@ -372,11 +401,21 @@ class DoConcurrentConversion

private:
mlir::omp::ParallelOp
genParallelOp(mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
genParallelOp(mlir::ConversionPatternRewriter &rewriter,
fir::DoConcurrentLoopOp loop,
looputils::InductionVariableInfos &ivInfos,
mlir::IRMapping &mapper) const {
auto parallelOp = mlir::omp::ParallelOp::create(rewriter, loc);
rewriter.createBlock(&parallelOp.getRegion());
mlir::omp::ParallelOperands parallelOps;

if (mapToDevice)
genPrivatizers(rewriter, mapper, loop, parallelOps);

mlir::Location loc = loop.getLoc();
auto parallelOp = mlir::omp::ParallelOp::create(rewriter, loc, parallelOps);
Fortran::common::openmp::EntryBlockArgs parallelArgs;
parallelArgs.priv.vars = parallelOps.privateVars;
Fortran::common::openmp::genEntryBlock(rewriter, parallelArgs,
parallelOp.getRegion());
rewriter.setInsertionPoint(mlir::omp::TerminatorOp::create(rewriter, loc));

genLoopNestIndVarAllocs(rewriter, ivInfos, mapper);
Expand Down Expand Up @@ -413,7 +452,7 @@ class DoConcurrentConversion

void genLoopNestClauseOps(
mlir::Location loc, mlir::ConversionPatternRewriter &rewriter,
fir::DoConcurrentLoopOp loop, mlir::IRMapping &mapper,
fir::DoConcurrentLoopOp loop,
mlir::omp::LoopNestOperands &loopNestClauseOps,
mlir::omp::TargetOperands *targetClauseOps = nullptr) const {
assert(loopNestClauseOps.loopLowerBounds.empty() &&
Expand Down Expand Up @@ -444,59 +483,14 @@ class DoConcurrentConversion
loopNestClauseOps.loopInclusive = rewriter.getUnitAttr();
}

mlir::omp::LoopNestOp
std::pair<mlir::omp::LoopNestOp, mlir::omp::WsloopOp>
genWsLoopOp(mlir::ConversionPatternRewriter &rewriter,
fir::DoConcurrentLoopOp loop, mlir::IRMapping &mapper,
const mlir::omp::LoopNestOperands &clauseOps,
bool isComposite) const {
mlir::omp::WsloopOperands wsloopClauseOps;

auto cloneFIRRegionToOMP = [&rewriter](mlir::Region &firRegion,
mlir::Region &ompRegion) {
if (!firRegion.empty()) {
rewriter.cloneRegionBefore(firRegion, ompRegion, ompRegion.begin());
auto firYield =
mlir::cast<fir::YieldOp>(ompRegion.back().getTerminator());
rewriter.setInsertionPoint(firYield);
mlir::omp::YieldOp::create(rewriter, firYield.getLoc(),
firYield.getOperands());
rewriter.eraseOp(firYield);
}
};

// For `local` (and `local_init`) opernads, emit corresponding `private`
// clauses and attach these clauses to the workshare loop.
if (!loop.getLocalVars().empty())
for (auto [op, sym, arg] : llvm::zip_equal(
loop.getLocalVars(),
loop.getLocalSymsAttr().getAsRange<mlir::SymbolRefAttr>(),
loop.getRegionLocalArgs())) {
auto localizer = moduleSymbolTable.lookup<fir::LocalitySpecifierOp>(
sym.getLeafReference());
if (localizer.getLocalitySpecifierType() ==
fir::LocalitySpecifierType::LocalInit)
TODO(localizer.getLoc(),
"local_init conversion is not supported yet");

mlir::OpBuilder::InsertionGuard guard(rewriter);
rewriter.setInsertionPointAfter(localizer);

auto privatizer = mlir::omp::PrivateClauseOp::create(
rewriter, localizer.getLoc(), sym.getLeafReference().str() + ".omp",
localizer.getTypeAttr().getValue(),
mlir::omp::DataSharingClauseType::Private);

cloneFIRRegionToOMP(localizer.getInitRegion(),
privatizer.getInitRegion());
cloneFIRRegionToOMP(localizer.getDeallocRegion(),
privatizer.getDeallocRegion());

moduleSymbolTable.insert(privatizer);

wsloopClauseOps.privateVars.push_back(op);
wsloopClauseOps.privateSyms.push_back(
mlir::SymbolRefAttr::get(privatizer));
}
if (!mapToDevice)
genPrivatizers(rewriter, mapper, loop, wsloopClauseOps);

if (!loop.getReduceVars().empty()) {
for (auto [op, byRef, sym, arg] : llvm::zip_equal(
Expand All @@ -519,15 +513,15 @@ class DoConcurrentConversion
rewriter, firReducer.getLoc(), ompReducerName,
firReducer.getTypeAttr().getValue());

cloneFIRRegionToOMP(firReducer.getAllocRegion(),
cloneFIRRegionToOMP(rewriter, firReducer.getAllocRegion(),
ompReducer.getAllocRegion());
cloneFIRRegionToOMP(firReducer.getInitializerRegion(),
cloneFIRRegionToOMP(rewriter, firReducer.getInitializerRegion(),
ompReducer.getInitializerRegion());
cloneFIRRegionToOMP(firReducer.getReductionRegion(),
cloneFIRRegionToOMP(rewriter, firReducer.getReductionRegion(),
ompReducer.getReductionRegion());
cloneFIRRegionToOMP(firReducer.getAtomicReductionRegion(),
cloneFIRRegionToOMP(rewriter, firReducer.getAtomicReductionRegion(),
ompReducer.getAtomicReductionRegion());
cloneFIRRegionToOMP(firReducer.getCleanupRegion(),
cloneFIRRegionToOMP(rewriter, firReducer.getCleanupRegion(),
ompReducer.getCleanupRegion());
moduleSymbolTable.insert(ompReducer);
}
Expand Down Expand Up @@ -559,21 +553,10 @@ class DoConcurrentConversion

rewriter.setInsertionPointToEnd(&loopNestOp.getRegion().back());
mlir::omp::YieldOp::create(rewriter, loop->getLoc());
loop->getParentOfType<mlir::ModuleOp>().print(
llvm::errs(), mlir::OpPrintingFlags().assumeVerified());

// `local` region arguments are transferred/cloned from the `do concurrent`
// loop to the loopnest op when the region is cloned above. Instead, these
// region arguments should be on the workshare loop's region.
for (auto [wsloopArg, loopNestArg] :
llvm::zip_equal(wsloopOp.getRegion().getArguments(),
loopNestOp.getRegion().getArguments().drop_front(
clauseOps.loopLowerBounds.size())))
rewriter.replaceAllUsesWith(loopNestArg, wsloopArg);

for (unsigned i = 0;
i < loop.getLocalVars().size() + loop.getReduceVars().size(); ++i)
loopNestOp.getRegion().eraseArgument(clauseOps.loopLowerBounds.size());

return loopNestOp;
return {loopNestOp, wsloopOp};
}

void genBoundsOps(fir::FirOpBuilder &builder, mlir::Value liveIn,
Expand Down Expand Up @@ -817,6 +800,67 @@ class DoConcurrentConversion
return distOp;
}

void cloneFIRRegionToOMP(mlir::ConversionPatternRewriter &rewriter,
mlir::Region &firRegion,
mlir::Region &ompRegion) const {
if (!firRegion.empty()) {
rewriter.cloneRegionBefore(firRegion, ompRegion, ompRegion.begin());
auto firYield =
mlir::cast<fir::YieldOp>(ompRegion.back().getTerminator());
rewriter.setInsertionPoint(firYield);
mlir::omp::YieldOp::create(rewriter, firYield.getLoc(),
firYield.getOperands());
rewriter.eraseOp(firYield);
}
}

/// Generate bodies of OpenMP privatizers by cloning the bodies of FIR
/// privatizers.
///
/// \param [in] rewriter - used to driver IR generation for privatizers.
/// \param [in] mapper - value mapping from FIR to OpenMP constructs.
/// \param [in] loop - FIR loop to convert its localizers.
///
/// \param [out] privateClauseOps - OpenMP privatizers to gen their bodies.
void genPrivatizers(mlir::ConversionPatternRewriter &rewriter,
Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Can you please add an overarching comment about what the function is doing including any conditions/prereqs for calling this function.

Copy link
Member Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Done. Let me know if it could be expanded more.

Copy link
Contributor

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Thank you, looks good. Just one (Ultra) nit - Is FIR privatizer the right term or should it be FIR localizer?

mlir::IRMapping &mapper, fir::DoConcurrentLoopOp loop,
mlir::omp::PrivateClauseOps &privateClauseOps) const {
// For `local` (and `local_init`) operands, emit corresponding `private`
// clauses and attach these clauses to the workshare loop.
if (!loop.getLocalVars().empty())
for (auto [var, sym, arg] : llvm::zip_equal(
loop.getLocalVars(),
loop.getLocalSymsAttr().getAsRange<mlir::SymbolRefAttr>(),
loop.getRegionLocalArgs())) {
auto localizer = moduleSymbolTable.lookup<fir::LocalitySpecifierOp>(
sym.getLeafReference());
if (localizer.getLocalitySpecifierType() ==
fir::LocalitySpecifierType::LocalInit)
TODO(localizer.getLoc(),
"local_init conversion is not supported yet");

mlir::OpBuilder::InsertionGuard guard(rewriter);
rewriter.setInsertionPointAfter(localizer);

auto privatizer = mlir::omp::PrivateClauseOp::create(
rewriter, localizer.getLoc(), sym.getLeafReference().str() + ".omp",
localizer.getTypeAttr().getValue(),
mlir::omp::DataSharingClauseType::Private);

cloneFIRRegionToOMP(rewriter, localizer.getInitRegion(),
privatizer.getInitRegion());
cloneFIRRegionToOMP(rewriter, localizer.getDeallocRegion(),
privatizer.getDeallocRegion());

moduleSymbolTable.insert(privatizer);

privateClauseOps.privateVars.push_back(mapToDevice ? mapper.lookup(var)
: var);
privateClauseOps.privateSyms.push_back(
mlir::SymbolRefAttr::get(privatizer));
}
}

bool mapToDevice;
llvm::DenseSet<fir::DoConcurrentOp> &concurrentLoopsToSkip;
mlir::SymbolTable &moduleSymbolTable;
Expand Down
49 changes: 49 additions & 0 deletions flang/test/Transforms/DoConcurrent/local_device.mlir
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
// RUN: fir-opt --omp-do-concurrent-conversion="map-to=device" %s -o - | FileCheck %s

fir.local {type = local} @_QFfooEmy_local_private_f32 : f32

func.func @_QPfoo() {
%0 = fir.dummy_scope : !fir.dscope
%3 = fir.alloca f32 {bindc_name = "my_local", uniq_name = "_QFfooEmy_local"}
%4:2 = hlfir.declare %3 {uniq_name = "_QFfooEmy_local"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)

%c1 = arith.constant 1 : index
%c10 = arith.constant 10 : index

fir.do_concurrent {
%7 = fir.alloca i32 {bindc_name = "i"}
%8:2 = hlfir.declare %7 {uniq_name = "_QFfooEi"} : (!fir.ref<i32>) -> (!fir.ref<i32>, !fir.ref<i32>)

fir.do_concurrent.loop (%arg0) = (%c1) to (%c10) step (%c1) local(@_QFfooEmy_local_private_f32 %4#0 -> %arg1 : !fir.ref<f32>) {
%9 = fir.convert %arg0 : (index) -> i32
fir.store %9 to %8#0 : !fir.ref<i32>
%10:2 = hlfir.declare %arg1 {uniq_name = "_QFfooEmy_local"} : (!fir.ref<f32>) -> (!fir.ref<f32>, !fir.ref<f32>)
%cst = arith.constant 4.200000e+01 : f32
hlfir.assign %cst to %10#0 : f32, !fir.ref<f32>
}
}
return
}

// CHECK: omp.private {type = private} @[[OMP_PRIVATIZER:.*.omp]] : f32

// CHECK: %[[LOCAL_DECL:.*]]:2 = hlfir.declare %{{.*}} {uniq_name = "{{.*}}my_local"}
// CHECK: %[[LOCAL_MAP:.*]] = omp.map.info var_ptr(%[[LOCAL_DECL]]#1 : {{.*}})

// CHECK: omp.target host_eval({{.*}}) map_entries({{.*}}, %[[LOCAL_MAP]] -> %[[LOCAL_MAP_ARG:.*]] : {{.*}}) {
// CHECK: %[[LOCAL_DEV_DECL:.*]]:2 = hlfir.declare %[[LOCAL_MAP_ARG]] {uniq_name = "_QFfooEmy_local"}

// CHECK: omp.teams {
// CHECK: omp.parallel private(@[[OMP_PRIVATIZER]] %[[LOCAL_DEV_DECL]]#0 -> %[[LOCAL_PRIV_ARG:.*]] : {{.*}}) {
// CHECK: omp.distribute {
// CHECK: omp.wsloop {
// CHECK: omp.loop_nest {{.*}} {
// CHECK: %[[LOCAL_LOOP_DECL:.*]]:2 = hlfir.declare %[[LOCAL_PRIV_ARG]] {uniq_name = "_QFfooEmy_local"}
// CHECK: hlfir.assign %{{.*}} to %[[LOCAL_LOOP_DECL]]#0
// CHECK: omp.yield
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: }
// CHECK: }
Loading