Skip to content
Merged
Show file tree
Hide file tree
Changes from 2 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
243 changes: 142 additions & 101 deletions flang/lib/Lower/OpenACC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -2251,6 +2251,49 @@ static void determineDefaultLoopParMode(
}
}

// Helper to visit Bounds of DO LOOP nest.
static void visitLoopControl(
Fortran::lower::AbstractConverter &converter,
const Fortran::parser::DoConstruct &outerDoConstruct,
uint64_t loopsToProcess, Fortran::lower::pft::Evaluation &eval,
std::function<void(const Fortran::parser::LoopControl::Bounds &,
mlir::Location)>
callback) {
Fortran::lower::pft::Evaluation *crtEval = &eval.getFirstNestedEvaluation();
for (uint64_t i = 0; i < loopsToProcess; ++i) {
const Fortran::parser::LoopControl *loopControl;
if (i == 0) {
loopControl = &*outerDoConstruct.GetLoopControl();
mlir::Location loc = converter.genLocation(
Fortran::parser::FindSourceLocation(outerDoConstruct));
callback(std::get<Fortran::parser::LoopControl::Bounds>(loopControl->u),
loc);
} else {
// Safely locate the next inner DoConstruct within this eval.
const Fortran::parser::DoConstruct *innerDo = nullptr;
if (crtEval && crtEval->hasNestedEvaluations()) {
for (Fortran::lower::pft::Evaluation &child :
crtEval->getNestedEvaluations()) {
if (auto *stmt = child.getIf<Fortran::parser::DoConstruct>()) {
innerDo = stmt;
// Prepare to descend for the next iteration
crtEval = &child;
break;
}
}
}
if (!innerDo)
break; // No deeper loop; stop collecting collapsed bounds.

loopControl = &*innerDo->GetLoopControl();
mlir::Location loc =
converter.genLocation(Fortran::parser::FindSourceLocation(*innerDo));
callback(std::get<Fortran::parser::LoopControl::Bounds>(loopControl->u),
loc);
}
}
}

// Extract loop bounds, steps, induction variables, and privatization info
// for both DO CONCURRENT and regular do loops
static void processDoLoopBounds(
Expand All @@ -2272,7 +2315,6 @@ static void processDoLoopBounds(
llvm::SmallVector<mlir::Location> &locs, uint64_t loopsToProcess) {
assert(loopsToProcess > 0 && "expect at least one loop");
locs.push_back(currentLocation); // Location of the directive
Fortran::lower::pft::Evaluation *crtEval = &eval.getFirstNestedEvaluation();
bool isDoConcurrent = outerDoConstruct.IsDoConcurrent();

if (isDoConcurrent) {
Expand Down Expand Up @@ -2313,57 +2355,29 @@ static void processDoLoopBounds(
inclusiveBounds.push_back(true);
}
} else {
for (uint64_t i = 0; i < loopsToProcess; ++i) {
const Fortran::parser::LoopControl *loopControl;
if (i == 0) {
loopControl = &*outerDoConstruct.GetLoopControl();
locs.push_back(converter.genLocation(
Fortran::parser::FindSourceLocation(outerDoConstruct)));
} else {
// Safely locate the next inner DoConstruct within this eval.
const Fortran::parser::DoConstruct *innerDo = nullptr;
if (crtEval && crtEval->hasNestedEvaluations()) {
for (Fortran::lower::pft::Evaluation &child :
crtEval->getNestedEvaluations()) {
if (auto *stmt = child.getIf<Fortran::parser::DoConstruct>()) {
innerDo = stmt;
// Prepare to descend for the next iteration
crtEval = &child;
break;
}
}
}
if (!innerDo)
break; // No deeper loop; stop collecting collapsed bounds.

loopControl = &*innerDo->GetLoopControl();
locs.push_back(converter.genLocation(
Fortran::parser::FindSourceLocation(*innerDo)));
}

const Fortran::parser::LoopControl::Bounds *bounds =
std::get_if<Fortran::parser::LoopControl::Bounds>(&loopControl->u);
assert(bounds && "Expected bounds on the loop construct");
lowerbounds.push_back(fir::getBase(converter.genExprValue(
*Fortran::semantics::GetExpr(bounds->lower), stmtCtx)));
upperbounds.push_back(fir::getBase(converter.genExprValue(
*Fortran::semantics::GetExpr(bounds->upper), stmtCtx)));
if (bounds->step)
steps.push_back(fir::getBase(converter.genExprValue(
*Fortran::semantics::GetExpr(bounds->step), stmtCtx)));
else // If `step` is not present, assume it is `1`.
steps.push_back(builder.createIntegerConstant(
currentLocation, upperbounds[upperbounds.size() - 1].getType(), 1));

Fortran::semantics::Symbol &ivSym =
bounds->name.thing.symbol->GetUltimate();
privatizeIv(converter, ivSym, currentLocation, ivTypes, ivLocs,
privateOperands, ivPrivate, privatizationRecipes);

inclusiveBounds.push_back(true);

// crtEval already updated when descending; no blind increment here.
}
visitLoopControl(
converter, outerDoConstruct, loopsToProcess, eval,
[&](const Fortran::parser::LoopControl::Bounds &bounds,
mlir::Location loc) {
locs.push_back(loc);
lowerbounds.push_back(fir::getBase(converter.genExprValue(
*Fortran::semantics::GetExpr(bounds.lower), stmtCtx)));
upperbounds.push_back(fir::getBase(converter.genExprValue(
*Fortran::semantics::GetExpr(bounds.upper), stmtCtx)));
if (bounds.step)
steps.push_back(fir::getBase(converter.genExprValue(
*Fortran::semantics::GetExpr(bounds.step), stmtCtx)));
else // If `step` is not present, assume it is `1`.
steps.push_back(builder.createIntegerConstant(
currentLocation, upperbounds[upperbounds.size() - 1].getType(),
1));
Fortran::semantics::Symbol &ivSym =
bounds.name.thing.symbol->GetUltimate();
privatizeIv(converter, ivSym, currentLocation, ivTypes, ivLocs,
privateOperands, ivPrivate, privatizationRecipes);

inclusiveBounds.push_back(true);
});
}
}

Expand Down Expand Up @@ -2499,6 +2513,34 @@ static void remapDataOperandSymbols(
}
}

static void privatizeInductionVariables(
Fortran::lower::AbstractConverter &converter,
mlir::Location currentLocation,
const Fortran::parser::DoConstruct &outerDoConstruct,
Fortran::lower::pft::Evaluation &eval,
llvm::SmallVector<mlir::Value> &privateOperands,
llvm::SmallVector<std::pair<mlir::Value, Fortran::semantics::SymbolRef>>
&ivPrivate,
llvm::SmallVector<mlir::Attribute> &privatizationRecipes,
llvm::SmallVector<mlir::Location> &locs, uint64_t loopsToProcess) {
// ivTypes and locs will be ignored since no acc.loop control arguments will
// be created.
llvm::SmallVector<mlir::Type> ivTypes;
llvm::SmallVector<mlir::Location> ivLocs;
assert(!outerDoConstruct.IsDoConcurrent() &&
"do concurrent loops are not expected to contained earlty exits");
visitLoopControl(converter, outerDoConstruct, loopsToProcess, eval,
[&](const Fortran::parser::LoopControl::Bounds &bounds,
mlir::Location loc) {
locs.push_back(loc);
Fortran::semantics::Symbol &ivSym =
bounds.name.thing.symbol->GetUltimate();
privatizeIv(converter, ivSym, currentLocation, ivTypes,
ivLocs, privateOperands, ivPrivate,
privatizationRecipes);
});
}

static mlir::acc::LoopOp buildACCLoopOp(
Fortran::lower::AbstractConverter &converter,
mlir::Location currentLocation,
Expand Down Expand Up @@ -2528,13 +2570,22 @@ static mlir::acc::LoopOp buildACCLoopOp(
llvm::SmallVector<mlir::Location> locs;
llvm::SmallVector<mlir::Value> lowerbounds, upperbounds, steps;

// Look at the do/do concurrent loops to extract bounds information.
processDoLoopBounds(converter, currentLocation, stmtCtx, builder,
outerDoConstruct, eval, lowerbounds, upperbounds, steps,
privateOperands, ivPrivate, privatizationRecipes, ivTypes,
ivLocs, inclusiveBounds, locs, loopsToProcess);

// Prepare the operand segment size attribute and the operands value range.
// Look at the do/do concurrent loops to extract bounds information unless
// this loop is lowered in an unstructured fashion, in which case bounds are
// not represented on acc.loop and explicit control flow is used inside body.
if (!eval.lowerAsUnstructured()) {
processDoLoopBounds(converter, currentLocation, stmtCtx, builder,
outerDoConstruct, eval, lowerbounds, upperbounds, steps,
privateOperands, ivPrivate, privatizationRecipes,
ivTypes, ivLocs, inclusiveBounds, locs, loopsToProcess);
} else {
// When the loop contains early exits, privatize induction variables, but do
// not create acc.loop bounds. The control flow of the loop will be
// generated explicitly in the acc.loop body that is just a container.
privatizeInductionVariables(converter, currentLocation, outerDoConstruct,
eval, privateOperands, ivPrivate,
privatizationRecipes, locs, loopsToProcess);
}
llvm::SmallVector<mlir::Value> operands;
llvm::SmallVector<int32_t> operandSegments;
addOperands(operands, operandSegments, lowerbounds);
Expand Down Expand Up @@ -2563,20 +2614,36 @@ static mlir::acc::LoopOp buildACCLoopOp(
// Remap symbols from data clauses to use data operation results
remapDataOperandSymbols(converter, builder, loopOp, dataOperandSymbolPairs);

for (auto [arg, iv] :
llvm::zip(loopOp.getLoopRegions().front()->front().getArguments(),
ivPrivate)) {
// Store block argument to the related iv private variable.
mlir::Value privateValue =
converter.getSymbolAddress(std::get<Fortran::semantics::SymbolRef>(iv));
fir::StoreOp::create(builder, currentLocation, arg, privateValue);
if (!eval.lowerAsUnstructured()) {
for (auto [arg, iv] :
llvm::zip(loopOp.getLoopRegions().front()->front().getArguments(),
ivPrivate)) {
// Store block argument to the related iv private variable.
mlir::Value privateValue = converter.getSymbolAddress(
std::get<Fortran::semantics::SymbolRef>(iv));
fir::StoreOp::create(builder, currentLocation, arg, privateValue);
}
loopOp.setInclusiveUpperbound(inclusiveBounds);
} else {
loopOp.setUnstructuredAttr(builder.getUnitAttr());
}

loopOp.setInclusiveUpperbound(inclusiveBounds);

return loopOp;
}

static bool hasEarlyReturn(Fortran::lower::pft::Evaluation &eval) {
bool hasReturnStmt = false;
for (auto &e : eval.getNestedEvaluations()) {
e.visit(Fortran::common::visitors{
[&](const Fortran::parser::ReturnStmt &) { hasReturnStmt = true; },
[&](const auto &s) {},
});
if (e.hasNestedEvaluations())
hasReturnStmt = hasEarlyReturn(e);
}
return hasReturnStmt;
}

static mlir::acc::LoopOp createLoopOp(
Fortran::lower::AbstractConverter &converter,
mlir::Location currentLocation,
Expand All @@ -2586,8 +2653,7 @@ static mlir::acc::LoopOp createLoopOp(
Fortran::lower::pft::Evaluation &eval,
const Fortran::parser::AccClauseList &accClauseList,
std::optional<mlir::acc::CombinedConstructsType> combinedConstructs =
std::nullopt,
bool needEarlyReturnHandling = false) {
std::nullopt) {
fir::FirOpBuilder &builder = converter.getFirOpBuilder();
llvm::SmallVector<mlir::Value> tileOperands, privateOperands,
reductionOperands, cacheOperands, vectorOperands, workerNumOperands,
Expand Down Expand Up @@ -2763,7 +2829,10 @@ static mlir::acc::LoopOp createLoopOp(

llvm::SmallVector<mlir::Type> retTy;
mlir::Value yieldValue;
if (needEarlyReturnHandling) {
if (eval.lowerAsUnstructured() && hasEarlyReturn(eval)) {
// When there is a return statement inside the loop, add a result to the
// acc.loop that will be used in a conditional branch after the loop to
// return.
mlir::Type i1Ty = builder.getI1Type();
yieldValue = builder.createIntegerConstant(currentLocation, i1Ty, 0);
retTy.push_back(i1Ty);
Expand Down Expand Up @@ -2844,19 +2913,6 @@ static mlir::acc::LoopOp createLoopOp(
return loopOp;
}

static bool hasEarlyReturn(Fortran::lower::pft::Evaluation &eval) {
bool hasReturnStmt = false;
for (auto &e : eval.getNestedEvaluations()) {
e.visit(Fortran::common::visitors{
[&](const Fortran::parser::ReturnStmt &) { hasReturnStmt = true; },
[&](const auto &s) {},
});
if (e.hasNestedEvaluations())
hasReturnStmt = hasEarlyReturn(e);
}
return hasReturnStmt;
}

static mlir::Value
genACC(Fortran::lower::AbstractConverter &converter,
Fortran::semantics::SemanticsContext &semanticsContext,
Expand All @@ -2870,17 +2926,6 @@ genACC(Fortran::lower::AbstractConverter &converter,

mlir::Location currentLocation =
converter.genLocation(beginLoopDirective.source);
bool needEarlyExitHandling = false;
if (eval.lowerAsUnstructured()) {
needEarlyExitHandling = hasEarlyReturn(eval);
// If the loop is lowered in an unstructured fashion, lowering generates
// explicit control flow that duplicates the looping semantics of the
// loops.
if (!needEarlyExitHandling)
TODO(currentLocation,
"loop with early exit inside OpenACC loop construct");
}

Fortran::lower::StatementContext stmtCtx;

assert(loopDirective.v == llvm::acc::ACCD_loop &&
Expand All @@ -2893,8 +2938,8 @@ genACC(Fortran::lower::AbstractConverter &converter,
std::get<std::optional<Fortran::parser::DoConstruct>>(loopConstruct.t);
auto loopOp = createLoopOp(converter, currentLocation, semanticsContext,
stmtCtx, *outerDoConstruct, eval, accClauseList,
/*combinedConstructs=*/{}, needEarlyExitHandling);
if (needEarlyExitHandling)
/*combinedConstructs=*/{});
if (loopOp.getNumResults() == 1)
return loopOp.getResult(0);

return mlir::Value{};
Expand Down Expand Up @@ -3679,10 +3724,6 @@ genACC(Fortran::lower::AbstractConverter &converter,
converter.genLocation(beginCombinedDirective.source);
Fortran::lower::StatementContext stmtCtx;

if (eval.lowerAsUnstructured())
TODO(currentLocation,
"loop with early exit inside OpenACC combined construct");

if (combinedDirective.v == llvm::acc::ACCD_kernels_loop) {
createComputeOp<mlir::acc::KernelsOp>(
converter, currentLocation, eval, semanticsContext, stmtCtx,
Expand Down
4 changes: 2 additions & 2 deletions flang/test/Lower/OpenACC/acc-unstructured.f90
Original file line number Diff line number Diff line change
@@ -1,5 +1,4 @@
! RUN: bbc -fopenacc -emit-hlfir %s -o - | FileCheck %s
! XFAIL: *

subroutine test_unstructured1(a, b, c)
integer :: i, j, k
Expand Down Expand Up @@ -55,10 +54,11 @@ subroutine test_unstructured2(a, b, c)

! CHECK-LABEL: func.func @_QPtest_unstructured2
! CHECK: acc.parallel
! CHECK: acc.loop
! CHECK: acc.loop combined(parallel) private(@privatization_ref_i32 -> %{{.*}} : !fir.ref<i32>) {
! CHECK: fir.call @_FortranAStopStatementText
! CHECK: acc.yield
! CHECK: acc.yield
! CHECK: } attributes {independent = [#acc.device_type<none>], unstructured}
! CHECK: acc.yield

end subroutine
Expand Down
9 changes: 8 additions & 1 deletion mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td
Original file line number Diff line number Diff line change
Expand Up @@ -2487,6 +2487,12 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
device-type-aware getter methods. When modifying these operands, the
corresponding `device_type` attributes must be updated to maintain
consistency between operands and their target device types.

The `unstructured` attribute indicates that the loops inside the OpenACC
construct contain early exits and cannot be lowered to structured MLIR
operations. When this flag is set, the acc.loop should have no induction
variables and the loop must be implemented via explicit control flow
inside its body.
}];

let arguments = (ins
Expand Down Expand Up @@ -2520,7 +2526,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
OptionalAttr<SymbolRefArrayAttr>:$firstprivatizationRecipes,
Variadic<AnyType>:$reductionOperands,
OptionalAttr<SymbolRefArrayAttr>:$reductionRecipes,
OptionalAttr<OpenACC_CombinedConstructsAttr>:$combined
OptionalAttr<OpenACC_CombinedConstructsAttr>:$combined,
UnitAttr:$unstructured
);

let results = (outs Variadic<AnyType>:$results);
Expand Down
8 changes: 6 additions & 2 deletions mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -3065,8 +3065,12 @@ LogicalResult acc::LoopOp::verify() {
if (getRegion().empty())
return emitError("expected non-empty body.");

// When it is container-like - it is expected to hold a loop-like operation.
if (isContainerLike()) {
if (getUnstructured()) {
if (!isContainerLike())
return emitError(
"unstructured acc.loop must not have induction variables");
} else if (isContainerLike()) {
// When it is container-like - it is expected to hold a loop-like operation.
// Obtain the maximum collapse count - we use this to check that there
// are enough loops contained.
uint64_t collapseCount = getCollapseValue().value_or(1);
Expand Down
Loading