Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions flang/include/flang/Optimizer/Builder/HLFIRTools.h
Original file line number Diff line number Diff line change
Expand Up @@ -374,12 +374,14 @@ struct LoopNest {
/// loop constructs currently.
LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
mlir::ValueRange extents, bool isUnordered = false,
bool emitWorkshareLoop = false);
bool emitWorkshareLoop = false,
bool couldVectorize = true);
inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
mlir::Value shape, bool isUnordered = false,
bool emitWorkshareLoop = false) {
bool emitWorkshareLoop = false,
bool couldVectorize = true) {
return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
isUnordered, emitWorkshareLoop);
isUnordered, emitWorkshareLoop, couldVectorize);
}

/// The type of a callback that generates the body of a reduction
Expand Down
4 changes: 4 additions & 0 deletions flang/include/flang/Optimizer/HLFIR/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ def InlineHLFIRAssign : Pass<"inline-hlfir-assign"> {
let summary = "Inline hlfir.assign operations";
}

def InlineHLFIRCopyIn : Pass<"inline-hlfir-copy-in"> {
let summary = "Inline hlfir.copy_in operations";
}

def PropagateFortranVariableAttributes : Pass<"propagate-fortran-attrs"> {
let summary = "Propagate FortranVariableFlagsAttr attributes through HLFIR";
}
Expand Down
13 changes: 12 additions & 1 deletion flang/lib/Optimizer/Builder/HLFIRTools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "mlir/IR/IRMapping.h"
#include "mlir/Support/LLVM.h"
#include "llvm/ADT/TypeSwitch.h"
#include <mlir/Dialect/LLVMIR/LLVMAttrs.h>
#include <mlir/Dialect/OpenMP/OpenMPDialect.h>
#include <optional>

Expand Down Expand Up @@ -932,7 +933,8 @@ mlir::Value hlfir::inlineElementalOp(
hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder &builder,
mlir::ValueRange extents, bool isUnordered,
bool emitWorkshareLoop) {
bool emitWorkshareLoop,
bool couldVectorize) {
emitWorkshareLoop = emitWorkshareLoop && isUnordered;
hlfir::LoopNest loopNest;
assert(!extents.empty() && "must have at least one extent");
Expand Down Expand Up @@ -967,6 +969,15 @@ hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
auto ub = builder.createConvert(loc, indexType, extent);
auto doLoop =
builder.create<fir::DoLoopOp>(loc, one, ub, one, isUnordered);
if (!couldVectorize) {
mlir::LLVM::LoopVectorizeAttr va{mlir::LLVM::LoopVectorizeAttr::get(
builder.getContext(),
/*disable=*/builder.getBoolAttr(true), {}, {}, {}, {}, {}, {})};
mlir::LLVM::LoopAnnotationAttr la = mlir::LLVM::LoopAnnotationAttr::get(
builder.getContext(), {}, /*vectorize=*/va, {}, /*unroll*/ {},
/*unroll_and_jam*/ {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {});
doLoop.setLoopAnnotationAttr(la);
}
loopNest.body = doLoop.getBody();
builder.setInsertionPointToStart(loopNest.body);
// Reverse the indices so they are in column-major order.
Expand Down
1 change: 1 addition & 0 deletions flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ add_flang_library(HLFIRTransforms
ConvertToFIR.cpp
InlineElementals.cpp
InlineHLFIRAssign.cpp
InlineHLFIRCopyIn.cpp
LowerHLFIRIntrinsics.cpp
LowerHLFIROrderedAssignments.cpp
ScheduleOrderedAssignments.cpp
Expand Down
183 changes: 183 additions & 0 deletions flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
//===- InlineHLFIRCopyIn.cpp - Inline hlfir.copy_in ops -------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Transform hlfir.copy_in array operations into loop nests performing element
// per element assignments. For simplicity, the inlining is done for trivial
// data types when the copy_in does not require a corresponding copy_out and
// when the input array is not behind a pointer. This may change in the future.
//===----------------------------------------------------------------------===//

#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/HLFIRTools.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/OpenMP/Passes.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"

namespace hlfir {
#define GEN_PASS_DEF_INLINEHLFIRCOPYIN
#include "flang/Optimizer/HLFIR/Passes.h.inc"
} // namespace hlfir

#define DEBUG_TYPE "inline-hlfir-copy-in"

static llvm::cl::opt<bool> noInlineHLFIRCopyIn(
"no-inline-hlfir-copy-in",
llvm::cl::desc("Do not inline hlfir.copy_in operations"),
llvm::cl::init(false));

namespace {
class InlineCopyInConversion : public mlir::OpRewritePattern<hlfir::CopyInOp> {
public:
using mlir::OpRewritePattern<hlfir::CopyInOp>::OpRewritePattern;

llvm::LogicalResult
matchAndRewrite(hlfir::CopyInOp copyIn,
mlir::PatternRewriter &rewriter) const override;
};

llvm::LogicalResult
InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn,
mlir::PatternRewriter &rewriter) const {
fir::FirOpBuilder builder(rewriter, copyIn.getOperation());
mlir::Location loc = copyIn.getLoc();
hlfir::Entity inputVariable{copyIn.getVar()};
mlir::Type resultAddrType = copyIn.getCopiedIn().getType();
if (!fir::isa_trivial(inputVariable.getFortranElementType()))
return rewriter.notifyMatchFailure(copyIn,
"CopyInOp's data type is not trivial");

// There should be exactly one user of WasCopied - the corresponding
// CopyOutOp.
if (!copyIn.getWasCopied().hasOneUse())
return rewriter.notifyMatchFailure(
copyIn, "CopyInOp's WasCopied has no single user");
// The copy out should always be present, either to actually copy or just
// deallocate memory.
auto copyOut = mlir::dyn_cast<hlfir::CopyOutOp>(
copyIn.getWasCopied().user_begin().getCurrent().getUser());

if (!copyOut)
return rewriter.notifyMatchFailure(copyIn,
"CopyInOp has no direct CopyOut");

if (mlir::cast<fir::BaseBoxType>(resultAddrType).isAssumedRank())
return rewriter.notifyMatchFailure(copyIn,
"The result array is assumed-rank");

// Only inline the copy_in when copy_out does not need to be done, i.e. in
// case of intent(in).
if (copyOut.getVar())
return rewriter.notifyMatchFailure(copyIn, "CopyIn needs a copy-out");

inputVariable =
hlfir::derefPointersAndAllocatables(loc, builder, inputVariable);
mlir::Type sequenceType =
hlfir::getFortranElementOrSequenceType(inputVariable.getType());
fir::BoxType resultBoxType = fir::BoxType::get(sequenceType);
mlir::Value isContiguous =
builder.create<fir::IsContiguousBoxOp>(loc, inputVariable);
mlir::Operation::result_range results =
builder
.genIfOp(loc, {resultBoxType, builder.getI1Type()}, isContiguous,
/*withElseRegion=*/true)
.genThen([&]() {
mlir::Value result = inputVariable;
if (fir::isPointerType(inputVariable.getType())) {
result = builder.create<fir::ReboxOp>(
loc, resultBoxType, inputVariable, mlir::Value{},
mlir::Value{});
}
builder.create<fir::ResultOp>(
loc, mlir::ValueRange{result, builder.createBool(loc, false)});
})
.genElse([&] {
mlir::Value shape = hlfir::genShape(loc, builder, inputVariable);
llvm::SmallVector<mlir::Value> extents =
hlfir::getIndexExtents(loc, builder, shape);
llvm::StringRef tmpName{".tmp.copy_in"};
llvm::SmallVector<mlir::Value> lenParams;
mlir::Value alloc = builder.createHeapTemporary(
loc, sequenceType, tmpName, extents, lenParams);

auto declareOp = builder.create<hlfir::DeclareOp>(
loc, alloc, tmpName, shape, lenParams,
/*dummy_scope=*/nullptr);
hlfir::Entity temp{declareOp.getBase()};
hlfir::LoopNest loopNest =
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
flangomp::shouldUseWorkshareLowering(copyIn),
/*couldVectorize=*/false);
builder.setInsertionPointToStart(loopNest.body);
hlfir::Entity elem = hlfir::getElementAt(
loc, builder, inputVariable, loopNest.oneBasedIndices);
elem = hlfir::loadTrivialScalar(loc, builder, elem);
hlfir::Entity tempElem = hlfir::getElementAt(
loc, builder, temp, loopNest.oneBasedIndices);
builder.create<hlfir::AssignOp>(loc, elem, tempElem);
builder.setInsertionPointAfter(loopNest.outerOp);

mlir::Value result;
// Make sure the result is always a boxed array by boxing it
// ourselves if need be.
if (mlir::isa<fir::BaseBoxType>(temp.getType())) {
result = temp;
} else {
fir::ReferenceType refTy =
fir::ReferenceType::get(temp.getElementOrSequenceType());
mlir::Value refVal = builder.createConvert(loc, refTy, temp);
result = builder.create<fir::EmboxOp>(loc, resultBoxType, refVal,
shape);
}

builder.create<fir::ResultOp>(
loc, mlir::ValueRange{result, builder.createBool(loc, true)});
})
.getResults();

mlir::OpResult resultBox = results[0];
mlir::OpResult needsCleanup = results[1];

// Prepare the corresponding copyOut to free the temporary if it is required
auto alloca = builder.create<fir::AllocaOp>(loc, resultBox.getType());
auto store = builder.create<fir::StoreOp>(loc, resultBox, alloca);
rewriter.startOpModification(copyOut);
copyOut->setOperand(0, store.getMemref());
copyOut->setOperand(1, needsCleanup);
rewriter.finalizeOpModification(copyOut);

rewriter.replaceOp(copyIn, {resultBox, builder.genNot(loc, isContiguous)});
return mlir::success();
}

class InlineHLFIRCopyInPass
: public hlfir::impl::InlineHLFIRCopyInBase<InlineHLFIRCopyInPass> {
public:
void runOnOperation() override {
mlir::MLIRContext *context = &getContext();

mlir::GreedyRewriteConfig config;
// Prevent the pattern driver from merging blocks.
config.setRegionSimplificationLevel(
mlir::GreedySimplifyRegionLevel::Disabled);

mlir::RewritePatternSet patterns(context);
if (!noInlineHLFIRCopyIn) {
patterns.insert<InlineCopyInConversion>(context);
}

if (mlir::failed(mlir::applyPatternsGreedily(
getOperation(), std::move(patterns), config))) {
mlir::emitError(getOperation()->getLoc(),
"failure in hlfir.copy_in inlining");
signalPassFailure();
}
}
};
} // namespace
5 changes: 5 additions & 0 deletions flang/lib/Optimizer/Passes/Pipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -260,6 +260,11 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm, bool enableOpenMP,
pm, hlfir::createOptimizedBufferization);
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createInlineHLFIRAssign);

if (optLevel == llvm::OptimizationLevel::O3) {
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createInlineHLFIRCopyIn);
}
}
pm.addPass(hlfir::createLowerHLFIROrderedAssignments());
pm.addPass(hlfir::createLowerHLFIRIntrinsics());
Expand Down
Loading
Loading