Skip to content
Merged
Show file tree
Hide file tree
Changes from 7 commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 5 additions & 3 deletions flang/include/flang/Optimizer/Builder/HLFIRTools.h
Original file line number Diff line number Diff line change
Expand Up @@ -374,12 +374,14 @@ struct LoopNest {
/// loop constructs currently.
LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
mlir::ValueRange extents, bool isUnordered = false,
bool emitWorkshareLoop = false);
bool emitWorkshareLoop = false,
bool couldVectorize = true);
inline LoopNest genLoopNest(mlir::Location loc, fir::FirOpBuilder &builder,
mlir::Value shape, bool isUnordered = false,
bool emitWorkshareLoop = false) {
bool emitWorkshareLoop = false,
bool couldVectorize = true) {
return genLoopNest(loc, builder, getIndexExtents(loc, builder, shape),
isUnordered, emitWorkshareLoop);
isUnordered, emitWorkshareLoop, couldVectorize);
}

/// The type of a callback that generates the body of a reduction
Expand Down
4 changes: 4 additions & 0 deletions flang/include/flang/Optimizer/HLFIR/Passes.td
Original file line number Diff line number Diff line change
Expand Up @@ -69,6 +69,10 @@ def InlineHLFIRAssign : Pass<"inline-hlfir-assign"> {
let summary = "Inline hlfir.assign operations";
}

def InlineHLFIRCopyIn : Pass<"inline-hlfir-copy-in"> {
let summary = "Inline hlfir.copy_in operations";
}

def PropagateFortranVariableAttributes : Pass<"propagate-fortran-attrs"> {
let summary = "Propagate FortranVariableFlagsAttr attributes through HLFIR";
}
Expand Down
13 changes: 12 additions & 1 deletion flang/lib/Optimizer/Builder/HLFIRTools.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -21,6 +21,7 @@
#include "mlir/IR/IRMapping.h"
#include "mlir/Support/LLVM.h"
#include "llvm/ADT/TypeSwitch.h"
#include <mlir/Dialect/LLVMIR/LLVMAttrs.h>
#include <mlir/Dialect/OpenMP/OpenMPDialect.h>
#include <optional>

Expand Down Expand Up @@ -932,7 +933,8 @@ mlir::Value hlfir::inlineElementalOp(
hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
fir::FirOpBuilder &builder,
mlir::ValueRange extents, bool isUnordered,
bool emitWorkshareLoop) {
bool emitWorkshareLoop,
bool couldVectorize) {
emitWorkshareLoop = emitWorkshareLoop && isUnordered;
hlfir::LoopNest loopNest;
assert(!extents.empty() && "must have at least one extent");
Expand Down Expand Up @@ -967,6 +969,15 @@ hlfir::LoopNest hlfir::genLoopNest(mlir::Location loc,
auto ub = builder.createConvert(loc, indexType, extent);
auto doLoop =
builder.create<fir::DoLoopOp>(loc, one, ub, one, isUnordered);
if (!couldVectorize) {
mlir::LLVM::LoopVectorizeAttr va{mlir::LLVM::LoopVectorizeAttr::get(
builder.getContext(),
/*disable=*/builder.getBoolAttr(true), {}, {}, {}, {}, {}, {})};
mlir::LLVM::LoopAnnotationAttr la = mlir::LLVM::LoopAnnotationAttr::get(
builder.getContext(), {}, /*vectorize=*/va, {}, /*unroll*/ {},
/*unroll_and_jam*/ {}, {}, {}, {}, {}, {}, {}, {}, {}, {}, {});
doLoop.setLoopAnnotationAttr(la);
}
loopNest.body = doLoop.getBody();
builder.setInsertionPointToStart(loopNest.body);
// Reverse the indices so they are in column-major order.
Expand Down
1 change: 1 addition & 0 deletions flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@ add_flang_library(HLFIRTransforms
ConvertToFIR.cpp
InlineElementals.cpp
InlineHLFIRAssign.cpp
InlineHLFIRCopyIn.cpp
LowerHLFIRIntrinsics.cpp
LowerHLFIROrderedAssignments.cpp
ScheduleOrderedAssignments.cpp
Expand Down
183 changes: 183 additions & 0 deletions flang/lib/Optimizer/HLFIR/Transforms/InlineHLFIRCopyIn.cpp
Original file line number Diff line number Diff line change
@@ -0,0 +1,183 @@
//===- InlineHLFIRCopyIn.cpp - Inline hlfir.copy_in ops -------------------===//
//
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
// See https://llvm.org/LICENSE.txt for license information.
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
//
//===----------------------------------------------------------------------===//
// Transform hlfir.copy_in array operations into loop nests performing element
// per element assignments. For simplicity, the inlining is done for trivial
// data types when the copy_in does not require a corresponding copy_out and
// when the input array is not behind a pointer. This may change in the future.
//===----------------------------------------------------------------------===//

#include "flang/Optimizer/Builder/FIRBuilder.h"
#include "flang/Optimizer/Builder/HLFIRTools.h"
#include "flang/Optimizer/Dialect/FIRType.h"
#include "flang/Optimizer/HLFIR/HLFIROps.h"
#include "flang/Optimizer/OpenMP/Passes.h"
#include "mlir/IR/PatternMatch.h"
#include "mlir/Support/LLVM.h"
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"

namespace hlfir {
#define GEN_PASS_DEF_INLINEHLFIRCOPYIN
#include "flang/Optimizer/HLFIR/Passes.h.inc"
} // namespace hlfir

#define DEBUG_TYPE "inline-hlfir-copy-in"

static llvm::cl::opt<bool> noInlineHLFIRCopyIn(
"no-inline-hlfir-copy-in",
llvm::cl::desc("Do not inline hlfir.copy_in operations"),
llvm::cl::init(false));

namespace {
class InlineCopyInConversion : public mlir::OpRewritePattern<hlfir::CopyInOp> {
public:
using mlir::OpRewritePattern<hlfir::CopyInOp>::OpRewritePattern;

llvm::LogicalResult
matchAndRewrite(hlfir::CopyInOp copyIn,
mlir::PatternRewriter &rewriter) const override;
};

llvm::LogicalResult
InlineCopyInConversion::matchAndRewrite(hlfir::CopyInOp copyIn,
mlir::PatternRewriter &rewriter) const {
fir::FirOpBuilder builder(rewriter, copyIn.getOperation());
mlir::Location loc = copyIn.getLoc();
hlfir::Entity inputVariable{copyIn.getVar()};
mlir::Type resultAddrType = copyIn.getCopiedIn().getType();
if (!fir::isa_trivial(inputVariable.getFortranElementType()))
return rewriter.notifyMatchFailure(copyIn,
"CopyInOp's data type is not trivial");

// There should be exactly one user of WasCopied - the corresponding
// CopyOutOp.
if (!copyIn.getWasCopied().hasOneUse())
return rewriter.notifyMatchFailure(
copyIn, "CopyInOp's WasCopied has no single user");
// The copy out should always be present, either to actually copy or just
// deallocate memory.
auto copyOut = mlir::dyn_cast<hlfir::CopyOutOp>(
copyIn.getWasCopied().user_begin().getCurrent().getUser());

if (!copyOut)
return rewriter.notifyMatchFailure(copyIn,
"CopyInOp has no direct CopyOut");

if (mlir::cast<fir::BaseBoxType>(resultAddrType).isAssumedRank())
return rewriter.notifyMatchFailure(copyIn,
"The result array is assumed-rank");

// Only inline the copy_in when copy_out does not need to be done, i.e. in
// case of intent(in).
if (copyOut.getVar())
return rewriter.notifyMatchFailure(copyIn, "CopyIn needs a copy-out");

inputVariable =
hlfir::derefPointersAndAllocatables(loc, builder, inputVariable);
mlir::Type sequenceType =
hlfir::getFortranElementOrSequenceType(inputVariable.getType());
fir::BoxType resultBoxType = fir::BoxType::get(sequenceType);
mlir::Value isContiguous =
builder.create<fir::IsContiguousBoxOp>(loc, inputVariable);
mlir::Operation::result_range results =
builder
.genIfOp(loc, {resultBoxType, builder.getI1Type()}, isContiguous,
/*withElseRegion=*/true)
.genThen([&]() {
mlir::Value result = inputVariable;
if (fir::isPointerType(inputVariable.getType())) {
result = builder.create<fir::ReboxOp>(
loc, resultBoxType, inputVariable, mlir::Value{},
mlir::Value{});
}
builder.create<fir::ResultOp>(
loc, mlir::ValueRange{result, builder.createBool(loc, false)});
})
.genElse([&] {
mlir::Value shape = hlfir::genShape(loc, builder, inputVariable);
llvm::SmallVector<mlir::Value> extents =
hlfir::getIndexExtents(loc, builder, shape);
llvm::StringRef tmpName{".tmp.copy_in"};
llvm::SmallVector<mlir::Value> lenParams;
mlir::Value alloc = builder.createHeapTemporary(
loc, sequenceType, tmpName, extents, lenParams);

auto declareOp = builder.create<hlfir::DeclareOp>(
loc, alloc, tmpName, shape, lenParams,
/*dummy_scope=*/nullptr);
hlfir::Entity temp{declareOp.getBase()};
hlfir::LoopNest loopNest =
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
flangomp::shouldUseWorkshareLowering(copyIn),
/*couldVectorize=*/false);
builder.setInsertionPointToStart(loopNest.body);
hlfir::Entity elem = hlfir::getElementAt(
loc, builder, inputVariable, loopNest.oneBasedIndices);
elem = hlfir::loadTrivialScalar(loc, builder, elem);
hlfir::Entity tempElem = hlfir::getElementAt(
loc, builder, temp, loopNest.oneBasedIndices);
builder.create<hlfir::AssignOp>(loc, elem, tempElem);
builder.setInsertionPointAfter(loopNest.outerOp);

mlir::Value result;
// Make sure the result is always a boxed array by boxing it
// ourselves if need be.
if (mlir::isa<fir::BaseBoxType>(temp.getType())) {
result = temp;
} else {
fir::ReferenceType refTy =
fir::ReferenceType::get(temp.getElementOrSequenceType());
mlir::Value refVal = builder.createConvert(loc, refTy, temp);
result = builder.create<fir::EmboxOp>(loc, resultBoxType, refVal,
shape);
}

builder.create<fir::ResultOp>(
loc, mlir::ValueRange{result, builder.createBool(loc, true)});
})
.getResults();

mlir::OpResult resultBox = results[0];
mlir::OpResult needsCleanup = results[1];

// Prepare the corresponding copyOut to free the temporary if it is required
auto alloca = builder.create<fir::AllocaOp>(loc, resultBox.getType());
auto store = builder.create<fir::StoreOp>(loc, resultBox, alloca);
rewriter.startOpModification(copyOut);
copyOut->setOperand(0, store.getMemref());
copyOut->setOperand(1, needsCleanup);
rewriter.finalizeOpModification(copyOut);

rewriter.replaceOp(copyIn, {resultBox, builder.genNot(loc, isContiguous)});
return mlir::success();
}

class InlineHLFIRCopyInPass
: public hlfir::impl::InlineHLFIRCopyInBase<InlineHLFIRCopyInPass> {
public:
void runOnOperation() override {
mlir::MLIRContext *context = &getContext();

mlir::GreedyRewriteConfig config;
// Prevent the pattern driver from merging blocks.
config.setRegionSimplificationLevel(
mlir::GreedySimplifyRegionLevel::Disabled);

mlir::RewritePatternSet patterns(context);
if (!noInlineHLFIRCopyIn) {
patterns.insert<InlineCopyInConversion>(context);
}

if (mlir::failed(mlir::applyPatternsGreedily(
getOperation(), std::move(patterns), config))) {
mlir::emitError(getOperation()->getLoc(),
"failure in hlfir.copy_in inlining");
signalPassFailure();
}
}
};
} // namespace
5 changes: 5 additions & 0 deletions flang/lib/Optimizer/Passes/Pipelines.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -255,6 +255,11 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm, bool enableOpenMP,
pm, hlfir::createOptimizedBufferization);
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createInlineHLFIRAssign);

if (optLevel == llvm::OptimizationLevel::O3) {
addNestedPassToAllTopLevelOperations<PassConstructor>(
pm, hlfir::createInlineHLFIRCopyIn);
}
}
pm.addPass(hlfir::createLowerHLFIROrderedAssignments());
pm.addPass(hlfir::createLowerHLFIRIntrinsics());
Expand Down
Loading
Loading