Skip to content

Commit f6783fa

Browse files
committed
[flang] Extract hlfir.assign inlining from opt-bufferization.
Optimized bufferization can transform hlfir.assign into a loop nest doing element per element assignment, but it avoids doing so for RHS that is hlfir.expr. This is done to let ElementalAssignBufferization pattern to try to do a better job. This patch moves the hlfir.assign inlining after opt-bufferization, and enables it for hlfir.expr RHS. The hlfir.expr RHS cases are present in tonto, and this patch results in some nice improvements. Note that those cases are handled by other compilers also using array temporaries, so this patch seems to just get rid of the Assign runtime overhead/inefficiency.
1 parent c1ecc0d commit f6783fa

File tree

12 files changed

+228
-178
lines changed

12 files changed

+228
-178
lines changed

flang/include/flang/Optimizer/HLFIR/Passes.td

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,4 +49,8 @@ def InlineElementals : Pass<"inline-elementals"> {
4949
let summary = "Inline chained hlfir.elemental operations";
5050
}
5151

52+
def InlineHLFIRAssign : Pass<"inline-hlfir-assign"> {
53+
let summary = "Inline hlfir.assign operations";
54+
}
55+
5256
#endif //FORTRAN_DIALECT_HLFIR_PASSES

flang/lib/Optimizer/HLFIR/Transforms/CMakeLists.txt

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -4,6 +4,7 @@ add_flang_library(HLFIRTransforms
44
BufferizeHLFIR.cpp
55
ConvertToFIR.cpp
66
InlineElementals.cpp
7+
InlineHLFIRAssign.cpp
78
LowerHLFIRIntrinsics.cpp
89
LowerHLFIROrderedAssignments.cpp
910
ScheduleOrderedAssignments.cpp
Lines changed: 152 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,152 @@
1+
//===- InlineHLFIRAssign.cpp - Inline hlfir.assign ops --------------------===//
2+
//
3+
// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4+
// See https://llvm.org/LICENSE.txt for license information.
5+
// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6+
//
7+
//===----------------------------------------------------------------------===//
8+
// Transform hlfir.assign array operations into loop nests performing element
9+
// per element assignments. The inlining is done for trivial data types always,
10+
// though, we may add performance/code-size heuristics in future.
11+
//===----------------------------------------------------------------------===//
12+
13+
#include "flang/Optimizer/Analysis/AliasAnalysis.h"
14+
#include "flang/Optimizer/Builder/FIRBuilder.h"
15+
#include "flang/Optimizer/Builder/HLFIRTools.h"
16+
#include "flang/Optimizer/HLFIR/HLFIROps.h"
17+
#include "flang/Optimizer/HLFIR/Passes.h"
18+
#include "flang/Optimizer/OpenMP/Passes.h"
19+
#include "mlir/IR/PatternMatch.h"
20+
#include "mlir/Pass/Pass.h"
21+
#include "mlir/Support/LLVM.h"
22+
#include "mlir/Transforms/GreedyPatternRewriteDriver.h"
23+
24+
namespace hlfir {
25+
#define GEN_PASS_DEF_INLINEHLFIRASSIGN
26+
#include "flang/Optimizer/HLFIR/Passes.h.inc"
27+
} // namespace hlfir
28+
29+
#define DEBUG_TYPE "inline-hlfir-assign"
30+
31+
namespace {
32+
/// Expand hlfir.assign of array RHS to array LHS into a loop nest
33+
/// of element-by-element assignments:
34+
/// hlfir.assign %4 to %5 : !fir.ref<!fir.array<3x3xf32>>,
35+
/// !fir.ref<!fir.array<3x3xf32>>
36+
/// into:
37+
/// fir.do_loop %arg1 = %c1 to %c3 step %c1 unordered {
38+
/// fir.do_loop %arg2 = %c1 to %c3 step %c1 unordered {
39+
/// %6 = hlfir.designate %4 (%arg2, %arg1) :
40+
/// (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
41+
/// %7 = fir.load %6 : !fir.ref<f32>
42+
/// %8 = hlfir.designate %5 (%arg2, %arg1) :
43+
/// (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
44+
/// hlfir.assign %7 to %8 : f32, !fir.ref<f32>
45+
/// }
46+
/// }
47+
///
48+
/// The transformation is correct only when LHS and RHS do not alias.
49+
/// When RHS is an array expression, then there is no aliasing.
50+
/// This transformation does not support runtime checking for
51+
/// non-conforming LHS/RHS arrays' shapes currently.
52+
class InlineHLFIRAssignConversion
53+
: public mlir::OpRewritePattern<hlfir::AssignOp> {
54+
public:
55+
using mlir::OpRewritePattern<hlfir::AssignOp>::OpRewritePattern;
56+
57+
llvm::LogicalResult
58+
matchAndRewrite(hlfir::AssignOp assign,
59+
mlir::PatternRewriter &rewriter) const override {
60+
if (assign.isAllocatableAssignment())
61+
return rewriter.notifyMatchFailure(assign,
62+
"AssignOp may imply allocation");
63+
64+
hlfir::Entity rhs{assign.getRhs()};
65+
66+
if (!rhs.isArray())
67+
return rewriter.notifyMatchFailure(assign,
68+
"AssignOp's RHS is not an array");
69+
70+
mlir::Type rhsEleTy = rhs.getFortranElementType();
71+
if (!fir::isa_trivial(rhsEleTy))
72+
return rewriter.notifyMatchFailure(
73+
assign, "AssignOp's RHS data type is not trivial");
74+
75+
hlfir::Entity lhs{assign.getLhs()};
76+
if (!lhs.isArray())
77+
return rewriter.notifyMatchFailure(assign,
78+
"AssignOp's LHS is not an array");
79+
80+
mlir::Type lhsEleTy = lhs.getFortranElementType();
81+
if (!fir::isa_trivial(lhsEleTy))
82+
return rewriter.notifyMatchFailure(
83+
assign, "AssignOp's LHS data type is not trivial");
84+
85+
if (lhsEleTy != rhsEleTy)
86+
return rewriter.notifyMatchFailure(assign,
87+
"RHS/LHS element types mismatch");
88+
89+
if (!mlir::isa<hlfir::ExprType>(rhs.getType())) {
90+
// If RHS is not an hlfir.expr, then we should prove that
91+
// LHS and RHS do not alias.
92+
// TODO: if they may alias, we can insert hlfir.as_expr for RHS,
93+
// and proceed with the inlining.
94+
fir::AliasAnalysis aliasAnalysis;
95+
mlir::AliasResult aliasRes = aliasAnalysis.alias(lhs, rhs);
96+
// TODO: use areIdenticalOrDisjointSlices() from
97+
// OptimizedBufferization.cpp to check if we can still do the expansion.
98+
if (!aliasRes.isNo()) {
99+
LLVM_DEBUG(llvm::dbgs() << "InlineHLFIRAssign:\n"
100+
<< "\tLHS: " << lhs << "\n"
101+
<< "\tRHS: " << rhs << "\n"
102+
<< "\tALIAS: " << aliasRes << "\n");
103+
return rewriter.notifyMatchFailure(assign, "RHS/LHS may alias");
104+
}
105+
}
106+
107+
mlir::Location loc = assign->getLoc();
108+
fir::FirOpBuilder builder(rewriter, assign.getOperation());
109+
builder.setInsertionPoint(assign);
110+
rhs = hlfir::derefPointersAndAllocatables(loc, builder, rhs);
111+
lhs = hlfir::derefPointersAndAllocatables(loc, builder, lhs);
112+
mlir::Value shape = hlfir::genShape(loc, builder, lhs);
113+
llvm::SmallVector<mlir::Value> extents =
114+
hlfir::getIndexExtents(loc, builder, shape);
115+
hlfir::LoopNest loopNest =
116+
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
117+
flangomp::shouldUseWorkshareLowering(assign));
118+
builder.setInsertionPointToStart(loopNest.body);
119+
auto rhsArrayElement =
120+
hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);
121+
rhsArrayElement = hlfir::loadTrivialScalar(loc, builder, rhsArrayElement);
122+
auto lhsArrayElement =
123+
hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
124+
builder.create<hlfir::AssignOp>(loc, rhsArrayElement, lhsArrayElement);
125+
rewriter.eraseOp(assign);
126+
return mlir::success();
127+
}
128+
};
129+
130+
class InlineHLFIRAssignPass
131+
: public hlfir::impl::InlineHLFIRAssignBase<InlineHLFIRAssignPass> {
132+
public:
133+
void runOnOperation() override {
134+
mlir::MLIRContext *context = &getContext();
135+
136+
mlir::GreedyRewriteConfig config;
137+
// Prevent the pattern driver from merging blocks.
138+
config.enableRegionSimplification =
139+
mlir::GreedySimplifyRegionLevel::Disabled;
140+
141+
mlir::RewritePatternSet patterns(context);
142+
patterns.insert<InlineHLFIRAssignConversion>(context);
143+
144+
if (mlir::failed(mlir::applyPatternsGreedily(
145+
getOperation(), std::move(patterns), config))) {
146+
mlir::emitError(getOperation()->getLoc(),
147+
"failure in hlfir.assign inlining");
148+
signalPassFailure();
149+
}
150+
}
151+
};
152+
} // namespace

flang/lib/Optimizer/HLFIR/Transforms/OptimizedBufferization.cpp

Lines changed: 3 additions & 106 deletions
Original file line numberDiff line numberDiff line change
@@ -698,108 +698,6 @@ llvm::LogicalResult BroadcastAssignBufferization::matchAndRewrite(
698698
return mlir::success();
699699
}
700700

701-
/// Expand hlfir.assign of array RHS to array LHS into a loop nest
702-
/// of element-by-element assignments:
703-
/// hlfir.assign %4 to %5 : !fir.ref<!fir.array<3x3xf32>>,
704-
/// !fir.ref<!fir.array<3x3xf32>>
705-
/// into:
706-
/// fir.do_loop %arg1 = %c1 to %c3 step %c1 unordered {
707-
/// fir.do_loop %arg2 = %c1 to %c3 step %c1 unordered {
708-
/// %6 = hlfir.designate %4 (%arg2, %arg1) :
709-
/// (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
710-
/// %7 = fir.load %6 : !fir.ref<f32>
711-
/// %8 = hlfir.designate %5 (%arg2, %arg1) :
712-
/// (!fir.ref<!fir.array<3x3xf32>>, index, index) -> !fir.ref<f32>
713-
/// hlfir.assign %7 to %8 : f32, !fir.ref<f32>
714-
/// }
715-
/// }
716-
///
717-
/// The transformation is correct only when LHS and RHS do not alias.
718-
/// This transformation does not support runtime checking for
719-
/// non-conforming LHS/RHS arrays' shapes currently.
720-
class VariableAssignBufferization
721-
: public mlir::OpRewritePattern<hlfir::AssignOp> {
722-
private:
723-
public:
724-
using mlir::OpRewritePattern<hlfir::AssignOp>::OpRewritePattern;
725-
726-
llvm::LogicalResult
727-
matchAndRewrite(hlfir::AssignOp assign,
728-
mlir::PatternRewriter &rewriter) const override;
729-
};
730-
731-
llvm::LogicalResult VariableAssignBufferization::matchAndRewrite(
732-
hlfir::AssignOp assign, mlir::PatternRewriter &rewriter) const {
733-
if (assign.isAllocatableAssignment())
734-
return rewriter.notifyMatchFailure(assign, "AssignOp may imply allocation");
735-
736-
hlfir::Entity rhs{assign.getRhs()};
737-
738-
// To avoid conflicts with ElementalAssignBufferization pattern, we avoid
739-
// matching RHS when it is an `ExprType` defined by an `ElementalOp`; which is
740-
// among the main criteria matched by ElementalAssignBufferization.
741-
if (mlir::isa<hlfir::ExprType>(rhs.getType()) &&
742-
mlir::isa<hlfir::ElementalOp>(rhs.getDefiningOp()))
743-
return rewriter.notifyMatchFailure(
744-
assign, "RHS is an ExprType defined by ElementalOp");
745-
746-
if (!rhs.isArray())
747-
return rewriter.notifyMatchFailure(assign,
748-
"AssignOp's RHS is not an array");
749-
750-
mlir::Type rhsEleTy = rhs.getFortranElementType();
751-
if (!fir::isa_trivial(rhsEleTy))
752-
return rewriter.notifyMatchFailure(
753-
assign, "AssignOp's RHS data type is not trivial");
754-
755-
hlfir::Entity lhs{assign.getLhs()};
756-
if (!lhs.isArray())
757-
return rewriter.notifyMatchFailure(assign,
758-
"AssignOp's LHS is not an array");
759-
760-
mlir::Type lhsEleTy = lhs.getFortranElementType();
761-
if (!fir::isa_trivial(lhsEleTy))
762-
return rewriter.notifyMatchFailure(
763-
assign, "AssignOp's LHS data type is not trivial");
764-
765-
if (lhsEleTy != rhsEleTy)
766-
return rewriter.notifyMatchFailure(assign,
767-
"RHS/LHS element types mismatch");
768-
769-
fir::AliasAnalysis aliasAnalysis;
770-
mlir::AliasResult aliasRes = aliasAnalysis.alias(lhs, rhs);
771-
// TODO: use areIdenticalOrDisjointSlices() to check if
772-
// we can still do the expansion.
773-
if (!aliasRes.isNo()) {
774-
LLVM_DEBUG(llvm::dbgs() << "VariableAssignBufferization:\n"
775-
<< "\tLHS: " << lhs << "\n"
776-
<< "\tRHS: " << rhs << "\n"
777-
<< "\tALIAS: " << aliasRes << "\n");
778-
return rewriter.notifyMatchFailure(assign, "RHS/LHS may alias");
779-
}
780-
781-
mlir::Location loc = assign->getLoc();
782-
fir::FirOpBuilder builder(rewriter, assign.getOperation());
783-
builder.setInsertionPoint(assign);
784-
rhs = hlfir::derefPointersAndAllocatables(loc, builder, rhs);
785-
lhs = hlfir::derefPointersAndAllocatables(loc, builder, lhs);
786-
mlir::Value shape = hlfir::genShape(loc, builder, lhs);
787-
llvm::SmallVector<mlir::Value> extents =
788-
hlfir::getIndexExtents(loc, builder, shape);
789-
hlfir::LoopNest loopNest =
790-
hlfir::genLoopNest(loc, builder, extents, /*isUnordered=*/true,
791-
flangomp::shouldUseWorkshareLowering(assign));
792-
builder.setInsertionPointToStart(loopNest.body);
793-
auto rhsArrayElement =
794-
hlfir::getElementAt(loc, builder, rhs, loopNest.oneBasedIndices);
795-
rhsArrayElement = hlfir::loadTrivialScalar(loc, builder, rhsArrayElement);
796-
auto lhsArrayElement =
797-
hlfir::getElementAt(loc, builder, lhs, loopNest.oneBasedIndices);
798-
builder.create<hlfir::AssignOp>(loc, rhsArrayElement, lhsArrayElement);
799-
rewriter.eraseOp(assign);
800-
return mlir::success();
801-
}
802-
803701
using GenBodyFn =
804702
std::function<mlir::Value(fir::FirOpBuilder &, mlir::Location, mlir::Value,
805703
const llvm::SmallVectorImpl<mlir::Value> &)>;
@@ -1206,9 +1104,9 @@ class ReductionMaskConversion : public mlir::OpRewritePattern<Op> {
12061104
loc, resultArr, builder.createBool(loc, false));
12071105

12081106
// Check all the users - the destroy is no longer required, and any assign
1209-
// can use resultArr directly so that VariableAssignBufferization in this
1210-
// pass can optimize the results. Other operations are replaces with an
1211-
// AsExpr for the temporary resultArr.
1107+
// can use resultArr directly so that InlineHLFIRAssign pass
1108+
// can optimize the results. Other operations are replaced with an AsExpr
1109+
// for the temporary resultArr.
12121110
llvm::SmallVector<hlfir::DestroyOp> destroys;
12131111
llvm::SmallVector<hlfir::AssignOp> assigns;
12141112
for (auto user : mloc->getUsers()) {
@@ -1356,7 +1254,6 @@ class OptimizedBufferizationPass
13561254
// This requires small code reordering in ElementalAssignBufferization.
13571255
patterns.insert<ElementalAssignBufferization>(context);
13581256
patterns.insert<BroadcastAssignBufferization>(context);
1359-
patterns.insert<VariableAssignBufferization>(context);
13601257
patterns.insert<EvaluateIntoMemoryAssignBufferization>(context);
13611258
patterns.insert<ReductionConversion<hlfir::CountOp>>(context);
13621259
patterns.insert<ReductionConversion<hlfir::AnyOp>>(context);

flang/lib/Optimizer/Passes/Pipelines.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -234,6 +234,8 @@ void createHLFIRToFIRPassPipeline(mlir::PassManager &pm, bool enableOpenMP,
234234
pm.addPass(mlir::createCSEPass());
235235
addNestedPassToAllTopLevelOperations<PassConstructor>(
236236
pm, hlfir::createOptimizedBufferization);
237+
addNestedPassToAllTopLevelOperations<PassConstructor>(
238+
pm, hlfir::createInlineHLFIRAssign);
237239
}
238240
pm.addPass(hlfir::createLowerHLFIROrderedAssignments());
239241
pm.addPass(hlfir::createLowerHLFIRIntrinsics());

flang/test/Driver/mlir-pass-pipeline.f90

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -36,12 +36,16 @@
3636
! O2-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
3737
! O2-NEXT: 'fir.global' Pipeline
3838
! O2-NEXT: OptimizedBufferization
39+
! O2-NEXT: InlineHLFIRAssign
3940
! O2-NEXT: 'func.func' Pipeline
4041
! O2-NEXT: OptimizedBufferization
42+
! O2-NEXT: InlineHLFIRAssign
4143
! O2-NEXT: 'omp.declare_reduction' Pipeline
4244
! O2-NEXT: OptimizedBufferization
45+
! O2-NEXT: InlineHLFIRAssign
4346
! O2-NEXT: 'omp.private' Pipeline
4447
! O2-NEXT: OptimizedBufferization
48+
! O2-NEXT: InlineHLFIRAssign
4549
! ALL: LowerHLFIROrderedAssignments
4650
! ALL-NEXT: LowerHLFIRIntrinsics
4751
! ALL-NEXT: BufferizeHLFIR

flang/test/Fir/basic-program.fir

Lines changed: 4 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -37,12 +37,16 @@ func.func @_QQmain() {
3737
// PASSES-NEXT: Pipeline Collection : ['fir.global', 'func.func', 'omp.declare_reduction', 'omp.private']
3838
// PASSES-NEXT: 'fir.global' Pipeline
3939
// PASSES-NEXT: OptimizedBufferization
40+
// PASSES-NEXT: InlineHLFIRAssign
4041
// PASSES-NEXT: 'func.func' Pipeline
4142
// PASSES-NEXT: OptimizedBufferization
43+
// PASSES-NEXT: InlineHLFIRAssign
4244
// PASSES-NEXT: 'omp.declare_reduction' Pipeline
4345
// PASSES-NEXT: OptimizedBufferization
46+
// PASSES-NEXT: InlineHLFIRAssign
4447
// PASSES-NEXT: 'omp.private' Pipeline
4548
// PASSES-NEXT: OptimizedBufferization
49+
// PASSES-NEXT: InlineHLFIRAssign
4650
// PASSES-NEXT: LowerHLFIROrderedAssignments
4751
// PASSES-NEXT: LowerHLFIRIntrinsics
4852
// PASSES-NEXT: BufferizeHLFIR

0 commit comments

Comments
 (0)