Skip to content

Commit c1779f3

Browse files
authored
[flang] Implement !DIR$ [NO]INLINE and FORCEINLINE directives (#134350)
This patch adds the support of these two directives : `!dir$ inline` and `!dir$ noinline`. - `!dir$ noinline` tells to the compiler to not perform inlining on specific function calls by adding the `noinline` metadata on the call. - `!dir$ inline` tells to the compiler to attempt inlining on specific function calls by adding the `inlinehint` metadata on the call. - `!dir$ forceinline` tells to the compiler to always perfom inlining on specific function calls by adding the `alwaysinline` metadata on the call. Currently, these directives can be placed before a `DO LOOP`, call functions or assignments. Maybe other statements can be added in the future if needed. For the `inline` directive the correct name might be `forceinline` but I'm not sure ?
1 parent 20c323a commit c1779f3

File tree

17 files changed

+375
-11
lines changed

17 files changed

+375
-11
lines changed

flang/docs/Directives.md

Lines changed: 8 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -53,6 +53,14 @@ A list of non-standard directives supported by Flang
5353
* `!dir$ novector` disabling vectorization on the following loop.
5454
* `!dir$ nounroll` disabling unrolling on the following loop.
5555
* `!dir$ nounroll_and_jam` disabling unrolling and jamming on the following loop.
56+
* `!dir$ inline` instructs the compiler to attempt to inline the called routines if the
57+
directive is specified before a call statement or all call statements within the loop
58+
body if specified before a DO LOOP or all function references if specified before an
59+
assignment statement.
60+
* `!dir$ forceinline` works in the same way as the `inline` directive, but it forces
61+
inlining by the compiler on a function call statement.
62+
* `!dir$ noinline` works in the same way as the `inline` directive, but prevents
63+
any attempt of inlining by the compiler on a function call statement.
5664

5765
# Directive Details
5866

flang/include/flang/Evaluate/call.h

Lines changed: 10 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -255,6 +255,13 @@ class ProcedureRef {
255255
bool IsElemental() const { return proc_.IsElemental(); }
256256
bool hasAlternateReturns() const { return hasAlternateReturns_; }
257257

258+
bool hasNoInline() const { return noInline_; }
259+
void setNoInline(bool ni) { noInline_ = ni; }
260+
bool hasAlwaysInline() const { return alwaysInline_; }
261+
void setAlwaysInline(bool ai) { alwaysInline_ = ai; }
262+
bool hasInlineHint() const { return inlineHint_; }
263+
void setInlineHint(bool ih) { inlineHint_ = ih; }
264+
258265
Expr<SomeType> *UnwrapArgExpr(int n) {
259266
if (static_cast<std::size_t>(n) < arguments_.size() && arguments_[n]) {
260267
return arguments_[n]->UnwrapExpr();
@@ -278,6 +285,9 @@ class ProcedureRef {
278285
ActualArguments arguments_;
279286
Chevrons chevrons_;
280287
bool hasAlternateReturns_;
288+
bool noInline_{false};
289+
bool alwaysInline_{false};
290+
bool inlineHint_{false};
281291
};
282292

283293
template <typename A> class FunctionRef : public ProcedureRef {

flang/include/flang/Optimizer/Dialect/FIRAttr.td

Lines changed: 20 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -219,4 +219,24 @@ def LocalitySpecifierTypeAttr : EnumAttr<FIROpsDialect, LocalitySpecifierType,
219219
let assemblyFormat = "`{` `type` `=` $value `}`";
220220
}
221221

222+
/// Fortran inline attribute
223+
def FIRinlineNone : I32BitEnumAttrCaseNone<"none">;
224+
def FIRinlineNo : I32BitEnumAttrCaseBit<"no_inline", 0>;
225+
def FIRinlineAlways : I32BitEnumAttrCaseBit<"always_inline", 1>;
226+
def FIRinlineHint : I32BitEnumAttrCaseBit<"inline_hint", 2>;
227+
228+
def fir_FortranInlineEnum
229+
: I32BitEnumAttr<"FortranInlineEnum", "Fortran inline attributes",
230+
[FIRinlineNone, FIRinlineNo, FIRinlineAlways,
231+
FIRinlineHint]> {
232+
let separator = ", ";
233+
let cppNamespace = "::fir";
234+
let genSpecializedAttr = 0;
235+
let printBitEnumPrimaryGroups = 1;
236+
}
237+
238+
def fir_FortranInlineAttr
239+
: EnumAttr<FIROpsDialect, fir_FortranInlineEnum, "inline_attrs"> {
240+
let assemblyFormat = "`<` $value `>`";
241+
}
222242
#endif // FIR_DIALECT_FIR_ATTRS

flang/include/flang/Optimizer/Dialect/FIROps.td

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2549,6 +2549,7 @@ def fir_CallOp : fir_Op<"call",
25492549
OptionalAttr<DictArrayAttr>:$arg_attrs,
25502550
OptionalAttr<DictArrayAttr>:$res_attrs,
25512551
OptionalAttr<fir_FortranProcedureFlagsAttr>:$procedure_attrs,
2552+
OptionalAttr<fir_FortranInlineAttr>:$inline_attr,
25522553
DefaultValuedAttr<Arith_FastMathAttr,
25532554
"::mlir::arith::FastMathFlags::none">:$fastmath
25542555
);

flang/include/flang/Parser/dump-parse-tree.h

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -206,8 +206,11 @@ class ParseTreeDumper {
206206
NODE(parser, CompilerDirective)
207207
NODE(CompilerDirective, AssumeAligned)
208208
NODE(CompilerDirective, IgnoreTKR)
209+
NODE(CompilerDirective, Inline)
210+
NODE(CompilerDirective, ForceInline)
209211
NODE(CompilerDirective, LoopCount)
210212
NODE(CompilerDirective, NameValue)
213+
NODE(CompilerDirective, NoInline)
211214
NODE(CompilerDirective, Unrecognized)
212215
NODE(CompilerDirective, VectorAlways)
213216
NODE(CompilerDirective, Unroll)

flang/include/flang/Parser/parse-tree.h

Lines changed: 7 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -3356,6 +3356,9 @@ struct StmtFunctionStmt {
33563356
// !DIR$ NOVECTOR
33573357
// !DIR$ NOUNROLL
33583358
// !DIR$ NOUNROLL_AND_JAM
3359+
// !DIR$ FORCEINLINE
3360+
// !DIR$ INLINE
3361+
// !DIR$ NOINLINE
33593362
// !DIR$ <anything else>
33603363
struct CompilerDirective {
33613364
UNION_CLASS_BOILERPLATE(CompilerDirective);
@@ -3384,11 +3387,14 @@ struct CompilerDirective {
33843387
EMPTY_CLASS(NoVector);
33853388
EMPTY_CLASS(NoUnroll);
33863389
EMPTY_CLASS(NoUnrollAndJam);
3390+
EMPTY_CLASS(ForceInline);
3391+
EMPTY_CLASS(Inline);
3392+
EMPTY_CLASS(NoInline);
33873393
EMPTY_CLASS(Unrecognized);
33883394
CharBlock source;
33893395
std::variant<std::list<IgnoreTKR>, LoopCount, std::list<AssumeAligned>,
33903396
VectorAlways, std::list<NameValue>, Unroll, UnrollAndJam, Unrecognized,
3391-
NoVector, NoUnroll, NoUnrollAndJam>
3397+
NoVector, NoUnroll, NoUnrollAndJam, ForceInline, Inline, NoInline>
33923398
u;
33933399
};
33943400

flang/lib/Lower/Bridge.cpp

Lines changed: 122 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -1884,6 +1884,26 @@ class FirConverter : public Fortran::lower::AbstractConverter {
18841884
setCurrentPosition(stmt.source);
18851885
assert(stmt.typedCall && "Call was not analyzed");
18861886
mlir::Value res{};
1887+
1888+
// Set 'no_inline', 'inline_hint' or 'always_inline' to true on the
1889+
// ProcedureRef. The NoInline and AlwaysInline attribute will be set in
1890+
// genProcedureRef later.
1891+
for (const auto *dir : eval.dirs) {
1892+
Fortran::common::visit(
1893+
Fortran::common::visitors{
1894+
[&](const Fortran::parser::CompilerDirective::ForceInline &) {
1895+
stmt.typedCall->setAlwaysInline(true);
1896+
},
1897+
[&](const Fortran::parser::CompilerDirective::Inline &) {
1898+
stmt.typedCall->setInlineHint(true);
1899+
},
1900+
[&](const Fortran::parser::CompilerDirective::NoInline &) {
1901+
stmt.typedCall->setNoInline(true);
1902+
},
1903+
[&](const auto &) {}},
1904+
dir->u);
1905+
}
1906+
18871907
if (lowerToHighLevelFIR()) {
18881908
std::optional<mlir::Type> resultType;
18891909
if (stmt.typedCall->hasAlternateReturns())
@@ -2200,6 +2220,50 @@ class FirConverter : public Fortran::lower::AbstractConverter {
22002220
// so no clean-up needs to be generated for these entities.
22012221
}
22022222

2223+
void attachInlineAttributes(
2224+
mlir::Operation &op,
2225+
const llvm::ArrayRef<const Fortran::parser::CompilerDirective *> &dirs) {
2226+
if (dirs.empty())
2227+
return;
2228+
2229+
for (mlir::Value operand : op.getOperands()) {
2230+
if (operand.getDefiningOp())
2231+
attachInlineAttributes(*operand.getDefiningOp(), dirs);
2232+
}
2233+
2234+
if (fir::CallOp callOp = mlir::dyn_cast<fir::CallOp>(op)) {
2235+
for (const auto *dir : dirs) {
2236+
Fortran::common::visit(
2237+
Fortran::common::visitors{
2238+
[&](const Fortran::parser::CompilerDirective::NoInline &) {
2239+
callOp.setInlineAttr(fir::FortranInlineEnum::no_inline);
2240+
},
2241+
[&](const Fortran::parser::CompilerDirective::Inline &) {
2242+
callOp.setInlineAttr(fir::FortranInlineEnum::inline_hint);
2243+
},
2244+
[&](const Fortran::parser::CompilerDirective::ForceInline &) {
2245+
callOp.setInlineAttr(fir::FortranInlineEnum::always_inline);
2246+
},
2247+
[&](const auto &) {}},
2248+
dir->u);
2249+
}
2250+
}
2251+
}
2252+
2253+
void attachAttributesToDoLoopOperations(
2254+
fir::DoLoopOp &doLoop,
2255+
llvm::SmallVectorImpl<const Fortran::parser::CompilerDirective *> &dirs) {
2256+
if (!doLoop.getOperation() || dirs.empty())
2257+
return;
2258+
2259+
for (mlir::Block &block : doLoop.getRegion()) {
2260+
for (mlir::Operation &op : block.getOperations()) {
2261+
if (!dirs.empty())
2262+
attachInlineAttributes(op, dirs);
2263+
}
2264+
}
2265+
}
2266+
22032267
/// Generate FIR for a DO construct. There are six variants:
22042268
/// - unstructured infinite and while loops
22052269
/// - structured and unstructured increment loops
@@ -2351,6 +2415,11 @@ class FirConverter : public Fortran::lower::AbstractConverter {
23512415
if (!incrementLoopNestInfo.empty() &&
23522416
incrementLoopNestInfo.back().isConcurrent)
23532417
localSymbols.popScope();
2418+
2419+
// Add attribute(s) on operations in fir::DoLoopOp if necessary
2420+
for (IncrementLoopInfo &info : incrementLoopNestInfo)
2421+
if (auto loopOp = mlir::dyn_cast_if_present<fir::DoLoopOp>(info.loopOp))
2422+
attachAttributesToDoLoopOperations(loopOp, doStmtEval.dirs);
23542423
}
23552424

23562425
/// Generate FIR to evaluate loop control values (lower, upper and step).
@@ -3154,6 +3223,26 @@ class FirConverter : public Fortran::lower::AbstractConverter {
31543223
e->dirs.push_back(&dir);
31553224
}
31563225

3226+
void
3227+
attachInliningDirectiveToStmt(const Fortran::parser::CompilerDirective &dir,
3228+
Fortran::lower::pft::Evaluation *e) {
3229+
while (e->isDirective())
3230+
e = e->lexicalSuccessor;
3231+
3232+
// If the successor is a statement or a do loop, the compiler
3233+
// will perform inlining.
3234+
if (e->isA<Fortran::parser::CallStmt>() ||
3235+
e->isA<Fortran::parser::NonLabelDoStmt>() ||
3236+
e->isA<Fortran::parser::AssignmentStmt>()) {
3237+
e->dirs.push_back(&dir);
3238+
} else {
3239+
mlir::Location loc = toLocation();
3240+
mlir::emitWarning(loc,
3241+
"Inlining directive not in front of loops, function"
3242+
"call or assignment.\n");
3243+
}
3244+
}
3245+
31573246
void genFIR(const Fortran::parser::CompilerDirective &dir) {
31583247
Fortran::lower::pft::Evaluation &eval = getEval();
31593248

@@ -3177,6 +3266,15 @@ class FirConverter : public Fortran::lower::AbstractConverter {
31773266
[&](const Fortran::parser::CompilerDirective::NoUnrollAndJam &) {
31783267
attachDirectiveToLoop(dir, &eval);
31793268
},
3269+
[&](const Fortran::parser::CompilerDirective::ForceInline &) {
3270+
attachInliningDirectiveToStmt(dir, &eval);
3271+
},
3272+
[&](const Fortran::parser::CompilerDirective::Inline &) {
3273+
attachInliningDirectiveToStmt(dir, &eval);
3274+
},
3275+
[&](const Fortran::parser::CompilerDirective::NoInline &) {
3276+
attachInliningDirectiveToStmt(dir, &eval);
3277+
},
31803278
[&](const auto &) {}},
31813279
dir.u);
31823280
}
@@ -5086,7 +5184,9 @@ class FirConverter : public Fortran::lower::AbstractConverter {
50865184

50875185
void genDataAssignment(
50885186
const Fortran::evaluate::Assignment &assign,
5089-
const Fortran::evaluate::ProcedureRef *userDefinedAssignment) {
5187+
const Fortran::evaluate::ProcedureRef *userDefinedAssignment,
5188+
const llvm::ArrayRef<const Fortran::parser::CompilerDirective *> &dirs =
5189+
{}) {
50905190
mlir::Location loc = getCurrentLocation();
50915191
fir::FirOpBuilder &builder = getFirOpBuilder();
50925192

@@ -5166,10 +5266,20 @@ class FirConverter : public Fortran::lower::AbstractConverter {
51665266
genCUDADataTransfer(builder, loc, assign, lhs, rhs,
51675267
isWholeAllocatableAssignment,
51685268
keepLhsLengthInAllocatableAssignment);
5169-
else
5269+
else {
5270+
// If RHS or LHS have a CallOp in their expression, this operation will
5271+
// have the 'no_inline' or 'always_inline' attribute if there is a
5272+
// directive just before the assignement.
5273+
if (!dirs.empty()) {
5274+
if (rhs.getDefiningOp())
5275+
attachInlineAttributes(*rhs.getDefiningOp(), dirs);
5276+
if (lhs.getDefiningOp())
5277+
attachInlineAttributes(*lhs.getDefiningOp(), dirs);
5278+
}
51705279
hlfir::AssignOp::create(builder, loc, rhs, lhs,
51715280
isWholeAllocatableAssignment,
51725281
keepLhsLengthInAllocatableAssignment);
5282+
}
51735283
if (hasCUDAImplicitTransfer && !isInDeviceContext) {
51745284
localSymbols.popScope();
51755285
for (mlir::Value temp : implicitTemps)
@@ -5237,16 +5347,21 @@ class FirConverter : public Fortran::lower::AbstractConverter {
52375347
}
52385348

52395349
/// Shared for both assignments and pointer assignments.
5240-
void genAssignment(const Fortran::evaluate::Assignment &assign) {
5350+
void
5351+
genAssignment(const Fortran::evaluate::Assignment &assign,
5352+
const llvm::ArrayRef<const Fortran::parser::CompilerDirective *>
5353+
&dirs = {}) {
52415354
mlir::Location loc = toLocation();
52425355
if (lowerToHighLevelFIR()) {
52435356
Fortran::common::visit(
52445357
Fortran::common::visitors{
52455358
[&](const Fortran::evaluate::Assignment::Intrinsic &) {
5246-
genDataAssignment(assign, /*userDefinedAssignment=*/nullptr);
5359+
genDataAssignment(assign, /*userDefinedAssignment=*/nullptr,
5360+
dirs);
52475361
},
52485362
[&](const Fortran::evaluate::ProcedureRef &procRef) {
5249-
genDataAssignment(assign, /*userDefinedAssignment=*/&procRef);
5363+
genDataAssignment(assign, /*userDefinedAssignment=*/&procRef,
5364+
dirs);
52505365
},
52515366
[&](const Fortran::evaluate::Assignment::BoundsSpec &lbExprs) {
52525367
if (isInsideHlfirForallOrWhere())
@@ -5651,7 +5766,8 @@ class FirConverter : public Fortran::lower::AbstractConverter {
56515766
}
56525767

56535768
void genFIR(const Fortran::parser::AssignmentStmt &stmt) {
5654-
genAssignment(*stmt.typedAssignment->v);
5769+
Fortran::lower::pft::Evaluation &eval = getEval();
5770+
genAssignment(*stmt.typedAssignment->v, eval.dirs);
56555771
}
56565772

56575773
void genFIR(const Fortran::parser::SyncAllStmt &stmt) {

flang/lib/Lower/ConvertCall.cpp

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -700,9 +700,20 @@ Fortran::lower::genCallOpAndResult(
700700
callResult = dispatch.getResult(0);
701701
} else {
702702
// Standard procedure call with fir.call.
703+
fir::FortranInlineEnumAttr inlineAttr;
704+
705+
if (caller.getCallDescription().hasNoInline())
706+
inlineAttr = fir::FortranInlineEnumAttr::get(
707+
builder.getContext(), fir::FortranInlineEnum::no_inline);
708+
else if (caller.getCallDescription().hasInlineHint())
709+
inlineAttr = fir::FortranInlineEnumAttr::get(
710+
builder.getContext(), fir::FortranInlineEnum::inline_hint);
711+
else if (caller.getCallDescription().hasAlwaysInline())
712+
inlineAttr = fir::FortranInlineEnumAttr::get(
713+
builder.getContext(), fir::FortranInlineEnum::always_inline);
703714
auto call = fir::CallOp::create(
704715
builder, loc, funcType.getResults(), funcSymbolAttr, operands,
705-
/*arg_attrs=*/nullptr, /*res_attrs=*/nullptr, procAttrs);
716+
/*arg_attrs=*/nullptr, /*res_attrs=*/nullptr, procAttrs, inlineAttr);
706717

707718
callNumResults = call.getNumResults();
708719
if (callNumResults != 0)

flang/lib/Optimizer/CodeGen/CodeGen.cpp

Lines changed: 12 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -680,6 +680,18 @@ struct CallOpConversion : public fir::FIROpConversion<fir::CallOp> {
680680
if (mlir::ArrayAttr resAttrs = call.getResAttrsAttr())
681681
llvmCall.setResAttrsAttr(resAttrs);
682682

683+
if (auto inlineAttr = call.getInlineAttrAttr()) {
684+
llvmCall->removeAttr("inline_attr");
685+
if (inlineAttr.getValue() == fir::FortranInlineEnum::no_inline) {
686+
llvmCall.setNoInlineAttr(rewriter.getUnitAttr());
687+
} else if (inlineAttr.getValue() == fir::FortranInlineEnum::inline_hint) {
688+
llvmCall.setInlineHintAttr(rewriter.getUnitAttr());
689+
} else if (inlineAttr.getValue() ==
690+
fir::FortranInlineEnum::always_inline) {
691+
llvmCall.setAlwaysInlineAttr(rewriter.getUnitAttr());
692+
}
693+
}
694+
683695
if (memAttr)
684696
llvmCall.setMemoryEffectsAttr(
685697
mlir::cast<mlir::LLVM::MemoryEffectsAttr>(memAttr));

flang/lib/Optimizer/Transforms/PolymorphicOpConversion.cpp

Lines changed: 2 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -246,7 +246,8 @@ struct DispatchOpConv : public OpConversionPattern<fir::DispatchOp> {
246246
args.append(dispatch.getArgs().begin(), dispatch.getArgs().end());
247247
rewriter.replaceOpWithNewOp<fir::CallOp>(
248248
dispatch, resTypes, nullptr, args, dispatch.getArgAttrsAttr(),
249-
dispatch.getResAttrsAttr(), dispatch.getProcedureAttrsAttr());
249+
dispatch.getResAttrsAttr(), dispatch.getProcedureAttrsAttr(),
250+
/*inline_attr*/ fir::FortranInlineEnumAttr{});
250251
return mlir::success();
251252
}
252253

0 commit comments

Comments
 (0)