Skip to content

Commit 04763b7

Browse files
authored
Merge upstream PR stack for do concurrent reductions (llvm#3039)
2 parents 9969219 + 56f75ae commit 04763b7

28 files changed

+1147
-363
lines changed

flang/include/flang/Lower/OpenMP/Clauses.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -179,6 +179,7 @@ using IteratorSpecifier = tomp::type::IteratorSpecifierT<TypeTy, IdTy, ExprTy>;
179179
using DefinedOperator = tomp::type::DefinedOperatorT<IdTy, ExprTy>;
180180
using ProcedureDesignator = tomp::type::ProcedureDesignatorT<IdTy, ExprTy>;
181181
using ReductionOperator = tomp::type::ReductionIdentifierT<IdTy, ExprTy>;
182+
using ReductionOperatorList = List<ReductionOperator>;
182183
using DependenceType = tomp::type::DependenceType;
183184
using Prescriptiveness = tomp::type::Prescriptiveness;
184185

flang/include/flang/Lower/OpenMP/Utils.h

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -9,7 +9,7 @@
99
#ifndef FORTRAN_LOWER_OPENMPUTILS_H
1010
#define FORTRAN_LOWER_OPENMPUTILS_H
1111

12-
#include "Clauses.h"
12+
#include "flang/Lower/OpenMP/Clauses.h"
1313
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
1414
#include "mlir/IR/Location.h"
1515
#include "mlir/IR/Value.h"

flang/lib/Lower/OpenMP/ReductionProcessor.h renamed to flang/include/flang/Lower/Support/ReductionProcessor.h

Lines changed: 16 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -18,7 +18,6 @@
1818
#include "flang/Optimizer/Dialect/FIRType.h"
1919
#include "flang/Semantics/symbol.h"
2020
#include "flang/Semantics/type.h"
21-
#include "mlir/Dialect/OpenMP/OpenMPDialect.h"
2221
#include "mlir/IR/Location.h"
2322
#include "mlir/IR/Types.h"
2423

@@ -64,6 +63,9 @@ class ReductionProcessor {
6463
static ReductionIdentifier
6564
getReductionType(omp::clause::DefinedOperator::IntrinsicOperator intrinsicOp);
6665

66+
static ReductionIdentifier
67+
getReductionType(const fir::ReduceOperationEnum &pd);
68+
6769
static bool
6870
supportedIntrinsicProcReduction(const omp::clause::ProcedureDesignator &pd);
6971

@@ -77,10 +79,9 @@ class ReductionProcessor {
7779
const fir::KindMapping &kindMap,
7880
mlir::Type ty, bool isByRef);
7981

80-
static std::string
81-
getReductionName(omp::clause::DefinedOperator::IntrinsicOperator intrinsicOp,
82-
const fir::KindMapping &kindMap, mlir::Type ty,
83-
bool isByRef);
82+
static std::string getReductionName(ReductionIdentifier redId,
83+
const fir::KindMapping &kindMap,
84+
mlir::Type ty, bool isByRef);
8485

8586
/// This function returns the identity value of the operator \p
8687
/// reductionOpName. For example:
@@ -112,22 +113,23 @@ class ReductionProcessor {
112113
/// symbol table. The declaration has a constant initializer with the neutral
113114
/// value `initValue`, and the reduction combiner carried over from `reduce`.
114115
/// TODO: add atomic region.
115-
static mlir::omp::DeclareReductionOp
116-
createDeclareReduction(AbstractConverter &builder,
117-
llvm::StringRef reductionOpName,
118-
const ReductionIdentifier redId, mlir::Type type,
119-
mlir::Location loc, bool isByRef);
116+
template <typename OpType>
117+
static OpType createDeclareReduction(AbstractConverter &builder,
118+
llvm::StringRef reductionOpName,
119+
const ReductionIdentifier redId,
120+
mlir::Type type, mlir::Location loc,
121+
bool isByRef);
120122

121123
/// Creates a reduction declaration and associates it with an OpenMP block
122124
/// directive.
123-
template <class T>
125+
template <typename OpType, typename RedOperatorListTy>
124126
static void processReductionArguments(
125127
mlir::Location currentLocation, lower::AbstractConverter &converter,
126-
const T &reduction, llvm::SmallVectorImpl<mlir::Value> &reductionVars,
128+
const RedOperatorListTy &redOperatorList,
129+
llvm::SmallVectorImpl<mlir::Value> &reductionVars,
127130
llvm::SmallVectorImpl<bool> &reduceVarByRef,
128131
llvm::SmallVectorImpl<mlir::Attribute> &reductionDeclSymbols,
129-
llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSymbols,
130-
mlir::omp::ReductionModifierAttr *reductionMod = nullptr);
132+
const llvm::SmallVectorImpl<const semantics::Symbol *> &reductionSymbols);
131133
};
132134

133135
template <typename FloatOp, typename IntegerOp>

flang/include/flang/Optimizer/Dialect/FIRAttr.td

Lines changed: 1 addition & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -112,7 +112,7 @@ def fir_ReduceOperationEnum : I32BitEnumAttr<"ReduceOperationEnum",
112112
I32BitEnumAttrCaseBit<"MIN", 7, "min">,
113113
I32BitEnumAttrCaseBit<"IAND", 8, "iand">,
114114
I32BitEnumAttrCaseBit<"IOR", 9, "ior">,
115-
I32BitEnumAttrCaseBit<"EIOR", 10, "eior">
115+
I32BitEnumAttrCaseBit<"IEOR", 10, "ieor">
116116
]> {
117117
let separator = ", ";
118118
let cppNamespace = "::fir";

flang/include/flang/Optimizer/Dialect/FIROps.td

Lines changed: 137 additions & 19 deletions
Original file line numberDiff line numberDiff line change
@@ -3518,7 +3518,7 @@ def fir_BoxTotalElementsOp
35183518

35193519
def YieldOp : fir_Op<"yield",
35203520
[Pure, ReturnLike, Terminator,
3521-
ParentOneOf<["LocalitySpecifierOp"]>]> {
3521+
ParentOneOf<["LocalitySpecifierOp", "DeclareReductionOp"]>]> {
35223522
let summary = "loop yield and termination operation";
35233523
let description = [{
35243524
"fir.yield" yields SSA values from a fir dialect op region and
@@ -3656,6 +3656,103 @@ def fir_LocalitySpecifierOp : fir_Op<"local", [IsolatedFromAbove]> {
36563656
let hasRegionVerifier = 1;
36573657
}
36583658

3659+
def fir_DeclareReductionOp : fir_Op<"declare_reduction", [IsolatedFromAbove,
3660+
Symbol]> {
3661+
let summary = "declares a reduction kind";
3662+
let description = [{
3663+
Note: this operation is adapted from omp::DeclareReductionOp. There is a lot
3664+
duplication at the moment. TODO Combine both ops into one. See:
3665+
https://discourse.llvm.org/t/dialect-for-data-locality-sharing-specifiers-clauses-in-openmp-openacc-and-do-concurrent/86108.
3666+
3667+
Declares a `do concurrent` reduction. This requires two mandatory and three
3668+
optional regions.
3669+
3670+
1. The optional alloc region specifies how to allocate the thread-local
3671+
reduction value. This region should not contain control flow and all
3672+
IR should be suitable for inlining straight into an entry block. In
3673+
the common case this is expected to contain only allocas. It is
3674+
expected to `fir.yield` the allocated value on all control paths.
3675+
If allocation is conditional (e.g. only allocate if the mold is
3676+
allocated), this should be done in the initilizer region and this
3677+
region not included. The alloc region is not used for by-value
3678+
reductions (where allocation is implicit).
3679+
2. The initializer region specifies how to initialize the thread-local
3680+
reduction value. This is usually the neutral element of the reduction.
3681+
For convenience, the region has an argument that contains the value
3682+
of the reduction accumulator at the start of the reduction. If an alloc
3683+
region is specified, there is a second block argument containing the
3684+
address of the allocated memory. The initializer region is expected to
3685+
`fir.yield` the new value on all control flow paths.
3686+
3. The reduction region specifies how to combine two values into one, i.e.
3687+
the reduction operator. It accepts the two values as arguments and is
3688+
expected to `fir.yield` the combined value on all control flow paths.
3689+
4. The atomic reduction region is optional and specifies how two values
3690+
can be combined atomically given local accumulator variables. It is
3691+
expected to store the combined value in the first accumulator variable.
3692+
5. The cleanup region is optional and specifies how to clean up any memory
3693+
allocated by the initializer region. The region has an argument that
3694+
contains the value of the thread-local reduction accumulator. This will
3695+
be executed after the reduction has completed.
3696+
3697+
Note that the MLIR type system does not allow for type-polymorphic
3698+
reductions. Separate reduction declarations should be created for different
3699+
element and accumulator types.
3700+
3701+
For initializer and reduction regions, the operand to `fir.yield` must
3702+
match the parent operation's results.
3703+
}];
3704+
3705+
let arguments = (ins SymbolNameAttr:$sym_name,
3706+
TypeAttr:$type);
3707+
3708+
let regions = (region MaxSizedRegion<1>:$allocRegion,
3709+
AnyRegion:$initializerRegion,
3710+
AnyRegion:$reductionRegion,
3711+
AnyRegion:$atomicReductionRegion,
3712+
AnyRegion:$cleanupRegion);
3713+
3714+
let assemblyFormat = "$sym_name `:` $type attr-dict-with-keyword "
3715+
"( `alloc` $allocRegion^ )? "
3716+
"`init` $initializerRegion "
3717+
"`combiner` $reductionRegion "
3718+
"( `atomic` $atomicReductionRegion^ )? "
3719+
"( `cleanup` $cleanupRegion^ )? ";
3720+
3721+
let extraClassDeclaration = [{
3722+
mlir::BlockArgument getAllocMoldArg() {
3723+
auto &region = getAllocRegion();
3724+
return region.empty() ? nullptr : region.getArgument(0);
3725+
}
3726+
mlir::BlockArgument getInitializerMoldArg() {
3727+
return getInitializerRegion().getArgument(0);
3728+
}
3729+
mlir::BlockArgument getInitializerAllocArg() {
3730+
return getAllocRegion().empty() ?
3731+
nullptr : getInitializerRegion().getArgument(1);
3732+
}
3733+
mlir::BlockArgument getReductionLhsArg() {
3734+
return getReductionRegion().getArgument(0);
3735+
}
3736+
mlir::BlockArgument getReductionRhsArg() {
3737+
return getReductionRegion().getArgument(1);
3738+
}
3739+
mlir::BlockArgument getAtomicReductionLhsArg() {
3740+
auto &region = getAtomicReductionRegion();
3741+
return region.empty() ? nullptr : region.getArgument(0);
3742+
}
3743+
mlir::BlockArgument getAtomicReductionRhsArg() {
3744+
auto &region = getAtomicReductionRegion();
3745+
return region.empty() ? nullptr : region.getArgument(1);
3746+
}
3747+
mlir::BlockArgument getCleanupAllocArg() {
3748+
auto &region = getCleanupRegion();
3749+
return region.empty() ? nullptr : region.getArgument(0);
3750+
}
3751+
}];
3752+
3753+
let hasRegionVerifier = 1;
3754+
}
3755+
36593756
def fir_DoConcurrentOp : fir_Op<"do_concurrent",
36603757
[SingleBlock, AutomaticAllocationScope]> {
36613758
let summary = "do concurrent loop wrapper";
@@ -3694,6 +3791,25 @@ def fir_LocalSpecifier {
36943791
);
36953792
}
36963793

3794+
def fir_ReduceSpecifier {
3795+
dag arguments = (ins
3796+
Variadic<AnyType>:$reduce_vars,
3797+
OptionalAttr<DenseBoolArrayAttr>:$reduce_byref,
3798+
3799+
// This introduces redundency in how reductions are modelled. In particular,
3800+
// a single reduction is represented by 2 attributes:
3801+
//
3802+
// 1. `$reduce_syms` which is a list of `DeclareReductionOp`s.
3803+
// 2. `$reduce_attrs` which is an array of `fir::ReduceAttr` values.
3804+
//
3805+
// The first makes it easier to map `do concurrent` to parallization models
3806+
// (e.g. OpenMP and OpenACC) while the second makes it easier to map it to
3807+
// nests of `fir.do_loop ... unodered` ops.
3808+
OptionalAttr<SymbolRefArrayAttr>:$reduce_syms,
3809+
OptionalAttr<ArrayAttr>:$reduce_attrs
3810+
);
3811+
}
3812+
36973813
def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
36983814
[AttrSizedOperandSegments, DeclareOpInterfaceMethods<LoopLikeOpInterface,
36993815
["getLoopInductionVars"]>,
@@ -3703,7 +3819,7 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
37033819
let description = [{
37043820
An operation that models a Fortran `do concurrent` loop's header and block.
37053821
This is a single-region single-block terminator op that is expected to
3706-
terminate the region of a `omp.do_concurrent` wrapper op.
3822+
terminate the region of a `fir.do_concurrent` wrapper op.
37073823

37083824
This op borrows from both `scf.parallel` and `fir.do_loop` ops. Similar to
37093825
`scf.parallel`, a loop nest takes 3 groups of SSA values as operands that
@@ -3741,8 +3857,6 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
37413857
- `lowerBound`: The group of SSA values for the nest's lower bounds.
37423858
- `upperBound`: The group of SSA values for the nest's upper bounds.
37433859
- `step`: The group of SSA values for the nest's steps.
3744-
- `reduceOperands`: The reduction SSA values, if any.
3745-
- `reduceAttrs`: Attributes to store reduction operations, if any.
37463860
- `loopAnnotation`: Loop metadata to be passed down the compiler pipeline to
37473861
LLVM.
37483862
}];
@@ -3751,22 +3865,30 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
37513865
Variadic<Index>:$lowerBound,
37523866
Variadic<Index>:$upperBound,
37533867
Variadic<Index>:$step,
3754-
Variadic<AnyType>:$reduceOperands,
3755-
OptionalAttr<ArrayAttr>:$reduceAttrs,
37563868
OptionalAttr<LoopAnnotationAttr>:$loopAnnotation
37573869
);
37583870

3759-
let arguments = !con(opArgs, fir_LocalSpecifier.arguments);
3871+
let arguments = !con(opArgs,
3872+
fir_LocalSpecifier.arguments,
3873+
fir_ReduceSpecifier.arguments);
37603874

37613875
let regions = (region SizedRegion<1>:$region);
37623876

37633877
let hasCustomAssemblyFormat = 1;
37643878
let hasVerifier = 1;
37653879

37663880
let extraClassDeclaration = [{
3767-
unsigned getNumInductionVars() { return getLowerBound().size(); }
3881+
unsigned getNumInductionVars() {
3882+
return getLowerBound().size();
3883+
}
37683884

3769-
unsigned getNumLocalOperands() { return getLocalVars().size(); }
3885+
unsigned getNumLocalOperands() {
3886+
return getLocalVars().size();
3887+
}
3888+
3889+
unsigned getNumReduceOperands() {
3890+
return getReduceVars().size();
3891+
}
37703892

37713893
mlir::Block::BlockArgListType getInductionVars() {
37723894
return getBody()->getArguments().slice(0, getNumInductionVars());
@@ -3777,19 +3899,15 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
37773899
getNumLocalOperands());
37783900
}
37793901

3902+
mlir::Block::BlockArgListType getRegionReduceArgs() {
3903+
return getBody()->getArguments().slice(getNumInductionVars()
3904+
+ getNumLocalOperands(),
3905+
getNumReduceOperands());
3906+
}
3907+
37803908
/// Number of operands controlling the loop
37813909
unsigned getNumControlOperands() { return getLowerBound().size() * 3; }
37823910

3783-
// Get Number of reduction operands
3784-
unsigned getNumReduceOperands() {
3785-
return getReduceOperands().size();
3786-
}
3787-
3788-
mlir::Operation::operand_range getLocalOperands() {
3789-
return getOperands()
3790-
.slice(getNumControlOperands() + getNumReduceOperands(),
3791-
getNumLocalOperands());
3792-
}
37933911
}];
37943912
}
37953913

0 commit comments

Comments
 (0)