@@ -3518,7 +3518,7 @@ def fir_BoxTotalElementsOp
35183518
35193519def YieldOp : fir_Op<"yield",
35203520 [Pure, ReturnLike, Terminator,
3521- ParentOneOf<["LocalitySpecifierOp"]>]> {
3521+ ParentOneOf<["LocalitySpecifierOp", "DeclareReductionOp" ]>]> {
35223522 let summary = "loop yield and termination operation";
35233523 let description = [{
35243524 "fir.yield" yields SSA values from a fir dialect op region and
@@ -3656,6 +3656,103 @@ def fir_LocalitySpecifierOp : fir_Op<"local", [IsolatedFromAbove]> {
36563656 let hasRegionVerifier = 1;
36573657}
36583658
3659+ def fir_DeclareReductionOp : fir_Op<"declare_reduction", [IsolatedFromAbove,
3660+ Symbol]> {
3661+ let summary = "declares a reduction kind";
3662+ let description = [{
3663+ Note: this operation is adapted from omp::DeclareReductionOp. There is a lot
3664+ duplication at the moment. TODO Combine both ops into one. See:
3665+ https://discourse.llvm.org/t/dialect-for-data-locality-sharing-specifiers-clauses-in-openmp-openacc-and-do-concurrent/86108.
3666+
3667+ Declares a `do concurrent` reduction. This requires two mandatory and three
3668+ optional regions.
3669+
3670+ 1. The optional alloc region specifies how to allocate the thread-local
3671+ reduction value. This region should not contain control flow and all
3672+ IR should be suitable for inlining straight into an entry block. In
3673+ the common case this is expected to contain only allocas. It is
3674+ expected to `fir.yield` the allocated value on all control paths.
3675+ If allocation is conditional (e.g. only allocate if the mold is
3676+ allocated), this should be done in the initilizer region and this
3677+ region not included. The alloc region is not used for by-value
3678+ reductions (where allocation is implicit).
3679+ 2. The initializer region specifies how to initialize the thread-local
3680+ reduction value. This is usually the neutral element of the reduction.
3681+ For convenience, the region has an argument that contains the value
3682+ of the reduction accumulator at the start of the reduction. If an alloc
3683+ region is specified, there is a second block argument containing the
3684+ address of the allocated memory. The initializer region is expected to
3685+ `fir.yield` the new value on all control flow paths.
3686+ 3. The reduction region specifies how to combine two values into one, i.e.
3687+ the reduction operator. It accepts the two values as arguments and is
3688+ expected to `fir.yield` the combined value on all control flow paths.
3689+ 4. The atomic reduction region is optional and specifies how two values
3690+ can be combined atomically given local accumulator variables. It is
3691+ expected to store the combined value in the first accumulator variable.
3692+ 5. The cleanup region is optional and specifies how to clean up any memory
3693+ allocated by the initializer region. The region has an argument that
3694+ contains the value of the thread-local reduction accumulator. This will
3695+ be executed after the reduction has completed.
3696+
3697+ Note that the MLIR type system does not allow for type-polymorphic
3698+ reductions. Separate reduction declarations should be created for different
3699+ element and accumulator types.
3700+
3701+ For initializer and reduction regions, the operand to `fir.yield` must
3702+ match the parent operation's results.
3703+ }];
3704+
3705+ let arguments = (ins SymbolNameAttr:$sym_name,
3706+ TypeAttr:$type);
3707+
3708+ let regions = (region MaxSizedRegion<1>:$allocRegion,
3709+ AnyRegion:$initializerRegion,
3710+ AnyRegion:$reductionRegion,
3711+ AnyRegion:$atomicReductionRegion,
3712+ AnyRegion:$cleanupRegion);
3713+
3714+ let assemblyFormat = "$sym_name `:` $type attr-dict-with-keyword "
3715+ "( `alloc` $allocRegion^ )? "
3716+ "`init` $initializerRegion "
3717+ "`combiner` $reductionRegion "
3718+ "( `atomic` $atomicReductionRegion^ )? "
3719+ "( `cleanup` $cleanupRegion^ )? ";
3720+
3721+ let extraClassDeclaration = [{
3722+ mlir::BlockArgument getAllocMoldArg() {
3723+ auto ®ion = getAllocRegion();
3724+ return region.empty() ? nullptr : region.getArgument(0);
3725+ }
3726+ mlir::BlockArgument getInitializerMoldArg() {
3727+ return getInitializerRegion().getArgument(0);
3728+ }
3729+ mlir::BlockArgument getInitializerAllocArg() {
3730+ return getAllocRegion().empty() ?
3731+ nullptr : getInitializerRegion().getArgument(1);
3732+ }
3733+ mlir::BlockArgument getReductionLhsArg() {
3734+ return getReductionRegion().getArgument(0);
3735+ }
3736+ mlir::BlockArgument getReductionRhsArg() {
3737+ return getReductionRegion().getArgument(1);
3738+ }
3739+ mlir::BlockArgument getAtomicReductionLhsArg() {
3740+ auto ®ion = getAtomicReductionRegion();
3741+ return region.empty() ? nullptr : region.getArgument(0);
3742+ }
3743+ mlir::BlockArgument getAtomicReductionRhsArg() {
3744+ auto ®ion = getAtomicReductionRegion();
3745+ return region.empty() ? nullptr : region.getArgument(1);
3746+ }
3747+ mlir::BlockArgument getCleanupAllocArg() {
3748+ auto ®ion = getCleanupRegion();
3749+ return region.empty() ? nullptr : region.getArgument(0);
3750+ }
3751+ }];
3752+
3753+ let hasRegionVerifier = 1;
3754+ }
3755+
36593756def fir_DoConcurrentOp : fir_Op<"do_concurrent",
36603757 [SingleBlock, AutomaticAllocationScope]> {
36613758 let summary = "do concurrent loop wrapper";
@@ -3694,6 +3791,25 @@ def fir_LocalSpecifier {
36943791 );
36953792}
36963793
3794+ def fir_ReduceSpecifier {
3795+ dag arguments = (ins
3796+ Variadic<AnyType>:$reduce_vars,
3797+ OptionalAttr<DenseBoolArrayAttr>:$reduce_byref,
3798+
3799+ // This introduces redundency in how reductions are modelled. In particular,
3800+ // a single reduction is represented by 2 attributes:
3801+ //
3802+ // 1. `$reduce_syms` which is a list of `DeclareReductionOp`s.
3803+ // 2. `$reduce_attrs` which is an array of `fir::ReduceAttr` values.
3804+ //
3805+ // The first makes it easier to map `do concurrent` to parallization models
3806+ // (e.g. OpenMP and OpenACC) while the second makes it easier to map it to
3807+ // nests of `fir.do_loop ... unodered` ops.
3808+ OptionalAttr<SymbolRefArrayAttr>:$reduce_syms,
3809+ OptionalAttr<ArrayAttr>:$reduce_attrs
3810+ );
3811+ }
3812+
36973813def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
36983814 [AttrSizedOperandSegments, DeclareOpInterfaceMethods<LoopLikeOpInterface,
36993815 ["getLoopInductionVars"]>,
@@ -3703,7 +3819,7 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
37033819 let description = [{
37043820 An operation that models a Fortran `do concurrent` loop's header and block.
37053821 This is a single-region single-block terminator op that is expected to
3706- terminate the region of a `omp .do_concurrent` wrapper op.
3822+ terminate the region of a `fir .do_concurrent` wrapper op.
37073823
37083824 This op borrows from both `scf.parallel` and `fir.do_loop` ops. Similar to
37093825 `scf.parallel`, a loop nest takes 3 groups of SSA values as operands that
@@ -3741,8 +3857,6 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
37413857 - `lowerBound`: The group of SSA values for the nest's lower bounds.
37423858 - `upperBound`: The group of SSA values for the nest's upper bounds.
37433859 - `step`: The group of SSA values for the nest's steps.
3744- - `reduceOperands`: The reduction SSA values, if any.
3745- - `reduceAttrs`: Attributes to store reduction operations, if any.
37463860 - `loopAnnotation`: Loop metadata to be passed down the compiler pipeline to
37473861 LLVM.
37483862 }];
@@ -3751,12 +3865,12 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
37513865 Variadic<Index>:$lowerBound,
37523866 Variadic<Index>:$upperBound,
37533867 Variadic<Index>:$step,
3754- Variadic<AnyType>:$reduceOperands,
3755- OptionalAttr<ArrayAttr>:$reduceAttrs,
37563868 OptionalAttr<LoopAnnotationAttr>:$loopAnnotation
37573869 );
37583870
3759- let arguments = !con(opArgs, fir_LocalSpecifier.arguments);
3871+ let arguments = !con(opArgs,
3872+ fir_LocalSpecifier.arguments,
3873+ fir_ReduceSpecifier.arguments);
37603874
37613875 let regions = (region SizedRegion<1>:$region);
37623876
@@ -3777,12 +3891,18 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
37773891 getNumLocalOperands());
37783892 }
37793893
3894+ mlir::Block::BlockArgListType getRegionReduceArgs() {
3895+ return getBody()->getArguments().slice(getNumInductionVars()
3896+ + getNumLocalOperands(),
3897+ getNumReduceOperands());
3898+ }
3899+
37803900 /// Number of operands controlling the loop
37813901 unsigned getNumControlOperands() { return getLowerBound().size() * 3; }
37823902
37833903 // Get Number of reduction operands
37843904 unsigned getNumReduceOperands() {
3785- return getReduceOperands ().size();
3905+ return getReduceVars ().size();
37863906 }
37873907
37883908 mlir::Operation::operand_range getLocalOperands() {
0 commit comments