@@ -3518,7 +3518,7 @@ def fir_BoxTotalElementsOp
35183518
35193519def YieldOp : fir_Op<"yield",
35203520    [Pure, ReturnLike, Terminator,
3521-      ParentOneOf<["LocalitySpecifierOp"]>]> {
3521+      ParentOneOf<["LocalitySpecifierOp", "DeclareReductionOp" ]>]> {
35223522  let summary = "loop yield and termination operation";
35233523  let description = [{
35243524    "fir.yield" yields SSA values from a fir dialect op region and
@@ -3656,6 +3656,103 @@ def fir_LocalitySpecifierOp : fir_Op<"local", [IsolatedFromAbove]> {
36563656  let hasRegionVerifier = 1;
36573657}
36583658
3659+ def fir_DeclareReductionOp : fir_Op<"declare_reduction", [IsolatedFromAbove,
3660+                                                          Symbol]> {
3661+   let summary = "declares a reduction kind";
3662+   let description = [{
3663+     Note: this operation is adapted from omp::DeclareReductionOp. There is a lot
3664+     duplication at the moment. TODO Combine both ops into one. See:
3665+     https://discourse.llvm.org/t/dialect-for-data-locality-sharing-specifiers-clauses-in-openmp-openacc-and-do-concurrent/86108.
3666+ 
3667+     Declares a `do concurrent` reduction. This requires two mandatory and three
3668+     optional regions.
3669+ 
3670+       1. The optional alloc region specifies how to allocate the thread-local
3671+          reduction value. This region should not contain control flow and all
3672+          IR should be suitable for inlining straight into an entry block. In
3673+          the common case this is expected to contain only allocas. It is
3674+          expected to `fir.yield` the allocated value on all control paths.
3675+          If allocation is conditional (e.g. only allocate if the mold is
3676+          allocated), this should be done in the initilizer region and this
3677+          region not included. The alloc region is not used for by-value
3678+          reductions (where allocation is implicit).
3679+       2. The initializer region specifies how to initialize the thread-local
3680+          reduction value. This is usually the neutral element of the reduction.
3681+          For convenience, the region has an argument that contains the value
3682+          of the reduction accumulator at the start of the reduction. If an alloc
3683+          region is specified, there is a second block argument containing the
3684+          address of the allocated memory. The initializer region is expected to
3685+          `fir.yield` the new value on all control flow paths.
3686+       3. The reduction region specifies how to combine two values into one, i.e.
3687+          the reduction operator. It accepts the two values as arguments and is
3688+          expected to `fir.yield` the combined value on all control flow paths.
3689+       4. The atomic reduction region is optional and specifies how two values
3690+          can be combined atomically given local accumulator variables. It is
3691+          expected to store the combined value in the first accumulator variable.
3692+       5. The cleanup region is optional and specifies how to clean up any memory
3693+          allocated by the initializer region. The region has an argument that
3694+          contains the value of the thread-local reduction accumulator. This will
3695+          be executed after the reduction has completed.
3696+ 
3697+     Note that the MLIR type system does not allow for type-polymorphic
3698+     reductions. Separate reduction declarations should be created for different
3699+     element and accumulator types.
3700+ 
3701+     For initializer and reduction regions, the operand to `fir.yield` must
3702+     match the parent operation's results.
3703+   }];
3704+ 
3705+   let arguments = (ins SymbolNameAttr:$sym_name,
3706+                        TypeAttr:$type);
3707+ 
3708+   let regions = (region MaxSizedRegion<1>:$allocRegion,
3709+                         AnyRegion:$initializerRegion,
3710+                         AnyRegion:$reductionRegion,
3711+                         AnyRegion:$atomicReductionRegion,
3712+                         AnyRegion:$cleanupRegion);
3713+ 
3714+   let assemblyFormat = "$sym_name `:` $type attr-dict-with-keyword "
3715+                        "( `alloc` $allocRegion^ )? "
3716+                        "`init` $initializerRegion "
3717+                        "`combiner` $reductionRegion "
3718+                        "( `atomic` $atomicReductionRegion^ )? "
3719+                        "( `cleanup` $cleanupRegion^ )? ";
3720+ 
3721+   let extraClassDeclaration = [{
3722+     mlir::BlockArgument getAllocMoldArg() {
3723+       auto ®ion = getAllocRegion();
3724+       return region.empty() ? nullptr : region.getArgument(0);
3725+     }
3726+     mlir::BlockArgument getInitializerMoldArg() {
3727+       return getInitializerRegion().getArgument(0);
3728+     }
3729+     mlir::BlockArgument getInitializerAllocArg() {
3730+       return getAllocRegion().empty() ?
3731+           nullptr : getInitializerRegion().getArgument(1);
3732+     }
3733+     mlir::BlockArgument getReductionLhsArg() {
3734+       return getReductionRegion().getArgument(0);
3735+     }
3736+     mlir::BlockArgument getReductionRhsArg() {
3737+       return getReductionRegion().getArgument(1);
3738+     }
3739+     mlir::BlockArgument getAtomicReductionLhsArg() {
3740+       auto ®ion = getAtomicReductionRegion();
3741+       return region.empty() ? nullptr : region.getArgument(0);
3742+     }
3743+     mlir::BlockArgument getAtomicReductionRhsArg() {
3744+       auto ®ion = getAtomicReductionRegion();
3745+       return region.empty() ? nullptr : region.getArgument(1);
3746+     }
3747+     mlir::BlockArgument getCleanupAllocArg() {
3748+       auto ®ion = getCleanupRegion();
3749+       return region.empty() ? nullptr : region.getArgument(0);
3750+     }
3751+   }];
3752+ 
3753+   let hasRegionVerifier = 1;
3754+ }
3755+ 
36593756def fir_DoConcurrentOp : fir_Op<"do_concurrent",
36603757    [SingleBlock, AutomaticAllocationScope]> {
36613758  let summary = "do concurrent loop wrapper";
@@ -3694,6 +3791,25 @@ def fir_LocalSpecifier {
36943791  );
36953792}
36963793
3794+ def fir_ReduceSpecifier {
3795+   dag arguments = (ins
3796+     Variadic<AnyType>:$reduce_vars,
3797+     OptionalAttr<DenseBoolArrayAttr>:$reduce_byref,
3798+ 
3799+     // This introduces redundency in how reductions are modelled. In particular,
3800+     // a single reduction is represented by 2 attributes:
3801+     //
3802+     // 1. `$reduce_syms` which is a list of `DeclareReductionOp`s.
3803+     // 2. `$reduce_attrs` which is an array of `fir::ReduceAttr` values.
3804+     //
3805+     // The first makes it easier to map `do concurrent` to parallization models
3806+     // (e.g. OpenMP and OpenACC) while the second makes it easier to map it to
3807+     // nests of `fir.do_loop ... unodered` ops.
3808+     OptionalAttr<SymbolRefArrayAttr>:$reduce_syms,
3809+     OptionalAttr<ArrayAttr>:$reduce_attrs
3810+   );
3811+ }
3812+ 
36973813def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
36983814    [AttrSizedOperandSegments, DeclareOpInterfaceMethods<LoopLikeOpInterface,
36993815                                                         ["getLoopInductionVars"]>,
@@ -3703,7 +3819,7 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
37033819  let description = [{
37043820    An operation that models a Fortran `do concurrent` loop's header and block.
37053821    This is a single-region single-block terminator op that is expected to
3706-     terminate the region of a `omp .do_concurrent` wrapper op.
3822+     terminate the region of a `fir .do_concurrent` wrapper op.
37073823
37083824    This op borrows from both `scf.parallel` and `fir.do_loop` ops. Similar to
37093825    `scf.parallel`, a loop nest takes 3 groups of SSA values as operands that
@@ -3741,8 +3857,6 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
37413857    - `lowerBound`: The group of SSA values for the nest's lower bounds.
37423858    - `upperBound`: The group of SSA values for the nest's upper bounds.
37433859    - `step`: The group of SSA values for the nest's steps.
3744-     - `reduceOperands`: The reduction SSA values, if any.
3745-     - `reduceAttrs`: Attributes to store reduction operations, if any.
37463860    - `loopAnnotation`: Loop metadata to be passed down the compiler pipeline to
37473861      LLVM.
37483862  }];
@@ -3751,12 +3865,12 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
37513865    Variadic<Index>:$lowerBound,
37523866    Variadic<Index>:$upperBound,
37533867    Variadic<Index>:$step,
3754-     Variadic<AnyType>:$reduceOperands,
3755-     OptionalAttr<ArrayAttr>:$reduceAttrs,
37563868    OptionalAttr<LoopAnnotationAttr>:$loopAnnotation
37573869  );
37583870
3759-   let arguments = !con(opArgs, fir_LocalSpecifier.arguments);
3871+   let arguments = !con(opArgs,
3872+     fir_LocalSpecifier.arguments,
3873+     fir_ReduceSpecifier.arguments);
37603874
37613875  let regions = (region SizedRegion<1>:$region);
37623876
@@ -3777,12 +3891,18 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
37773891                                             getNumLocalOperands());
37783892    }
37793893
3894+     mlir::Block::BlockArgListType getRegionReduceArgs() {
3895+       return getBody()->getArguments().slice(getNumInductionVars()
3896+                                                + getNumLocalOperands(),
3897+                                              getNumReduceOperands());
3898+     }
3899+ 
37803900    /// Number of operands controlling the loop
37813901    unsigned getNumControlOperands() { return getLowerBound().size() * 3; }
37823902
37833903    // Get Number of reduction operands
37843904    unsigned getNumReduceOperands() {
3785-       return getReduceOperands ().size();
3905+       return getReduceVars ().size();
37863906    }
37873907
37883908    mlir::Operation::operand_range getLocalOperands() {
0 commit comments