@@ -3518,7 +3518,7 @@ def fir_BoxTotalElementsOp
35183518
35193519def YieldOp : fir_Op<"yield",
35203520 [Pure, ReturnLike, Terminator,
3521- ParentOneOf<["LocalitySpecifierOp"]>]> {
3521+ ParentOneOf<["LocalitySpecifierOp", "DeclareReductionOp" ]>]> {
35223522 let summary = "loop yield and termination operation";
35233523 let description = [{
35243524 "fir.yield" yields SSA values from a fir dialect op region and
@@ -3662,6 +3662,103 @@ def fir_LocalitySpecifierOp : fir_Op<"local", [IsolatedFromAbove]> {
36623662 let hasRegionVerifier = 1;
36633663}
36643664
3665+ def fir_DeclareReductionOp : fir_Op<"declare_reduction", [IsolatedFromAbove,
3666+ Symbol]> {
3667+ let summary = "declares a reduction kind";
3668+ let description = [{
3669+ Note: this operation is adapted from omp::DeclareReductionOp. There is a lot
3670+ duplication at the moment. TODO Combined both ops into one. See:
3671+ https://discourse.llvm.org/t/dialect-for-data-locality-sharing-specifiers-clauses-in-openmp-openacc-and-do-concurrent/86108.
3672+
3673+ Declares an `do concurrent` reduction. This requires two mandatory and three
3674+ optional regions.
3675+
3676+ 1. The optional alloc region specifies how to allocate the thread-local
3677+ reduction value. This region should not contain control flow and all
3678+ IR should be suitable for inlining straight into an entry block. In
3679+ the common case this is expected to contain only allocas. It is
3680+ expected to `fir.yield` the allocated value on all control paths.
3681+ If allocation is conditional (e.g. only allocate if the mold is
3682+ allocated), this should be done in the initilizer region and this
3683+ region not included. The alloc region is not used for by-value
3684+ reductions (where allocation is implicit).
3685+ 2. The initializer region specifies how to initialize the thread-local
3686+ reduction value. This is usually the neutral element of the reduction.
3687+ For convenience, the region has an argument that contains the value
3688+ of the reduction accumulator at the start of the reduction. If an alloc
3689+ region is specified, there is a second block argument containing the
3690+ address of the allocated memory. The initializer region is expected to
3691+ `fir.yield` the new value on all control flow paths.
3692+ 3. The reduction region specifies how to combine two values into one, i.e.
3693+ the reduction operator. It accepts the two values as arguments and is
3694+ expected to `fir.yield` the combined value on all control flow paths.
3695+ 4. The atomic reduction region is optional and specifies how two values
3696+ can be combined atomically given local accumulator variables. It is
3697+ expected to store the combined value in the first accumulator variable.
3698+ 5. The cleanup region is optional and specifies how to clean up any memory
3699+ allocated by the initializer region. The region has an argument that
3700+ contains the value of the thread-local reduction accumulator. This will
3701+ be executed after the reduction has completed.
3702+
3703+ Note that the MLIR type system does not allow for type-polymorphic
3704+ reductions. Separate reduction declarations should be created for different
3705+ element and accumulator types.
3706+
3707+ For initializer and reduction regions, the operand to `fir.yield` must
3708+ match the parent operation's results.
3709+ }];
3710+
3711+ let arguments = (ins SymbolNameAttr:$sym_name,
3712+ TypeAttr:$type);
3713+
3714+ let regions = (region MaxSizedRegion<1>:$allocRegion,
3715+ AnyRegion:$initializerRegion,
3716+ AnyRegion:$reductionRegion,
3717+ AnyRegion:$atomicReductionRegion,
3718+ AnyRegion:$cleanupRegion);
3719+
3720+ let assemblyFormat = "$sym_name `:` $type attr-dict-with-keyword "
3721+ "( `alloc` $allocRegion^ )? "
3722+ "`init` $initializerRegion "
3723+ "`combiner` $reductionRegion "
3724+ "( `atomic` $atomicReductionRegion^ )? "
3725+ "( `cleanup` $cleanupRegion^ )? ";
3726+
3727+ let extraClassDeclaration = [{
3728+ mlir::BlockArgument getAllocMoldArg() {
3729+ auto ®ion = getAllocRegion();
3730+ return region.empty() ? nullptr : region.getArgument(0);
3731+ }
3732+ mlir::BlockArgument getInitializerMoldArg() {
3733+ return getInitializerRegion().getArgument(0);
3734+ }
3735+ mlir::BlockArgument getInitializerAllocArg() {
3736+ return getAllocRegion().empty() ?
3737+ nullptr : getInitializerRegion().getArgument(1);
3738+ }
3739+ mlir::BlockArgument getReductionLhsArg() {
3740+ return getReductionRegion().getArgument(0);
3741+ }
3742+ mlir::BlockArgument getReductionRhsArg() {
3743+ return getReductionRegion().getArgument(1);
3744+ }
3745+ mlir::BlockArgument getAtomicReductionLhsArg() {
3746+ auto ®ion = getAtomicReductionRegion();
3747+ return region.empty() ? nullptr : region.getArgument(0);
3748+ }
3749+ mlir::BlockArgument getAtomicReductionRhsArg() {
3750+ auto ®ion = getAtomicReductionRegion();
3751+ return region.empty() ? nullptr : region.getArgument(1);
3752+ }
3753+ mlir::BlockArgument getCleanupAllocArg() {
3754+ auto ®ion = getCleanupRegion();
3755+ return region.empty() ? nullptr : region.getArgument(0);
3756+ }
3757+ }];
3758+
3759+ let hasRegionVerifier = 1;
3760+ }
3761+
36653762def fir_DoConcurrentOp : fir_Op<"do_concurrent",
36663763 [SingleBlock, AutomaticAllocationScope]> {
36673764 let summary = "do concurrent loop wrapper";
@@ -3700,6 +3797,25 @@ def fir_LocalSpecifier {
37003797 );
37013798}
37023799
3800+ def fir_ReduceSpecifier {
3801+ dag arguments = (ins
3802+ Variadic<AnyType>:$reduce_vars,
3803+ OptionalAttr<DenseBoolArrayAttr>:$reduce_byref,
3804+
3805+ // This introduces redundency in how reductions are modelled. In particular,
3806+ // a single reduction is represented by 2 attributes:
3807+ //
3808+ // 1. `$reduce_syms` which is a list of `DeclareReductionOp`s.
3809+ // 2. `$reduce_attrs` which is an array of `fir::ReduceAttr` values.
3810+ //
3811+ // The first makes it easier to map `do concurrent` to parallization models
3812+ // (e.g. OpenMP and OpenACC) while the second makes it easier to map it to
3813+ // nests of `fir.do_loop ... unodered` ops.
3814+ OptionalAttr<SymbolRefArrayAttr>:$reduce_syms,
3815+ OptionalAttr<ArrayAttr>:$reduce_attrs
3816+ );
3817+ }
3818+
37033819def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
37043820 [AttrSizedOperandSegments, DeclareOpInterfaceMethods<LoopLikeOpInterface,
37053821 ["getLoopInductionVars"]>,
@@ -3709,7 +3825,7 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
37093825 let description = [{
37103826 An operation that models a Fortran `do concurrent` loop's header and block.
37113827 This is a single-region single-block terminator op that is expected to
3712- terminate the region of a `omp .do_concurrent` wrapper op.
3828+ terminate the region of a `fir .do_concurrent` wrapper op.
37133829
37143830 This op borrows from both `scf.parallel` and `fir.do_loop` ops. Similar to
37153831 `scf.parallel`, a loop nest takes 3 groups of SSA values as operands that
@@ -3747,8 +3863,6 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
37473863 - `lowerBound`: The group of SSA values for the nest's lower bounds.
37483864 - `upperBound`: The group of SSA values for the nest's upper bounds.
37493865 - `step`: The group of SSA values for the nest's steps.
3750- - `reduceOperands`: The reduction SSA values, if any.
3751- - `reduceAttrs`: Attributes to store reduction operations, if any.
37523866 - `loopAnnotation`: Loop metadata to be passed down the compiler pipeline to
37533867 LLVM.
37543868 }];
@@ -3757,12 +3871,12 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
37573871 Variadic<Index>:$lowerBound,
37583872 Variadic<Index>:$upperBound,
37593873 Variadic<Index>:$step,
3760- Variadic<AnyType>:$reduceOperands,
3761- OptionalAttr<ArrayAttr>:$reduceAttrs,
37623874 OptionalAttr<LoopAnnotationAttr>:$loopAnnotation
37633875 );
37643876
3765- let arguments = !con(opArgs, fir_LocalSpecifier.arguments);
3877+ let arguments = !con(opArgs,
3878+ fir_LocalSpecifier.arguments,
3879+ fir_ReduceSpecifier.arguments);
37663880
37673881 let regions = (region SizedRegion<1>:$region);
37683882
@@ -3783,12 +3897,18 @@ def fir_DoConcurrentLoopOp : fir_Op<"do_concurrent.loop",
37833897 getNumLocalOperands());
37843898 }
37853899
3900+ mlir::Block::BlockArgListType getRegionReduceArgs() {
3901+ return getBody()->getArguments().slice(getNumInductionVars()
3902+ + getNumLocalOperands(),
3903+ getNumReduceOperands());
3904+ }
3905+
37863906 /// Number of operands controlling the loop
37873907 unsigned getNumControlOperands() { return getLowerBound().size() * 3; }
37883908
37893909 // Get Number of reduction operands
37903910 unsigned getNumReduceOperands() {
3791- return getReduceOperands ().size();
3911+ return getReduceVars ().size();
37923912 }
37933913
37943914 mlir::Operation::operand_range getLocalOperands() {
0 commit comments