Skip to content

Commit 478048d

Browse files
[mlir][acc] Add firstprivate operands to acc.loop (llvm#161881)
Add support for firstprivate operands to the OpenACC loop construct, enabling representation of privatization scenarios that require initialization from original values.
1 parent aea5399 commit 478048d

File tree

4 files changed

+105
-10
lines changed

4 files changed

+105
-10
lines changed

flang/lib/Lower/OpenACC.cpp

Lines changed: 3 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -2222,6 +2222,9 @@ buildACCLoopOp(Fortran::lower::AbstractConverter &converter,
22222222
addOperands(operands, operandSegments, tileOperands);
22232223
addOperands(operands, operandSegments, cacheOperands);
22242224
addOperands(operands, operandSegments, privateOperands);
2225+
// fill empty firstprivate operands since they are not permitted
2226+
// from OpenACC language perspective.
2227+
addOperands(operands, operandSegments, {});
22252228
addOperands(operands, operandSegments, reductionOperands);
22262229

22272230
auto loopOp = createRegionOp<mlir::acc::LoopOp, mlir::acc::YieldOp>(

mlir/include/mlir/Dialect/OpenACC/OpenACCOps.td

Lines changed: 44 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -2383,15 +2383,38 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
23832383
let summary = "loop construct";
23842384

23852385
let description = [{
2386-
The "acc.loop" operation represents the OpenACC loop construct. The lower
2387-
and upper bounds specify a half-open range: the range includes the lower
2388-
bound but does not include the upper bound. If the `inclusive` attribute is
2389-
set then the upper bound is included.
2386+
The `acc.loop` operation represents the OpenACC loop construct and when
2387+
bounds are included, the associated source language loop iterators. The
2388+
lower and upper bounds specify a half-open range: the range includes the
2389+
lower bound but does not include the upper bound. If the `inclusive`
2390+
attribute is set then the upper bound is included.
2391+
2392+
In cases where the OpenACC loop directive needs to capture multiple
2393+
source language loops, such as in the case of `collapse` or `tile`,
2394+
the multiple induction arguments are used to capture each case. Having
2395+
such a representation makes sure no intermediate transformation such
2396+
as Loop Invariant Code Motion breaks the property requested by the
2397+
clause on the loop constructs.
2398+
2399+
Each `acc.loop` holds private and reduction operands which are the
2400+
ssa values from the corresponding `acc.private` or `acc.reduction`
2401+
operations. Additionally, firstprivate operands are supported to
2402+
represent cases where privatization is needed with initialization
2403+
from an original value. While the OpenACC specification does not
2404+
explicitly support firstprivate on loop constructs, this extension
2405+
enables representing privatization scenarios that arise from an
2406+
optimization and codegen pipeline operating on acc dialect.
2407+
2408+
The operation supports capturing information that it comes combined
2409+
constructs (e.g., `parallel loop`, `kernels loop`, `serial loop`)
2410+
through the `combined` attribute despite requiring the `acc.loop`
2411+
to be decomposed from the compute operation representing compute
2412+
construct.
23902413

23912414
Example:
23922415

23932416
```mlir
2394-
acc.loop gang() vector() (%arg3 : index, %arg4 : index, %arg5 : index) =
2417+
acc.loop gang() vector() (%arg3 : index, %arg4 : index, %arg5 : index) =
23952418
(%c0, %c0, %c0 : index, index, index) to
23962419
(%c10, %c10, %c10 : index, index, index) step
23972420
(%c1, %c1, %c1 : index, index, index) {
@@ -2400,10 +2423,12 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
24002423
} attributes { collapse = [3] }
24012424
```
24022425

2403-
`collapse`, `gang`, `worker`, `vector`, `seq`, `independent`, `auto` and
2404-
`tile` operands are supported with `device_type` information. They should
2405-
only be accessed by the extra provided getters. If modified, the
2406-
corresponding `device_type` attributes must be modified as well.
2426+
`collapse`, `gang`, `worker`, `vector`, `seq`, `independent`, `auto`,
2427+
`cache`, and `tile` operands are supported with `device_type`
2428+
information. These clauses should only be accessed through the provided
2429+
device-type-aware getter methods. When modifying these operands, the
2430+
corresponding `device_type` attributes must be updated to maintain
2431+
consistency between operands and their target device types.
24072432
}];
24082433

24092434
let arguments = (ins
@@ -2433,6 +2458,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
24332458
Variadic<OpenACC_AnyPointerOrMappableType>:$cacheOperands,
24342459
Variadic<OpenACC_AnyPointerOrMappableType>:$privateOperands,
24352460
OptionalAttr<SymbolRefArrayAttr>:$privatizationRecipes,
2461+
Variadic<OpenACC_AnyPointerOrMappableType>:$firstprivateOperands,
2462+
OptionalAttr<SymbolRefArrayAttr>:$firstprivatizationRecipes,
24362463
Variadic<AnyType>:$reductionOperands,
24372464
OptionalAttr<SymbolRefArrayAttr>:$reductionRecipes,
24382465
OptionalAttr<OpenACC_CombinedConstructsAttr>:$combined
@@ -2589,6 +2616,10 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
25892616
/// Adds a private clause variable to this operation, including its recipe.
25902617
void addPrivatization(MLIRContext *, mlir::acc::PrivateOp op,
25912618
mlir::acc::PrivateRecipeOp recipe);
2619+
/// Adds a firstprivate clause variable to this operation, including its
2620+
/// recipe.
2621+
void addFirstPrivatization(MLIRContext *, mlir::acc::FirstprivateOp op,
2622+
mlir::acc::FirstprivateRecipeOp recipe);
25922623
/// Adds a reduction clause variable to this operation, including its
25932624
/// recipe.
25942625
void addReduction(MLIRContext *, mlir::acc::ReductionOp op,
@@ -2609,6 +2640,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
26092640
type($vectorOperands), $vectorOperandsDeviceType, $vector)
26102641
| `private` `(` custom<SymOperandList>(
26112642
$privateOperands, type($privateOperands), $privatizationRecipes) `)`
2643+
| `firstprivate` `(` custom<SymOperandList>($firstprivateOperands,
2644+
type($firstprivateOperands), $firstprivatizationRecipes) `)`
26122645
| `tile` `(` custom<DeviceTypeOperandsWithSegment>($tileOperands,
26132646
type($tileOperands), $tileOperandsDeviceType, $tileOperandsSegments)
26142647
`)`
@@ -2665,6 +2698,8 @@ def OpenACC_LoopOp : OpenACC_Op<"loop",
26652698
/*cacheOperands=*/{},
26662699
/*privateOperands=*/{},
26672700
/*privatizationRecipes=*/nullptr,
2701+
/*firstprivateOperands=*/{},
2702+
/*firstprivatizationRecipes=*/nullptr,
26682703
/*reductionOperands=*/{},
26692704
/*reductionRecipes=*/nullptr,
26702705
/*combined=*/nullptr);

mlir/lib/Dialect/OpenACC/IR/OpenACC.cpp

Lines changed: 22 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -2674,6 +2674,11 @@ LogicalResult acc::LoopOp::verify() {
26742674
"privatizations", false)))
26752675
return failure();
26762676

2677+
if (failed(checkSymOperandList<mlir::acc::FirstprivateRecipeOp>(
2678+
*this, getFirstprivatizationRecipes(), getFirstprivateOperands(),
2679+
"firstprivate", "firstprivatizations", /*checkOperandType=*/false)))
2680+
return failure();
2681+
26772682
if (failed(checkSymOperandList<mlir::acc::ReductionRecipeOp>(
26782683
*this, getReductionRecipes(), getReductionOperands(), "reduction",
26792684
"reductions", false)))
@@ -2737,7 +2742,8 @@ LogicalResult acc::LoopOp::verify() {
27372742
}
27382743

27392744
unsigned LoopOp::getNumDataOperands() {
2740-
return getReductionOperands().size() + getPrivateOperands().size();
2745+
return getReductionOperands().size() + getPrivateOperands().size() +
2746+
getFirstprivateOperands().size();
27412747
}
27422748

27432749
Value LoopOp::getDataOperand(unsigned i) {
@@ -3117,6 +3123,21 @@ void acc::LoopOp::addPrivatization(MLIRContext *context,
31173123
setPrivatizationRecipesAttr(mlir::ArrayAttr::get(context, recipes));
31183124
}
31193125

3126+
void acc::LoopOp::addFirstPrivatization(
3127+
MLIRContext *context, mlir::acc::FirstprivateOp op,
3128+
mlir::acc::FirstprivateRecipeOp recipe) {
3129+
getFirstprivateOperandsMutable().append(op.getResult());
3130+
3131+
llvm::SmallVector<mlir::Attribute> recipes;
3132+
3133+
if (getFirstprivatizationRecipesAttr())
3134+
llvm::copy(getFirstprivatizationRecipesAttr(), std::back_inserter(recipes));
3135+
3136+
recipes.push_back(
3137+
mlir::SymbolRefAttr::get(context, recipe.getSymName().str()));
3138+
setFirstprivatizationRecipesAttr(mlir::ArrayAttr::get(context, recipes));
3139+
}
3140+
31203141
void acc::LoopOp::addReduction(MLIRContext *context, mlir::acc::ReductionOp op,
31213142
mlir::acc::ReductionRecipeOp recipe) {
31223143
getReductionOperandsMutable().append(op.getResult());

mlir/test/Dialect/OpenACC/ops.mlir

Lines changed: 36 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -358,6 +358,41 @@ func.func @acc_loop_multiple_block() {
358358

359359
// -----
360360

361+
acc.firstprivate.recipe @firstprivatization_memref_10xf32 : memref<10xf32> init {
362+
^bb0(%arg0: memref<10xf32>):
363+
%0 = memref.alloca() : memref<10xf32>
364+
acc.yield %0 : memref<10xf32>
365+
} copy {
366+
^bb0(%arg0: memref<10xf32>, %arg1: memref<10xf32>):
367+
memref.copy %arg0, %arg1 : memref<10xf32> to memref<10xf32>
368+
acc.terminator
369+
} destroy {
370+
^bb0(%arg0: memref<10xf32>):
371+
acc.terminator
372+
}
373+
374+
func.func @testloopfirstprivate(%a: memref<10xf32>, %b: memref<10xf32>) -> () {
375+
%c0 = arith.constant 0 : index
376+
%c10 = arith.constant 10 : index
377+
%c1 = arith.constant 1 : index
378+
%firstprivate = acc.firstprivate varPtr(%a : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32>
379+
acc.loop firstprivate(@firstprivatization_memref_10xf32 -> %firstprivate : memref<10xf32>) control(%iv : index) = (%c0 : index) to (%c10 : index) step (%c1 : index) {
380+
"test.openacc_dummy_op"() : () -> ()
381+
acc.yield
382+
} attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
383+
return
384+
}
385+
386+
// CHECK-LABEL: func.func @testloopfirstprivate(
387+
// CHECK-SAME: %[[ARG0:.*]]: memref<10xf32>, %[[ARG1:.*]]: memref<10xf32>)
388+
// CHECK: %[[FIRSTPRIVATE:.*]] = acc.firstprivate varPtr(%[[ARG0]] : memref<10xf32>) varType(tensor<10xf32>) -> memref<10xf32>
389+
// CHECK: acc.loop firstprivate(@firstprivatization_memref_10xf32 -> %[[FIRSTPRIVATE]] : memref<10xf32>) control(%{{.*}}) = (%{{.*}}) to (%{{.*}}) step (%{{.*}}) {
390+
// CHECK: "test.openacc_dummy_op"() : () -> ()
391+
// CHECK: acc.yield
392+
// CHECK: } attributes {inclusiveUpperbound = array<i1: true>, independent = [#acc.device_type<none>]}
393+
394+
// -----
395+
361396
acc.private.recipe @privatization_memref_10_f32 : memref<10xf32> init {
362397
^bb0(%arg0: memref<10xf32>):
363398
%0 = memref.alloc() : memref<10xf32>
@@ -535,6 +570,7 @@ acc.firstprivate.recipe @firstprivatization_memref_10xf32 : memref<10xf32> init
535570
acc.yield %0 : memref<10xf32>
536571
} copy {
537572
^bb0(%arg0: memref<10xf32>, %arg1: memref<10xf32>):
573+
memref.copy %arg0, %arg1 : memref<10xf32> to memref<10xf32>
538574
acc.terminator
539575
} destroy {
540576
^bb0(%arg0: memref<10xf32>):

0 commit comments

Comments
 (0)