Skip to content

Commit b1c4ebc

Browse files
committed
[OpenACC][CIR] Implement copyin/copyout/create lowering for compute/combined
This patch does the lowering of copyin (represented as a acc.copyin/acc.delete), copyout (acc.create/acc.copyin), and create (acc.create/acc.delete). Additionally, it found a few problems with #144806, so it fixes those as well.
1 parent edf0d0d commit b1c4ebc

File tree

5 files changed

+354
-3
lines changed

5 files changed

+354
-3
lines changed

clang/lib/CIR/CodeGen/CIRGenOpenACCClause.cpp

Lines changed: 51 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -858,6 +858,57 @@ class OpenACCClauseCIREmitter final
858858
}
859859
}
860860

861+
void VisitCopyInClause(const OpenACCCopyInClause &clause) {
862+
if constexpr (isOneOfTypes<OpTy, mlir::acc::ParallelOp, mlir::acc::SerialOp,
863+
mlir::acc::KernelsOp>) {
864+
for (auto var : clause.getVarList())
865+
addDataOperand<mlir::acc::CopyinOp, mlir::acc::DeleteOp>(
866+
var, mlir::acc::DataClause::acc_copyin, clause.getModifierList(),
867+
/*structured=*/true,
868+
/*implicit=*/false);
869+
} else if constexpr (isCombinedType<OpTy>) {
870+
applyToComputeOp(clause);
871+
} else {
872+
// TODO: When we've implemented this for everything, switch this to an
873+
// unreachable. data, declare, combined constructs remain.
874+
return clauseNotImplemented(clause);
875+
}
876+
}
877+
878+
void VisitCopyOutClause(const OpenACCCopyOutClause &clause) {
879+
if constexpr (isOneOfTypes<OpTy, mlir::acc::ParallelOp, mlir::acc::SerialOp,
880+
mlir::acc::KernelsOp>) {
881+
for (auto var : clause.getVarList())
882+
addDataOperand<mlir::acc::CreateOp, mlir::acc::CopyoutOp>(
883+
var, mlir::acc::DataClause::acc_copyout, clause.getModifierList(),
884+
/*structured=*/true,
885+
/*implicit=*/false);
886+
} else if constexpr (isCombinedType<OpTy>) {
887+
applyToComputeOp(clause);
888+
} else {
889+
// TODO: When we've implemented this for everything, switch this to an
890+
// unreachable. data, declare, combined constructs remain.
891+
return clauseNotImplemented(clause);
892+
}
893+
}
894+
895+
void VisitCreateClause(const OpenACCCreateClause &clause) {
896+
if constexpr (isOneOfTypes<OpTy, mlir::acc::ParallelOp, mlir::acc::SerialOp,
897+
mlir::acc::KernelsOp>) {
898+
for (auto var : clause.getVarList())
899+
addDataOperand<mlir::acc::CreateOp, mlir::acc::DeleteOp>(
900+
var, mlir::acc::DataClause::acc_create, clause.getModifierList(),
901+
/*structured=*/true,
902+
/*implicit=*/false);
903+
} else if constexpr (isCombinedType<OpTy>) {
904+
applyToComputeOp(clause);
905+
} else {
906+
// TODO: When we've implemented this for everything, switch this to an
907+
// unreachable. data, declare, combined constructs remain.
908+
return clauseNotImplemented(clause);
909+
}
910+
}
911+
861912
void VisitUseDeviceClause(const OpenACCUseDeviceClause &clause) {
862913
if constexpr (isOneOfTypes<OpTy, mlir::acc::HostDataOp>) {
863914
for (auto var : clause.getVarList())
Lines changed: 160 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,160 @@
1+
// RUN: %clang_cc1 -fopenacc -Wno-openacc-self-if-potential-conflict -emit-cir -fclangir %s -o - | FileCheck %s
2+
3+
void acc_combined(int parmVar) {
4+
// CHECK: cir.func{{.*}} @acc_combined(%[[ARG:.*]]: !s32i{{.*}}) {
5+
// CHECK-NEXT: %[[PARM:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["parmVar", init]
6+
7+
int localVar1;
8+
// CHECK-NEXT: %[[LV1:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["localVar1"]
9+
float localVar2;
10+
// CHECK-NEXT: %[[LV2:.*]] = cir.alloca !cir.float, !cir.ptr<!cir.float>, ["localVar2"]
11+
// CHECK-NEXT: cir.store %[[ARG]], %[[PARM]]
12+
#pragma acc parallel loop copyin(parmVar) copyout(localVar1) create(localVar2)
13+
for(int i = 0; i < 5; ++i);
14+
// CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[PARM]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "parmVar"}
15+
// CHECK-NEXT: %[[CREATE1:.*]] = acc.create varPtr(%[[LV1]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copyout>, name = "localVar1"}
16+
// CHECK-NEXT: %[[CREATE2:.*]] = acc.create varPtr(%[[LV2]] : !cir.ptr<!cir.float>) -> !cir.ptr<!cir.float> {name = "localVar2"}
17+
// CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[COPYIN1]], %[[CREATE1]], %[[CREATE2]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !cir.ptr<!cir.float>) {
18+
// CHECK-NEXT: acc.loop combined(parallel) {
19+
// CHECK: acc.yield
20+
// CHECK-NEXT: } loc
21+
// CHECK-NEXT: acc.yield
22+
// CHECK-NEXT: } loc
23+
// CHECK-NEXT: acc.delete accPtr(%[[CREATE2]] : !cir.ptr<!cir.float>) {dataClause = #acc<data_clause acc_create>, name = "localVar2"}
24+
// CHECK-NEXT: acc.copyout accPtr(%[[CREATE1]] : !cir.ptr<!s32i>) to varPtr(%[[LV1]] : !cir.ptr<!s32i>) {name = "localVar1"}
25+
// CHECK-NEXT: acc.delete accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copyin>, name = "parmVar"}
26+
27+
#pragma acc serial loop copyin(parmVar, localVar1)
28+
for(int i = 0; i < 5; ++i);
29+
// CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[PARM]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "parmVar"}
30+
// CHECK-NEXT: %[[COPYIN2:.*]] = acc.copyin varPtr(%[[LV1]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "localVar1"}
31+
// CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]], %[[COPYIN2]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>) {
32+
// CHECK-NEXT: acc.loop combined(serial) {
33+
// CHECK: acc.yield
34+
// CHECK-NEXT: } loc
35+
// CHECK-NEXT: acc.yield
36+
// CHECK-NEXT: } loc
37+
// CHECK-NEXT: acc.delete accPtr(%[[COPYIN2]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copyin>, name = "localVar1"}
38+
// CHECK-NEXT: acc.delete accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copyin>, name = "parmVar"}
39+
40+
#pragma acc kernels loop copyout(parmVar, localVar1)
41+
for(int i = 0; i < 5; ++i);
42+
// CHECK-NEXT: %[[CREATE1:.*]] = acc.create varPtr(%[[PARM]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copyout>, name = "parmVar"}
43+
// CHECK-NEXT: %[[CREATE2:.*]] = acc.create varPtr(%[[LV1]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copyout>, name = "localVar1"}
44+
// CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[CREATE1]], %[[CREATE2]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>) {
45+
// CHECK-NEXT: acc.loop combined(kernels) {
46+
// CHECK: acc.yield
47+
// CHECK-NEXT: } loc
48+
// CHECK-NEXT: acc.terminator
49+
// CHECK-NEXT: } loc
50+
// CHECK-NEXT: acc.copyout accPtr(%[[CREATE2]] : !cir.ptr<!s32i>) to varPtr(%[[LV1]] : !cir.ptr<!s32i>) {name = "localVar1"}
51+
// CHECK-NEXT: acc.copyout accPtr(%[[CREATE1]] : !cir.ptr<!s32i>) to varPtr(%[[PARM]] : !cir.ptr<!s32i>) {name = "parmVar"}
52+
53+
#pragma acc parallel loop create (parmVar, localVar2)
54+
for(int i = 0; i < 5; ++i);
55+
// CHECK-NEXT: %[[CREATE1:.*]] = acc.create varPtr(%[[PARM]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {name = "parmVar"}
56+
// CHECK-NEXT: %[[CREATE2:.*]] = acc.create varPtr(%[[LV2]] : !cir.ptr<!cir.float>) -> !cir.ptr<!cir.float> {name = "localVar2"}
57+
// CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[CREATE1]], %[[CREATE2]] : !cir.ptr<!s32i>, !cir.ptr<!cir.float>) {
58+
// CHECK-NEXT: acc.loop combined(parallel) {
59+
// CHECK: acc.yield
60+
// CHECK-NEXT: } loc
61+
// CHECK-NEXT: acc.yield
62+
// CHECK-NEXT: } loc
63+
// CHECK-NEXT: acc.delete accPtr(%[[CREATE2]] : !cir.ptr<!cir.float>) {dataClause = #acc<data_clause acc_create>, name = "localVar2"}
64+
// CHECK-NEXT: acc.delete accPtr(%[[CREATE1]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_create>, name = "parmVar"}
65+
66+
#pragma acc serial loop copyin(capture: parmVar) copyin(always: localVar1)
67+
for(int i = 0; i < 5; ++i);
68+
// CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[PARM]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {modifiers = #acc<data_clause_modifier capture>, name = "parmVar"}
69+
// CHECK-NEXT: %[[COPYIN2:.*]] = acc.copyin varPtr(%[[LV1]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {modifiers = #acc<data_clause_modifier always>, name = "localVar1"}
70+
// CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]], %[[COPYIN2]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>) {
71+
// CHECK-NEXT: acc.loop combined(serial) {
72+
// CHECK: acc.yield
73+
// CHECK-NEXT: } loc
74+
// CHECK-NEXT: acc.yield
75+
// CHECK-NEXT: } loc
76+
// CHECK-NEXT: acc.delete accPtr(%[[COPYIN2]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copyin>, modifiers = #acc<data_clause_modifier always>, name = "localVar1"}
77+
// CHECK-NEXT: acc.delete accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copyin>, modifiers = #acc<data_clause_modifier capture>, name = "parmVar"}
78+
79+
#pragma acc kernels loop copyout(capture: parmVar) copyout(always: localVar1)
80+
for(int i = 0; i < 5; ++i);
81+
// CHECK-NEXT: %[[CREATE1:.*]] = acc.create varPtr(%[[PARM]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copyout>, modifiers = #acc<data_clause_modifier capture>, name = "parmVar"}
82+
// CHECK-NEXT: %[[CREATE2:.*]] = acc.create varPtr(%[[LV1]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copyout>, modifiers = #acc<data_clause_modifier always>, name = "localVar1"}
83+
// CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[CREATE1]], %[[CREATE2]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>) {
84+
// CHECK-NEXT: acc.loop combined(kernels) {
85+
// CHECK: acc.yield
86+
// CHECK-NEXT: } loc
87+
// CHECK-NEXT: acc.terminator
88+
// CHECK-NEXT: } loc
89+
// CHECK-NEXT: acc.copyout accPtr(%[[CREATE2]] : !cir.ptr<!s32i>) to varPtr(%[[LV1]] : !cir.ptr<!s32i>) {modifiers = #acc<data_clause_modifier always>, name = "localVar1"}
90+
// CHECK-NEXT: acc.copyout accPtr(%[[CREATE1]] : !cir.ptr<!s32i>) to varPtr(%[[PARM]] : !cir.ptr<!s32i>) {modifiers = #acc<data_clause_modifier capture>, name = "parmVar"}
91+
92+
#pragma acc parallel loop create(capture: parmVar)
93+
for(int i = 0; i < 5; ++i);
94+
// CHECK-NEXT: %[[CREATE1:.*]] = acc.create varPtr(%[[PARM]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {modifiers = #acc<data_clause_modifier capture>, name = "parmVar"}
95+
// CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[CREATE1]] : !cir.ptr<!s32i>) {
96+
// CHECK-NEXT: acc.loop combined(parallel) {
97+
// CHECK: acc.yield
98+
// CHECK-NEXT: } loc
99+
// CHECK-NEXT: acc.yield
100+
// CHECK-NEXT: } loc
101+
// CHECK-NEXT: acc.delete accPtr(%[[CREATE1]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_create>, modifiers = #acc<data_clause_modifier capture>, name = "parmVar"}
102+
103+
#pragma acc serial loop copyin(capture, always: parmVar, localVar1)
104+
for(int i = 0; i < 5; ++i);
105+
// CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[PARM]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {modifiers = #acc<data_clause_modifier always,capture>, name = "parmVar"}
106+
// CHECK-NEXT: %[[COPYIN2:.*]] = acc.copyin varPtr(%[[LV1]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {modifiers = #acc<data_clause_modifier always,capture>, name = "localVar1"}
107+
// CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[COPYIN1]], %[[COPYIN2]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>) {
108+
// CHECK-NEXT: acc.loop combined(serial) {
109+
// CHECK: acc.yield
110+
// CHECK-NEXT: } loc
111+
// CHECK-NEXT: acc.yield
112+
// CHECK-NEXT: } loc
113+
// CHECK-NEXT: acc.delete accPtr(%[[COPYIN2]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copyin>, modifiers = #acc<data_clause_modifier always,capture>, name = "localVar1"}
114+
// CHECK-NEXT: acc.delete accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copyin>, modifiers = #acc<data_clause_modifier always,capture>, name = "parmVar"}
115+
116+
#pragma acc kernels loop copyin(readonly, always, alwaysin, capture: parmVar, localVar1, localVar2)
117+
for(int i = 0; i < 5; ++i);
118+
// CHECK-NEXT: %[[COPYIN1:.*]] = acc.copyin varPtr(%[[PARM]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {modifiers = #acc<data_clause_modifier always,readonly,capture>, name = "parmVar"}
119+
// CHECK-NEXT: %[[COPYIN2:.*]] = acc.copyin varPtr(%[[LV1]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {modifiers = #acc<data_clause_modifier always,readonly,capture>, name = "localVar1"}
120+
// CHECK-NEXT: %[[COPYIN3:.*]] = acc.copyin varPtr(%[[LV2]] : !cir.ptr<!cir.float>) -> !cir.ptr<!cir.float> {modifiers = #acc<data_clause_modifier always,readonly,capture>, name = "localVar2"}
121+
// CHECK-NEXT: acc.kernels combined(loop) dataOperands(%[[COPYIN1]], %[[COPYIN2]], %[[COPYIN3]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !cir.ptr<!cir.float>) {
122+
// CHECK-NEXT: acc.loop combined(kernels) {
123+
// CHECK: acc.yield
124+
// CHECK-NEXT: } loc
125+
// CHECK-NEXT: acc.terminator
126+
// CHECK-NEXT: } loc
127+
// CHECK-NEXT: acc.delete accPtr(%[[COPYIN3]] : !cir.ptr<!cir.float>) {dataClause = #acc<data_clause acc_copyin>, modifiers = #acc<data_clause_modifier always,readonly,capture>, name = "localVar2"}
128+
// CHECK-NEXT: acc.delete accPtr(%[[COPYIN2]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copyin>, modifiers = #acc<data_clause_modifier always,readonly,capture>, name = "localVar1"}
129+
// CHECK-NEXT: acc.delete accPtr(%[[COPYIN1]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_copyin>, modifiers = #acc<data_clause_modifier always,readonly,capture>, name = "parmVar"}
130+
131+
#pragma acc parallel loop copyout(zero, always, alwaysout, capture: parmVar, localVar1, localVar2)
132+
for(int i = 0; i < 5; ++i);
133+
// CHECK-NEXT: %[[CREATE1:.*]] = acc.create varPtr(%[[PARM]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copyout>, modifiers = #acc<data_clause_modifier always,zero,capture>, name = "parmVar"}
134+
// CHECK-NEXT: %[[CREATE2:.*]] = acc.create varPtr(%[[LV1]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {dataClause = #acc<data_clause acc_copyout>, modifiers = #acc<data_clause_modifier always,zero,capture>, name = "localVar1"}
135+
// CHECK-NEXT: %[[CREATE3:.*]] = acc.create varPtr(%[[LV2]] : !cir.ptr<!cir.float>) -> !cir.ptr<!cir.float> {dataClause = #acc<data_clause acc_copyout>, modifiers = #acc<data_clause_modifier always,zero,capture>, name = "localVar2"}
136+
// CHECK-NEXT: acc.parallel combined(loop) dataOperands(%[[CREATE1]], %[[CREATE2]], %[[CREATE3]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !cir.ptr<!cir.float>) {
137+
// CHECK-NEXT: acc.loop combined(parallel) {
138+
// CHECK: acc.yield
139+
// CHECK-NEXT: } loc
140+
// CHECK-NEXT: acc.yield
141+
// CHECK-NEXT: } loc
142+
// CHECK-NEXT: acc.copyout accPtr(%[[CREATE3]] : !cir.ptr<!cir.float>) to varPtr(%[[LV2]] : !cir.ptr<!cir.float>) {modifiers = #acc<data_clause_modifier always,zero,capture>, name = "localVar2"}
143+
// CHECK-NEXT: acc.copyout accPtr(%[[CREATE2]] : !cir.ptr<!s32i>) to varPtr(%[[LV1]] : !cir.ptr<!s32i>) {modifiers = #acc<data_clause_modifier always,zero,capture>, name = "localVar1"}
144+
// CHECK-NEXT: acc.copyout accPtr(%[[CREATE1]] : !cir.ptr<!s32i>) to varPtr(%[[PARM]] : !cir.ptr<!s32i>) {modifiers = #acc<data_clause_modifier always,zero,capture>, name = "parmVar"}
145+
146+
#pragma acc serial loop create(zero, capture: parmVar, localVar1, localVar2)
147+
for(int i = 0; i < 5; ++i);
148+
// CHECK-NEXT: %[[CREATE1:.*]] = acc.create varPtr(%[[PARM]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {modifiers = #acc<data_clause_modifier zero,capture>, name = "parmVar"}
149+
// CHECK-NEXT: %[[CREATE2:.*]] = acc.create varPtr(%[[LV1]] : !cir.ptr<!s32i>) -> !cir.ptr<!s32i> {modifiers = #acc<data_clause_modifier zero,capture>, name = "localVar1"}
150+
// CHECK-NEXT: %[[CREATE3:.*]] = acc.create varPtr(%[[LV2]] : !cir.ptr<!cir.float>) -> !cir.ptr<!cir.float> {modifiers = #acc<data_clause_modifier zero,capture>, name = "localVar2"}
151+
// CHECK-NEXT: acc.serial combined(loop) dataOperands(%[[CREATE1]], %[[CREATE2]], %[[CREATE3]] : !cir.ptr<!s32i>, !cir.ptr<!s32i>, !cir.ptr<!cir.float>) {
152+
// CHECK-NEXT: acc.loop combined(serial) {
153+
// CHECK: acc.yield
154+
// CHECK-NEXT: } loc
155+
// CHECK-NEXT: acc.yield
156+
// CHECK-NEXT: } loc
157+
// CHECK-NEXT: acc.delete accPtr(%[[CREATE3]] : !cir.ptr<!cir.float>) {dataClause = #acc<data_clause acc_create>, modifiers = #acc<data_clause_modifier zero,capture>, name = "localVar2"}
158+
// CHECK-NEXT: acc.delete accPtr(%[[CREATE2]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_create>, modifiers = #acc<data_clause_modifier zero,capture>, name = "localVar1"}
159+
// CHECK-NEXT: acc.delete accPtr(%[[CREATE1]] : !cir.ptr<!s32i>) {dataClause = #acc<data_clause acc_create>, modifiers = #acc<data_clause_modifier zero,capture>, name = "parmVar"}
160+
}

0 commit comments

Comments
 (0)