Skip to content

Commit f6c5637

Browse files
authored
Merge branch 'main' into dag-reduce-load-offset
2 parents bddc9f8 + 5e101de commit f6c5637

31 files changed

+1218
-381
lines changed

clang/lib/CIR/CodeGen/CIRGenStmtOpenACC.cpp

Lines changed: 33 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -170,6 +170,7 @@ class OpenACCClauseCIREmitter final
170170
break;
171171
}
172172
} else {
173+
// Combined Constructs left.
173174
return clauseNotImplemented(clause);
174175
}
175176
}
@@ -208,6 +209,7 @@ class OpenACCClauseCIREmitter final
208209
// they just modify the other clauses IR. So setting of `lastDeviceType`
209210
// (done above) is all we need.
210211
} else {
212+
// update, data, loop, routine, combined remain.
211213
return clauseNotImplemented(clause);
212214
}
213215
}
@@ -221,6 +223,7 @@ class OpenACCClauseCIREmitter final
221223
} else if constexpr (isOneOfTypes<OpTy, SerialOp>) {
222224
llvm_unreachable("num_workers not valid on serial");
223225
} else {
226+
// Combined Remain.
224227
return clauseNotImplemented(clause);
225228
}
226229
}
@@ -234,6 +237,7 @@ class OpenACCClauseCIREmitter final
234237
} else if constexpr (isOneOfTypes<OpTy, SerialOp>) {
235238
llvm_unreachable("vector_length not valid on serial");
236239
} else {
240+
// Combined remain.
237241
return clauseNotImplemented(clause);
238242
}
239243
}
@@ -250,6 +254,7 @@ class OpenACCClauseCIREmitter final
250254
createIntExpr(clause.getIntExpr()), &range));
251255
}
252256
} else {
257+
// Data, enter data, exit data, update, wait, combined remain.
253258
return clauseNotImplemented(clause);
254259
}
255260
}
@@ -266,21 +271,48 @@ class OpenACCClauseCIREmitter final
266271
llvm_unreachable("var-list version of self shouldn't get here");
267272
}
268273
} else {
274+
// update and combined remain.
269275
return clauseNotImplemented(clause);
270276
}
271277
}
272278

273279
void VisitIfClause(const OpenACCIfClause &clause) {
274-
if constexpr (isOneOfTypes<OpTy, ParallelOp, SerialOp, KernelsOp>) {
280+
if constexpr (isOneOfTypes<OpTy, ParallelOp, SerialOp, KernelsOp, InitOp,
281+
ShutdownOp, SetOp>) {
275282
operation.getIfCondMutable().append(
276283
createCondition(clause.getConditionExpr()));
277284
} else {
278285
// 'if' applies to most of the constructs, but hold off on lowering them
279286
// until we can write tests/know what we're doing with codegen to make
280287
// sure we get it right.
288+
// Enter data, exit data, host_data, update, wait, combined remain.
281289
return clauseNotImplemented(clause);
282290
}
283291
}
292+
293+
void VisitDeviceNumClause(const OpenACCDeviceNumClause &clause) {
294+
if constexpr (isOneOfTypes<OpTy, InitOp, ShutdownOp>) {
295+
operation.getDeviceNumOperandMutable().append(
296+
createIntExpr(clause.getIntExpr()));
297+
} else if constexpr (isOneOfTypes<OpTy, SetOp>) {
298+
// This is only a separate case because the getter name is different in
299+
// 'set' for some reason.
300+
operation.getDeviceNumMutable().append(
301+
createIntExpr(clause.getIntExpr()));
302+
} else {
303+
llvm_unreachable(
304+
"init, shutdown, set, are only valid device_num constructs");
305+
}
306+
}
307+
308+
void VisitDefaultAsyncClause(const OpenACCDefaultAsyncClause &clause) {
309+
if constexpr (isOneOfTypes<OpTy, SetOp>) {
310+
operation.getDefaultAsyncMutable().append(
311+
createIntExpr(clause.getIntExpr()));
312+
} else {
313+
llvm_unreachable("set, is only valid device_num constructs");
314+
}
315+
}
284316
};
285317

286318
template <typename OpTy>

clang/test/CIR/CodeGenOpenACC/init.c

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
// RUN: %clang_cc1 -fopenacc -emit-cir -fclangir %s -o - | FileCheck %s
22

3-
void acc_init(void) {
4-
// CHECK: cir.func @acc_init() {
3+
void acc_init(int cond) {
4+
// CHECK: cir.func @acc_init(%[[ARG:.*]]: !s32i{{.*}}) {
5+
// CHECK-NEXT: %[[COND:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["cond", init]
6+
// CHECK-NEXT: cir.store %[[ARG]], %[[COND]] : !s32i, !cir.ptr<!s32i>
57
#pragma acc init
68
// CHECK-NEXT: acc.init loc(#{{[a-zA-Z0-9]+}}){{$}}
79

@@ -17,4 +19,34 @@ void acc_init(void) {
1719
// CHECK-NEXT: acc.init attributes {device_types = [#acc.device_type<host>, #acc.device_type<multicore>]}
1820
#pragma acc init device_type(HoSt) device_type(MuLtIcORe)
1921
// CHECK-NEXT: acc.init attributes {device_types = [#acc.device_type<host>, #acc.device_type<multicore>]}
22+
23+
#pragma acc init if(cond)
24+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
25+
// CHECK-NEXT: %[[COND_CAST:.*]] = cir.cast(int_to_bool, %[[COND_LOAD]] : !s32i), !cir.bool
26+
// CHECK-NEXT: %[[BOOL_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_CAST]] : !cir.bool to i1
27+
// CHECK-NEXT: acc.init if(%[[BOOL_CONV]])
28+
29+
#pragma acc init if(1)
30+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
31+
// CHECK-NEXT: %[[ONE_TO_BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[ONE_LITERAL]] : !s32i), !cir.bool
32+
// CHECK-NEXT: %[[BOOL_CONV:.*]] = builtin.unrealized_conversion_cast %[[ONE_TO_BOOL_CAST]] : !cir.bool to i1
33+
// CHECK-NEXT: acc.init if(%[[BOOL_CONV]])
34+
35+
#pragma acc init device_num(cond)
36+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
37+
// CHECK-NEXT: %[[COND_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
38+
// CHECK-NEXT: acc.init device_num(%[[COND_CONV]] : si32)
39+
40+
#pragma acc init device_num(1)
41+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
42+
// CHECK-NEXT: %[[ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
43+
// CHECK-NEXT: acc.init device_num(%[[ONE_CONV]] : si32)
44+
45+
#pragma acc init if(cond) device_num(cond) device_type(*)
46+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
47+
// CHECK-NEXT: %[[COND_CAST:.*]] = cir.cast(int_to_bool, %[[COND_LOAD]] : !s32i), !cir.bool
48+
// CHECK-NEXT: %[[BOOL_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_CAST]] : !cir.bool to i1
49+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
50+
// CHECK-NEXT: %[[COND_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
51+
// CHECK-NEXT: acc.init device_num(%[[COND_CONV]] : si32) if(%[[BOOL_CONV]]) attributes {device_types = [#acc.device_type<star>]}
2052
}
Lines changed: 32 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
// RUN: %clang_cc1 -fopenacc -emit-cir -fclangir %s -o - | FileCheck %s
22

3-
void acc_set(void) {
4-
// CHECK: cir.func @acc_set() {
3+
void acc_set(int cond) {
4+
// CHECK: cir.func @acc_set(%[[ARG:.*]]: !s32i{{.*}}) {
5+
// CHECK-NEXT: %[[COND:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["cond", init]
6+
// CHECK-NEXT: cir.store %[[ARG]], %[[COND]] : !s32i, !cir.ptr<!s32i>
57

68
#pragma acc set device_type(*)
79
// CHECK-NEXT: acc.set attributes {device_type = #acc.device_type<star>}
@@ -10,5 +12,33 @@ void acc_set(void) {
1012
#pragma acc set device_type(radeon)
1113
// CHECK-NEXT: acc.set attributes {device_type = #acc.device_type<radeon>}
1214

15+
#pragma acc set default_async(cond)
16+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
17+
// CHECK-NEXT: %[[COND_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
18+
// CHECK-NEXT: acc.set default_async(%[[COND_CONV]] : si32)
19+
20+
#pragma acc set default_async(1)
21+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
22+
// CHECK-NEXT: %[[ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
23+
// CHECK-NEXT: acc.set default_async(%[[ONE_CONV]] : si32)
24+
25+
#pragma acc set device_num(cond) if (cond)
26+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
27+
// CHECK-NEXT: %[[COND_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
28+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
29+
// CHECK-NEXT: %[[COND_CAST:.*]] = cir.cast(int_to_bool, %[[COND_LOAD]] : !s32i), !cir.bool
30+
// CHECK-NEXT: %[[BOOL_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_CAST]] : !cir.bool to i1
31+
// CHECK-NEXT: acc.set device_num(%[[COND_CONV]] : si32) if(%[[BOOL_CONV]])
32+
33+
#pragma acc set device_type(radeon) default_async(1) device_num(cond) if (cond)
34+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
35+
// CHECK-NEXT: %[[ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
36+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
37+
// CHECK-NEXT: %[[COND_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
38+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
39+
// CHECK-NEXT: %[[COND_CAST:.*]] = cir.cast(int_to_bool, %[[COND_LOAD]] : !s32i), !cir.bool
40+
// CHECK-NEXT: %[[BOOL_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_CAST]] : !cir.bool to i1
41+
// CHECK-NEXT: acc.set default_async(%[[ONE_CONV]] : si32) device_num(%[[COND_CONV]] : si32) if(%[[BOOL_CONV]]) attributes {device_type = #acc.device_type<radeon>}
42+
1343
// CHECK-NEXT: cir.return
1444
}

clang/test/CIR/CodeGenOpenACC/shutdown.c

Lines changed: 34 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -1,7 +1,9 @@
11
// RUN: %clang_cc1 -fopenacc -emit-cir -fclangir %s -o - | FileCheck %s
22

3-
void acc_shutdown(void) {
4-
// CHECK: cir.func @acc_shutdown() {
3+
void acc_shutdown(int cond) {
4+
// CHECK: cir.func @acc_shutdown(%[[ARG:.*]]: !s32i{{.*}}) {
5+
// CHECK-NEXT: %[[COND:.*]] = cir.alloca !s32i, !cir.ptr<!s32i>, ["cond", init]
6+
// CHECK-NEXT: cir.store %[[ARG]], %[[COND]] : !s32i, !cir.ptr<!s32i>
57
#pragma acc shutdown
68
// CHECK-NEXT: acc.shutdown loc(#{{[a-zA-Z0-9]+}}){{$}}
79

@@ -17,4 +19,34 @@ void acc_shutdown(void) {
1719
// CHECK-NEXT: acc.shutdown attributes {device_types = [#acc.device_type<host>, #acc.device_type<multicore>]}
1820
#pragma acc shutdown device_type(HoSt) device_type(MuLtIcORe)
1921
// CHECK-NEXT: acc.shutdown attributes {device_types = [#acc.device_type<host>, #acc.device_type<multicore>]}
22+
23+
#pragma acc shutdown if(cond)
24+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
25+
// CHECK-NEXT: %[[COND_CAST:.*]] = cir.cast(int_to_bool, %[[COND_LOAD]] : !s32i), !cir.bool
26+
// CHECK-NEXT: %[[BOOL_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_CAST]] : !cir.bool to i1
27+
// CHECK-NEXT: acc.shutdown if(%[[BOOL_CONV]])
28+
29+
#pragma acc shutdown if(1)
30+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
31+
// CHECK-NEXT: %[[ONE_TO_BOOL_CAST:.*]] = cir.cast(int_to_bool, %[[ONE_LITERAL]] : !s32i), !cir.bool
32+
// CHECK-NEXT: %[[BOOL_CONV:.*]] = builtin.unrealized_conversion_cast %[[ONE_TO_BOOL_CAST]] : !cir.bool to i1
33+
// CHECK-NEXT: acc.shutdown if(%[[BOOL_CONV]])
34+
35+
#pragma acc shutdown device_num(cond)
36+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
37+
// CHECK-NEXT: %[[COND_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
38+
// CHECK-NEXT: acc.shutdown device_num(%[[COND_CONV]] : si32)
39+
40+
#pragma acc shutdown device_num(1)
41+
// CHECK-NEXT: %[[ONE_LITERAL:.*]] = cir.const #cir.int<1> : !s32i
42+
// CHECK-NEXT: %[[ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[ONE_LITERAL]] : !s32i to si32
43+
// CHECK-NEXT: acc.shutdown device_num(%[[ONE_CONV]] : si32)
44+
45+
#pragma acc shutdown if(cond) device_num(cond) device_type(*)
46+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
47+
// CHECK-NEXT: %[[COND_CAST:.*]] = cir.cast(int_to_bool, %[[COND_LOAD]] : !s32i), !cir.bool
48+
// CHECK-NEXT: %[[BOOL_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_CAST]] : !cir.bool to i1
49+
// CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
50+
// CHECK-NEXT: %[[COND_CONV:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
51+
// CHECK-NEXT: acc.shutdown device_num(%[[COND_CONV]] : si32) if(%[[BOOL_CONV]]) attributes {device_types = [#acc.device_type<star>]}
2052
}

flang/lib/Optimizer/OpenACC/FIROpenACCTypeInterfaces.cpp

Lines changed: 71 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -188,6 +188,77 @@ OpenACCMappableModel<fir::SequenceType>::generateAccBounds(
188188
mlir::acc::DataBoundsType>(
189189
firBuilder, loc, exv, info);
190190
}
191+
192+
if (mlir::isa<hlfir::DeclareOp, fir::DeclareOp>(varPtr.getDefiningOp())) {
193+
mlir::Value zero =
194+
firBuilder.createIntegerConstant(loc, builder.getIndexType(), 0);
195+
mlir::Value one =
196+
firBuilder.createIntegerConstant(loc, builder.getIndexType(), 1);
197+
198+
mlir::Value shape;
199+
if (auto declareOp =
200+
mlir::dyn_cast_if_present<fir::DeclareOp>(varPtr.getDefiningOp()))
201+
shape = declareOp.getShape();
202+
else if (auto declareOp = mlir::dyn_cast_if_present<hlfir::DeclareOp>(
203+
varPtr.getDefiningOp()))
204+
shape = declareOp.getShape();
205+
206+
const bool strideIncludeLowerExtent = true;
207+
208+
llvm::SmallVector<mlir::Value> accBounds;
209+
if (auto shapeOp =
210+
mlir::dyn_cast_if_present<fir::ShapeOp>(shape.getDefiningOp())) {
211+
mlir::Value cummulativeExtent = one;
212+
for (auto extent : shapeOp.getExtents()) {
213+
mlir::Value upperbound =
214+
builder.create<mlir::arith::SubIOp>(loc, extent, one);
215+
mlir::Value stride = one;
216+
if (strideIncludeLowerExtent) {
217+
stride = cummulativeExtent;
218+
cummulativeExtent = builder.create<mlir::arith::MulIOp>(
219+
loc, cummulativeExtent, extent);
220+
}
221+
auto accBound = builder.create<mlir::acc::DataBoundsOp>(
222+
loc, mlir::acc::DataBoundsType::get(builder.getContext()),
223+
/*lowerbound=*/zero, /*upperbound=*/upperbound,
224+
/*extent=*/extent, /*stride=*/stride, /*strideInBytes=*/false,
225+
/*startIdx=*/one);
226+
accBounds.push_back(accBound);
227+
}
228+
} else if (auto shapeShiftOp =
229+
mlir::dyn_cast_if_present<fir::ShapeShiftOp>(
230+
shape.getDefiningOp())) {
231+
mlir::Value lowerbound;
232+
mlir::Value cummulativeExtent = one;
233+
for (auto [idx, val] : llvm::enumerate(shapeShiftOp.getPairs())) {
234+
if (idx % 2 == 0) {
235+
lowerbound = val;
236+
} else {
237+
mlir::Value extent = val;
238+
mlir::Value upperbound =
239+
builder.create<mlir::arith::SubIOp>(loc, extent, one);
240+
upperbound = builder.create<mlir::arith::AddIOp>(loc, lowerbound,
241+
upperbound);
242+
mlir::Value stride = one;
243+
if (strideIncludeLowerExtent) {
244+
stride = cummulativeExtent;
245+
cummulativeExtent = builder.create<mlir::arith::MulIOp>(
246+
loc, cummulativeExtent, extent);
247+
}
248+
auto accBound = builder.create<mlir::acc::DataBoundsOp>(
249+
loc, mlir::acc::DataBoundsType::get(builder.getContext()),
250+
/*lowerbound=*/zero, /*upperbound=*/upperbound,
251+
/*extent=*/extent, /*stride=*/stride, /*strideInBytes=*/false,
252+
/*startIdx=*/lowerbound);
253+
accBounds.push_back(accBound);
254+
}
255+
}
256+
}
257+
258+
if (!accBounds.empty())
259+
return accBounds;
260+
}
261+
191262
assert(false && "array with unknown dimension expected to have descriptor");
192263
return {};
193264
}

0 commit comments

Comments
 (0)