@@ -252,4 +252,71 @@ extern "C" void acc_combined(int N) {
252252 // CHECK-NEXT: acc.yield
253253 // CHECK-NEXT: } loc
254254
255+ #pragma acc parallel loop tile(1, 2, 3)
256+ for (unsigned I = 0 ; I < N; ++I)
257+ for (unsigned J = 0 ; J < N; ++J)
258+ for (unsigned K = 0 ; K < N; ++K);
259+ // CHECK-NEXT: acc.parallel combined(loop) {
260+ // CHECK: %[[ONE_CONST:.*]] = arith.constant 1 : i64
261+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
262+ // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
263+ // CHECK-NEXT: acc.loop combined(parallel) tile({%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64}) {
264+ // CHECK: acc.yield
265+ // CHECK-NEXT: } loc
266+ // CHECK-NEXT: acc.yield
267+ // CHECK-NEXT: } loc
268+ #pragma acc serial loop tile(2) device_type(radeon)
269+ for (unsigned I = 0 ; I < N; ++I)
270+ for (unsigned J = 0 ; J < N; ++J)
271+ for (unsigned K = 0 ; K < N; ++K);
272+ // CHECK-NEXT: acc.serial combined(loop) {
273+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
274+ // CHECK-NEXT: acc.loop combined(serial) tile({%[[TWO_CONST]] : i64}) {
275+ // CHECK: acc.yield
276+ // CHECK-NEXT: } loc
277+ // CHECK-NEXT: acc.yield
278+ // CHECK-NEXT: } loc
279+ #pragma acc kernels loop tile(2) device_type(radeon) tile (1, *)
280+ for (unsigned I = 0 ; I < N; ++I)
281+ for (unsigned J = 0 ; J < N; ++J)
282+ for (unsigned K = 0 ; K < N; ++K);
283+ // CHECK-NEXT: acc.kernels combined(loop) {
284+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
285+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
286+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
287+ // CHECK-NEXT: acc.loop combined(kernels) tile({%[[TWO_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[STAR_CONST]] : i64} [#acc.device_type<radeon>]) {
288+ // CHECK: acc.yield
289+ // CHECK-NEXT: } loc
290+ // CHECK-NEXT: acc.terminator
291+ // CHECK-NEXT: } loc
292+ #pragma acc parallel loop tile(*) device_type(radeon, nvidia) tile (1, 2)
293+ for (unsigned I = 0 ; I < N; ++I)
294+ for (unsigned J = 0 ; J < N; ++J)
295+ for (unsigned K = 0 ; K < N; ++K);
296+ // CHECK-NEXT: acc.parallel combined(loop) {
297+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
298+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
299+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
300+ // CHECK-NEXT: acc.loop combined(parallel) tile({%[[STAR_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<radeon>], {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<nvidia>]) {
301+ // CHECK: acc.yield
302+ // CHECK-NEXT: } loc
303+ // CHECK-NEXT: acc.yield
304+ // CHECK-NEXT: } loc
305+ #pragma acc serial loop tile(1) device_type(radeon, nvidia) tile(2, 3) device_type(host) tile(*, *, *)
306+ for (unsigned I = 0 ; I < N; ++I)
307+ for (unsigned J = 0 ; J < N; ++J)
308+ for (unsigned K = 0 ; K < N; ++K);
309+ // CHECK-NEXT: acc.serial combined(loop) {
310+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
311+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
312+ // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
313+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
314+ // CHECK-NEXT: %[[STAR2_CONST:.*]] = arith.constant -1 : i64
315+ // CHECK-NEXT: %[[STAR3_CONST:.*]] = arith.constant -1 : i64
316+ // CHECK-NEXT: acc.loop combined(serial) tile({%[[ONE_CONST]] : i64}, {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<radeon>], {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<nvidia>], {%[[STAR_CONST]] : i64, %[[STAR2_CONST]] : i64, %[[STAR3_CONST]] : i64} [#acc.device_type<host>]) {
317+ // CHECK: acc.yield
318+ // CHECK-NEXT: } loc
319+ // CHECK-NEXT: acc.yield
320+ // CHECK-NEXT: } loc
321+
255322}
0 commit comments