@@ -947,4 +947,67 @@ extern "C" void acc_combined(int N, int cond) {
947947 // CHECK-NEXT: acc.yield
948948 // CHECK-NEXT: } loc
949949
950+ #pragma acc parallel loop async
951+ for (unsigned I = 0 ; I < N; ++I);
952+ // CHECK-NEXT: acc.parallel combined(loop) async {
953+ // CHECK-NEXT: acc.loop combined(parallel) {
954+ // CHECK: acc.yield
955+ // CHECK-NEXT: } loc
956+ // CHECK-NEXT: acc.yield
957+ // CHECK-NEXT: } loc
958+
959+ #pragma acc serial loop async(cond)
960+ for (unsigned I = 0 ; I < N; ++I);
961+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
962+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
963+ // CHECK-NEXT: acc.serial combined(loop) async(%[[CONV_CAST]] : si32) {
964+ // CHECK-NEXT: acc.loop combined(serial) {
965+ // CHECK: acc.yield
966+ // CHECK-NEXT: } loc
967+ // CHECK-NEXT: acc.yield
968+ // CHECK-NEXT: } loc
969+
970+ #pragma acc kernels loop async device_type(nvidia, radeon) async
971+ for (unsigned I = 0 ; I < N; ++I);
972+ // CHECK-NEXT: acc.kernels combined(loop) async([#acc.device_type<none>, #acc.device_type<nvidia>, #acc.device_type<radeon>]) {
973+ // CHECK-NEXT: acc.loop combined(kernels) {
974+ // CHECK: acc.yield
975+ // CHECK-NEXT: } loc
976+ // CHECK-NEXT: acc.terminator
977+ // CHECK-NEXT: } loc
978+
979+ #pragma acc parallel loop async(3) device_type(nvidia, radeon) async(cond)
980+ for (unsigned I = 0 ; I < N; ++I);
981+ // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
982+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
983+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
984+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
985+ // CHECK-NEXT: acc.parallel combined(loop) async(%[[THREE_CAST]] : si32, %[[CONV_CAST]] : si32 [#acc.device_type<nvidia>], %[[CONV_CAST]] : si32 [#acc.device_type<radeon>]) {
986+ // CHECK-NEXT: acc.loop combined(parallel) {
987+ // CHECK: acc.yield
988+ // CHECK-NEXT: } loc
989+ // CHECK-NEXT: acc.yield
990+ // CHECK-NEXT: } loc
991+
992+ #pragma acc serial loop async device_type(nvidia, radeon) async(cond)
993+ for (unsigned I = 0 ; I < N; ++I);
994+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
995+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
996+ // CHECK-NEXT: acc.serial combined(loop) async([#acc.device_type<none>], %[[CONV_CAST]] : si32 [#acc.device_type<nvidia>], %[[CONV_CAST]] : si32 [#acc.device_type<radeon>]) {
997+ // CHECK-NEXT: acc.loop combined(serial) {
998+ // CHECK: acc.yield
999+ // CHECK-NEXT: } loc
1000+ // CHECK-NEXT: acc.yield
1001+ // CHECK-NEXT: } loc
1002+
1003+ #pragma acc kernels loop async(3) device_type(nvidia, radeon) async
1004+ for (unsigned I = 0 ; I < N; ++I);
1005+ // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
1006+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
1007+ // CHECK-NEXT: acc.kernels combined(loop) async([#acc.device_type<nvidia>, #acc.device_type<radeon>], %[[THREE_CAST]] : si32) {
1008+ // CHECK-NEXT: acc.loop combined(kernels) {
1009+ // CHECK: acc.yield
1010+ // CHECK-NEXT: } loc
1011+ // CHECK-NEXT: acc.terminator
1012+ // CHECK-NEXT: } loc
9501013}
0 commit comments