@@ -813,4 +813,70 @@ extern "C" void acc_combined(int N, int cond) {
813813 // CHECK-NEXT: acc.yield
814814 // CHECK-NEXT: } loc
815815
816+ #pragma acc parallel loop num_workers(cond)
817+ for (unsigned I = 0 ; I < N; ++I);
818+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
819+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
820+ // CHECK-NEXT: acc.parallel combined(loop) num_workers(%[[CONV_CAST]] : si32) {
821+ // CHECK-NEXT: acc.loop combined(parallel) {
822+ // CHECK: acc.yield
823+ // CHECK-NEXT: } loc
824+ // CHECK-NEXT: acc.yield
825+ // CHECK-NEXT: } loc
826+
827+ #pragma acc kernels loop num_workers(cond) device_type(nvidia) num_workers(2u)
828+ for (unsigned I = 0 ; I < N; ++I);
829+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
830+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
831+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !u32i
832+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !u32i to ui32
833+ // CHECK-NEXT: acc.kernels combined(loop) num_workers(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : ui32 [#acc.device_type<nvidia>]) {
834+ // CHECK-NEXT: acc.loop combined(kernels) {
835+ // CHECK: acc.yield
836+ // CHECK-NEXT: } loc
837+ // CHECK-NEXT: acc.terminator
838+ // CHECK-NEXT: } loc
839+
840+ #pragma acc parallel loop num_workers(cond) device_type(nvidia, host) num_workers(2) device_type(radeon) num_workers(3)
841+ for (unsigned I = 0 ; I < N; ++I);
842+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
843+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
844+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
845+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
846+ // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
847+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
848+ // CHECK-NEXT: acc.parallel combined(loop) num_workers(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[TWO_CAST]] : si32 [#acc.device_type<host>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) {
849+ // CHECK-NEXT: acc.loop combined(parallel) {
850+ // CHECK: acc.yield
851+ // CHECK-NEXT: } loc
852+ // CHECK-NEXT: acc.yield
853+ // CHECK-NEXT: } loc
854+
855+ #pragma acc kernels loop num_workers(cond) device_type(nvidia) num_workers(2) device_type(radeon, multicore) num_workers(4)
856+ for (unsigned I = 0 ; I < N; ++I);
857+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
858+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
859+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
860+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
861+ // CHECK-NEXT: %[[FOUR_LITERAL:.*]] = cir.const #cir.int<4> : !s32i
862+ // CHECK-NEXT: %[[FOUR_CAST:.*]] = builtin.unrealized_conversion_cast %[[FOUR_LITERAL]] : !s32i to si32
863+ // CHECK-NEXT: acc.kernels combined(loop) num_workers(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[FOUR_CAST]] : si32 [#acc.device_type<radeon>], %[[FOUR_CAST]] : si32 [#acc.device_type<multicore>]) {
864+ // CHECK-NEXT: acc.loop combined(kernels) {
865+ // CHECK: acc.yield
866+ // CHECK-NEXT: } loc
867+ // CHECK-NEXT: acc.terminator
868+ // CHECK-NEXT: } loc
869+
870+ #pragma acc parallel loop device_type(nvidia) num_workers(2) device_type(radeon) num_workers(3)
871+ for (unsigned I = 0 ; I < N; ++I);
872+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
873+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
874+ // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
875+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
876+ // CHECK-NEXT: acc.parallel combined(loop) num_workers(%[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) {
877+ // CHECK-NEXT: acc.loop combined(parallel) {
878+ // CHECK: acc.yield
879+ // CHECK-NEXT: } loc
880+ // CHECK-NEXT: acc.yield
881+ // CHECK-NEXT: } loc
816882}
0 commit comments