@@ -879,4 +879,72 @@ extern "C" void acc_combined(int N, int cond) {
879879 // CHECK-NEXT: } loc
880880 // CHECK-NEXT: acc.yield
881881 // CHECK-NEXT: } loc
882+ //
883+ #pragma acc parallel loop vector_length(cond)
884+ for (unsigned I = 0 ; I < N; ++I);
885+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
886+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
887+ // CHECK-NEXT: acc.parallel combined(loop) vector_length(%[[CONV_CAST]] : si32) {
888+ // CHECK-NEXT: acc.loop combined(parallel) {
889+ // CHECK: acc.yield
890+ // CHECK-NEXT: } loc
891+ // CHECK-NEXT: acc.yield
892+ // CHECK-NEXT: } loc
893+
894+ #pragma acc kernels loop vector_length(cond) device_type(nvidia) vector_length(2u)
895+ for (unsigned I = 0 ; I < N; ++I);
896+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
897+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
898+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !u32i
899+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !u32i to ui32
900+ // CHECK-NEXT: acc.kernels combined(loop) vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : ui32 [#acc.device_type<nvidia>]) {
901+ // CHECK-NEXT: acc.loop combined(kernels) {
902+ // CHECK: acc.yield
903+ // CHECK-NEXT: } loc
904+ // CHECK-NEXT: acc.terminator
905+ // CHECK-NEXT: } loc
906+
907+ #pragma acc parallel loop vector_length(cond) device_type(nvidia, host) vector_length(2) device_type(radeon) vector_length(3)
908+ for (unsigned I = 0 ; I < N; ++I);
909+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
910+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
911+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
912+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
913+ // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
914+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
915+ // CHECK-NEXT: acc.parallel combined(loop) vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[TWO_CAST]] : si32 [#acc.device_type<host>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) {
916+ // CHECK-NEXT: acc.loop combined(parallel) {
917+ // CHECK: acc.yield
918+ // CHECK-NEXT: } loc
919+ // CHECK-NEXT: acc.yield
920+ // CHECK-NEXT: } loc
921+
922+ #pragma acc kernels loop vector_length(cond) device_type(nvidia) vector_length(2) device_type(radeon, multicore) vector_length(4)
923+ for (unsigned I = 0 ; I < N; ++I);
924+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
925+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
926+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
927+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
928+ // CHECK-NEXT: %[[FOUR_LITERAL:.*]] = cir.const #cir.int<4> : !s32i
929+ // CHECK-NEXT: %[[FOUR_CAST:.*]] = builtin.unrealized_conversion_cast %[[FOUR_LITERAL]] : !s32i to si32
930+ // CHECK-NEXT: acc.kernels combined(loop) vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[FOUR_CAST]] : si32 [#acc.device_type<radeon>], %[[FOUR_CAST]] : si32 [#acc.device_type<multicore>]) {
931+ // CHECK-NEXT: acc.loop combined(kernels) {
932+ // CHECK: acc.yield
933+ // CHECK-NEXT: } loc
934+ // CHECK-NEXT: acc.terminator
935+ // CHECK-NEXT: } loc
936+
937+ #pragma acc parallel loop device_type(nvidia) vector_length(2) device_type(radeon) vector_length(3)
938+ for (unsigned I = 0 ; I < N; ++I);
939+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
940+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
941+ // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
942+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
943+ // CHECK-NEXT: acc.parallel combined(loop) vector_length(%[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) {
944+ // CHECK-NEXT: acc.loop combined(parallel) {
945+ // CHECK: acc.yield
946+ // CHECK-NEXT: } loc
947+ // CHECK-NEXT: acc.yield
948+ // CHECK-NEXT: } loc
949+
882950}
0 commit comments