@@ -879,4 +879,72 @@ extern "C" void acc_combined(int N, int cond) {
879
879
// CHECK-NEXT: } loc
880
880
// CHECK-NEXT: acc.yield
881
881
// CHECK-NEXT: } loc
882
+ //
883
+ #pragma acc parallel loop vector_length(cond)
884
+ for (unsigned I = 0 ; I < N; ++I);
885
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
886
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
887
+ // CHECK-NEXT: acc.parallel combined(loop) vector_length(%[[CONV_CAST]] : si32) {
888
+ // CHECK-NEXT: acc.loop combined(parallel) {
889
+ // CHECK: acc.yield
890
+ // CHECK-NEXT: } loc
891
+ // CHECK-NEXT: acc.yield
892
+ // CHECK-NEXT: } loc
893
+
894
+ #pragma acc kernels loop vector_length(cond) device_type(nvidia) vector_length(2u)
895
+ for (unsigned I = 0 ; I < N; ++I);
896
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
897
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
898
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !u32i
899
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !u32i to ui32
900
+ // CHECK-NEXT: acc.kernels combined(loop) vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : ui32 [#acc.device_type<nvidia>]) {
901
+ // CHECK-NEXT: acc.loop combined(kernels) {
902
+ // CHECK: acc.yield
903
+ // CHECK-NEXT: } loc
904
+ // CHECK-NEXT: acc.terminator
905
+ // CHECK-NEXT: } loc
906
+
907
+ #pragma acc parallel loop vector_length(cond) device_type(nvidia, host) vector_length(2) device_type(radeon) vector_length(3)
908
+ for (unsigned I = 0 ; I < N; ++I);
909
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
910
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
911
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
912
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
913
+ // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
914
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
915
+ // CHECK-NEXT: acc.parallel combined(loop) vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[TWO_CAST]] : si32 [#acc.device_type<host>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) {
916
+ // CHECK-NEXT: acc.loop combined(parallel) {
917
+ // CHECK: acc.yield
918
+ // CHECK-NEXT: } loc
919
+ // CHECK-NEXT: acc.yield
920
+ // CHECK-NEXT: } loc
921
+
922
+ #pragma acc kernels loop vector_length(cond) device_type(nvidia) vector_length(2) device_type(radeon, multicore) vector_length(4)
923
+ for (unsigned I = 0 ; I < N; ++I);
924
+ // CHECK-NEXT: %[[COND_LOAD:.*]] = cir.load %[[COND]] : !cir.ptr<!s32i>, !s32i
925
+ // CHECK-NEXT: %[[CONV_CAST:.*]] = builtin.unrealized_conversion_cast %[[COND_LOAD]] : !s32i to si32
926
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
927
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
928
+ // CHECK-NEXT: %[[FOUR_LITERAL:.*]] = cir.const #cir.int<4> : !s32i
929
+ // CHECK-NEXT: %[[FOUR_CAST:.*]] = builtin.unrealized_conversion_cast %[[FOUR_LITERAL]] : !s32i to si32
930
+ // CHECK-NEXT: acc.kernels combined(loop) vector_length(%[[CONV_CAST]] : si32, %[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[FOUR_CAST]] : si32 [#acc.device_type<radeon>], %[[FOUR_CAST]] : si32 [#acc.device_type<multicore>]) {
931
+ // CHECK-NEXT: acc.loop combined(kernels) {
932
+ // CHECK: acc.yield
933
+ // CHECK-NEXT: } loc
934
+ // CHECK-NEXT: acc.terminator
935
+ // CHECK-NEXT: } loc
936
+
937
+ #pragma acc parallel loop device_type(nvidia) vector_length(2) device_type(radeon) vector_length(3)
938
+ for (unsigned I = 0 ; I < N; ++I);
939
+ // CHECK-NEXT: %[[TWO_LITERAL:.*]] = cir.const #cir.int<2> : !s32i
940
+ // CHECK-NEXT: %[[TWO_CAST:.*]] = builtin.unrealized_conversion_cast %[[TWO_LITERAL]] : !s32i to si32
941
+ // CHECK-NEXT: %[[THREE_LITERAL:.*]] = cir.const #cir.int<3> : !s32i
942
+ // CHECK-NEXT: %[[THREE_CAST:.*]] = builtin.unrealized_conversion_cast %[[THREE_LITERAL]] : !s32i to si32
943
+ // CHECK-NEXT: acc.parallel combined(loop) vector_length(%[[TWO_CAST]] : si32 [#acc.device_type<nvidia>], %[[THREE_CAST]] : si32 [#acc.device_type<radeon>]) {
944
+ // CHECK-NEXT: acc.loop combined(parallel) {
945
+ // CHECK: acc.yield
946
+ // CHECK-NEXT: } loc
947
+ // CHECK-NEXT: acc.yield
948
+ // CHECK-NEXT: } loc
949
+
882
950
}
0 commit comments