@@ -109,4 +109,88 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
109109 // CHECK: acc.loop {
110110 // CHECK: acc.yield
111111 // CHECK-NEXT: } attributes {auto_ = [#acc.device_type<none>]} loc
112+
113+ #pragma acc loop collapse(1) device_type(radeon)
114+ for (unsigned I = 0 ; I < N; ++I)
115+ for (unsigned J = 0 ; J < N; ++J)
116+ for (unsigned K = 0 ; K < N; ++K);
117+ // CHECK: acc.loop {
118+ // CHECK: acc.yield
119+ // CHECK-NEXT: } attributes {collapse = [1], collapseDeviceType = [#acc.device_type<none>]}
120+
121+ #pragma acc loop collapse(1) device_type(radeon) collapse (2)
122+ for (unsigned I = 0 ; I < N; ++I)
123+ for (unsigned J = 0 ; J < N; ++J)
124+ for (unsigned K = 0 ; K < N; ++K);
125+ // CHECK: acc.loop {
126+ // CHECK: acc.yield
127+ // CHECK-NEXT: } attributes {collapse = [1, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>]}
128+
129+ #pragma acc loop collapse(1) device_type(radeon, nvidia) collapse (2)
130+ for (unsigned I = 0 ; I < N; ++I)
131+ for (unsigned J = 0 ; J < N; ++J)
132+ for (unsigned K = 0 ; K < N; ++K);
133+ // CHECK: acc.loop {
134+ // CHECK: acc.yield
135+ // CHECK-NEXT: } attributes {collapse = [1, 2, 2], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>]}
136+ #pragma acc loop collapse(1) device_type(radeon, nvidia) collapse(2) device_type(host) collapse(3)
137+ for (unsigned I = 0 ; I < N; ++I)
138+ for (unsigned J = 0 ; J < N; ++J)
139+ for (unsigned K = 0 ; K < N; ++K);
140+ // CHECK: acc.loop {
141+ // CHECK: acc.yield
142+ // CHECK-NEXT: } attributes {collapse = [1, 2, 2, 3], collapseDeviceType = [#acc.device_type<none>, #acc.device_type<radeon>, #acc.device_type<nvidia>, #acc.device_type<host>]}
143+
144+ #pragma acc loop tile(1, 2, 3)
145+ for (unsigned I = 0 ; I < N; ++I)
146+ for (unsigned J = 0 ; J < N; ++J)
147+ for (unsigned K = 0 ; K < N; ++K);
148+ // CHECK: %[[ONE_CONST:.*]] = arith.constant 1 : i64
149+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
150+ // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
151+ // CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64}) {
152+ // CHECK: acc.yield
153+ // CHECK-NEXT: } loc
154+ #pragma acc loop tile(2) device_type(radeon)
155+ for (unsigned I = 0 ; I < N; ++I)
156+ for (unsigned J = 0 ; J < N; ++J)
157+ for (unsigned K = 0 ; K < N; ++K);
158+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
159+ // CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}) {
160+ // CHECK: acc.yield
161+ // CHECK-NEXT: } loc
162+ #pragma acc loop tile(2) device_type(radeon) tile (1, *)
163+ for (unsigned I = 0 ; I < N; ++I)
164+ for (unsigned J = 0 ; J < N; ++J)
165+ for (unsigned K = 0 ; K < N; ++K);
166+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
167+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
168+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
169+ // CHECK-NEXT: acc.loop tile({%[[TWO_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[STAR_CONST]] : i64} [#acc.device_type<radeon>]) {
170+ // CHECK: acc.yield
171+ // CHECK-NEXT: } loc
172+ #pragma acc loop tile(*) device_type(radeon, nvidia) tile (1, 2)
173+ for (unsigned I = 0 ; I < N; ++I)
174+ for (unsigned J = 0 ; J < N; ++J)
175+ for (unsigned K = 0 ; K < N; ++K);
176+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
177+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
178+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
179+ // CHECK-NEXT: acc.loop tile({%[[STAR_CONST]] : i64}, {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<radeon>], {%[[ONE_CONST]] : i64, %[[TWO_CONST]] : i64} [#acc.device_type<nvidia>]) {
180+ // CHECK: acc.yield
181+ // CHECK-NEXT: } loc
182+ #pragma acc loop tile(1) device_type(radeon, nvidia) tile(2, 3) device_type(host) tile(*, *, *)
183+ for (unsigned I = 0 ; I < N; ++I)
184+ for (unsigned J = 0 ; J < N; ++J)
185+ for (unsigned K = 0 ; K < N; ++K);
186+ // CHECK-NEXT: %[[ONE_CONST:.*]] = arith.constant 1 : i64
187+ // CHECK-NEXT: %[[TWO_CONST:.*]] = arith.constant 2 : i64
188+ // CHECK-NEXT: %[[THREE_CONST:.*]] = arith.constant 3 : i64
189+ // CHECK-NEXT: %[[STAR_CONST:.*]] = arith.constant -1 : i64
190+ // CHECK-NEXT: %[[STAR2_CONST:.*]] = arith.constant -1 : i64
191+ // CHECK-NEXT: %[[STAR3_CONST:.*]] = arith.constant -1 : i64
192+ // CHECK-NEXT: acc.loop tile({%[[ONE_CONST]] : i64}, {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<radeon>], {%[[TWO_CONST]] : i64, %[[THREE_CONST]] : i64} [#acc.device_type<nvidia>], {%[[STAR_CONST]] : i64, %[[STAR2_CONST]] : i64, %[[STAR3_CONST]] : i64} [#acc.device_type<host>]) {
193+ // CHECK: acc.yield
194+ // CHECK-NEXT: } loc
195+
112196}
0 commit comments