@@ -193,4 +193,134 @@ extern "C" void acc_loop(int *A, int *B, int *C, int N) {
193193 // CHECK: acc.yield
194194 // CHECK-NEXT: } loc
195195
196+
197+ #pragma acc kernels
198+ {
199+
200+ #pragma acc loop worker
201+ for (unsigned I = 0 ; I < N; ++I);
202+ // CHECK: acc.loop worker {
203+ // CHECK: acc.yield
204+ // CHECK-NEXT: } loc
205+
206+ #pragma acc loop worker(N)
207+ for (unsigned I = 0 ; I < N; ++I);
208+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
209+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
210+ // CHECK-NEXT: acc.loop worker(%[[N_CONV]] : si32) {
211+ // CHECK: acc.yield
212+ // CHECK-NEXT: } loc
213+
214+ #pragma acc loop worker device_type(nvidia, radeon) worker
215+ for (unsigned I = 0 ; I < N; ++I);
216+ // CHECK-NEXT: acc.loop worker([#acc.device_type<none>, #acc.device_type<nvidia>, #acc.device_type<radeon>]) {
217+ // CHECK: acc.yield
218+ // CHECK-NEXT: } loc
219+
220+ #pragma acc loop worker(N) device_type(nvidia, radeon) worker
221+ for (unsigned I = 0 ; I < N; ++I);
222+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
223+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
224+ // CHECK-NEXT: acc.loop worker([#acc.device_type<nvidia>, #acc.device_type<radeon>], %[[N_CONV]] : si32) {
225+ // CHECK: acc.yield
226+ // CHECK-NEXT: } loc
227+
228+ #pragma acc loop worker device_type(nvidia, radeon) worker(N)
229+ for (unsigned I = 0 ; I < N; ++I);
230+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
231+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
232+ // CHECK-NEXT: acc.loop worker([#acc.device_type<none>], %[[N_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_CONV]] : si32 [#acc.device_type<radeon>]) {
233+ // CHECK: acc.yield
234+ // CHECK-NEXT: } loc
235+
236+ #pragma acc loop worker(N) device_type(nvidia, radeon) worker(N + 1)
237+ for (unsigned I = 0 ; I < N; ++I);
238+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
239+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
240+ // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
241+ // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
242+ // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD2]], %[[ONE_CONST]]) nsw : !s32i
243+ // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
244+ // CHECK-NEXT: acc.loop worker(%[[N_CONV]] : si32, %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
245+ // CHECK: acc.yield
246+ // CHECK-NEXT: } loc
247+
248+ #pragma acc loop device_type(nvidia, radeon) worker(num:N + 1)
249+ for (unsigned I = 0 ; I < N; ++I);
250+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
251+ // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
252+ // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD]], %[[ONE_CONST]]) nsw : !s32i
253+ // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
254+ // CHECK-NEXT: acc.loop worker(%[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
255+
256+ #pragma acc loop vector
257+ for (unsigned I = 0 ; I < N; ++I);
258+ // CHECK: acc.loop vector {
259+ // CHECK: acc.yield
260+ // CHECK-NEXT: } loc
261+
262+ #pragma acc loop vector(N)
263+ for (unsigned I = 0 ; I < N; ++I);
264+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
265+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
266+ // CHECK-NEXT: acc.loop vector(%[[N_CONV]] : si32) {
267+ // CHECK: acc.yield
268+ // CHECK-NEXT: } loc
269+
270+ #pragma acc loop vector device_type(nvidia, radeon) vector
271+ for (unsigned I = 0 ; I < N; ++I);
272+ // CHECK-NEXT: acc.loop vector([#acc.device_type<none>, #acc.device_type<nvidia>, #acc.device_type<radeon>]) {
273+ // CHECK: acc.yield
274+ // CHECK-NEXT: } loc
275+
276+ #pragma acc loop vector(N) device_type(nvidia, radeon) vector
277+ for (unsigned I = 0 ; I < N; ++I);
278+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
279+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
280+ // CHECK-NEXT: acc.loop vector([#acc.device_type<nvidia>, #acc.device_type<radeon>], %[[N_CONV]] : si32) {
281+ // CHECK: acc.yield
282+ // CHECK-NEXT: } loc
283+
284+ #pragma acc loop vector(N) device_type(nvidia, radeon) vector(N + 1)
285+ for (unsigned I = 0 ; I < N; ++I);
286+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
287+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
288+ // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
289+ // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
290+ // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD2]], %[[ONE_CONST]]) nsw : !s32i
291+ // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
292+ // CHECK-NEXT: acc.loop vector(%[[N_CONV]] : si32, %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
293+ // CHECK: acc.yield
294+ // CHECK-NEXT: } loc
295+
296+ #pragma acc loop device_type(nvidia, radeon) vector(length:N + 1)
297+ for (unsigned I = 0 ; I < N; ++I);
298+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
299+ // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
300+ // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD]], %[[ONE_CONST]]) nsw : !s32i
301+ // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
302+ // CHECK-NEXT: acc.loop vector(%[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
303+ // CHECK: acc.yield
304+ // CHECK-NEXT: } loc
305+
306+ #pragma acc loop worker vector device_type(nvidia) worker vector
307+ for (unsigned I = 0 ; I < N; ++I);
308+ // CHECK-NEXT: acc.loop worker([#acc.device_type<none>, #acc.device_type<nvidia>]) vector([#acc.device_type<none>, #acc.device_type<nvidia>])
309+ // CHECK: acc.yield
310+ // CHECK-NEXT: } loc
311+
312+ #pragma acc loop worker(N) vector(N) device_type(nvidia) worker(N) vector(N)
313+ for (unsigned I = 0 ; I < N; ++I);
314+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
315+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
316+ // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
317+ // CHECK-NEXT: %[[N_CONV2:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD2]] : !s32i to si32
318+ // CHECK-NEXT: %[[N_LOAD3:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
319+ // CHECK-NEXT: %[[N_CONV3:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD3]] : !s32i to si32
320+ // CHECK-NEXT: %[[N_LOAD4:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
321+ // CHECK-NEXT: %[[N_CONV4:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD4]] : !s32i to si32
322+ // CHECK-NEXT: acc.loop worker(%[[N_CONV]] : si32, %[[N_CONV3]] : si32 [#acc.device_type<nvidia>]) vector(%[[N_CONV2]] : si32, %[[N_CONV4]] : si32 [#acc.device_type<nvidia>]) {
323+ // CHECK: acc.yield
324+ // CHECK-NEXT: } loc
325+ }
196326}
0 commit comments