@@ -401,4 +401,176 @@ extern "C" void acc_combined(int N) {
401
401
// CHECK-NEXT: } loc
402
402
// CHECK-NEXT: acc.terminator
403
403
// CHECK-NEXT: } loc
404
+
405
+ #pragma acc kernels loop worker
406
+ for (unsigned I = 0 ; I < N; ++I);
407
+ // CHECK-NEXT: acc.kernels combined(loop) {
408
+ // CHECK-NEXT: acc.loop combined(kernels) worker {
409
+ // CHECK: acc.yield
410
+ // CHECK-NEXT: } loc
411
+ // CHECK: acc.terminator
412
+ // CHECK-NEXT: } loc
413
+
414
+ #pragma acc kernels loop worker(N)
415
+ for (unsigned I = 0 ; I < N; ++I);
416
+ // CHECK-NEXT: acc.kernels combined(loop) {
417
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
418
+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
419
+ // CHECK-NEXT: acc.loop combined(kernels) worker(%[[N_CONV]] : si32) {
420
+ // CHECK: acc.yield
421
+ // CHECK-NEXT: } loc
422
+ // CHECK: acc.terminator
423
+ // CHECK-NEXT: } loc
424
+
425
+ #pragma acc kernels loop worker device_type(nvidia, radeon) worker
426
+ for (unsigned I = 0 ; I < N; ++I);
427
+ // CHECK-NEXT: acc.kernels combined(loop) {
428
+ // CHECK-NEXT: acc.loop combined(kernels) worker([#acc.device_type<none>, #acc.device_type<nvidia>, #acc.device_type<radeon>]) {
429
+ // CHECK: acc.yield
430
+ // CHECK-NEXT: } loc
431
+ // CHECK: acc.terminator
432
+ // CHECK-NEXT: } loc
433
+
434
+ #pragma acc kernels loop worker(N) device_type(nvidia, radeon) worker
435
+ for (unsigned I = 0 ; I < N; ++I);
436
+ // CHECK-NEXT: acc.kernels combined(loop) {
437
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
438
+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
439
+ // CHECK-NEXT: acc.loop combined(kernels) worker([#acc.device_type<nvidia>, #acc.device_type<radeon>], %[[N_CONV]] : si32) {
440
+ // CHECK: acc.yield
441
+ // CHECK-NEXT: } loc
442
+ // CHECK: acc.terminator
443
+ // CHECK-NEXT: } loc
444
+
445
+ #pragma acc kernels loop worker device_type(nvidia, radeon) worker(N)
446
+ for (unsigned I = 0 ; I < N; ++I);
447
+ // CHECK-NEXT: acc.kernels combined(loop) {
448
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
449
+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
450
+ // CHECK-NEXT: acc.loop combined(kernels) worker([#acc.device_type<none>], %[[N_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_CONV]] : si32 [#acc.device_type<radeon>]) {
451
+ // CHECK: acc.yield
452
+ // CHECK-NEXT: } loc
453
+ // CHECK: acc.terminator
454
+ // CHECK-NEXT: } loc
455
+
456
+ #pragma acc kernels loop worker(N) device_type(nvidia, radeon) worker(N + 1)
457
+ for (unsigned I = 0 ; I < N; ++I);
458
+ // CHECK-NEXT: acc.kernels combined(loop) {
459
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
460
+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
461
+ // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
462
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
463
+ // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD2]], %[[ONE_CONST]]) nsw : !s32i
464
+ // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
465
+ // CHECK-NEXT: acc.loop combined(kernels) worker(%[[N_CONV]] : si32, %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
466
+ // CHECK: acc.yield
467
+ // CHECK-NEXT: } loc
468
+ // CHECK: acc.terminator
469
+ // CHECK-NEXT: } loc
470
+
471
+ #pragma acc kernels loop device_type(nvidia, radeon) worker(num:N + 1)
472
+ for (unsigned I = 0 ; I < N; ++I);
473
+ // CHECK-NEXT: acc.kernels combined(loop) {
474
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
475
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
476
+ // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD]], %[[ONE_CONST]]) nsw : !s32i
477
+ // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
478
+ // CHECK-NEXT: acc.loop combined(kernels) worker(%[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
479
+ // CHECK: acc.terminator
480
+ // CHECK-NEXT: } loc
481
+
482
+
483
+ #pragma acc kernels loop worker vector device_type(nvidia) worker vector
484
+ for (unsigned I = 0 ; I < N; ++I);
485
+ // CHECK-NEXT: acc.kernels combined(loop) {
486
+ // CHECK-NEXT: acc.loop combined(kernels) worker([#acc.device_type<none>, #acc.device_type<nvidia>]) vector([#acc.device_type<none>, #acc.device_type<nvidia>])
487
+ // CHECK: acc.yield
488
+ // CHECK-NEXT: } loc
489
+ // CHECK: acc.terminator
490
+ // CHECK-NEXT: } loc
491
+
492
+ #pragma acc kernels loop vector
493
+ for (unsigned I = 0 ; I < N; ++I);
494
+ // CHECK-NEXT: acc.kernels combined(loop) {
495
+ // CHECK: acc.loop combined(kernels) vector {
496
+ // CHECK: acc.yield
497
+ // CHECK-NEXT: } loc
498
+ // CHECK: acc.terminator
499
+ // CHECK-NEXT: } loc
500
+
501
+ #pragma acc kernels loop vector(N)
502
+ for (unsigned I = 0 ; I < N; ++I);
503
+ // CHECK-NEXT: acc.kernels combined(loop) {
504
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
505
+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
506
+ // CHECK-NEXT: acc.loop combined(kernels) vector(%[[N_CONV]] : si32) {
507
+ // CHECK: acc.yield
508
+ // CHECK-NEXT: } loc
509
+ // CHECK: acc.terminator
510
+ // CHECK-NEXT: } loc
511
+
512
+ #pragma acc kernels loop vector device_type(nvidia, radeon) vector
513
+ for (unsigned I = 0 ; I < N; ++I);
514
+ // CHECK-NEXT: acc.kernels combined(loop) {
515
+ // CHECK-NEXT: acc.loop combined(kernels) vector([#acc.device_type<none>, #acc.device_type<nvidia>, #acc.device_type<radeon>]) {
516
+ // CHECK: acc.yield
517
+ // CHECK-NEXT: } loc
518
+ // CHECK: acc.terminator
519
+ // CHECK-NEXT: } loc
520
+
521
+ #pragma acc kernels loop vector(N) device_type(nvidia, radeon) vector
522
+ for (unsigned I = 0 ; I < N; ++I);
523
+ // CHECK-NEXT: acc.kernels combined(loop) {
524
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
525
+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
526
+ // CHECK-NEXT: acc.loop combined(kernels) vector([#acc.device_type<nvidia>, #acc.device_type<radeon>], %[[N_CONV]] : si32) {
527
+ // CHECK: acc.yield
528
+ // CHECK-NEXT: } loc
529
+ // CHECK: acc.terminator
530
+ // CHECK-NEXT: } loc
531
+
532
+ #pragma acc kernels loop vector(N) device_type(nvidia, radeon) vector(N + 1)
533
+ for (unsigned I = 0 ; I < N; ++I);
534
+ // CHECK-NEXT: acc.kernels combined(loop) {
535
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
536
+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
537
+ // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
538
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
539
+ // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD2]], %[[ONE_CONST]]) nsw : !s32i
540
+ // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
541
+ // CHECK-NEXT: acc.loop combined(kernels) vector(%[[N_CONV]] : si32, %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
542
+ // CHECK: acc.yield
543
+ // CHECK-NEXT: } loc
544
+ // CHECK: acc.terminator
545
+ // CHECK-NEXT: } loc
546
+
547
+ #pragma acc kernels loop device_type(nvidia, radeon) vector(length:N + 1)
548
+ for (unsigned I = 0 ; I < N; ++I);
549
+ // CHECK-NEXT: acc.kernels combined(loop) {
550
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
551
+ // CHECK-NEXT: %[[ONE_CONST:.*]] = cir.const #cir.int<1> : !s32i
552
+ // CHECK-NEXT: %[[N_PLUS_ONE:.*]] = cir.binop(add, %[[N_LOAD]], %[[ONE_CONST]]) nsw : !s32i
553
+ // CHECK-NEXT: %[[N_PLUS_ONE_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_PLUS_ONE]] : !s32i to si32
554
+ // CHECK-NEXT: acc.loop combined(kernels) vector(%[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<nvidia>], %[[N_PLUS_ONE_CONV]] : si32 [#acc.device_type<radeon>]) {
555
+ // CHECK: acc.yield
556
+ // CHECK-NEXT: } loc
557
+ // CHECK: acc.terminator
558
+ // CHECK-NEXT: } loc
559
+
560
+ #pragma acc kernels loop worker(N) vector(N) device_type(nvidia) worker(N) vector(N)
561
+ for (unsigned I = 0 ; I < N; ++I);
562
+ // CHECK-NEXT: acc.kernels combined(loop) {
563
+ // CHECK-NEXT: %[[N_LOAD:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
564
+ // CHECK-NEXT: %[[N_CONV:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD]] : !s32i to si32
565
+ // CHECK-NEXT: %[[N_LOAD2:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
566
+ // CHECK-NEXT: %[[N_CONV2:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD2]] : !s32i to si32
567
+ // CHECK-NEXT: %[[N_LOAD3:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
568
+ // CHECK-NEXT: %[[N_CONV3:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD3]] : !s32i to si32
569
+ // CHECK-NEXT: %[[N_LOAD4:.*]] = cir.load %[[ALLOCA_N]] : !cir.ptr<!s32i>, !s32i
570
+ // CHECK-NEXT: %[[N_CONV4:.*]] = builtin.unrealized_conversion_cast %[[N_LOAD4]] : !s32i to si32
571
+ // CHECK-NEXT: acc.loop combined(kernels) worker(%[[N_CONV]] : si32, %[[N_CONV3]] : si32 [#acc.device_type<nvidia>]) vector(%[[N_CONV2]] : si32, %[[N_CONV4]] : si32 [#acc.device_type<nvidia>]) {
572
+ // CHECK: acc.yield
573
+ // CHECK-NEXT: } loc
574
+ // CHECK: acc.terminator
575
+ // CHECK-NEXT: } loc
404
576
}
0 commit comments