@@ -240,6 +240,23 @@ func.func @loop_nest_unroll_full() {
240240 return
241241} // UNROLL-FULL }
242242
243+ gpu.module @unroll_full {
244+ // UNROLL-FULL-LABEL: func @gpu_loop_nest_simplest() {
245+ gpu.func @gpu_loop_nest_simplest () {
246+ // UNROLL-FULL: affine.for %arg0 = 0 to 100 step 2 {
247+ affine.for %i = 0 to 100 step 2 {
248+ // UNROLL-FULL: %c1_i32 = arith.constant 1 : i32
249+ // UNROLL-FULL-NEXT: %c1_i32_0 = arith.constant 1 : i32
250+ // UNROLL-FULL-NEXT: %c1_i32_1 = arith.constant 1 : i32
251+ // UNROLL-FULL-NEXT: %c1_i32_2 = arith.constant 1 : i32
252+ affine.for %j = 0 to 4 {
253+ %x = arith.constant 1 : i32
254+ }
255+ } // UNROLL-FULL: }
256+ gpu.return // UNROLL-FULL: return
257+ }
258+ }
259+
243260// SHORT-LABEL: func @loop_nest_outer_unroll() {
244261func.func @loop_nest_outer_unroll () {
245262 // SHORT: affine.for %arg0 = 0 to 4 {
@@ -260,6 +277,28 @@ func.func @loop_nest_outer_unroll() {
260277 return // SHORT: return
261278} // SHORT }
262279
280+ gpu.module @short {
281+ // SHORT-LABEL: func @gpu_loop_nest_outer_unroll() {
282+ gpu.func @gpu_loop_nest_outer_unroll () {
283+ // SHORT: affine.for %arg0 = 0 to 4 {
284+ // SHORT-NEXT: %0 = affine.apply [[$MAP0]](%arg0)
285+ // SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
286+ // SHORT-NEXT: }
287+ // SHORT-NEXT: affine.for %arg0 = 0 to 4 {
288+ // SHORT-NEXT: %0 = affine.apply [[$MAP0]](%arg0)
289+ // SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
290+ // SHORT-NEXT: }
291+ affine.for %i = 0 to 2 {
292+ affine.for %j = 0 to 4 {
293+ %x = " affine.apply" (%j ) { map = affine_map <(d0 ) -> (d0 + 1 )> } :
294+ (index ) -> (index )
295+ %y = " addi32" (%x , %x ) : (index , index ) -> index
296+ }
297+ }
298+ gpu.return // SHORT: gpu.return
299+ } // SHORT }
300+ }
301+
263302// We are doing a minimal FileCheck here. We just need this test case to
264303// successfully run. Both %x and %y will get unrolled here as the min trip
265304// count threshold set to 2.
@@ -345,6 +384,37 @@ func.func @unroll_unit_stride_no_cleanup() {
345384 return
346385}
347386
387+ gpu.module @unroll_by_4 {
388+ // UNROLL-BY-4-LABEL: func @gpu_unroll_unit_stride_no_cleanup() {
389+ gpu.func @gpu_unroll_unit_stride_no_cleanup () {
390+ // UNROLL-BY-4: affine.for %arg0 = 0 to 100 {
391+ affine.for %i = 0 to 100 {
392+ // UNROLL-BY-4: for [[L1:%arg[0-9]+]] = 0 to 8 step 4 {
393+ // UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
394+ // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
395+ // UNROLL-BY-4-NEXT: %2 = affine.apply #map{{[0-9]*}}([[L1]])
396+ // UNROLL-BY-4-NEXT: %3 = "addi32"(%2, %2) : (index, index) -> i32
397+ // UNROLL-BY-4-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32
398+ // UNROLL-BY-4-NEXT: %5 = affine.apply #map{{[0-9]*}}([[L1]])
399+ // UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
400+ // UNROLL-BY-4-NEXT: %7 = "addi32"(%6, %6) : (i32, i32) -> i32
401+ // UNROLL-BY-4-NEXT: %8 = affine.apply #map{{[0-9]*}}([[L1]])
402+ // UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32
403+ // UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32
404+ // UNROLL-BY-4-NEXT: }
405+ affine.for %j = 0 to 8 {
406+ %x = " addi32" (%j , %j ) : (index , index ) -> i32
407+ %y = " addi32" (%x , %x ) : (i32 , i32 ) -> i32
408+ }
409+ // empty loop
410+ // UNROLL-BY-4: affine.for %arg1 = 0 to 8 {
411+ affine.for %k = 0 to 8 {
412+ }
413+ }
414+ gpu.return
415+ }
416+ }
417+
348418// UNROLL-BY-4-LABEL: func @unroll_unit_stride_cleanup() {
349419func.func @unroll_unit_stride_cleanup () {
350420 // UNROLL-BY-4: affine.for %arg0 = 0 to 100 {
@@ -632,6 +702,19 @@ func.func @unroll_by_one_should_promote_single_iteration_loop() {
632702// UNROLL-BY-1-NEXT: return
633703}
634704
705+ gpu.module @unroll_by_1 {
706+ // UNROLL-BY-1-LABEL: func @gpu_unroll_by_one_should_promote_single_iteration_loop()
707+ gpu.func @gpu_unroll_by_one_should_promote_single_iteration_loop () {
708+ affine.for %i = 0 to 1 {
709+ %x = " foo" (%i ) : (index ) -> i32
710+ }
711+ gpu.return
712+ // UNROLL-BY-1-NEXT: %c0 = arith.constant 0 : index
713+ // UNROLL-BY-1-NEXT: %0 = "foo"(%c0) : (index) -> i32
714+ // UNROLL-BY-1-NEXT: gpu.return
715+ }
716+ }
717+
635718// Test unrolling with affine.for iter_args.
636719
637720// UNROLL-BY-4-LABEL: loop_unroll_with_iter_args_and_cleanup
@@ -706,6 +789,23 @@ func.func @unroll_cleanup_loop_with_larger_unroll_factor() {
706789// UNROLL-CLEANUP-LOOP-NEXT: return
707790}
708791
792+ gpu.module @unroll_cleanup_loop {
793+ // UNROLL-CLEANUP-LOOP-LABEL: func @gpu_unroll_cleanup_loop_with_larger_unroll_factor()
794+ gpu.func @gpu_unroll_cleanup_loop_with_larger_unroll_factor () {
795+ affine.for %i = 0 to 3 {
796+ %x = " foo" (%i ) : (index ) -> i32
797+ }
798+ gpu.return
799+ // UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index
800+ // UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32
801+ // UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}}
802+ // UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32
803+ // UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}}
804+ // UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32
805+ // UNROLL-CLEANUP-LOOP-NEXT: gpu.return
806+ }
807+ }
808+
709809// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_smaller_unroll_factor()
710810func.func @unroll_cleanup_loop_with_smaller_unroll_factor () {
711811 affine.for %i = 0 to 7 {
0 commit comments