1- // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-full=true}),gpu.module(gpu.func(affine-loop-unroll{unroll-full=true})))" | FileCheck %s --check-prefix UNROLL-FULL
2- // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-full=true unroll-full-threshold=2}),gpu.module(gpu.func(affine-loop-unroll{unroll-full=true unroll-full-threshold=2})))" | FileCheck %s --check-prefix SHORT
3- // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=4}),gpu.module(gpu.func(affine-loop-unroll{unroll-factor=4})))" | FileCheck %s --check-prefix UNROLL-BY-4
4- // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=1}),gpu.module(gpu.func(affine-loop-unroll{unroll-factor=1})))" | FileCheck %s --check-prefix UNROLL-BY-1
5- // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=5 cleanup-unroll=true}),gpu.module(gpu.func(affine-loop-unroll{unroll-factor=5 cleanup-unroll=true})))" | FileCheck %s --check-prefix UNROLL-CLEANUP-LOOP
1+ // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-full=true}))" | FileCheck %s --check-prefix UNROLL-FULL
2+ // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-full=true unroll-full-threshold=2}))" | FileCheck %s --check-prefix SHORT
3+ // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=4}))" | FileCheck %s --check-prefix UNROLL-BY-4
4+ // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=1}))" | FileCheck %s --check-prefix UNROLL-BY-1
5+ // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll{unroll-factor=5 cleanup-unroll=true}))" | FileCheck %s --check-prefix UNROLL-CLEANUP-LOOP
6+ // RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(gpu.module(gpu.func(affine-loop-unroll{unroll-full=true})))" | FileCheck %s --check-prefix GPU-UNROLL-FULL
67
78// UNROLL-FULL-DAG: [[$MAP0:#map[0-9]*]] = affine_map<(d0) -> (d0 + 1)>
89// UNROLL-FULL-DAG: [[$MAP1:#map[0-9]*]] = affine_map<(d0) -> (d0 + 2)>
@@ -241,19 +242,19 @@ func.func @loop_nest_unroll_full() {
241242} // UNROLL-FULL }
242243
243244gpu.module @unroll_full {
244- // UNROLL-FULL-LABEL: func @gpu_loop_nest_simplest() {
245+ // GPU- UNROLL-FULL-LABEL: func @gpu_loop_nest_simplest() {
245246 gpu.func @gpu_loop_nest_simplest () {
246- // UNROLL-FULL: affine.for %arg0 = 0 to 100 step 2 {
247+ // GPU- UNROLL-FULL: affine.for %arg0 = 0 to 100 step 2 {
247248 affine.for %i = 0 to 100 step 2 {
248- // UNROLL-FULL: %c1_i32 = arith.constant 1 : i32
249- // UNROLL-FULL-NEXT: %c1_i32_0 = arith.constant 1 : i32
250- // UNROLL-FULL-NEXT: %c1_i32_1 = arith.constant 1 : i32
251- // UNROLL-FULL-NEXT: %c1_i32_2 = arith.constant 1 : i32
249+ // GPU- UNROLL-FULL: %c1_i32 = arith.constant 1 : i32
250+ // GPU- UNROLL-FULL-NEXT: %c1_i32_0 = arith.constant 1 : i32
251+ // GPU- UNROLL-FULL-NEXT: %c1_i32_1 = arith.constant 1 : i32
252+ // GPU- UNROLL-FULL-NEXT: %c1_i32_2 = arith.constant 1 : i32
252253 affine.for %j = 0 to 4 {
253254 %x = arith.constant 1 : i32
254255 }
255- } // UNROLL-FULL: }
256- gpu.return // UNROLL-FULL: return
256+ } // GPU- UNROLL-FULL: }
257+ gpu.return // GPU- UNROLL-FULL: return
257258 }
258259}
259260
@@ -277,28 +278,6 @@ func.func @loop_nest_outer_unroll() {
277278 return // SHORT: return
278279} // SHORT }
279280
280- gpu.module @short {
281- // SHORT-LABEL: func @gpu_loop_nest_outer_unroll() {
282- gpu.func @gpu_loop_nest_outer_unroll () {
283- // SHORT: affine.for %arg0 = 0 to 4 {
284- // SHORT-NEXT: %0 = affine.apply [[$MAP0]](%arg0)
285- // SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
286- // SHORT-NEXT: }
287- // SHORT-NEXT: affine.for %arg0 = 0 to 4 {
288- // SHORT-NEXT: %0 = affine.apply [[$MAP0]](%arg0)
289- // SHORT-NEXT: %1 = "addi32"(%0, %0) : (index, index) -> index
290- // SHORT-NEXT: }
291- affine.for %i = 0 to 2 {
292- affine.for %j = 0 to 4 {
293- %x = " affine.apply" (%j ) { map = affine_map <(d0 ) -> (d0 + 1 )> } :
294- (index ) -> (index )
295- %y = " addi32" (%x , %x ) : (index , index ) -> index
296- }
297- }
298- gpu.return // SHORT: gpu.return
299- } // SHORT }
300- }
301-
302281// We are doing a minimal FileCheck here. We just need this test case to
303282// successfully run. Both %x and %y will get unrolled here as the min trip
304283// count threshold set to 2.
@@ -384,37 +363,6 @@ func.func @unroll_unit_stride_no_cleanup() {
384363 return
385364}
386365
387- gpu.module @unroll_by_4 {
388- // UNROLL-BY-4-LABEL: func @gpu_unroll_unit_stride_no_cleanup() {
389- gpu.func @gpu_unroll_unit_stride_no_cleanup () {
390- // UNROLL-BY-4: affine.for %arg0 = 0 to 100 {
391- affine.for %i = 0 to 100 {
392- // UNROLL-BY-4: for [[L1:%arg[0-9]+]] = 0 to 8 step 4 {
393- // UNROLL-BY-4-NEXT: %0 = "addi32"([[L1]], [[L1]]) : (index, index) -> i32
394- // UNROLL-BY-4-NEXT: %1 = "addi32"(%0, %0) : (i32, i32) -> i32
395- // UNROLL-BY-4-NEXT: %2 = affine.apply #map{{[0-9]*}}([[L1]])
396- // UNROLL-BY-4-NEXT: %3 = "addi32"(%2, %2) : (index, index) -> i32
397- // UNROLL-BY-4-NEXT: %4 = "addi32"(%3, %3) : (i32, i32) -> i32
398- // UNROLL-BY-4-NEXT: %5 = affine.apply #map{{[0-9]*}}([[L1]])
399- // UNROLL-BY-4-NEXT: %6 = "addi32"(%5, %5) : (index, index) -> i32
400- // UNROLL-BY-4-NEXT: %7 = "addi32"(%6, %6) : (i32, i32) -> i32
401- // UNROLL-BY-4-NEXT: %8 = affine.apply #map{{[0-9]*}}([[L1]])
402- // UNROLL-BY-4-NEXT: %9 = "addi32"(%8, %8) : (index, index) -> i32
403- // UNROLL-BY-4-NEXT: %10 = "addi32"(%9, %9) : (i32, i32) -> i32
404- // UNROLL-BY-4-NEXT: }
405- affine.for %j = 0 to 8 {
406- %x = " addi32" (%j , %j ) : (index , index ) -> i32
407- %y = " addi32" (%x , %x ) : (i32 , i32 ) -> i32
408- }
409- // empty loop
410- // UNROLL-BY-4: affine.for %arg1 = 0 to 8 {
411- affine.for %k = 0 to 8 {
412- }
413- }
414- gpu.return
415- }
416- }
417-
418366// UNROLL-BY-4-LABEL: func @unroll_unit_stride_cleanup() {
419367func.func @unroll_unit_stride_cleanup () {
420368 // UNROLL-BY-4: affine.for %arg0 = 0 to 100 {
@@ -702,19 +650,6 @@ func.func @unroll_by_one_should_promote_single_iteration_loop() {
702650// UNROLL-BY-1-NEXT: return
703651}
704652
705- gpu.module @unroll_by_1 {
706- // UNROLL-BY-1-LABEL: func @gpu_unroll_by_one_should_promote_single_iteration_loop()
707- gpu.func @gpu_unroll_by_one_should_promote_single_iteration_loop () {
708- affine.for %i = 0 to 1 {
709- %x = " foo" (%i ) : (index ) -> i32
710- }
711- gpu.return
712- // UNROLL-BY-1-NEXT: %c0 = arith.constant 0 : index
713- // UNROLL-BY-1-NEXT: %0 = "foo"(%c0) : (index) -> i32
714- // UNROLL-BY-1-NEXT: gpu.return
715- }
716- }
717-
718653// Test unrolling with affine.for iter_args.
719654
720655// UNROLL-BY-4-LABEL: loop_unroll_with_iter_args_and_cleanup
@@ -789,23 +724,6 @@ func.func @unroll_cleanup_loop_with_larger_unroll_factor() {
789724// UNROLL-CLEANUP-LOOP-NEXT: return
790725}
791726
792- gpu.module @unroll_cleanup_loop {
793- // UNROLL-CLEANUP-LOOP-LABEL: func @gpu_unroll_cleanup_loop_with_larger_unroll_factor()
794- gpu.func @gpu_unroll_cleanup_loop_with_larger_unroll_factor () {
795- affine.for %i = 0 to 3 {
796- %x = " foo" (%i ) : (index ) -> i32
797- }
798- gpu.return
799- // UNROLL-CLEANUP-LOOP-NEXT: %[[C0:.*]] = arith.constant 0 : index
800- // UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[C0]]) : (index) -> i32
801- // UNROLL-CLEANUP-LOOP-NEXT: %[[V1:.*]] = affine.apply {{.*}}
802- // UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V1]]) : (index) -> i32
803- // UNROLL-CLEANUP-LOOP-NEXT: %[[V2:.*]] = affine.apply {{.*}}
804- // UNROLL-CLEANUP-LOOP-NEXT: {{.*}} = "foo"(%[[V2]]) : (index) -> i32
805- // UNROLL-CLEANUP-LOOP-NEXT: gpu.return
806- }
807- }
808-
809727// UNROLL-CLEANUP-LOOP-LABEL: func @unroll_cleanup_loop_with_smaller_unroll_factor()
810728func.func @unroll_cleanup_loop_with_smaller_unroll_factor () {
811729 affine.for %i = 0 to 7 {
0 commit comments