|
1 | | -// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll-jam="unroll-jam-factor=2" | FileCheck %s |
2 | | -// RUN: mlir-opt -allow-unregistered-dialect %s -affine-loop-unroll-jam="unroll-jam-factor=4" | FileCheck --check-prefix=UJAM-FOUR %s |
| 1 | +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll-jam{unroll-jam-factor=2}))" | FileCheck %s |
| 2 | +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(func.func(affine-loop-unroll-jam{unroll-jam-factor=4}))" | FileCheck --check-prefix=UJAM-FOUR %s |
| 3 | +// RUN: mlir-opt -allow-unregistered-dialect %s -pass-pipeline="builtin.module(gpu.module(gpu.func(affine-loop-unroll-jam{unroll-jam-factor=2})))" | FileCheck --check-prefix=GPU-HJAM %s |
3 | 4 |
|
4 | 5 | // CHECK-DAG: [[$MAP_PLUS_1:#map[0-9]*]] = affine_map<(d0) -> (d0 + 1)> |
5 | 6 | // CHECK-DAG: [[$MAP_DIV_OFFSET:#map[0-9]*]] = affine_map<()[s0] -> (((s0 - 1) floordiv 2) * 2 + 1)> |
|
10 | 11 | // UJAM-FOUR-DAG: [[$MAP_PLUS_2:#map[0-9]*]] = affine_map<(d0) -> (d0 + 2)> |
11 | 12 | // UJAM-FOUR-DAG: [[$MAP_PLUS_3:#map[0-9]*]] = affine_map<(d0) -> (d0 + 3)> |
12 | 13 |
|
| 14 | +// GPU-HJAM-DAG: [[$MAP_PLUS_1:#map[0-9]*]] = affine_map<(d0) -> (d0 + 1)> |
| 15 | + |
13 | 16 | // CHECK-LABEL: func @unroll_jam_imperfect_nest() { |
14 | 17 | func.func @unroll_jam_imperfect_nest() { |
15 | 18 | affine.for %i = 0 to 101 { |
@@ -46,6 +49,44 @@ func.func @unroll_jam_imperfect_nest() { |
46 | 49 | // CHECK-NEXT: "foo"(%c100, %{{.*}}) |
47 | 50 | // CHECK-NEXT: return |
48 | 51 |
|
| 52 | +gpu.module @unroll_jam { |
| 53 | + // GPU-HJAM-LABEL: func @unroll_jam_imperfect_nest() { |
| 54 | + gpu.func @unroll_jam_imperfect_nest() { |
| 55 | + affine.for %i = 0 to 101 { |
| 56 | + %x = "addi32"(%i, %i) : (index, index) -> i32 |
| 57 | + affine.for %j = 0 to 17 { |
| 58 | + %y = "addi32"(%i, %i) : (index, index) -> i32 |
| 59 | + %z = "addi32"(%y, %y) : (i32, i32) -> i32 |
| 60 | + } |
| 61 | + %w = "foo"(%i, %x) : (index, i32) -> i32 |
| 62 | + } |
| 63 | + gpu.return |
| 64 | + } |
| 65 | + // GPU-HJAM: affine.for [[IV0:%arg[0-9]+]] = 0 to 100 step 2 { |
| 66 | + // GPU-HJAM-NEXT: [[RES1:%[0-9]+]] = "addi32"([[IV0]], [[IV0]]) |
| 67 | + // GPU-HJAM-NEXT: [[INC:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]]) |
| 68 | + // GPU-HJAM-NEXT: [[RES2:%[0-9]+]] = "addi32"([[INC]], [[INC]]) |
| 69 | + // GPU-HJAM-NEXT: affine.for %{{.*}} = 0 to 17 { |
| 70 | + // GPU-HJAM-NEXT: [[RES3:%[0-9]+]] = "addi32"([[IV0]], [[IV0]]) |
| 71 | + // GPU-HJAM-NEXT: "addi32"([[RES3]], [[RES3]]) : (i32, i32) -> i32 |
| 72 | + // GPU-HJAM-NEXT: [[INC1:%[0-9]+]] = affine.apply [[$MAP_PLUS_1]]([[IV0]]) |
| 73 | + // GPU-HJAM-NEXT: [[RES4:%[0-9]+]] = "addi32"([[INC1]], [[INC1]]) |
| 74 | + // GPU-HJAM-NEXT: "addi32"([[RES4]], [[RES4]]) : (i32, i32) -> i32 |
| 75 | + // GPU-HJAM-NEXT: } |
| 76 | + // GPU-HJAM: "foo"([[IV0]], [[RES1]]) |
| 77 | + // GPU-HJAM-NEXT: affine.apply [[$MAP_PLUS_1]]([[IV0]]) |
| 78 | + // GPU-HJAM-NEXT: "foo"({{.*}}, [[RES2]]) |
| 79 | + // GPU-HJAM: } |
| 80 | + // Cleanup loop (single iteration). |
| 81 | + // GPU-HJAM: "addi32"(%c100, %c100) |
| 82 | + // GPU-HJAM-NEXT: affine.for [[IV0]] = 0 to 17 { |
| 83 | + // GPU-HJAM-NEXT: [[RESC:%[0-9]+]] = "addi32"(%c100, %c100) |
| 84 | + // GPU-HJAM-NEXT: "addi32"([[RESC]], [[RESC]]) : (i32, i32) -> i32 |
| 85 | + // GPU-HJAM-NEXT: } |
| 86 | + // GPU-HJAM-NEXT: "foo"(%c100, %{{.*}}) |
| 87 | + // GPU-HJAM-NEXT: return |
| 88 | +} |
| 89 | + |
49 | 90 | // CHECK-LABEL: func @loop_nest_unknown_count_1 |
50 | 91 | // CHECK-SAME: [[N:arg[0-9]+]]: index |
51 | 92 | func.func @loop_nest_unknown_count_1(%N : index) { |
|
0 commit comments