Skip to content

Commit 9a9169c

Browse files
committed
multiply lower bound in loop range folding
1 parent c77b107 commit 9a9169c

File tree

2 files changed

+62
-20
lines changed

2 files changed

+62
-20
lines changed

mlir/lib/Dialect/SCF/Transforms/LoopRangeFolding.cpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -72,9 +72,11 @@ void ForLoopRangeFolding::runOnOperation() {
7272
op.setUpperBound(ubFold->getResult(0));
7373

7474
} else if (isa<arith::MulIOp>(user)) {
75+
Operation *lbFold = b.clone(*user, lbMap);
7576
Operation *ubFold = b.clone(*user, ubMap);
7677
Operation *stepFold = b.clone(*user, stepMap);
7778

79+
op.setLowerBound(lbFold->getResult(0));
7880
op.setUpperBound(ubFold->getResult(0));
7981
op.setStep(stepFold->getResult(0));
8082
}

mlir/test/Dialect/SCF/loop-range.mlir

Lines changed: 60 additions & 20 deletions
Original file line numberDiff line numberDiff line change
@@ -21,12 +21,13 @@ func.func @fold_one_loop(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
2121
// CHECK: %[[C4:.*]] = arith.constant 4 : index
2222
// CHECK: %[[I0:.*]] = arith.addi %[[ARG2]], %[[C0]] : index
2323
// CHECK: %[[I1:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index
24-
// CHECK: %[[I2:.*]] = arith.muli %[[I1]], %[[C4]] : index
25-
// CHECK: %[[I3:.*]] = arith.muli %[[C1]], %[[C4]] : index
26-
// CHECK: scf.for %[[I:.*]] = %[[I0]] to %[[I2]] step %[[I3]] {
27-
// CHECK: %[[I4:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
28-
// CHECK: %[[I5:.*]] = arith.muli %[[I4]], %[[I4]] : i32
29-
// CHECK: memref.store %[[I5]], %[[ARG0]]{{\[}}%[[I]]
24+
// CHECK: %[[I2:.*]] = arith.muli %[[I0]], %[[C4]] : index
25+
// CHECK: %[[I3:.*]] = arith.muli %[[I1]], %[[C4]] : index
26+
// CHECK: %[[I4:.*]] = arith.muli %[[C1]], %[[C4]] : index
27+
// CHECK: scf.for %[[I:.*]] = %[[I2]] to %[[I3]] step %[[I4]] {
28+
// CHECK: %[[I5:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
29+
// CHECK: %[[I6:.*]] = arith.muli %[[I5]], %[[I5]] : i32
30+
// CHECK: memref.store %[[I6]], %[[ARG0]]{{\[}}%[[I]]
3031

3132
func.func @fold_one_loop2(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
3233
%c0 = arith.constant 0 : index
@@ -54,12 +55,48 @@ func.func @fold_one_loop2(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
5455
// CHECK: scf.for %[[J:.*]] = %[[C0]] to %[[C10]] step %[[C1]] {
5556
// CHECK: %[[I0:.*]] = arith.addi %[[ARG2]], %[[C0]] : index
5657
// CHECK: %[[I1:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index
57-
// CHECK: %[[I2:.*]] = arith.muli %[[I1]], %[[C4]] : index
58-
// CHECK: %[[I3:.*]] = arith.muli %[[C1]], %[[C4]] : index
59-
// CHECK: scf.for %[[I:.*]] = %[[I0]] to %[[I2]] step %[[I3]] {
60-
// CHECK: %[[I4:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
61-
// CHECK: %[[I5:.*]] = arith.muli %[[I4]], %[[I4]] : i32
62-
// CHECK: memref.store %[[I5]], %[[ARG0]]{{\[}}%[[I]]
58+
// CHECK: %[[I2:.*]] = arith.muli %[[I0]], %[[C4]] : index
59+
// CHECK: %[[I3:.*]] = arith.muli %[[I1]], %[[C4]] : index
60+
// CHECK: %[[I4:.*]] = arith.muli %[[C1]], %[[C4]] : index
61+
// CHECK: scf.for %[[I:.*]] = %[[I2]] to %[[I3]] step %[[I4]] {
62+
// CHECK: %[[I5:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
63+
// CHECK: %[[I6:.*]] = arith.muli %[[I5]], %[[I5]] : i32
64+
// CHECK: memref.store %[[I6]], %[[ARG0]]{{\[}}%[[I]]
65+
66+
func.func @fold_one_loop3(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
67+
%c0 = arith.constant 0 : index
68+
%c1 = arith.constant 1 : index
69+
%c4 = arith.constant 4 : index
70+
%c10 = arith.constant 10 : index
71+
scf.for %j = %c0 to %c10 step %c1 {
72+
scf.for %i = %c1 to %arg1 step %c1 {
73+
%0 = arith.addi %arg2, %i : index
74+
%1 = arith.muli %0, %c4 : index
75+
%2 = memref.load %arg0[%1] : memref<?xi32>
76+
%3 = arith.muli %2, %2 : i32
77+
memref.store %3, %arg0[%1] : memref<?xi32>
78+
}
79+
}
80+
return
81+
}
82+
83+
// CHECK-LABEL: func @fold_one_loop3
84+
// CHECK-SAME: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}
85+
// CHECK: %[[C0:.*]] = arith.constant 0 : index
86+
// CHECK: %[[C1:.*]] = arith.constant 1 : index
87+
// CHECK: %[[C4:.*]] = arith.constant 4 : index
88+
// CHECK: %[[C10:.*]] = arith.constant 10 : index
89+
// CHECK: scf.for %[[J:.*]] = %[[C0]] to %[[C10]] step %[[C1]] {
90+
// CHECK: %[[I0:.*]] = arith.addi %[[ARG2]], %[[C1]] : index
91+
// CHECK: %[[I1:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index
92+
// CHECK: %[[I2:.*]] = arith.muli %[[I0]], %[[C4]] : index
93+
// CHECK: %[[I3:.*]] = arith.muli %[[I1]], %[[C4]] : index
94+
// CHECK: %[[I4:.*]] = arith.muli %[[C1]], %[[C4]] : index
95+
// CHECK: scf.for %[[I:.*]] = %[[I2]] to %[[I3]] step %[[I4]] {
96+
// CHECK: %[[I5:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
97+
// CHECK: %[[I6:.*]] = arith.muli %[[I5]], %[[I5]] : i32
98+
// CHECK: memref.store %[[I6]], %[[ARG0]]{{\[}}%[[I]]
99+
63100

64101
func.func @fold_two_loops(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
65102
%c0 = arith.constant 0 : index
@@ -86,14 +123,17 @@ func.func @fold_two_loops(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
86123
// CHECK: %[[C10:.*]] = arith.constant 10 : index
87124
// CHECK: %[[I0:.*]] = arith.addi %[[ARG2]], %[[C0]] : index
88125
// CHECK: %[[I1:.*]] = arith.addi %[[ARG2]], %[[C10]] : index
89-
// CHECK: scf.for %[[J:.*]] = %[[I0]] to %[[I1]] step %[[C1]] {
90-
// CHECK: %[[I1:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index
91-
// CHECK: %[[I2:.*]] = arith.muli %[[I1]], %[[C4]] : index
92-
// CHECK: %[[I3:.*]] = arith.muli %[[C1]], %[[C4]] : index
93-
// CHECK: scf.for %[[I:.*]] = %[[J]] to %[[I2]] step %[[I3]] {
94-
// CHECK: %[[I4:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
95-
// CHECK: %[[I5:.*]] = arith.muli %[[I4]], %[[I4]] : i32
96-
// CHECK: memref.store %[[I5]], %[[ARG0]]{{\[}}%[[I]]
126+
// CHECK: %[[I2:.*]] = arith.muli %[[I0]], %[[C4]] : index
127+
// CHECK: %[[I3:.*]] = arith.muli %[[I1]], %[[C4]] : index
128+
// CHECK: %[[I4:.*]] = arith.muli %[[C1]], %[[C4]] : index
129+
// CHECK: scf.for %[[J:.*]] = %[[I2]] to %[[I3]] step %[[I4]] {
130+
// CHECK: %[[I5:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index
131+
// CHECK: %[[I6:.*]] = arith.muli %[[I5]], %[[C4]] : index
132+
// CHECK: %[[I7:.*]] = arith.muli %[[C1]], %[[C4]] : index
133+
// CHECK: scf.for %[[I:.*]] = %[[J]] to %[[I6]] step %[[I7]] {
134+
// CHECK: %[[I8:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
135+
// CHECK: %[[I9:.*]] = arith.muli %[[I8]], %[[I8]] : i32
136+
// CHECK: memref.store %[[I9]], %[[ARG0]]{{\[}}%[[I]]
97137

98138
// If an instruction's operands are not defined outside the loop, we cannot
99139
// perform the optimization, as is the case with the arith.muli below. (If

0 commit comments

Comments
 (0)