@@ -21,12 +21,13 @@ func.func @fold_one_loop(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
2121// CHECK: %[[C4:.*]] = arith.constant 4 : index
2222// CHECK: %[[I0:.*]] = arith.addi %[[ARG2]], %[[C0]] : index
2323// CHECK: %[[I1:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index
24- // CHECK: %[[I2:.*]] = arith.muli %[[I1]], %[[C4]] : index
25- // CHECK: %[[I3:.*]] = arith.muli %[[C1]], %[[C4]] : index
26- // CHECK: scf.for %[[I:.*]] = %[[I0]] to %[[I2]] step %[[I3]] {
27- // CHECK: %[[I4:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
28- // CHECK: %[[I5:.*]] = arith.muli %[[I4]], %[[I4]] : i32
29- // CHECK: memref.store %[[I5]], %[[ARG0]]{{\[}}%[[I]]
24+ // CHECK: %[[I2:.*]] = arith.muli %[[I0]], %[[C4]] : index
25+ // CHECK: %[[I3:.*]] = arith.muli %[[I1]], %[[C4]] : index
26+ // CHECK: %[[I4:.*]] = arith.muli %[[C1]], %[[C4]] : index
27+ // CHECK: scf.for %[[I:.*]] = %[[I2]] to %[[I3]] step %[[I4]] {
28+ // CHECK: %[[I5:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
29+ // CHECK: %[[I6:.*]] = arith.muli %[[I5]], %[[I5]] : i32
30+ // CHECK: memref.store %[[I6]], %[[ARG0]]{{\[}}%[[I]]
3031
3132func.func @fold_one_loop2 (%arg0: memref <?xi32 >, %arg1: index , %arg2: index ) {
3233 %c0 = arith.constant 0 : index
@@ -54,12 +55,48 @@ func.func @fold_one_loop2(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
5455// CHECK: scf.for %[[J:.*]] = %[[C0]] to %[[C10]] step %[[C1]] {
5556// CHECK: %[[I0:.*]] = arith.addi %[[ARG2]], %[[C0]] : index
5657// CHECK: %[[I1:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index
57- // CHECK: %[[I2:.*]] = arith.muli %[[I1]], %[[C4]] : index
58- // CHECK: %[[I3:.*]] = arith.muli %[[C1]], %[[C4]] : index
59- // CHECK: scf.for %[[I:.*]] = %[[I0]] to %[[I2]] step %[[I3]] {
60- // CHECK: %[[I4:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
61- // CHECK: %[[I5:.*]] = arith.muli %[[I4]], %[[I4]] : i32
62- // CHECK: memref.store %[[I5]], %[[ARG0]]{{\[}}%[[I]]
58+ // CHECK: %[[I2:.*]] = arith.muli %[[I0]], %[[C4]] : index
59+ // CHECK: %[[I3:.*]] = arith.muli %[[I1]], %[[C4]] : index
60+ // CHECK: %[[I4:.*]] = arith.muli %[[C1]], %[[C4]] : index
61+ // CHECK: scf.for %[[I:.*]] = %[[I2]] to %[[I3]] step %[[I4]] {
62+ // CHECK: %[[I5:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
63+ // CHECK: %[[I6:.*]] = arith.muli %[[I5]], %[[I5]] : i32
64+ // CHECK: memref.store %[[I6]], %[[ARG0]]{{\[}}%[[I]]
65+
66+ func.func @fold_one_loop3 (%arg0: memref <?xi32 >, %arg1: index , %arg2: index ) {
67+ %c0 = arith.constant 0 : index
68+ %c1 = arith.constant 1 : index
69+ %c4 = arith.constant 4 : index
70+ %c10 = arith.constant 10 : index
71+ scf.for %j = %c0 to %c10 step %c1 {
72+ scf.for %i = %c1 to %arg1 step %c1 {
73+ %0 = arith.addi %arg2 , %i : index
74+ %1 = arith.muli %0 , %c4 : index
75+ %2 = memref.load %arg0 [%1 ] : memref <?xi32 >
76+ %3 = arith.muli %2 , %2 : i32
77+ memref.store %3 , %arg0 [%1 ] : memref <?xi32 >
78+ }
79+ }
80+ return
81+ }
82+
83+ // CHECK-LABEL: func @fold_one_loop3
84+ // CHECK-SAME: (%[[ARG0:.*]]: {{.*}}, %[[ARG1:.*]]: {{.*}}, %[[ARG2:.*]]: {{.*}}
85+ // CHECK: %[[C0:.*]] = arith.constant 0 : index
86+ // CHECK: %[[C1:.*]] = arith.constant 1 : index
87+ // CHECK: %[[C4:.*]] = arith.constant 4 : index
88+ // CHECK: %[[C10:.*]] = arith.constant 10 : index
89+ // CHECK: scf.for %[[J:.*]] = %[[C0]] to %[[C10]] step %[[C1]] {
90+ // CHECK: %[[I0:.*]] = arith.addi %[[ARG2]], %[[C1]] : index
91+ // CHECK: %[[I1:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index
92+ // CHECK: %[[I2:.*]] = arith.muli %[[I0]], %[[C4]] : index
93+ // CHECK: %[[I3:.*]] = arith.muli %[[I1]], %[[C4]] : index
94+ // CHECK: %[[I4:.*]] = arith.muli %[[C1]], %[[C4]] : index
95+ // CHECK: scf.for %[[I:.*]] = %[[I2]] to %[[I3]] step %[[I4]] {
96+ // CHECK: %[[I5:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
97+ // CHECK: %[[I6:.*]] = arith.muli %[[I5]], %[[I5]] : i32
98+ // CHECK: memref.store %[[I6]], %[[ARG0]]{{\[}}%[[I]]
99+
63100
64101func.func @fold_two_loops (%arg0: memref <?xi32 >, %arg1: index , %arg2: index ) {
65102 %c0 = arith.constant 0 : index
@@ -86,14 +123,17 @@ func.func @fold_two_loops(%arg0: memref<?xi32>, %arg1: index, %arg2: index) {
86123// CHECK: %[[C10:.*]] = arith.constant 10 : index
87124// CHECK: %[[I0:.*]] = arith.addi %[[ARG2]], %[[C0]] : index
88125// CHECK: %[[I1:.*]] = arith.addi %[[ARG2]], %[[C10]] : index
89- // CHECK: scf.for %[[J:.*]] = %[[I0]] to %[[I1]] step %[[C1]] {
90- // CHECK: %[[I1:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index
91- // CHECK: %[[I2:.*]] = arith.muli %[[I1]], %[[C4]] : index
92- // CHECK: %[[I3:.*]] = arith.muli %[[C1]], %[[C4]] : index
93- // CHECK: scf.for %[[I:.*]] = %[[J]] to %[[I2]] step %[[I3]] {
94- // CHECK: %[[I4:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
95- // CHECK: %[[I5:.*]] = arith.muli %[[I4]], %[[I4]] : i32
96- // CHECK: memref.store %[[I5]], %[[ARG0]]{{\[}}%[[I]]
126+ // CHECK: %[[I2:.*]] = arith.muli %[[I0]], %[[C4]] : index
127+ // CHECK: %[[I3:.*]] = arith.muli %[[I1]], %[[C4]] : index
128+ // CHECK: %[[I4:.*]] = arith.muli %[[C1]], %[[C4]] : index
129+ // CHECK: scf.for %[[J:.*]] = %[[I2]] to %[[I3]] step %[[I4]] {
130+ // CHECK: %[[I5:.*]] = arith.addi %[[ARG2]], %[[ARG1]] : index
131+ // CHECK: %[[I6:.*]] = arith.muli %[[I5]], %[[C4]] : index
132+ // CHECK: %[[I7:.*]] = arith.muli %[[C1]], %[[C4]] : index
133+ // CHECK: scf.for %[[I:.*]] = %[[J]] to %[[I6]] step %[[I7]] {
134+ // CHECK: %[[I8:.*]] = memref.load %[[ARG0]]{{\[}}%[[I]]
135+ // CHECK: %[[I9:.*]] = arith.muli %[[I8]], %[[I8]] : i32
136+ // CHECK: memref.store %[[I9]], %[[ARG0]]{{\[}}%[[I]]
97137
98138// If an instruction's operands are not defined outside the loop, we cannot
99139// perform the optimization, as is the case with the arith.muli below. (If
0 commit comments