10
10
#include "defs.h"
11
11
#include "globals.c"
12
12
13
+ /* Determines if an instruction can be fused with a following OP_assign.
14
+ * Fusible instructions are those whose results can be directly written
15
+ * to the final destination register, eliminating intermediate moves.
16
+ */
13
17
bool is_fusible_insn (ph2_ir_t * ph2_ir )
14
18
{
15
19
switch (ph2_ir -> op ) {
16
- case OP_add :
20
+ case OP_add : /* Arithmetic operations */
17
21
case OP_sub :
18
22
case OP_mul :
19
23
case OP_div :
20
24
case OP_mod :
21
- case OP_lshift :
25
+ case OP_lshift : /* Shift operations */
22
26
case OP_rshift :
23
- case OP_bit_and :
27
+ case OP_bit_and : /* Bitwise operations */
24
28
case OP_bit_or :
25
29
case OP_bit_xor :
26
- case OP_log_and :
30
+ case OP_log_and : /* Logical operations */
27
31
case OP_log_or :
28
32
case OP_log_not :
29
- case OP_negate :
30
- case OP_load :
33
+ case OP_negate : /* Unary operations */
34
+ case OP_load : /* Memory operations */
31
35
case OP_global_load :
32
36
case OP_load_data_address :
33
37
return true;
@@ -36,39 +40,43 @@ bool is_fusible_insn(ph2_ir_t *ph2_ir)
36
40
}
37
41
}
38
42
43
+ /* Main peephole optimization function that applies pattern matching
44
+ * and transformation rules to consecutive IR instructions.
45
+ * Returns true if any optimization was applied, false otherwise.
46
+ */
39
47
bool insn_fusion (ph2_ir_t * ph2_ir )
40
48
{
41
49
ph2_ir_t * next = ph2_ir -> next ;
42
50
if (!next )
43
51
return false;
44
52
53
+ /* ALU instruction fusion.
54
+ * Eliminates redundant move operations following arithmetic/logical
55
+ * operations. This is the most fundamental optimization that removes
56
+ * temporary register usage.
57
+ */
45
58
if (next -> op == OP_assign ) {
46
59
if (is_fusible_insn (ph2_ir ) && ph2_ir -> dest == next -> src0 ) {
47
- /* eliminates:
48
- * {ALU rn, rs1, rs2; mv rd, rn;}
49
- * reduces to:
50
- * {ALU rd, rs1, rs2;}
60
+ /* Pattern: {ALU rn, rs1, rs2; mv rd, rn} → {ALU rd, rs1, rs2}
61
+ * Example: {add t1, a, b; mv result, t1} → {add result, a, b}
51
62
*/
52
63
ph2_ir -> dest = next -> dest ;
53
64
ph2_ir -> next = next -> next ;
54
65
return true;
55
66
}
56
67
}
57
68
69
+ /* Arithmetic identity with zero constant */
58
70
if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 0 ) {
59
71
if (next -> op == OP_add &&
60
72
(ph2_ir -> dest == next -> src0 || ph2_ir -> dest == next -> src1 )) {
61
- /* eliminates:
62
- * {li rn, 0; add rd, rs1, rn;} or
63
- * {li rn, 0; add rd, rn, rs1;}
64
- * reduces to:
65
- * {mv rd, rs1;}, based on identity property of addition
73
+ /* Pattern: {li 0; add x, 0} → {mov x} (additive identity: x+0 = x)
74
+ * Handles both operand positions due to addition commutativity
75
+ * Example: {li t1, 0; add result, var, t1} → {mov result, var}
66
76
*/
67
- /* Determine the non-zero source operand */
68
77
int non_zero_src =
69
78
(ph2_ir -> dest == next -> src0 ) ? next -> src1 : next -> src0 ;
70
79
71
- /* Transform instruction sequence from addition with zero to move */
72
80
ph2_ir -> op = OP_assign ;
73
81
ph2_ir -> src0 = non_zero_src ;
74
82
ph2_ir -> dest = next -> dest ;
@@ -78,10 +86,8 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
78
86
79
87
if (next -> op == OP_sub ) {
80
88
if (ph2_ir -> dest == next -> src1 ) {
81
- /* eliminates:
82
- * {li rn, 0; sub rd, rs1, rn;}
83
- * reduces to:
84
- * {mv rd, rs1;}
89
+ /* Pattern: {li 0; sub x, 0} → {mov x} (x - 0 = x)
90
+ * Example: {li t1, 0; sub result, var, t1} → {mov result, var}
85
91
*/
86
92
ph2_ir -> op = OP_assign ;
87
93
ph2_ir -> src0 = next -> src0 ;
@@ -91,10 +97,8 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
91
97
}
92
98
93
99
if (ph2_ir -> dest == next -> src0 ) {
94
- /* eliminates:
95
- * {li rn, 0; sub rd, rn, rs1;}
96
- * reduces to:
97
- * {negate rd, rs1;}
100
+ /* Pattern: {li 0; sub 0, x} → {neg x} (0 - x = -x)
101
+ * Example: {li t1, 0; sub result, t1, var} → {neg result, var}
98
102
*/
99
103
ph2_ir -> op = OP_negate ;
100
104
ph2_ir -> src0 = next -> src1 ;
@@ -106,11 +110,9 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
106
110
107
111
if (next -> op == OP_mul &&
108
112
(ph2_ir -> dest == next -> src0 || ph2_ir -> dest == next -> src1 )) {
109
- /* eliminates:
110
- * {li rn, 0; mul rd, rs1, rn;} or
111
- * {li rn, 0; mul rd, rn, rs1;}
112
- * reduces to:
113
- * {li rd, 0}, based on zero property of multiplication
113
+ /* Pattern: {li 0; mul x, 0} → {li 0} (absorbing element: x * 0 = 0)
114
+ * Example: {li t1, 0; mul result, var, t1} → {li result, 0}
115
+ * Eliminates multiplication entirely
114
116
*/
115
117
ph2_ir -> op = OP_load_constant ;
116
118
ph2_ir -> src0 = 0 ;
@@ -120,14 +122,15 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
120
122
}
121
123
}
122
124
125
+ /* Multiplicative identity with one constant */
123
126
if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 1 ) {
124
127
if (next -> op == OP_mul &&
125
128
(ph2_ir -> dest == next -> src0 || ph2_ir -> dest == next -> src1 )) {
126
- /* eliminates :
127
- * {li rn, 1; mul rd, rs1, rn;} or
128
- * {li rn , 1; mul rd, rn, rs1; }
129
- * reduces to:
130
- * {li rd, rs1}, based on identity property of multiplication
129
+ /* Pattern: {li 1; mul x, 1} → {mov x} (multiplicative identity :
130
+ * x * 1 = x)
131
+ * Example: {li t1 , 1; mul result, var, t1} → {mov result, var }
132
+ * Handles both operand positions due to multiplication
133
+ * commutativity
131
134
*/
132
135
ph2_ir -> op = OP_assign ;
133
136
ph2_ir -> src0 = ph2_ir -> dest == next -> src0 ? next -> src1 : next -> src0 ;
@@ -137,15 +140,112 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
137
140
}
138
141
}
139
142
140
- /* Other instruction fusion should be done here, and for any success fusion,
141
- * it should return true. This meant to allow peephole optimization to do
142
- * multiple passes over the IR list to maximize optimization as much as
143
- * possbile.
143
+ /* Bitwise identity operations */
144
+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == -1 &&
145
+ next -> op == OP_bit_and && ph2_ir -> dest == next -> src1 ) {
146
+ /* Pattern: {li -1; and x, -1} → {mov x} (x & 0xFFFFFFFF = x)
147
+ * Example: {li t1, -1; and result, var, t1} → {mov result, var}
148
+ * Eliminates bitwise AND with all-ones mask
149
+ */
150
+ ph2_ir -> op = OP_assign ;
151
+ ph2_ir -> src0 = next -> src0 ;
152
+ ph2_ir -> dest = next -> dest ;
153
+ ph2_ir -> next = next -> next ;
154
+ return true;
155
+ }
156
+
157
+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 0 &&
158
+ (next -> op == OP_lshift || next -> op == OP_rshift ) &&
159
+ ph2_ir -> dest == next -> src1 ) {
160
+ /* Pattern: {li 0; shl/shr x, 0} → {mov x} (x << 0 = x >> 0 = x)
161
+ * Example: {li t1, 0; shl result, var, t1} → {mov result, var}
162
+ * Eliminates no-op shift operations
163
+ */
164
+ ph2_ir -> op = OP_assign ;
165
+ ph2_ir -> src0 = next -> src0 ;
166
+ ph2_ir -> dest = next -> dest ;
167
+ ph2_ir -> next = next -> next ;
168
+ return true;
169
+ }
170
+
171
+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 0 &&
172
+ next -> op == OP_bit_or && ph2_ir -> dest == next -> src1 ) {
173
+ /* Pattern: {li 0; or x, 0} → {mov x} (x | 0 = x)
174
+ * Example: {li t1, 0; or result, var, t1} → {mov result, var}
175
+ * Eliminates bitwise OR with zero (identity element)
176
+ */
177
+ ph2_ir -> op = OP_assign ;
178
+ ph2_ir -> src0 = next -> src0 ;
179
+ ph2_ir -> dest = next -> dest ;
180
+ ph2_ir -> next = next -> next ;
181
+ return true;
182
+ }
183
+
184
+ /* Power-of-2 multiplication to shift conversion.
185
+ * Shift operations are significantly faster than multiplication
144
186
*/
187
+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 > 0 &&
188
+ next -> op == OP_mul && ph2_ir -> dest == next -> src1 ) {
189
+ int power = ph2_ir -> src0 ;
190
+ /* Detect power-of-2 using bit manipulation: (n & (n-1)) == 0 for powers
191
+ * of 2
192
+ */
193
+ if (power && (power & (power - 1 )) == 0 ) {
194
+ /* Calculate log2(power) to determine shift amount */
195
+ int shift_amount = 0 ;
196
+ int tmp = power ;
197
+ while (tmp > 1 ) {
198
+ tmp >>= 1 ;
199
+ shift_amount ++ ;
200
+ }
201
+ /* Pattern: {li 2^n; mul x, 2^n} → {li n; shl x, n}
202
+ * Example: {li t1, 4; mul result, var, t1} →
203
+ * {li t1, 2; shl result, var, t1}
204
+ */
205
+ ph2_ir -> op = OP_load_constant ;
206
+ ph2_ir -> src0 = shift_amount ;
207
+ next -> op = OP_lshift ;
208
+ return true;
209
+ }
210
+ }
211
+
212
+ /* XOR identity operation */
213
+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 0 &&
214
+ next -> op == OP_bit_xor && ph2_ir -> dest == next -> src1 ) {
215
+ /* Pattern: {li 0; xor x, 0} → {mov x} (x ^ 0 = x)
216
+ * Example: {li t1, 0; xor result, var, t1} → {mov result, var}
217
+ * Completes bitwise identity optimization coverage
218
+ */
219
+ ph2_ir -> op = OP_assign ;
220
+ ph2_ir -> src0 = next -> src0 ;
221
+ ph2_ir -> dest = next -> dest ;
222
+ ph2_ir -> next = next -> next ;
223
+ return true;
224
+ }
225
+
226
+ /* Extended multiplicative identity (operand position variant)
227
+ * Handles the case where constant 1 is in src0 position of multiplication
228
+ */
229
+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 1 &&
230
+ next -> op == OP_mul && ph2_ir -> dest == next -> src0 ) {
231
+ /* Pattern: {li 1; mul 1, x} → {mov x} (1 * x = x)
232
+ * Example: {li t1, 1; mul result, t1, var} → {mov result, var}
233
+ * Covers multiplication commutativity edge case
234
+ */
235
+ ph2_ir -> op = OP_assign ;
236
+ ph2_ir -> src0 = next -> src1 ;
237
+ ph2_ir -> dest = next -> dest ;
238
+ ph2_ir -> next = next -> next ;
239
+ return true;
240
+ }
145
241
146
242
return false;
147
243
}
148
244
245
+ /* Main peephole optimization driver.
246
+ * It iterates through all functions, basic blocks, and IR instructions to apply
247
+ * local optimizations on adjacent instruction pairs.
248
+ */
149
249
void peephole (void )
150
250
{
151
251
for (func_t * func = FUNC_LIST .head ; func ; func = func -> next ) {
@@ -154,10 +254,17 @@ void peephole(void)
154
254
ph2_ir_t * next = ir -> next ;
155
255
if (!next )
156
256
continue ;
257
+
258
+ /* Self-assignment elimination
259
+ * Removes trivial assignments where destination equals source
260
+ * Pattern: {mov x, x} → eliminated
261
+ * Common in compiler-generated intermediate code
262
+ */
157
263
if (next -> op == OP_assign && next -> dest == next -> src0 ) {
158
264
ir -> next = next -> next ;
159
265
continue ;
160
266
}
267
+
161
268
insn_fusion (ir );
162
269
}
163
270
}
0 commit comments