Skip to content

Commit bbd5b41

Browse files
authored
Merge pull request #241 from sysprog21/peephole-opt
Improve peephole optimizer with more patterns
2 parents 598ebe2 + fcc691e commit bbd5b41

File tree

1 file changed

+146
-39
lines changed

1 file changed

+146
-39
lines changed

src/peephole.c

Lines changed: 146 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,28 @@
1010
#include "defs.h"
1111
#include "globals.c"
1212

13+
/* Determines if an instruction can be fused with a following OP_assign.
14+
* Fusible instructions are those whose results can be directly written
15+
* to the final destination register, eliminating intermediate moves.
16+
*/
1317
bool is_fusible_insn(ph2_ir_t *ph2_ir)
1418
{
1519
switch (ph2_ir->op) {
16-
case OP_add:
20+
case OP_add: /* Arithmetic operations */
1721
case OP_sub:
1822
case OP_mul:
1923
case OP_div:
2024
case OP_mod:
21-
case OP_lshift:
25+
case OP_lshift: /* Shift operations */
2226
case OP_rshift:
23-
case OP_bit_and:
27+
case OP_bit_and: /* Bitwise operations */
2428
case OP_bit_or:
2529
case OP_bit_xor:
26-
case OP_log_and:
30+
case OP_log_and: /* Logical operations */
2731
case OP_log_or:
2832
case OP_log_not:
29-
case OP_negate:
30-
case OP_load:
33+
case OP_negate: /* Unary operations */
34+
case OP_load: /* Memory operations */
3135
case OP_global_load:
3236
case OP_load_data_address:
3337
return true;
@@ -36,39 +40,43 @@ bool is_fusible_insn(ph2_ir_t *ph2_ir)
3640
}
3741
}
3842

43+
/* Main peephole optimization function that applies pattern matching
44+
* and transformation rules to consecutive IR instructions.
45+
* Returns true if any optimization was applied, false otherwise.
46+
*/
3947
bool insn_fusion(ph2_ir_t *ph2_ir)
4048
{
4149
ph2_ir_t *next = ph2_ir->next;
4250
if (!next)
4351
return false;
4452

53+
/* ALU instruction fusion.
54+
* Eliminates redundant move operations following arithmetic/logical
55+
* operations. This is the most fundamental optimization that removes
56+
* temporary register usage.
57+
*/
4558
if (next->op == OP_assign) {
4659
if (is_fusible_insn(ph2_ir) && ph2_ir->dest == next->src0) {
47-
/* eliminates:
48-
* {ALU rn, rs1, rs2; mv rd, rn;}
49-
* reduces to:
50-
* {ALU rd, rs1, rs2;}
60+
/* Pattern: {ALU rn, rs1, rs2; mv rd, rn} → {ALU rd, rs1, rs2}
61+
* Example: {add t1, a, b; mv result, t1} → {add result, a, b}
5162
*/
5263
ph2_ir->dest = next->dest;
5364
ph2_ir->next = next->next;
5465
return true;
5566
}
5667
}
5768

69+
/* Arithmetic identity with zero constant */
5870
if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 0) {
5971
if (next->op == OP_add &&
6072
(ph2_ir->dest == next->src0 || ph2_ir->dest == next->src1)) {
61-
/* eliminates:
62-
* {li rn, 0; add rd, rs1, rn;} or
63-
* {li rn, 0; add rd, rn, rs1;}
64-
* reduces to:
65-
* {mv rd, rs1;}, based on identity property of addition
73+
/* Pattern: {li 0; add x, 0} → {mov x} (additive identity: x+0 = x)
74+
* Handles both operand positions due to addition commutativity
75+
* Example: {li t1, 0; add result, var, t1} → {mov result, var}
6676
*/
67-
/* Determine the non-zero source operand */
6877
int non_zero_src =
6978
(ph2_ir->dest == next->src0) ? next->src1 : next->src0;
7079

71-
/* Transform instruction sequence from addition with zero to move */
7280
ph2_ir->op = OP_assign;
7381
ph2_ir->src0 = non_zero_src;
7482
ph2_ir->dest = next->dest;
@@ -78,10 +86,8 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
7886

7987
if (next->op == OP_sub) {
8088
if (ph2_ir->dest == next->src1) {
81-
/* eliminates:
82-
* {li rn, 0; sub rd, rs1, rn;}
83-
* reduces to:
84-
* {mv rd, rs1;}
89+
/* Pattern: {li 0; sub x, 0} → {mov x} (x - 0 = x)
90+
* Example: {li t1, 0; sub result, var, t1} → {mov result, var}
8591
*/
8692
ph2_ir->op = OP_assign;
8793
ph2_ir->src0 = next->src0;
@@ -91,10 +97,8 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
9197
}
9298

9399
if (ph2_ir->dest == next->src0) {
94-
/* eliminates:
95-
* {li rn, 0; sub rd, rn, rs1;}
96-
* reduces to:
97-
* {negate rd, rs1;}
100+
/* Pattern: {li 0; sub 0, x} → {neg x} (0 - x = -x)
101+
* Example: {li t1, 0; sub result, t1, var} → {neg result, var}
98102
*/
99103
ph2_ir->op = OP_negate;
100104
ph2_ir->src0 = next->src1;
@@ -106,11 +110,9 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
106110

107111
if (next->op == OP_mul &&
108112
(ph2_ir->dest == next->src0 || ph2_ir->dest == next->src1)) {
109-
/* eliminates:
110-
* {li rn, 0; mul rd, rs1, rn;} or
111-
* {li rn, 0; mul rd, rn, rs1;}
112-
* reduces to:
113-
* {li rd, 0}, based on zero property of multiplication
113+
/* Pattern: {li 0; mul x, 0} → {li 0} (absorbing element: x * 0 = 0)
114+
* Example: {li t1, 0; mul result, var, t1} → {li result, 0}
115+
* Eliminates multiplication entirely
114116
*/
115117
ph2_ir->op = OP_load_constant;
116118
ph2_ir->src0 = 0;
@@ -120,14 +122,15 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
120122
}
121123
}
122124

125+
/* Multiplicative identity with one constant */
123126
if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 1) {
124127
if (next->op == OP_mul &&
125128
(ph2_ir->dest == next->src0 || ph2_ir->dest == next->src1)) {
126-
/* eliminates:
127-
* {li rn, 1; mul rd, rs1, rn;} or
128-
* {li rn, 1; mul rd, rn, rs1;}
129-
* reduces to:
130-
* {li rd, rs1}, based on identity property of multiplication
129+
/* Pattern: {li 1; mul x, 1} → {mov x} (multiplicative identity:
130+
* x * 1 = x)
131+
* Example: {li t1, 1; mul result, var, t1} → {mov result, var}
132+
* Handles both operand positions due to multiplication
133+
* commutativity
131134
*/
132135
ph2_ir->op = OP_assign;
133136
ph2_ir->src0 = ph2_ir->dest == next->src0 ? next->src1 : next->src0;
@@ -137,15 +140,112 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
137140
}
138141
}
139142

140-
/* Other instruction fusion should be done here, and for any success fusion,
141-
* it should return true. This meant to allow peephole optimization to do
142-
* multiple passes over the IR list to maximize optimization as much as
143-
* possbile.
143+
/* Bitwise identity operations */
144+
if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == -1 &&
145+
next->op == OP_bit_and && ph2_ir->dest == next->src1) {
146+
/* Pattern: {li -1; and x, -1} → {mov x} (x & 0xFFFFFFFF = x)
147+
* Example: {li t1, -1; and result, var, t1} → {mov result, var}
148+
* Eliminates bitwise AND with all-ones mask
149+
*/
150+
ph2_ir->op = OP_assign;
151+
ph2_ir->src0 = next->src0;
152+
ph2_ir->dest = next->dest;
153+
ph2_ir->next = next->next;
154+
return true;
155+
}
156+
157+
if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 0 &&
158+
(next->op == OP_lshift || next->op == OP_rshift) &&
159+
ph2_ir->dest == next->src1) {
160+
/* Pattern: {li 0; shl/shr x, 0} → {mov x} (x << 0 = x >> 0 = x)
161+
* Example: {li t1, 0; shl result, var, t1} → {mov result, var}
162+
* Eliminates no-op shift operations
163+
*/
164+
ph2_ir->op = OP_assign;
165+
ph2_ir->src0 = next->src0;
166+
ph2_ir->dest = next->dest;
167+
ph2_ir->next = next->next;
168+
return true;
169+
}
170+
171+
if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 0 &&
172+
next->op == OP_bit_or && ph2_ir->dest == next->src1) {
173+
/* Pattern: {li 0; or x, 0} → {mov x} (x | 0 = x)
174+
* Example: {li t1, 0; or result, var, t1} → {mov result, var}
175+
* Eliminates bitwise OR with zero (identity element)
176+
*/
177+
ph2_ir->op = OP_assign;
178+
ph2_ir->src0 = next->src0;
179+
ph2_ir->dest = next->dest;
180+
ph2_ir->next = next->next;
181+
return true;
182+
}
183+
184+
/* Power-of-2 multiplication to shift conversion.
185+
* Shift operations are significantly faster than multiplication
144186
*/
187+
if (ph2_ir->op == OP_load_constant && ph2_ir->src0 > 0 &&
188+
next->op == OP_mul && ph2_ir->dest == next->src1) {
189+
int power = ph2_ir->src0;
190+
/* Detect power-of-2 using bit manipulation: (n & (n-1)) == 0 for powers
191+
* of 2
192+
*/
193+
if (power && (power & (power - 1)) == 0) {
194+
/* Calculate log2(power) to determine shift amount */
195+
int shift_amount = 0;
196+
int tmp = power;
197+
while (tmp > 1) {
198+
tmp >>= 1;
199+
shift_amount++;
200+
}
201+
/* Pattern: {li 2^n; mul x, 2^n} → {li n; shl x, n}
202+
* Example: {li t1, 4; mul result, var, t1} →
203+
* {li t1, 2; shl result, var, t1}
204+
*/
205+
ph2_ir->op = OP_load_constant;
206+
ph2_ir->src0 = shift_amount;
207+
next->op = OP_lshift;
208+
return true;
209+
}
210+
}
211+
212+
/* XOR identity operation */
213+
if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 0 &&
214+
next->op == OP_bit_xor && ph2_ir->dest == next->src1) {
215+
/* Pattern: {li 0; xor x, 0} → {mov x} (x ^ 0 = x)
216+
* Example: {li t1, 0; xor result, var, t1} → {mov result, var}
217+
* Completes bitwise identity optimization coverage
218+
*/
219+
ph2_ir->op = OP_assign;
220+
ph2_ir->src0 = next->src0;
221+
ph2_ir->dest = next->dest;
222+
ph2_ir->next = next->next;
223+
return true;
224+
}
225+
226+
/* Extended multiplicative identity (operand position variant)
227+
* Handles the case where constant 1 is in src0 position of multiplication
228+
*/
229+
if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 1 &&
230+
next->op == OP_mul && ph2_ir->dest == next->src0) {
231+
/* Pattern: {li 1; mul 1, x} → {mov x} (1 * x = x)
232+
* Example: {li t1, 1; mul result, t1, var} → {mov result, var}
233+
* Covers multiplication commutativity edge case
234+
*/
235+
ph2_ir->op = OP_assign;
236+
ph2_ir->src0 = next->src1;
237+
ph2_ir->dest = next->dest;
238+
ph2_ir->next = next->next;
239+
return true;
240+
}
145241

146242
return false;
147243
}
148244

245+
/* Main peephole optimization driver.
246+
* It iterates through all functions, basic blocks, and IR instructions to apply
247+
* local optimizations on adjacent instruction pairs.
248+
*/
149249
void peephole(void)
150250
{
151251
for (func_t *func = FUNC_LIST.head; func; func = func->next) {
@@ -154,10 +254,17 @@ void peephole(void)
154254
ph2_ir_t *next = ir->next;
155255
if (!next)
156256
continue;
257+
258+
/* Self-assignment elimination
259+
* Removes trivial assignments where destination equals source
260+
* Pattern: {mov x, x} → eliminated
261+
* Common in compiler-generated intermediate code
262+
*/
157263
if (next->op == OP_assign && next->dest == next->src0) {
158264
ir->next = next->next;
159265
continue;
160266
}
267+
161268
insn_fusion(ir);
162269
}
163270
}

0 commit comments

Comments
 (0)