Skip to content

Commit c6be0aa

Browse files
committed
Improve peephole optimizer with more patterns
This commit enhances the peephole optimizer to improve code generation, including: 1. Bitwise identity operations (50% instruction reduction each): - AND with -1: x & 0xFFFFFFFF = x - OR with 0: x | 0 = x - XOR with 0: x ^ 0 = x - Shift by 0: x << 0 = x, x >> 0 = x 2. Power-of-2 multiplication to shift conversion: - Pattern: x * 2^n → x << n - Uses bit manipulation to detect powers of 2 3. Extended multiplicative identity: - Handles multiplication by 1 in both operand positions - Pattern: 1 * x = x * 1 = x All optimizations are based on mathematical identities and maintain semantic correctness.
1 parent 598ebe2 commit c6be0aa

File tree

1 file changed

+148
-39
lines changed

1 file changed

+148
-39
lines changed

src/peephole.c

Lines changed: 148 additions & 39 deletions
Original file line numberDiff line numberDiff line change
@@ -10,24 +10,28 @@
1010
#include "defs.h"
1111
#include "globals.c"
1212

13+
/* Determines if an instruction can be fused with a following OP_assign.
14+
* Fusible instructions are those whose results can be directly written
15+
* to the final destination register, eliminating intermediate moves.
16+
*/
1317
bool is_fusible_insn(ph2_ir_t *ph2_ir)
1418
{
1519
switch (ph2_ir->op) {
16-
case OP_add:
20+
case OP_add: /* Arithmetic operations */
1721
case OP_sub:
1822
case OP_mul:
1923
case OP_div:
2024
case OP_mod:
21-
case OP_lshift:
25+
case OP_lshift: /* Shift operations */
2226
case OP_rshift:
23-
case OP_bit_and:
27+
case OP_bit_and: /* Bitwise operations */
2428
case OP_bit_or:
2529
case OP_bit_xor:
26-
case OP_log_and:
30+
case OP_log_and: /* Logical operations */
2731
case OP_log_or:
2832
case OP_log_not:
29-
case OP_negate:
30-
case OP_load:
33+
case OP_negate: /* Unary operations */
34+
case OP_load: /* Memory operations */
3135
case OP_global_load:
3236
case OP_load_data_address:
3337
return true;
@@ -36,39 +40,44 @@ bool is_fusible_insn(ph2_ir_t *ph2_ir)
3640
}
3741
}
3842

43+
/* Main peephole optimization function that applies pattern matching
44+
* and transformation rules to consecutive IR instructions.
45+
* Returns true if any optimization was applied, false otherwise.
46+
*/
3947
bool insn_fusion(ph2_ir_t *ph2_ir)
4048
{
4149
ph2_ir_t *next = ph2_ir->next;
4250
if (!next)
4351
return false;
4452

53+
/* ALU instruction fusion.
54+
* Eliminates redundant move operations following arithmetic/logical
55+
* operations. This is the most fundamental optimization that removes
56+
* temporary register usage.
57+
*/
4558
if (next->op == OP_assign) {
4659
if (is_fusible_insn(ph2_ir) && ph2_ir->dest == next->src0) {
47-
/* eliminates:
48-
* {ALU rn, rs1, rs2; mv rd, rn;}
49-
* reduces to:
50-
* {ALU rd, rs1, rs2;}
60+
/* Pattern: {ALU rn, rs1, rs2; mv rd, rn} → {ALU rd, rs1, rs2}
61+
* Example: {add t1, a, b; mv result, t1} → {add result, a, b}
62+
* Benefit: 50% instruction reduction + register pressure relief
5163
*/
5264
ph2_ir->dest = next->dest;
5365
ph2_ir->next = next->next;
5466
return true;
5567
}
5668
}
5769

70+
/* Arithmetic identity with zero constant */
5871
if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 0) {
5972
if (next->op == OP_add &&
6073
(ph2_ir->dest == next->src0 || ph2_ir->dest == next->src1)) {
61-
/* eliminates:
62-
* {li rn, 0; add rd, rs1, rn;} or
63-
* {li rn, 0; add rd, rn, rs1;}
64-
* reduces to:
65-
* {mv rd, rs1;}, based on identity property of addition
74+
/* Pattern: {li 0; add x, 0} → {mov x} (additive identity: x + 0 =
75+
* x) Handles both operand positions due to addition commutativity
76+
* Example: {li t1, 0; add result, var, t1} → {mov result, var}
6677
*/
67-
/* Determine the non-zero source operand */
6878
int non_zero_src =
6979
(ph2_ir->dest == next->src0) ? next->src1 : next->src0;
7080

71-
/* Transform instruction sequence from addition with zero to move */
7281
ph2_ir->op = OP_assign;
7382
ph2_ir->src0 = non_zero_src;
7483
ph2_ir->dest = next->dest;
@@ -78,10 +87,8 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
7887

7988
if (next->op == OP_sub) {
8089
if (ph2_ir->dest == next->src1) {
81-
/* eliminates:
82-
* {li rn, 0; sub rd, rs1, rn;}
83-
* reduces to:
84-
* {mv rd, rs1;}
90+
/* Pattern: {li 0; sub x, 0} → {mov x} (x - 0 = x)
91+
* Example: {li t1, 0; sub result, var, t1} → {mov result, var}
8592
*/
8693
ph2_ir->op = OP_assign;
8794
ph2_ir->src0 = next->src0;
@@ -91,10 +98,8 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
9198
}
9299

93100
if (ph2_ir->dest == next->src0) {
94-
/* eliminates:
95-
* {li rn, 0; sub rd, rn, rs1;}
96-
* reduces to:
97-
* {negate rd, rs1;}
101+
/* Pattern: {li 0; sub 0, x} → {neg x} (0 - x = -x)
102+
* Example: {li t1, 0; sub result, t1, var} → {neg result, var}
98103
*/
99104
ph2_ir->op = OP_negate;
100105
ph2_ir->src0 = next->src1;
@@ -106,11 +111,9 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
106111

107112
if (next->op == OP_mul &&
108113
(ph2_ir->dest == next->src0 || ph2_ir->dest == next->src1)) {
109-
/* eliminates:
110-
* {li rn, 0; mul rd, rs1, rn;} or
111-
* {li rn, 0; mul rd, rn, rs1;}
112-
* reduces to:
113-
* {li rd, 0}, based on zero property of multiplication
114+
/* Pattern: {li 0; mul x, 0} → {li 0} (absorbing element: x * 0 = 0)
115+
* Example: {li t1, 0; mul result, var, t1} → {li result, 0}
116+
* Eliminates multiplication entirely
114117
*/
115118
ph2_ir->op = OP_load_constant;
116119
ph2_ir->src0 = 0;
@@ -120,14 +123,15 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
120123
}
121124
}
122125

126+
/* Multiplicative identity with one constant */
123127
if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 1) {
124128
if (next->op == OP_mul &&
125129
(ph2_ir->dest == next->src0 || ph2_ir->dest == next->src1)) {
126-
/* eliminates:
127-
* {li rn, 1; mul rd, rs1, rn;} or
128-
* {li rn, 1; mul rd, rn, rs1;}
129-
* reduces to:
130-
* {li rd, rs1}, based on identity property of multiplication
130+
/* Pattern: {li 1; mul x, 1} → {mov x} (multiplicative identity:
131+
* x * 1 = x)
132+
* Example: {li t1, 1; mul result, var, t1} → {mov result, var}
133+
* Handles both operand positions due to multiplication
134+
* commutativity
131135
*/
132136
ph2_ir->op = OP_assign;
133137
ph2_ir->src0 = ph2_ir->dest == next->src0 ? next->src1 : next->src0;
@@ -137,15 +141,113 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
137141
}
138142
}
139143

140-
/* Other instruction fusion should be done here, and for any success fusion,
141-
* it should return true. This meant to allow peephole optimization to do
142-
* multiple passes over the IR list to maximize optimization as much as
143-
* possbile.
144+
/* Bitwise identity operations */
145+
if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == -1 &&
146+
next->op == OP_bit_and && ph2_ir->dest == next->src1) {
147+
/* Pattern: {li -1; and x, -1} → {mov x} (x & 0xFFFFFFFF = x)
148+
* Example: {li t1, -1; and result, var, t1} → {mov result, var}
149+
* Eliminates bitwise AND with all-ones mask
150+
*/
151+
ph2_ir->op = OP_assign;
152+
ph2_ir->src0 = next->src0;
153+
ph2_ir->dest = next->dest;
154+
ph2_ir->next = next->next;
155+
return true;
156+
}
157+
158+
if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 0 &&
159+
(next->op == OP_lshift || next->op == OP_rshift) &&
160+
ph2_ir->dest == next->src1) {
161+
/* Pattern: {li 0; shl/shr x, 0} → {mov x} (x << 0 = x >> 0 = x)
162+
* Example: {li t1, 0; shl result, var, t1} → {mov result, var}
163+
* Benefit: Eliminates no-op shift operations
164+
*/
165+
ph2_ir->op = OP_assign;
166+
ph2_ir->src0 = next->src0;
167+
ph2_ir->dest = next->dest;
168+
ph2_ir->next = next->next;
169+
return true;
170+
}
171+
172+
if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 0 &&
173+
next->op == OP_bit_or && ph2_ir->dest == next->src1) {
174+
/* Pattern: {li 0; or x, 0} → {mov x} (x | 0 = x)
175+
* Example: {li t1, 0; or result, var, t1} → {mov result, var}
176+
* Benefit: Eliminates bitwise OR with zero (identity element)
177+
*/
178+
ph2_ir->op = OP_assign;
179+
ph2_ir->src0 = next->src0;
180+
ph2_ir->dest = next->dest;
181+
ph2_ir->next = next->next;
182+
return true;
183+
}
184+
185+
/* Power-of-2 multiplication to shift conversion.
186+
* Shift operations are significantly faster than multiplication
144187
*/
188+
if (ph2_ir->op == OP_load_constant && ph2_ir->src0 > 0 &&
189+
next->op == OP_mul && ph2_ir->dest == next->src1) {
190+
int power = ph2_ir->src0;
191+
/* Detect power-of-2 using bit manipulation: (n & (n-1)) == 0 for powers
192+
* of 2
193+
*/
194+
if (power && (power & (power - 1)) == 0) {
195+
/* Calculate log2(power) to determine shift amount */
196+
int shift_amount = 0;
197+
int tmp = power;
198+
while (tmp > 1) {
199+
tmp >>= 1;
200+
shift_amount++;
201+
}
202+
/* Pattern: {li 2^n; mul x, 2^n} → {li n; shl x, n}
203+
* Example: {li t1, 4; mul result, var, t1} →
204+
* {li t1, 2; shl result, var, t1}
205+
* Faster execution on ARM/RISC-V architectures
206+
*/
207+
ph2_ir->op = OP_load_constant;
208+
ph2_ir->src0 = shift_amount;
209+
next->op = OP_lshift;
210+
return true;
211+
}
212+
}
213+
214+
/* XOR identity operation */
215+
if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 0 &&
216+
next->op == OP_bit_xor && ph2_ir->dest == next->src1) {
217+
/* Pattern: {li 0; xor x, 0} → {mov x} (x ^ 0 = x)
218+
* Example: {li t1, 0; xor result, var, t1} → {mov result, var}
219+
* Completes bitwise identity optimization coverage
220+
*/
221+
ph2_ir->op = OP_assign;
222+
ph2_ir->src0 = next->src0;
223+
ph2_ir->dest = next->dest;
224+
ph2_ir->next = next->next;
225+
return true;
226+
}
227+
228+
/* Extended multiplicative identity (operand position variant)
229+
* Handles the case where constant 1 is in src0 position of multiplication
230+
*/
231+
if (ph2_ir->op == OP_load_constant && ph2_ir->src0 == 1 &&
232+
next->op == OP_mul && ph2_ir->dest == next->src0) {
233+
/* Pattern: {li 1; mul 1, x} → {mov x} (1 * x = x)
234+
* Example: {li t1, 1; mul result, t1, var} → {mov result, var}
235+
* Covers multiplication commutativity edge case
236+
*/
237+
ph2_ir->op = OP_assign;
238+
ph2_ir->src0 = next->src1;
239+
ph2_ir->dest = next->dest;
240+
ph2_ir->next = next->next;
241+
return true;
242+
}
145243

146244
return false;
147245
}
148246

247+
/* Main peephole optimization driver.
248+
* It iterates through all functions, basic blocks, and IR instructions to apply
249+
* local optimizations on adjacent instruction pairs.
250+
*/
149251
void peephole(void)
150252
{
151253
for (func_t *func = FUNC_LIST.head; func; func = func->next) {
@@ -154,10 +256,17 @@ void peephole(void)
154256
ph2_ir_t *next = ir->next;
155257
if (!next)
156258
continue;
259+
260+
/* Self-assignment elimination
261+
* Removes trivial assignments where destination equals source
262+
* Pattern: {mov x, x} → eliminated
263+
* Common in compiler-generated intermediate code
264+
*/
157265
if (next->op == OP_assign && next->dest == next->src0) {
158266
ir->next = next->next;
159267
continue;
160268
}
269+
161270
insn_fusion(ir);
162271
}
163272
}

0 commit comments

Comments
 (0)