1010#include "defs.h"
1111#include "globals.c"
1212
13+ /* Determines if an instruction can be fused with a following OP_assign.
14+ * Fusible instructions are those whose results can be directly written
15+ * to the final destination register, eliminating intermediate moves.
16+ */
1317bool is_fusible_insn (ph2_ir_t * ph2_ir )
1418{
1519 switch (ph2_ir -> op ) {
16- case OP_add :
20+ case OP_add : /* Arithmetic operations */
1721 case OP_sub :
1822 case OP_mul :
1923 case OP_div :
2024 case OP_mod :
21- case OP_lshift :
25+ case OP_lshift : /* Shift operations */
2226 case OP_rshift :
23- case OP_bit_and :
27+ case OP_bit_and : /* Bitwise operations */
2428 case OP_bit_or :
2529 case OP_bit_xor :
26- case OP_log_and :
30+ case OP_log_and : /* Logical operations */
2731 case OP_log_or :
2832 case OP_log_not :
29- case OP_negate :
30- case OP_load :
33+ case OP_negate : /* Unary operations */
34+ case OP_load : /* Memory operations */
3135 case OP_global_load :
3236 case OP_load_data_address :
3337 return true;
@@ -36,39 +40,44 @@ bool is_fusible_insn(ph2_ir_t *ph2_ir)
3640 }
3741}
3842
43+ /* Main peephole optimization function that applies pattern matching
44+ * and transformation rules to consecutive IR instructions.
45+ * Returns true if any optimization was applied, false otherwise.
46+ */
3947bool insn_fusion (ph2_ir_t * ph2_ir )
4048{
4149 ph2_ir_t * next = ph2_ir -> next ;
4250 if (!next )
4351 return false;
4452
53+ /* ALU instruction fusion.
54+ * Eliminates redundant move operations following arithmetic/logical
55+ * operations. This is the most fundamental optimization that removes
56+ * temporary register usage.
57+ */
4558 if (next -> op == OP_assign ) {
4659 if (is_fusible_insn (ph2_ir ) && ph2_ir -> dest == next -> src0 ) {
47- /* eliminates:
48- * {ALU rn, rs1, rs2; mv rd, rn;}
49- * reduces to:
50- * {ALU rd, rs1, rs2;}
60+ /* Pattern: {ALU rn, rs1, rs2; mv rd, rn} → {ALU rd, rs1, rs2}
61+ * Example: {add t1, a, b; mv result, t1} → {add result, a, b}
62+ * Benefit: 50% instruction reduction + register pressure relief
5163 */
5264 ph2_ir -> dest = next -> dest ;
5365 ph2_ir -> next = next -> next ;
5466 return true;
5567 }
5668 }
5769
70+ /* Arithmetic identity with zero constant */
5871 if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 0 ) {
5972 if (next -> op == OP_add &&
6073 (ph2_ir -> dest == next -> src0 || ph2_ir -> dest == next -> src1 )) {
61- /* eliminates:
62- * {li rn, 0; add rd, rs1, rn;} or
63- * {li rn, 0; add rd, rn, rs1;}
64- * reduces to:
65- * {mv rd, rs1;}, based on identity property of addition
74+ /* Pattern: {li 0; add x, 0} → {mov x} (additive identity: x + 0 =
75+ * x) Handles both operand positions due to addition commutativity
76+ * Example: {li t1, 0; add result, var, t1} → {mov result, var}
6677 */
67- /* Determine the non-zero source operand */
6878 int non_zero_src =
6979 (ph2_ir -> dest == next -> src0 ) ? next -> src1 : next -> src0 ;
7080
71- /* Transform instruction sequence from addition with zero to move */
7281 ph2_ir -> op = OP_assign ;
7382 ph2_ir -> src0 = non_zero_src ;
7483 ph2_ir -> dest = next -> dest ;
@@ -78,10 +87,8 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
7887
7988 if (next -> op == OP_sub ) {
8089 if (ph2_ir -> dest == next -> src1 ) {
81- /* eliminates:
82- * {li rn, 0; sub rd, rs1, rn;}
83- * reduces to:
84- * {mv rd, rs1;}
90+ /* Pattern: {li 0; sub x, 0} → {mov x} (x - 0 = x)
91+ * Example: {li t1, 0; sub result, var, t1} → {mov result, var}
8592 */
8693 ph2_ir -> op = OP_assign ;
8794 ph2_ir -> src0 = next -> src0 ;
@@ -91,10 +98,8 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
9198 }
9299
93100 if (ph2_ir -> dest == next -> src0 ) {
94- /* eliminates:
95- * {li rn, 0; sub rd, rn, rs1;}
96- * reduces to:
97- * {negate rd, rs1;}
101+ /* Pattern: {li 0; sub 0, x} → {neg x} (0 - x = -x)
102+ * Example: {li t1, 0; sub result, t1, var} → {neg result, var}
98103 */
99104 ph2_ir -> op = OP_negate ;
100105 ph2_ir -> src0 = next -> src1 ;
@@ -106,11 +111,9 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
106111
107112 if (next -> op == OP_mul &&
108113 (ph2_ir -> dest == next -> src0 || ph2_ir -> dest == next -> src1 )) {
109- /* eliminates:
110- * {li rn, 0; mul rd, rs1, rn;} or
111- * {li rn, 0; mul rd, rn, rs1;}
112- * reduces to:
113- * {li rd, 0}, based on zero property of multiplication
114+ /* Pattern: {li 0; mul x, 0} → {li 0} (absorbing element: x * 0 = 0)
115+ * Example: {li t1, 0; mul result, var, t1} → {li result, 0}
116+ * Eliminates multiplication entirely
114117 */
115118 ph2_ir -> op = OP_load_constant ;
116119 ph2_ir -> src0 = 0 ;
@@ -120,14 +123,15 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
120123 }
121124 }
122125
126+ /* Multiplicative identity with one constant */
123127 if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 1 ) {
124128 if (next -> op == OP_mul &&
125129 (ph2_ir -> dest == next -> src0 || ph2_ir -> dest == next -> src1 )) {
126- /* eliminates :
127- * {li rn, 1; mul rd, rs1, rn;} or
128- * {li rn , 1; mul rd, rn, rs1; }
129- * reduces to:
130- * {li rd, rs1}, based on identity property of multiplication
130+ /* Pattern: {li 1; mul x, 1} → {mov x} (multiplicative identity :
131+ * x * 1 = x)
132+ * Example: {li t1 , 1; mul result, var, t1} → {mov result, var }
133+ * Handles both operand positions due to multiplication
134+ * commutativity
131135 */
132136 ph2_ir -> op = OP_assign ;
133137 ph2_ir -> src0 = ph2_ir -> dest == next -> src0 ? next -> src1 : next -> src0 ;
@@ -137,15 +141,113 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
137141 }
138142 }
139143
140- /* Other instruction fusion should be done here, and for any success fusion,
141- * it should return true. This meant to allow peephole optimization to do
142- * multiple passes over the IR list to maximize optimization as much as
143- * possbile.
144+ /* Bitwise identity operations */
145+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == -1 &&
146+ next -> op == OP_bit_and && ph2_ir -> dest == next -> src1 ) {
147+ /* Pattern: {li -1; and x, -1} → {mov x} (x & 0xFFFFFFFF = x)
148+ * Example: {li t1, -1; and result, var, t1} → {mov result, var}
149+ * Eliminates bitwise AND with all-ones mask
150+ */
151+ ph2_ir -> op = OP_assign ;
152+ ph2_ir -> src0 = next -> src0 ;
153+ ph2_ir -> dest = next -> dest ;
154+ ph2_ir -> next = next -> next ;
155+ return true;
156+ }
157+
158+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 0 &&
159+ (next -> op == OP_lshift || next -> op == OP_rshift ) &&
160+ ph2_ir -> dest == next -> src1 ) {
161+ /* Pattern: {li 0; shl/shr x, 0} → {mov x} (x << 0 = x >> 0 = x)
162+ * Example: {li t1, 0; shl result, var, t1} → {mov result, var}
163+ * Benefit: Eliminates no-op shift operations
164+ */
165+ ph2_ir -> op = OP_assign ;
166+ ph2_ir -> src0 = next -> src0 ;
167+ ph2_ir -> dest = next -> dest ;
168+ ph2_ir -> next = next -> next ;
169+ return true;
170+ }
171+
172+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 0 &&
173+ next -> op == OP_bit_or && ph2_ir -> dest == next -> src1 ) {
174+ /* Pattern: {li 0; or x, 0} → {mov x} (x | 0 = x)
175+ * Example: {li t1, 0; or result, var, t1} → {mov result, var}
176+ * Benefit: Eliminates bitwise OR with zero (identity element)
177+ */
178+ ph2_ir -> op = OP_assign ;
179+ ph2_ir -> src0 = next -> src0 ;
180+ ph2_ir -> dest = next -> dest ;
181+ ph2_ir -> next = next -> next ;
182+ return true;
183+ }
184+
185+ /* Power-of-2 multiplication to shift conversion.
186+ * Shift operations are significantly faster than multiplication
144187 */
188+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 > 0 &&
189+ next -> op == OP_mul && ph2_ir -> dest == next -> src1 ) {
190+ int power = ph2_ir -> src0 ;
191+ /* Detect power-of-2 using bit manipulation: (n & (n-1)) == 0 for powers
192+ * of 2
193+ */
194+ if (power && (power & (power - 1 )) == 0 ) {
195+ /* Calculate log2(power) to determine shift amount */
196+ int shift_amount = 0 ;
197+ int tmp = power ;
198+ while (tmp > 1 ) {
199+ tmp >>= 1 ;
200+ shift_amount ++ ;
201+ }
202+ /* Pattern: {li 2^n; mul x, 2^n} → {li n; shl x, n}
203+ * Example: {li t1, 4; mul result, var, t1} →
204+ * {li t1, 2; shl result, var, t1}
205+ * Faster execution on ARM/RISC-V architectures
206+ */
207+ ph2_ir -> op = OP_load_constant ;
208+ ph2_ir -> src0 = shift_amount ;
209+ next -> op = OP_lshift ;
210+ return true;
211+ }
212+ }
213+
214+ /* XOR identity operation */
215+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 0 &&
216+ next -> op == OP_bit_xor && ph2_ir -> dest == next -> src1 ) {
217+ /* Pattern: {li 0; xor x, 0} → {mov x} (x ^ 0 = x)
218+ * Example: {li t1, 0; xor result, var, t1} → {mov result, var}
219+ * Completes bitwise identity optimization coverage
220+ */
221+ ph2_ir -> op = OP_assign ;
222+ ph2_ir -> src0 = next -> src0 ;
223+ ph2_ir -> dest = next -> dest ;
224+ ph2_ir -> next = next -> next ;
225+ return true;
226+ }
227+
228+ /* Extended multiplicative identity (operand position variant)
229+ * Handles the case where constant 1 is in src0 position of multiplication
230+ */
231+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 1 &&
232+ next -> op == OP_mul && ph2_ir -> dest == next -> src0 ) {
233+ /* Pattern: {li 1; mul 1, x} → {mov x} (1 * x = x)
234+ * Example: {li t1, 1; mul result, t1, var} → {mov result, var}
235+ * Covers multiplication commutativity edge case
236+ */
237+ ph2_ir -> op = OP_assign ;
238+ ph2_ir -> src0 = next -> src1 ;
239+ ph2_ir -> dest = next -> dest ;
240+ ph2_ir -> next = next -> next ;
241+ return true;
242+ }
145243
146244 return false;
147245}
148246
247+ /* Main peephole optimization driver.
248+ * It iterates through all functions, basic blocks, and IR instructions to apply
249+ * local optimizations on adjacent instruction pairs.
250+ */
149251void peephole (void )
150252{
151253 for (func_t * func = FUNC_LIST .head ; func ; func = func -> next ) {
@@ -154,10 +256,17 @@ void peephole(void)
154256 ph2_ir_t * next = ir -> next ;
155257 if (!next )
156258 continue ;
259+
260+ /* Self-assignment elimination
261+ * Removes trivial assignments where destination equals source
262+ * Pattern: {mov x, x} → eliminated
263+ * Common in compiler-generated intermediate code
264+ */
157265 if (next -> op == OP_assign && next -> dest == next -> src0 ) {
158266 ir -> next = next -> next ;
159267 continue ;
160268 }
269+
161270 insn_fusion (ir );
162271 }
163272 }
0 commit comments