1010#include "defs.h"
1111#include "globals.c"
1212
13+ /* Determines if an instruction can be fused with a following OP_assign.
14+ * Fusible instructions are those whose results can be directly written
15+ * to the final destination register, eliminating intermediate moves.
16+ */
1317bool is_fusible_insn (ph2_ir_t * ph2_ir )
1418{
1519 switch (ph2_ir -> op ) {
16- case OP_add :
20+ case OP_add : /* Arithmetic operations */
1721 case OP_sub :
1822 case OP_mul :
1923 case OP_div :
2024 case OP_mod :
21- case OP_lshift :
25+ case OP_lshift : /* Shift operations */
2226 case OP_rshift :
23- case OP_bit_and :
27+ case OP_bit_and : /* Bitwise operations */
2428 case OP_bit_or :
2529 case OP_bit_xor :
26- case OP_log_and :
30+ case OP_log_and : /* Logical operations */
2731 case OP_log_or :
2832 case OP_log_not :
29- case OP_negate :
30- case OP_load :
33+ case OP_negate : /* Unary operations */
34+ case OP_load : /* Memory operations */
3135 case OP_global_load :
3236 case OP_load_data_address :
3337 return true;
@@ -36,39 +40,43 @@ bool is_fusible_insn(ph2_ir_t *ph2_ir)
3640 }
3741}
3842
43+ /* Main peephole optimization function that applies pattern matching
44+ * and transformation rules to consecutive IR instructions.
45+ * Returns true if any optimization was applied, false otherwise.
46+ */
3947bool insn_fusion (ph2_ir_t * ph2_ir )
4048{
4149 ph2_ir_t * next = ph2_ir -> next ;
4250 if (!next )
4351 return false;
4452
53+ /* ALU instruction fusion.
54+ * Eliminates redundant move operations following arithmetic/logical
55+ * operations. This is the most fundamental optimization that removes
56+ * temporary register usage.
57+ */
4558 if (next -> op == OP_assign ) {
4659 if (is_fusible_insn (ph2_ir ) && ph2_ir -> dest == next -> src0 ) {
47- /* eliminates:
48- * {ALU rn, rs1, rs2; mv rd, rn;}
49- * reduces to:
50- * {ALU rd, rs1, rs2;}
60+ /* Pattern: {ALU rn, rs1, rs2; mv rd, rn} → {ALU rd, rs1, rs2}
61+ * Example: {add t1, a, b; mv result, t1} → {add result, a, b}
5162 */
5263 ph2_ir -> dest = next -> dest ;
5364 ph2_ir -> next = next -> next ;
5465 return true;
5566 }
5667 }
5768
69+ /* Arithmetic identity with zero constant */
5870 if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 0 ) {
5971 if (next -> op == OP_add &&
6072 (ph2_ir -> dest == next -> src0 || ph2_ir -> dest == next -> src1 )) {
61- /* eliminates:
62- * {li rn, 0; add rd, rs1, rn;} or
63- * {li rn, 0; add rd, rn, rs1;}
64- * reduces to:
65- * {mv rd, rs1;}, based on identity property of addition
73+ /* Pattern: {li 0; add x, 0} → {mov x} (additive identity: x+0 = x)
74+ * Handles both operand positions due to addition commutativity
75+ * Example: {li t1, 0; add result, var, t1} → {mov result, var}
6676 */
67- /* Determine the non-zero source operand */
6877 int non_zero_src =
6978 (ph2_ir -> dest == next -> src0 ) ? next -> src1 : next -> src0 ;
7079
71- /* Transform instruction sequence from addition with zero to move */
7280 ph2_ir -> op = OP_assign ;
7381 ph2_ir -> src0 = non_zero_src ;
7482 ph2_ir -> dest = next -> dest ;
@@ -78,10 +86,8 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
7886
7987 if (next -> op == OP_sub ) {
8088 if (ph2_ir -> dest == next -> src1 ) {
81- /* eliminates:
82- * {li rn, 0; sub rd, rs1, rn;}
83- * reduces to:
84- * {mv rd, rs1;}
89+ /* Pattern: {li 0; sub x, 0} → {mov x} (x - 0 = x)
90+ * Example: {li t1, 0; sub result, var, t1} → {mov result, var}
8591 */
8692 ph2_ir -> op = OP_assign ;
8793 ph2_ir -> src0 = next -> src0 ;
@@ -91,10 +97,8 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
9197 }
9298
9399 if (ph2_ir -> dest == next -> src0 ) {
94- /* eliminates:
95- * {li rn, 0; sub rd, rn, rs1;}
96- * reduces to:
97- * {negate rd, rs1;}
100+ /* Pattern: {li 0; sub 0, x} → {neg x} (0 - x = -x)
101+ * Example: {li t1, 0; sub result, t1, var} → {neg result, var}
98102 */
99103 ph2_ir -> op = OP_negate ;
100104 ph2_ir -> src0 = next -> src1 ;
@@ -106,11 +110,9 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
106110
107111 if (next -> op == OP_mul &&
108112 (ph2_ir -> dest == next -> src0 || ph2_ir -> dest == next -> src1 )) {
109- /* eliminates:
110- * {li rn, 0; mul rd, rs1, rn;} or
111- * {li rn, 0; mul rd, rn, rs1;}
112- * reduces to:
113- * {li rd, 0}, based on zero property of multiplication
113+ /* Pattern: {li 0; mul x, 0} → {li 0} (absorbing element: x * 0 = 0)
114+ * Example: {li t1, 0; mul result, var, t1} → {li result, 0}
115+ * Eliminates multiplication entirely
114116 */
115117 ph2_ir -> op = OP_load_constant ;
116118 ph2_ir -> src0 = 0 ;
@@ -120,14 +122,15 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
120122 }
121123 }
122124
125+ /* Multiplicative identity with one constant */
123126 if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 1 ) {
124127 if (next -> op == OP_mul &&
125128 (ph2_ir -> dest == next -> src0 || ph2_ir -> dest == next -> src1 )) {
126- /* eliminates :
127- * {li rn, 1; mul rd, rs1, rn;} or
128- * {li rn , 1; mul rd, rn, rs1; }
129- * reduces to:
130- * {li rd, rs1}, based on identity property of multiplication
129+ /* Pattern: {li 1; mul x, 1} → {mov x} (multiplicative identity :
130+ * x * 1 = x)
131+ * Example: {li t1 , 1; mul result, var, t1} → {mov result, var }
132+ * Handles both operand positions due to multiplication
133+ * commutativity
131134 */
132135 ph2_ir -> op = OP_assign ;
133136 ph2_ir -> src0 = ph2_ir -> dest == next -> src0 ? next -> src1 : next -> src0 ;
@@ -137,15 +140,112 @@ bool insn_fusion(ph2_ir_t *ph2_ir)
137140 }
138141 }
139142
140- /* Other instruction fusion should be done here, and for any success fusion,
141- * it should return true. This meant to allow peephole optimization to do
142- * multiple passes over the IR list to maximize optimization as much as
143- * possbile.
143+ /* Bitwise identity operations */
144+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == -1 &&
145+ next -> op == OP_bit_and && ph2_ir -> dest == next -> src1 ) {
146+ /* Pattern: {li -1; and x, -1} → {mov x} (x & 0xFFFFFFFF = x)
147+ * Example: {li t1, -1; and result, var, t1} → {mov result, var}
148+ * Eliminates bitwise AND with all-ones mask
149+ */
150+ ph2_ir -> op = OP_assign ;
151+ ph2_ir -> src0 = next -> src0 ;
152+ ph2_ir -> dest = next -> dest ;
153+ ph2_ir -> next = next -> next ;
154+ return true;
155+ }
156+
157+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 0 &&
158+ (next -> op == OP_lshift || next -> op == OP_rshift ) &&
159+ ph2_ir -> dest == next -> src1 ) {
160+ /* Pattern: {li 0; shl/shr x, 0} → {mov x} (x << 0 = x >> 0 = x)
161+ * Example: {li t1, 0; shl result, var, t1} → {mov result, var}
162+ * Eliminates no-op shift operations
163+ */
164+ ph2_ir -> op = OP_assign ;
165+ ph2_ir -> src0 = next -> src0 ;
166+ ph2_ir -> dest = next -> dest ;
167+ ph2_ir -> next = next -> next ;
168+ return true;
169+ }
170+
171+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 0 &&
172+ next -> op == OP_bit_or && ph2_ir -> dest == next -> src1 ) {
173+ /* Pattern: {li 0; or x, 0} → {mov x} (x | 0 = x)
174+ * Example: {li t1, 0; or result, var, t1} → {mov result, var}
175+ * Eliminates bitwise OR with zero (identity element)
176+ */
177+ ph2_ir -> op = OP_assign ;
178+ ph2_ir -> src0 = next -> src0 ;
179+ ph2_ir -> dest = next -> dest ;
180+ ph2_ir -> next = next -> next ;
181+ return true;
182+ }
183+
184+ /* Power-of-2 multiplication to shift conversion.
185+ * Shift operations are significantly faster than multiplication
144186 */
187+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 > 0 &&
188+ next -> op == OP_mul && ph2_ir -> dest == next -> src1 ) {
189+ int power = ph2_ir -> src0 ;
190+ /* Detect power-of-2 using bit manipulation: (n & (n-1)) == 0 for powers
191+ * of 2
192+ */
193+ if (power && (power & (power - 1 )) == 0 ) {
194+ /* Calculate log2(power) to determine shift amount */
195+ int shift_amount = 0 ;
196+ int tmp = power ;
197+ while (tmp > 1 ) {
198+ tmp >>= 1 ;
199+ shift_amount ++ ;
200+ }
201+ /* Pattern: {li 2^n; mul x, 2^n} → {li n; shl x, n}
202+ * Example: {li t1, 4; mul result, var, t1} →
203+ * {li t1, 2; shl result, var, t1}
204+ */
205+ ph2_ir -> op = OP_load_constant ;
206+ ph2_ir -> src0 = shift_amount ;
207+ next -> op = OP_lshift ;
208+ return true;
209+ }
210+ }
211+
212+ /* XOR identity operation */
213+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 0 &&
214+ next -> op == OP_bit_xor && ph2_ir -> dest == next -> src1 ) {
215+ /* Pattern: {li 0; xor x, 0} → {mov x} (x ^ 0 = x)
216+ * Example: {li t1, 0; xor result, var, t1} → {mov result, var}
217+ * Completes bitwise identity optimization coverage
218+ */
219+ ph2_ir -> op = OP_assign ;
220+ ph2_ir -> src0 = next -> src0 ;
221+ ph2_ir -> dest = next -> dest ;
222+ ph2_ir -> next = next -> next ;
223+ return true;
224+ }
225+
226+ /* Extended multiplicative identity (operand position variant)
227+ * Handles the case where constant 1 is in src0 position of multiplication
228+ */
229+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 1 &&
230+ next -> op == OP_mul && ph2_ir -> dest == next -> src0 ) {
231+ /* Pattern: {li 1; mul 1, x} → {mov x} (1 * x = x)
232+ * Example: {li t1, 1; mul result, t1, var} → {mov result, var}
233+ * Covers multiplication commutativity edge case
234+ */
235+ ph2_ir -> op = OP_assign ;
236+ ph2_ir -> src0 = next -> src1 ;
237+ ph2_ir -> dest = next -> dest ;
238+ ph2_ir -> next = next -> next ;
239+ return true;
240+ }
145241
146242 return false;
147243}
148244
245+ /* Main peephole optimization driver.
246+ * It iterates through all functions, basic blocks, and IR instructions to apply
247+ * local optimizations on adjacent instruction pairs.
248+ */
149249void peephole (void )
150250{
151251 for (func_t * func = FUNC_LIST .head ; func ; func = func -> next ) {
@@ -154,10 +254,17 @@ void peephole(void)
154254 ph2_ir_t * next = ir -> next ;
155255 if (!next )
156256 continue ;
257+
258+ /* Self-assignment elimination
259+ * Removes trivial assignments where destination equals source
260+ * Pattern: {mov x, x} → eliminated
261+ * Common in compiler-generated intermediate code
262+ */
157263 if (next -> op == OP_assign && next -> dest == next -> src0 ) {
158264 ir -> next = next -> next ;
159265 continue ;
160266 }
267+
161268 insn_fusion (ir );
162269 }
163270 }
0 commit comments