@@ -31,23 +31,109 @@ bool is_fusible_insn(ph2_ir_t *ph2_ir)
3131 }
3232}
3333
34- void insn_fusion (ph2_ir_t * ph2_ir )
34+ bool insn_fusion (ph2_ir_t * ph2_ir )
3535{
3636 ph2_ir_t * next = ph2_ir -> next ;
3737 if (!next )
38- return ;
38+ return false ;
3939
4040 if (next -> op == OP_assign ) {
41- /* eliminate {ALU rn, rs1, rs2; mv rd, rn;} */
42- if (!is_fusible_insn (ph2_ir ))
43- return ;
44- if (ph2_ir -> dest == next -> src0 ) {
41+ if (is_fusible_insn (ph2_ir ) && ph2_ir -> dest == next -> src0 ) {
42+ /* eliminates:
43+ * {ALU rn, rs1, rs2; mv rd, rn;}
44+ * reduces to:
45+ * {ALU rd, rs1, rs2;}
46+ */
4547 ph2_ir -> dest = next -> dest ;
4648 ph2_ir -> next = next -> next ;
47- return ;
49+ return true ;
4850 }
4951 }
50- /* other insn fusions */
52+
53+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 0 ) {
54+ if (next -> op == OP_add &&
55+ (ph2_ir -> dest == next -> src0 || ph2_ir -> dest == next -> src1 )) {
56+ /* eliminates:
57+ * {li rn, 0; add rd, rs1, rn;} or
58+ * {li rn, 0; add rd, rn, rs1;}
59+ * reduces to:
60+ * {mv rd, rs1;}, based on identity property of addition
61+ */
62+ ph2_ir -> op = OP_assign ;
63+ ph2_ir -> src0 = ph2_ir -> dest == next -> src0 ? next -> src1 : next -> src0 ;
64+ ph2_ir -> dest = next -> dest ;
65+ ph2_ir -> next = next -> next ;
66+ return true;
67+ }
68+
69+ if (next -> op == OP_sub ) {
70+ if (ph2_ir -> dest == next -> src1 ) {
71+ /* eliminates:
72+ * {li rn, 0; sub rd, rs1, rn;}
73+ * reduces to:
74+ * {mv rd, rs1;}
75+ */
76+ ph2_ir -> op = OP_assign ;
77+ ph2_ir -> src0 = next -> src0 ;
78+ ph2_ir -> dest = next -> dest ;
79+ ph2_ir -> next = next -> next ;
80+ return true;
81+ }
82+
83+ if (ph2_ir -> dest == next -> src0 ) {
84+ /* eliminates:
85+ * {li rn, 0; sub rd, rn, rs1;}
86+ * reduces to:
87+ * {negate rd, rs1;}
88+ */
89+ ph2_ir -> op = OP_negate ;
90+ ph2_ir -> src0 = next -> src1 ;
91+ ph2_ir -> dest = next -> dest ;
92+ ph2_ir -> next = next -> next ;
93+ return true;
94+ }
95+ }
96+
97+ if (next -> op == OP_mul &&
98+ (ph2_ir -> dest == next -> src0 || ph2_ir -> dest == next -> src1 )) {
99+ /* eliminates:
100+ * {li rn, 0; mul rd, rs1, rn;} or
101+ * {li rn, 0; mul rd, rn, rs1;}
102+ * reduces to:
103+ * {li rd, 0}, based on zero property of multiplication
104+ */
105+ ph2_ir -> op = OP_load_constant ;
106+ ph2_ir -> src0 = 0 ;
107+ ph2_ir -> dest = next -> dest ;
108+ ph2_ir -> next = next -> next ;
109+ return true;
110+ }
111+ }
112+
113+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 1 ) {
114+ if (next -> op == OP_mul &&
115+ (ph2_ir -> dest == next -> src0 || ph2_ir -> dest == next -> src1 )) {
116+ /* eliminates:
117+ * {li rn, 1; mul rd, rs1, rn;} or
118+ * {li rn, 1; mul rd, rn, rs1;}
119+ * reduces to:
120+ * {li rd, rs1}, based on identity property of multiplication
121+ */
122+ ph2_ir -> op = OP_assign ;
123+ ph2_ir -> src0 = ph2_ir -> dest == next -> src0 ? next -> src1 : next -> src0 ;
124+ ph2_ir -> dest = next -> dest ;
125+ ph2_ir -> next = next -> next ;
126+ return true;
127+ }
128+ }
129+
130+ /* Other instruction fusion should be done here, and for any success fusion,
131+ * it should return true. This meant to allow peephole optimization to do
132+ * multiple passes over the IR list to maximize optimization as much as
133+ * possbile.
134+ */
135+
136+ return false;
51137}
52138
53139/* FIXME: release detached basic blocks */
@@ -63,6 +149,7 @@ void peephole()
63149 ir -> next = next -> next ;
64150 continue ;
65151 }
152+
66153 insn_fusion (ir );
67154 }
68155 }
0 commit comments