@@ -31,23 +31,114 @@ bool is_fusible_insn(ph2_ir_t *ph2_ir)
3131 }
3232}
3333
34- void insn_fusion (ph2_ir_t * ph2_ir )
34+ bool insn_fusion (ph2_ir_t * ph2_ir )
3535{
3636 ph2_ir_t * next = ph2_ir -> next ;
3737 if (!next )
38- return ;
38+ return false ;
3939
4040 if (next -> op == OP_assign ) {
41- /* eliminate {ALU rn, rs1, rs2; mv rd, rn;} */
42- if (!is_fusible_insn (ph2_ir ))
43- return ;
44- if (ph2_ir -> dest == next -> src0 ) {
41+ if (is_fusible_insn (ph2_ir ) && ph2_ir -> dest == next -> src0 ) {
42+ /* eliminates:
43+ * {ALU rn, rs1, rs2; mv rd, rn;}
44+ * reduces to:
45+ * {ALU rd, rs1, rs2;}
46+ */
4547 ph2_ir -> dest = next -> dest ;
4648 ph2_ir -> next = next -> next ;
47- return ;
49+ return true ;
4850 }
4951 }
50- /* other insn fusions */
52+
53+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 0 ) {
54+ if (next -> op == OP_add &&
55+ (ph2_ir -> dest == next -> src0 || ph2_ir -> dest == next -> src1 )) {
56+ /* eliminates:
57+ * {li rn, 0; add rd, rs1, rn;} or
58+ * {li rn, 0; add rd, rn, rs1;}
59+ * reduces to:
60+ * {mv rd, rs1;}, based on identity property of addition
61+ */
62+ /* Determine the non-zero source operand */
63+ int non_zero_src =
64+ (ph2_ir -> dest == next -> src0 ) ? next -> src1 : next -> src0 ;
65+
66+ /* Transform instruction sequence from addition with zero to move */
67+ ph2_ir -> op = OP_assign ;
68+ ph2_ir -> src0 = non_zero_src ;
69+ ph2_ir -> dest = next -> dest ;
70+ ph2_ir -> next = next -> next ;
71+ return true;
72+ }
73+
74+ if (next -> op == OP_sub ) {
75+ if (ph2_ir -> dest == next -> src1 ) {
76+ /* eliminates:
77+ * {li rn, 0; sub rd, rs1, rn;}
78+ * reduces to:
79+ * {mv rd, rs1;}
80+ */
81+ ph2_ir -> op = OP_assign ;
82+ ph2_ir -> src0 = next -> src0 ;
83+ ph2_ir -> dest = next -> dest ;
84+ ph2_ir -> next = next -> next ;
85+ return true;
86+ }
87+
88+ if (ph2_ir -> dest == next -> src0 ) {
89+ /* eliminates:
90+ * {li rn, 0; sub rd, rn, rs1;}
91+ * reduces to:
92+ * {negate rd, rs1;}
93+ */
94+ ph2_ir -> op = OP_negate ;
95+ ph2_ir -> src0 = next -> src1 ;
96+ ph2_ir -> dest = next -> dest ;
97+ ph2_ir -> next = next -> next ;
98+ return true;
99+ }
100+ }
101+
102+ if (next -> op == OP_mul &&
103+ (ph2_ir -> dest == next -> src0 || ph2_ir -> dest == next -> src1 )) {
104+ /* eliminates:
105+ * {li rn, 0; mul rd, rs1, rn;} or
106+ * {li rn, 0; mul rd, rn, rs1;}
107+ * reduces to:
108+ * {li rd, 0}, based on zero property of multiplication
109+ */
110+ ph2_ir -> op = OP_load_constant ;
111+ ph2_ir -> src0 = 0 ;
112+ ph2_ir -> dest = next -> dest ;
113+ ph2_ir -> next = next -> next ;
114+ return true;
115+ }
116+ }
117+
118+ if (ph2_ir -> op == OP_load_constant && ph2_ir -> src0 == 1 ) {
119+ if (next -> op == OP_mul &&
120+ (ph2_ir -> dest == next -> src0 || ph2_ir -> dest == next -> src1 )) {
121+ /* eliminates:
122+ * {li rn, 1; mul rd, rs1, rn;} or
123+ * {li rn, 1; mul rd, rn, rs1;}
124+ * reduces to:
125+ * {li rd, rs1}, based on identity property of multiplication
126+ */
127+ ph2_ir -> op = OP_assign ;
128+ ph2_ir -> src0 = ph2_ir -> dest == next -> src0 ? next -> src1 : next -> src0 ;
129+ ph2_ir -> dest = next -> dest ;
130+ ph2_ir -> next = next -> next ;
131+ return true;
132+ }
133+ }
134+
135+ /* Other instruction fusion should be done here, and for any success fusion,
136+ * it should return true. This meant to allow peephole optimization to do
137+ * multiple passes over the IR list to maximize optimization as much as
138+ * possbile.
139+ */
140+
141+ return false;
51142}
52143
53144/* FIXME: release detached basic blocks */
0 commit comments