|
8 | 8 | // |
9 | 9 | // This pass performs below peephole optimizations on MIR level. |
10 | 10 | // |
11 | | -// 1. MOVi32imm + ANDWrr ==> ANDWri + ANDWri |
12 | | -// MOVi64imm + ANDXrr ==> ANDXri + ANDXri |
| 11 | +// 1. MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri |
| 12 | +// MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri |
13 | 13 | // |
14 | 14 | // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi |
15 | 15 | // MOVi64imm + ADDXrr ==> ANDXri + ANDXri |
@@ -126,7 +126,7 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass { |
126 | 126 | bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI); |
127 | 127 |
|
128 | 128 | template <typename T> |
129 | | - bool visitAND(unsigned Opc, MachineInstr &MI); |
| 129 | + bool visitAND(unsigned Opc, MachineInstr &MI, unsigned OtherOpc = 0); |
130 | 130 | bool visitORR(MachineInstr &MI); |
131 | 131 | bool visitCSEL(MachineInstr &MI); |
132 | 132 | bool visitINSERT(MachineInstr &MI); |
@@ -194,24 +194,24 @@ static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) { |
194 | 194 | } |
195 | 195 |
|
196 | 196 | template <typename T> |
197 | | -bool AArch64MIPeepholeOpt::visitAND( |
198 | | - unsigned Opc, MachineInstr &MI) { |
| 197 | +bool AArch64MIPeepholeOpt::visitAND(unsigned Opc, MachineInstr &MI, |
| 198 | + unsigned OtherOpc) { |
199 | 199 | // Try below transformation. |
200 | 200 | // |
201 | | - // MOVi32imm + ANDWrr ==> ANDWri + ANDWri |
202 | | - // MOVi64imm + ANDXrr ==> ANDXri + ANDXri |
| 201 | + // MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri |
| 202 | + // MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri |
203 | 203 | // |
204 | 204 | // The mov pseudo instruction could be expanded to multiple mov instructions |
205 | 205 | // later. Let's try to split the constant operand of mov instruction into two |
206 | 206 | // bitmask immediates. It makes only two AND instructions instead of multiple |
207 | | - // mov + and instructions. |
| 207 | + // mov + AND instructions. |
208 | 208 |
|
209 | 209 | return splitTwoPartImm<T>( |
210 | 210 | MI, |
211 | | - [Opc](T Imm, unsigned RegSize, T &Imm0, |
212 | | - T &Imm1) -> std::optional<OpcodePair> { |
| 211 | + [Opc, OtherOpc](T Imm, unsigned RegSize, T &Imm0, |
| 212 | + T &Imm1) -> std::optional<OpcodePair> { |
213 | 213 | if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1)) |
214 | | - return std::make_pair(Opc, Opc); |
| 214 | + return std::make_pair(Opc, !OtherOpc ? Opc : OtherOpc); |
215 | 215 | return std::nullopt; |
216 | 216 | }, |
217 | 217 | [&TII = TII](MachineInstr &MI, OpcodePair Opcode, unsigned Imm0, |
@@ -864,6 +864,12 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) { |
864 | 864 | case AArch64::ANDXrr: |
865 | 865 | Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI); |
866 | 866 | break; |
| 867 | + case AArch64::ANDSWrr: |
| 868 | + Changed |= visitAND<uint32_t>(AArch64::ANDWri, MI, AArch64::ANDSWri); |
| 869 | + break; |
| 870 | + case AArch64::ANDSXrr: |
| 871 | + Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI, AArch64::ANDSXri); |
| 872 | + break; |
867 | 873 | case AArch64::ORRWrs: |
868 | 874 | Changed |= visitORR(MI); |
869 | 875 | break; |
|
0 commit comments