|
8 | 8 | //
|
9 | 9 | // This pass performs below peephole optimizations on MIR level.
|
10 | 10 | //
|
11 |
| -// 1. MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri |
12 |
| -// MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri |
| 11 | +// 1. MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri |
| 12 | +// MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri |
13 | 13 | //
|
14 | 14 | // 2. MOVi32imm + ADDWrr ==> ADDWRi + ADDWRi
|
15 |
| -// MOVi64imm + ADDXrr ==> ANDXri + ANDXri |
| 15 | +// MOVi64imm + ADDXrr ==> ADDXri + ADDXri |
16 | 16 | //
|
17 | 17 | // 3. MOVi32imm + SUBWrr ==> SUBWRi + SUBWRi
|
18 | 18 | // MOVi64imm + SUBXrr ==> SUBXri + SUBXri
|
@@ -125,8 +125,14 @@ struct AArch64MIPeepholeOpt : public MachineFunctionPass {
|
125 | 125 | template <typename T>
|
126 | 126 | bool visitADDSSUBS(OpcodePair PosOpcs, OpcodePair NegOpcs, MachineInstr &MI);
|
127 | 127 |
|
| 128 | + // Strategy used to split logical immediate bitmasks. |
| 129 | + enum class SplitStrategy { |
| 130 | + Intersect, |
| 131 | + Disjoint, |
| 132 | + }; |
128 | 133 | template <typename T>
|
129 |
| - bool visitAND(unsigned Opc, MachineInstr &MI, unsigned OtherOpc = 0); |
| 134 | + bool trySplitLogicalImm(unsigned Opc, MachineInstr &MI, |
| 135 | + SplitStrategy Strategy, unsigned OtherOpc = 0); |
130 | 136 | bool visitORR(MachineInstr &MI);
|
131 | 137 | bool visitCSEL(MachineInstr &MI);
|
132 | 138 | bool visitINSERT(MachineInstr &MI);
|
@@ -158,14 +164,6 @@ INITIALIZE_PASS(AArch64MIPeepholeOpt, "aarch64-mi-peephole-opt",
|
158 | 164 | template <typename T>
|
159 | 165 | static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
|
160 | 166 | T UImm = static_cast<T>(Imm);
|
161 |
| - if (AArch64_AM::isLogicalImmediate(UImm, RegSize)) |
162 |
| - return false; |
163 |
| - |
164 |
| - // If this immediate can be handled by one instruction, do not split it. |
165 |
| - SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; |
166 |
| - AArch64_IMM::expandMOVImm(UImm, RegSize, Insn); |
167 |
| - if (Insn.size() == 1) |
168 |
| - return false; |
169 | 167 |
|
170 | 168 | // The bitmask immediate consists of consecutive ones. Let's say there is
|
171 | 169 | // constant 0b00000000001000000000010000000000 which does not consist of
|
@@ -194,23 +192,72 @@ static bool splitBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, T &Imm2Enc) {
|
194 | 192 | }
|
195 | 193 |
|
196 | 194 | template <typename T>
|
197 |
| -bool AArch64MIPeepholeOpt::visitAND(unsigned Opc, MachineInstr &MI, |
198 |
| - unsigned OtherOpc) { |
199 |
| - // Try below transformation. |
| 195 | +static bool splitDisjointBitmaskImm(T Imm, unsigned RegSize, T &Imm1Enc, |
| 196 | + T &Imm2Enc) { |
| 197 | + // Try to split a bitmask of the form 0b00000000011000000000011110000000 into |
| 198 | + // two disjoint masks such as 0b00000000011000000000000000000000 and |
| 199 | + // 0b00000000000000000000011110000000 where the inclusive/exclusive OR of the |
| 200 | + // new masks match the original mask. |
| 201 | + unsigned LowestBitSet = llvm::countr_zero(Imm); |
| 202 | + unsigned LowestGapBitUnset = |
| 203 | + LowestBitSet + llvm::countr_one(Imm >> LowestBitSet); |
| 204 | + |
| 205 | + // Create a mask for the least significant group of consecutive ones. |
| 206 | + T NewImm1 = (static_cast<T>(1) << LowestGapBitUnset) - |
| 207 | + (static_cast<T>(1) << LowestBitSet); |
| 208 | + // Create a disjoint mask for the remaining ones. |
| 209 | + T NewImm2 = Imm & ~NewImm1; |
| 210 | + assert(((NewImm1 & NewImm2) == 0) && "Non-disjoint immediates!"); |
| 211 | + |
| 212 | + if (AArch64_AM::isLogicalImmediate(NewImm2, RegSize)) { |
| 213 | + assert(((NewImm1 | NewImm2) == Imm) && "Invalid immediates!"); |
| 214 | + Imm1Enc = AArch64_AM::encodeLogicalImmediate(NewImm1, RegSize); |
| 215 | + Imm2Enc = AArch64_AM::encodeLogicalImmediate(NewImm2, RegSize); |
| 216 | + return true; |
| 217 | + } |
| 218 | + |
| 219 | + return false; |
| 220 | +} |
| 221 | + |
| 222 | +template <typename T> |
| 223 | +bool AArch64MIPeepholeOpt::trySplitLogicalImm(unsigned Opc, MachineInstr &MI, |
| 224 | + SplitStrategy Strategy, |
| 225 | + unsigned OtherOpc) { |
| 226 | + // Try the transformations below. |
200 | 227 | //
|
201 |
| - // MOVi32imm + ANDS?Wrr ==> ANDWri + ANDS?Wri |
202 |
| - // MOVi64imm + ANDS?Xrr ==> ANDXri + ANDS?Xri |
| 228 | + // MOVi32imm + (ANDS?|EOR|ORR)Wrr ==> (AND|EOR|ORR)Wri + (ANDS?|EOR|ORR)Wri |
| 229 | + // MOVi64imm + (ANDS?|EOR|ORR)Xrr ==> (AND|EOR|ORR)Xri + (ANDS?|EOR|ORR)Xri |
203 | 230 | //
|
204 | 231 | // The mov pseudo instruction could be expanded to multiple mov instructions
|
205 | 232 | // later. Let's try to split the constant operand of mov instruction into two
|
206 |
| - // bitmask immediates. It makes only two AND instructions instead of multiple |
207 |
| - // mov + and instructions. |
| 233 | + // bitmask immediates based on the given split strategy. It makes only two |
| 234 | + // logical instructions instead of multiple mov + logic instructions. |
208 | 235 |
|
209 | 236 | return splitTwoPartImm<T>(
|
210 | 237 | MI,
|
211 |
| - [Opc, OtherOpc](T Imm, unsigned RegSize, T &Imm0, |
212 |
| - T &Imm1) -> std::optional<OpcodePair> { |
213 |
| - if (splitBitmaskImm(Imm, RegSize, Imm0, Imm1)) |
| 238 | + [Opc, Strategy, OtherOpc](T Imm, unsigned RegSize, T &Imm0, |
| 239 | + T &Imm1) -> std::optional<OpcodePair> { |
| 240 | + // If this immediate is already a suitable bitmask, don't do anything. |
| 241 | + // TODO: Should we just combine the two instructions in this case? |
| 242 | + if (AArch64_AM::isLogicalImmediate(Imm, RegSize)) |
| 243 | + return std::nullopt; |
| 244 | + |
| 245 | + // If this immediate can be handled by one instruction, do not split it. |
| 246 | + SmallVector<AArch64_IMM::ImmInsnModel, 4> Insn; |
| 247 | + AArch64_IMM::expandMOVImm(Imm, RegSize, Insn); |
| 248 | + if (Insn.size() == 1) |
| 249 | + return std::nullopt; |
| 250 | + |
| 251 | + bool SplitSucc = false; |
| 252 | + switch (Strategy) { |
| 253 | + case SplitStrategy::Intersect: |
| 254 | + SplitSucc = splitBitmaskImm(Imm, RegSize, Imm0, Imm1); |
| 255 | + break; |
| 256 | + case SplitStrategy::Disjoint: |
| 257 | + SplitSucc = splitDisjointBitmaskImm(Imm, RegSize, Imm0, Imm1); |
| 258 | + break; |
| 259 | + } |
| 260 | + if (SplitSucc) |
214 | 261 | return std::make_pair(Opc, !OtherOpc ? Opc : OtherOpc);
|
215 | 262 | return std::nullopt;
|
216 | 263 | },
|
@@ -859,16 +906,36 @@ bool AArch64MIPeepholeOpt::runOnMachineFunction(MachineFunction &MF) {
|
859 | 906 | Changed |= visitINSERT(MI);
|
860 | 907 | break;
|
861 | 908 | case AArch64::ANDWrr:
|
862 |
| - Changed |= visitAND<uint32_t>(AArch64::ANDWri, MI); |
| 909 | + Changed |= trySplitLogicalImm<uint32_t>(AArch64::ANDWri, MI, |
| 910 | + SplitStrategy::Intersect); |
863 | 911 | break;
|
864 | 912 | case AArch64::ANDXrr:
|
865 |
| - Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI); |
| 913 | + Changed |= trySplitLogicalImm<uint64_t>(AArch64::ANDXri, MI, |
| 914 | + SplitStrategy::Intersect); |
866 | 915 | break;
|
867 | 916 | case AArch64::ANDSWrr:
|
868 |
| - Changed |= visitAND<uint32_t>(AArch64::ANDWri, MI, AArch64::ANDSWri); |
| 917 | + Changed |= trySplitLogicalImm<uint32_t>( |
| 918 | + AArch64::ANDWri, MI, SplitStrategy::Intersect, AArch64::ANDSWri); |
869 | 919 | break;
|
870 | 920 | case AArch64::ANDSXrr:
|
871 |
| - Changed |= visitAND<uint64_t>(AArch64::ANDXri, MI, AArch64::ANDSXri); |
| 921 | + Changed |= trySplitLogicalImm<uint64_t>( |
| 922 | + AArch64::ANDXri, MI, SplitStrategy::Intersect, AArch64::ANDSXri); |
| 923 | + break; |
| 924 | + case AArch64::EORWrr: |
| 925 | + Changed |= trySplitLogicalImm<uint32_t>(AArch64::EORWri, MI, |
| 926 | + SplitStrategy::Disjoint); |
| 927 | + break; |
| 928 | + case AArch64::EORXrr: |
| 929 | + Changed |= trySplitLogicalImm<uint64_t>(AArch64::EORXri, MI, |
| 930 | + SplitStrategy::Disjoint); |
| 931 | + break; |
| 932 | + case AArch64::ORRWrr: |
| 933 | + Changed |= trySplitLogicalImm<uint32_t>(AArch64::ORRWri, MI, |
| 934 | + SplitStrategy::Disjoint); |
| 935 | + break; |
| 936 | + case AArch64::ORRXrr: |
| 937 | + Changed |= trySplitLogicalImm<uint64_t>(AArch64::ORRXri, MI, |
| 938 | + SplitStrategy::Disjoint); |
872 | 939 | break;
|
873 | 940 | case AArch64::ORRWrs:
|
874 | 941 | Changed |= visitORR(MI);
|
|
0 commit comments